runem 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runem/job_execute.py CHANGED
@@ -1,4 +1,3 @@
1
- import inspect
2
1
  import os
3
2
  import pathlib
4
3
  import typing
@@ -10,15 +9,25 @@ from runem.config_metadata import ConfigMetadata
10
9
  from runem.informative_dict import ReadOnlyInformativeDict
11
10
  from runem.job import Job
12
11
  from runem.job_wrapper import get_job_wrapper
13
- from runem.log import log
14
- from runem.types import FilePathListLookup, JobConfig, JobFunction, JobReturn, JobTags
12
+ from runem.log import error, log
13
+ from runem.types import (
14
+ FilePathListLookup,
15
+ JobConfig,
16
+ JobFunction,
17
+ JobReturn,
18
+ JobTags,
19
+ JobTiming,
20
+ TimingEntries,
21
+ TimingEntry,
22
+ )
15
23
 
16
24
 
17
25
  def job_execute_inner(
18
26
  job_config: JobConfig,
19
27
  config_metadata: ConfigMetadata,
20
28
  file_lists: FilePathListLookup,
21
- ) -> typing.Tuple[typing.Tuple[str, timedelta], JobReturn]:
29
+ **kwargs: typing.Any,
30
+ ) -> typing.Tuple[JobTiming, JobReturn]:
22
31
  """Wrapper for running a job inside a sub-process.
23
32
 
24
33
  Returns the time information and any reports the job generated
@@ -38,7 +47,19 @@ def job_execute_inner(
38
47
  if not file_list:
39
48
  # no files to work on
40
49
  log(f"WARNING: skipping job '{label}', no files for job")
41
- return (f"{label}: no files!", timedelta(0)), None
50
+ return {
51
+ "job": (f"{label}: no files!", timedelta(0)),
52
+ "commands": [],
53
+ }, None
54
+
55
+ sub_command_timings: TimingEntries = []
56
+
57
+ def _record_sub_job_time(label: str, timing: timedelta) -> None:
58
+ """Record timing information for sub-commands/tasks, atomically.
59
+
60
+ For example inside of run_command() calls
61
+ """
62
+ sub_command_timings.append((label, timing))
42
63
 
43
64
  if (
44
65
  "ctx" in job_config
@@ -52,30 +73,26 @@ def job_execute_inner(
52
73
  os.chdir(root_path)
53
74
 
54
75
  start = timer()
55
- func_signature = inspect.signature(function)
56
76
  if config_metadata.args.verbose:
57
77
  log(f"job: running: '{Job.get_job_name(job_config)}'")
58
78
  reports: JobReturn
59
79
  try:
60
- if "args" in func_signature.parameters:
61
- reports = function( # type: ignore # FIXME: which function do we have?
62
- config_metadata.args, config_metadata.options, file_list
63
- )
64
- else:
65
- reports = function(
66
- options=ReadOnlyInformativeDict(config_metadata.options), # type: ignore
67
- file_list=file_list,
68
- procs=config_metadata.args.procs,
69
- root_path=root_path,
70
- verbose=config_metadata.args.verbose,
71
- # unpack useful data points from the job_config
72
- label=Job.get_job_name(job_config),
73
- job=job_config,
74
- )
80
+ reports = function(
81
+ options=ReadOnlyInformativeDict(config_metadata.options), # type: ignore
82
+ file_list=file_list,
83
+ procs=config_metadata.args.procs,
84
+ root_path=root_path,
85
+ verbose=config_metadata.args.verbose,
86
+ # unpack useful data points from the job_config
87
+ label=Job.get_job_name(job_config),
88
+ job=job_config,
89
+ record_sub_job_time=_record_sub_job_time,
90
+ **kwargs,
91
+ )
75
92
  except BaseException: # pylint: disable=broad-exception-caught
76
93
  # log that we hit an error on this job and re-raise
77
94
  log(decorate=False)
78
- log(f"job: ERROR: job '{Job.get_job_name(job_config)}' failed to complete!")
95
+ error(f"job: job '{Job.get_job_name(job_config)}' failed to complete!")
79
96
  # re-raise
80
97
  raise
81
98
 
@@ -83,8 +100,8 @@ def job_execute_inner(
83
100
  time_taken: timedelta = timedelta(seconds=end - start)
84
101
  if config_metadata.args.verbose:
85
102
  log(f"job: DONE: '{label}': {time_taken}")
86
- timing_data = (label, time_taken)
87
- return (timing_data, reports)
103
+ this_job_timing_data: TimingEntry = (label, time_taken)
104
+ return ({"job": this_job_timing_data, "commands": sub_command_timings}, reports)
88
105
 
89
106
 
90
107
  def job_execute(
@@ -92,13 +109,19 @@ def job_execute(
92
109
  running_jobs: typing.Dict[str, str],
93
110
  config_metadata: ConfigMetadata,
94
111
  file_lists: FilePathListLookup,
95
- ) -> typing.Tuple[typing.Tuple[str, timedelta], JobReturn]:
112
+ **kwargs: typing.Any,
113
+ ) -> typing.Tuple[JobTiming, JobReturn]:
96
114
  """Thin-wrapper around job_execute_inner needed for mocking in tests.
97
115
 
98
116
  Needed for faster tests.
99
117
  """
100
118
  this_id: str = str(uuid.uuid4())
101
119
  running_jobs[this_id] = Job.get_job_name(job_config)
102
- results = job_execute_inner(job_config, config_metadata, file_lists)
120
+ results = job_execute_inner(
121
+ job_config,
122
+ config_metadata,
123
+ file_lists,
124
+ **kwargs,
125
+ )
103
126
  del running_jobs[this_id]
104
127
  return results
runem/job_filter.py CHANGED
@@ -35,7 +35,7 @@ def _should_filter_out_by_tags(
35
35
  if verbose:
36
36
  log(
37
37
  (
38
- f"not running job '{job['label']}' because it doesn't have "
38
+ f"not running job '{Job.get_job_name(job)}' because it doesn't have "
39
39
  f"any of the following tags: {printable_set(tags)}"
40
40
  )
41
41
  )
@@ -46,7 +46,7 @@ def _should_filter_out_by_tags(
46
46
  if verbose:
47
47
  log(
48
48
  (
49
- f"not running job '{job['label']}' because it contains the "
49
+ f"not running job '{Job.get_job_name(job)}' because it contains the "
50
50
  f"following tags: {printable_set(has_tags_to_avoid)}"
51
51
  )
52
52
  )
@@ -5,6 +5,12 @@ from runem.run_command import run_command
5
5
  from runem.types import JobConfig
6
6
 
7
7
 
8
+ def validate_simple_command(command_string: str) -> typing.List[str]:
9
+ # use shlex to handle parsing of the command string, a non-trivial problem.
10
+ split_command: typing.List[str] = shlex.split(command_string)
11
+ return split_command
12
+
13
+
8
14
  def job_runner_simple_command(
9
15
  **kwargs: typing.Any,
10
16
  ) -> None:
@@ -17,7 +23,7 @@ def job_runner_simple_command(
17
23
  command_string: str = job_config["command"]
18
24
 
19
25
  # use shlex to handle parsing of the command string, a non-trivial problem.
20
- result = shlex.split(command_string)
26
+ result = validate_simple_command(command_string)
21
27
 
22
28
  # preserve quotes for consistent handling of strings and avoid the "word
23
29
  # splitting" problem for unix-like shells.
runem/job_wrapper.py CHANGED
@@ -1,19 +1,25 @@
1
1
  import pathlib
2
2
 
3
- from runem.job_runner_simple_command import job_runner_simple_command
3
+ from runem.job_runner_simple_command import (
4
+ job_runner_simple_command,
5
+ validate_simple_command,
6
+ )
4
7
  from runem.job_wrapper_python import get_job_wrapper_py_func
5
- from runem.types import JobConfig, JobFunction
8
+ from runem.types import JobFunction, JobWrapper
6
9
 
7
10
 
8
- def get_job_wrapper(job_config: JobConfig, cfg_filepath: pathlib.Path) -> JobFunction:
11
+ def get_job_wrapper(job_wrapper: JobWrapper, cfg_filepath: pathlib.Path) -> JobFunction:
9
12
  """Given a job-description determines the job-runner, returning it as a function.
10
13
 
11
14
  NOTE: Side-effects: also re-addressed the job-config in the case of functions see
12
15
  get_job_function.
13
16
  """
14
- if "command" in job_config:
17
+ if "command" in job_wrapper:
18
+ # validate that the command is "understandable" and usable.
19
+ command_string: str = job_wrapper["command"]
20
+ validate_simple_command(command_string)
15
21
  return job_runner_simple_command # type: ignore # NO_COMMIT
16
22
 
17
23
  # if we do not have a simple command address assume we have just an addressed
18
24
  # function
19
- return get_job_wrapper_py_func(job_config, cfg_filepath)
25
+ return get_job_wrapper_py_func(job_wrapper, cfg_filepath)
@@ -3,7 +3,7 @@ import sys
3
3
  from importlib.util import module_from_spec
4
4
  from importlib.util import spec_from_file_location as module_spec_from_file_location
5
5
 
6
- from runem.types import FunctionNotFound, JobConfig, JobFunction
6
+ from runem.types import FunctionNotFound, JobFunction, JobWrapper
7
7
 
8
8
 
9
9
  def _load_python_function_from_module(
@@ -86,22 +86,22 @@ def _find_job_module(cfg_filepath: pathlib.Path, module_file_path: str) -> pathl
86
86
 
87
87
 
88
88
  def get_job_wrapper_py_func(
89
- job_config: JobConfig, cfg_filepath: pathlib.Path
89
+ job_wrapper: JobWrapper, cfg_filepath: pathlib.Path
90
90
  ) -> JobFunction:
91
91
  """For a job, dynamically loads the associated python job-function.
92
92
 
93
93
  Side-effects: also re-addressed the job-config.
94
94
  """
95
- function_to_load: str = job_config["addr"]["function"]
95
+ function_to_load: str = job_wrapper["addr"]["function"]
96
96
  try:
97
97
  module_file_path: pathlib.Path = _find_job_module(
98
- cfg_filepath, job_config["addr"]["file"]
98
+ cfg_filepath, job_wrapper["addr"]["file"]
99
99
  )
100
100
  except FunctionNotFound as err:
101
101
  raise FunctionNotFound(
102
102
  (
103
- f"Whilst loading job '{job_config['label']}' runem failed to find "
104
- f"job.addr.file '{job_config['addr']['file']}' looking for "
103
+ "runem failed to find "
104
+ f"job.addr.file '{job_wrapper['addr']['file']}' looking for "
105
105
  f"job.addr.function '{function_to_load}'"
106
106
  )
107
107
  ) from err
@@ -118,5 +118,5 @@ def get_job_wrapper_py_func(
118
118
  )
119
119
 
120
120
  # re-write the job-config file-path for the module with the one that worked
121
- job_config["addr"]["file"] = str(module_file_path)
121
+ job_wrapper["addr"]["file"] = str(module_file_path)
122
122
  return function
runem/log.py CHANGED
@@ -14,3 +14,11 @@ def log(msg: str = "", decorate: bool = True, end: typing.Optional[str] = None)
14
14
  # print in a blocking manner, waiting for system resources to free up if a
15
15
  # runem job is contending on stdout or similar.
16
16
  blocking_print(msg, end=end)
17
+
18
+
19
+ def warn(msg: str) -> None:
20
+ log(f"WARNING: {msg}")
21
+
22
+
23
+ def error(msg: str) -> None:
24
+ log(f"ERROR: {msg}")
runem/report.py CHANGED
@@ -14,6 +14,7 @@ from runem.types import (
14
14
  PhaseName,
15
15
  ReportUrlInfo,
16
16
  ReportUrls,
17
+ TimingEntries,
17
18
  )
18
19
 
19
20
  try:
@@ -22,7 +23,7 @@ except ImportError: # pragma: FIXME: add code coverage
22
23
  termplotlib = None
23
24
 
24
25
 
25
- def _align_bar_graphs_workaround(original_text: str) -> None:
26
+ def _align_bar_graphs_workaround(original_text: str) -> str:
26
27
  """Module termplotlib doesn't align floats, this fixes that.
27
28
 
28
29
  This makes it so we can align the point in the floating point string, without it,
@@ -39,40 +40,98 @@ def _align_bar_graphs_workaround(original_text: str) -> None:
39
40
  r"\[.*?(\d+)\.", lambda m: f"[{m.group(1):>{max_width}}.", original_text
40
41
  )
41
42
 
42
- print(formatted_text)
43
+ return formatted_text
44
+
45
+
46
+ def replace_bar_graph_characters(text: str, end_str: str, replace_char: str) -> str:
47
+ """Replaces block characters in lines containing `end_str` with give char.
48
+
49
+ Args:
50
+ text_lines (List[str]): A list of strings, each representing a line of text.
51
+ replace_char (str): The character to replace all bocks with
52
+
53
+ Returns:
54
+ List[str]: The modified list of strings with block characters replaced
55
+ on specified lines.
56
+ """
57
+ # Define the block character and its light shade replacement
58
+ block_chars = (
59
+ "▏▎▍▋▊▉█▌▐▄▀─" # Extend this string with any additional block characters you use
60
+ "░·" # also include the chars we might replace with for special bars
61
+ )
62
+
63
+ text_lines: typing.List[str] = text.split("\n")
64
+
65
+ # Process each line, replacing block characters if `end_str` is present
66
+ modified_lines = [
67
+ (
68
+ line.translate(str.maketrans(block_chars, replace_char * len(block_chars)))
69
+ if end_str in line
70
+ else line
71
+ )
72
+ for line in text_lines
73
+ ]
74
+
75
+ return "\n".join(modified_lines)
76
+
77
+
78
+ def _semi_shade_phase_totals(text: str) -> str:
79
+ light_shade_char = "░"
80
+ return replace_bar_graph_characters(text, "(user-time)", light_shade_char)
81
+
82
+
83
+ def _dot_jobs(text: str) -> str:
84
+ dot_char = "·"
85
+ return replace_bar_graph_characters(text, "(+)", dot_char)
43
86
 
44
87
 
45
88
  def _plot_times(
46
- overall_run_time: timedelta,
89
+ wall_clock_for_runem_main: timedelta,
47
90
  phase_run_oder: OrderedPhases,
48
91
  timing_data: JobRunTimesByPhase,
49
- ) -> timedelta:
92
+ ) -> typing.Tuple[timedelta, timedelta]:
50
93
  """Prints a report to terminal on how well we performed.
51
94
 
52
95
  Also calculates the wall-clock time-saved for the user.
96
+
97
+ Returns the total system time spent and the time-saved. (system-time-spent,
98
+ wall-clock-time-saved)
53
99
  """
54
100
  labels: typing.List[str] = []
55
101
  times: typing.List[float] = []
56
- job_time_sum: timedelta = timedelta() # init to 0
57
- for phase in phase_run_oder:
102
+
103
+ # Track active processing time for jobs, distinct from wall-clock time (the
104
+ # time the user experiences).
105
+ system_time_spent: timedelta = timedelta() # init to 0
106
+
107
+ for idx, phase in enumerate(phase_run_oder):
108
+ not_last_phase: bool = idx < len(phase_run_oder) - 1
109
+ utf8_phase = " ├" if not_last_phase else " └"
110
+ utf8_phase_group = " │" if not_last_phase else " "
58
111
  # log(f"Phase '{phase}' jobs took:")
59
- phase_total_time: float = 0.0
60
112
  phase_start_idx = len(labels)
61
- for label, job_time in timing_data[phase]:
62
- if job_time.total_seconds() == 0:
63
- continue
64
- labels.append(f"│├{phase}.{label}")
65
- times.append(job_time.total_seconds())
66
- job_time_sum += job_time
67
- phase_total_time += job_time.total_seconds()
68
- labels.insert(phase_start_idx, f"├{phase} (total)")
69
- times.insert(phase_start_idx, phase_total_time)
70
-
71
- for label, job_time in reversed(timing_data["_app"]):
72
- labels.insert(0, f"├runem.{label}")
73
- times.insert(0, job_time.total_seconds())
74
- labels.insert(0, "runem")
75
- times.insert(0, overall_run_time.total_seconds())
113
+
114
+ phase_job_times: timedelta = _gen_jobs_report(
115
+ phase,
116
+ labels,
117
+ times,
118
+ utf8_phase_group,
119
+ timing_data[phase],
120
+ )
121
+ labels.insert(phase_start_idx, f"{utf8_phase}{phase} (user-time)")
122
+ times.insert(phase_start_idx, phase_job_times.total_seconds())
123
+ system_time_spent += phase_job_times
124
+
125
+ runem_app_timing: typing.List[JobTiming] = timing_data["_app"]
126
+ job_metadata: JobTiming
127
+ for idx, job_metadata in enumerate(reversed(runem_app_timing)):
128
+ last_group: bool = idx == 0 # reverse sorted
129
+ utf8_group = "├" if not last_group else "└"
130
+ job_label, job_time_total = job_metadata["job"]
131
+ labels.insert(0, f"{utf8_group}runem.{job_label}")
132
+ times.insert(0, job_time_total.total_seconds())
133
+ labels.insert(0, "runem (total wall-clock)")
134
+ times.insert(0, wall_clock_for_runem_main.total_seconds())
76
135
  if termplotlib:
77
136
  fig = termplotlib.figure()
78
137
  # cspell:disable-next-line
@@ -81,14 +140,66 @@ def _plot_times(
81
140
  labels,
82
141
  force_ascii=False,
83
142
  )
143
+ shaded_bar_graph: str = _semi_shade_phase_totals(fig.get_string())
144
+ dotted_bar_graph: str = _dot_jobs(shaded_bar_graph)
145
+
84
146
  # ensure the graphs get aligned nicely.
85
- _align_bar_graphs_workaround(fig.get_string())
147
+ final_bar_graph: str = _align_bar_graphs_workaround(dotted_bar_graph)
148
+ print(final_bar_graph)
86
149
  else: # pragma: FIXME: add code coverage
87
- for label, time in zip(labels, times):
88
- log(f"{label}: {time}s")
150
+ for job_label, time in zip(labels, times):
151
+ log(f"{job_label}: {time}s")
89
152
 
90
- time_saved: timedelta = job_time_sum - overall_run_time
91
- return time_saved
153
+ wall_clock_time_saved: timedelta = system_time_spent - wall_clock_for_runem_main
154
+ return system_time_spent, wall_clock_time_saved
155
+
156
+
157
+ def _gen_jobs_report(
158
+ phase: PhaseName,
159
+ labels: typing.List[str],
160
+ times: typing.List[float],
161
+ utf8_phase_group: str,
162
+ job_timings: typing.List[JobTiming],
163
+ ) -> timedelta:
164
+ """Gathers the reports for sub-jobs.
165
+
166
+ Split out from _plot_times as the code was getting complex
167
+ """
168
+ job_timing: JobTiming
169
+
170
+ # Filter out JobTiming instances with non-zero total_seconds
171
+ non_zero_timing_data: typing.List[JobTiming] = [
172
+ job_timing
173
+ for job_timing in job_timings
174
+ if job_timing["job"][1].total_seconds() != 0
175
+ ]
176
+
177
+ job_time_sum: timedelta = timedelta() # init to 0
178
+ for idx, job_timing in enumerate(non_zero_timing_data):
179
+ not_last: bool = idx < len(non_zero_timing_data) - 1
180
+ utf8_job = "├" if not_last else "└"
181
+ utf8_sub_jobs = "│" if not_last else " "
182
+ job_label, job_time_total = job_timing["job"]
183
+ job_bar_label: str = f"{job_label}"
184
+ labels.append(f"{utf8_phase_group}{utf8_job}{job_bar_label}")
185
+ times.append(job_time_total.total_seconds())
186
+ job_time_sum += job_time_total
187
+ sub_command_times: TimingEntries = job_timing["commands"]
188
+
189
+ if len(sub_command_times) <= 1:
190
+ # we only have one or fewer sub-commands, just show the job-time
191
+ continue
192
+
193
+ # also print the sub-components of the job as we have more than one
194
+ for idx, (sub_job_label, sub_job_time) in enumerate(sub_command_times):
195
+ sub_utf8 = "├"
196
+ if idx == len(sub_command_times) - 1:
197
+ sub_utf8 = "└"
198
+ labels.append(
199
+ f"{utf8_phase_group}{utf8_sub_jobs}{sub_utf8}{sub_job_label} (+)"
200
+ )
201
+ times.append(sub_job_time.total_seconds())
202
+ return job_time_sum
92
203
 
93
204
 
94
205
  def _print_reports_by_phase(
@@ -107,8 +218,8 @@ def _print_reports_by_phase(
107
218
  def report_on_run(
108
219
  phase_run_oder: OrderedPhases,
109
220
  job_run_metadatas: JobRunMetadatasByPhase,
110
- overall_runtime: timedelta,
111
- ) -> timedelta:
221
+ wall_clock_for_runem_main: timedelta,
222
+ ) -> typing.Tuple[timedelta, timedelta]:
112
223
  """Generate high-level reports AND prints out any reports returned by jobs.
113
224
 
114
225
  IMPORTANT: returns the wall-clock time saved to the user.
@@ -130,9 +241,8 @@ def report_on_run(
130
241
  report_data[phase].extend(reports["reportUrls"])
131
242
 
132
243
  # Now plot the times on the terminal to give a visual report of the timing.
133
- # Also, calculate the time saved by runem, a key selling-point metric
134
- time_saved: timedelta = _plot_times(
135
- overall_run_time=overall_runtime,
244
+ time_metrics: typing.Tuple[timedelta, timedelta] = _plot_times(
245
+ wall_clock_for_runem_main=wall_clock_for_runem_main,
136
246
  phase_run_oder=phase_run_oder,
137
247
  timing_data=timing_data,
138
248
  )
@@ -140,6 +250,7 @@ def report_on_run(
140
250
  # Penultimate-ly print out the available reports grouped by run-phase.
141
251
  _print_reports_by_phase(phase_run_oder, report_data)
142
252
 
143
- # Return the key metric for runem, the wall-clock time saved to the user
253
+ # Return the key metrics for runem, the system vs wall-clock time saved to
254
+ # the user
144
255
  # TODO: write this to disk
145
- return time_saved
256
+ return time_metrics
runem/run_command.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import os
2
2
  import pathlib
3
3
  import typing
4
+ from datetime import timedelta
4
5
  from subprocess import PIPE as SUBPROCESS_PIPE
5
6
  from subprocess import STDOUT as SUBPROCESS_STDOUT
6
7
  from subprocess import Popen
8
+ from timeit import default_timer as timer
7
9
 
8
10
  from runem.log import log
9
11
 
@@ -18,6 +20,10 @@ class RunCommandUnhandledError(RuntimeError):
18
20
  pass
19
21
 
20
22
 
23
+ # A function type for recording timing information.
24
+ RecordSubJobTimeType = typing.Callable[[str, timedelta], None]
25
+
26
+
21
27
  def parse_stdout(stdout: str, prefix: str) -> str:
22
28
  """Prefixes each line of the output with a given label, except trailing new
23
29
  lines."""
@@ -91,11 +97,16 @@ def run_command( # noqa: C901
91
97
  ignore_fails: bool = False,
92
98
  valid_exit_ids: typing.Optional[typing.Tuple[int, ...]] = None,
93
99
  cwd: typing.Optional[pathlib.Path] = None,
100
+ record_sub_job_time: typing.Optional[RecordSubJobTimeType] = None,
94
101
  **kwargs: typing.Any,
95
102
  ) -> str:
96
103
  """Runs the given command, returning stdout or throwing on any error."""
97
104
  cmd_string = " ".join(cmd)
98
105
 
106
+ if record_sub_job_time is not None:
107
+ # start the capture of how long this sub-task takes.
108
+ start = timer()
109
+
99
110
  run_env: typing.Dict[str, str] = _prepare_environment(
100
111
  env_overrides,
101
112
  )
@@ -174,4 +185,11 @@ def run_command( # noqa: C901
174
185
 
175
186
  if verbose:
176
187
  log(f"running: done: {label}: {cmd_string}")
188
+
189
+ if record_sub_job_time is not None:
190
+ # Capture how long this run took
191
+ end = timer()
192
+ time_taken: timedelta = timedelta(seconds=end - start)
193
+ record_sub_job_time(label, time_taken)
194
+
177
195
  return stdout