PyPI - runem - Versions diffs - 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl - Mend

runem 0.0.28py3-none-any.whl → 0.0.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

runem/VERSION +1 -1
runem/cli.py +1 -0
runem/command_line.py +33 -8
runem/config.py +58 -9
runem/config_metadata.py +8 -0
runem/config_parse.py +188 -13
runem/files.py +32 -7
runem/hook_manager.py +116 -0
runem/job_execute.py +49 -26
runem/job_filter.py +2 -2
runem/job_runner_simple_command.py +7 -1
runem/job_wrapper.py +11 -5
runem/job_wrapper_python.py +7 -7
runem/log.py +8 -0
runem/report.py +145 -34
runem/run_command.py +18 -0
runem/runem.py +46 -19
runem/types.py +62 -5
{runem-0.0.28.dist-info → runem-0.0.30.dist-info}/METADATA +25 -34
runem-0.0.30.dist-info/RECORD +33 -0
{runem-0.0.28.dist-info → runem-0.0.30.dist-info}/WHEEL +1 -1
runem-0.0.28.dist-info/RECORD +0 -32
{runem-0.0.28.dist-info → runem-0.0.30.dist-info}/LICENSE +0 -0
{runem-0.0.28.dist-info → runem-0.0.30.dist-info}/entry_points.txt +0 -0
{runem-0.0.28.dist-info → runem-0.0.30.dist-info}/top_level.txt +0 -0

runem/job_execute.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import inspect
 import os
 import pathlib
 import typing
@@ -10,15 +9,25 @@ from runem.config_metadata import ConfigMetadata
 from runem.informative_dict import ReadOnlyInformativeDict
 from runem.job import Job
 from runem.job_wrapper import get_job_wrapper
-from runem.log import log
-from runem.types import FilePathListLookup, JobConfig, JobFunction, JobReturn, JobTags
+from runem.log import error, log
+from runem.types import (
+    FilePathListLookup,
+    JobConfig,
+    JobFunction,
+    JobReturn,
+    JobTags,
+    JobTiming,
+    TimingEntries,
+    TimingEntry,
+)
 def job_execute_inner(
     job_config: JobConfig,
     config_metadata: ConfigMetadata,
     file_lists: FilePathListLookup,
-) -> typing.Tuple[typing.Tuple[str, timedelta], JobReturn]:
+    **kwargs: typing.Any,
+) -> typing.Tuple[JobTiming, JobReturn]:
     """Wrapper for running a job inside a sub-process.
     Returns the time information and any reports the job generated
@@ -38,7 +47,19 @@ def job_execute_inner(
     if not file_list:
         # no files to work on
         log(f"WARNING: skipping job '{label}', no files for job")
-        return (f"{label}: no files!", timedelta(0)), None
+        return {
+            "job": (f"{label}: no files!", timedelta(0)),
+            "commands": [],
+        }, None
+    sub_command_timings: TimingEntries = []
+    def _record_sub_job_time(label: str, timing: timedelta) -> None:
+        """Record timing information for sub-commands/tasks, atomically.
+        For example inside of run_command() calls
+        """
+        sub_command_timings.append((label, timing))
     if (
         "ctx" in job_config
@@ -52,30 +73,26 @@ def job_execute_inner(
         os.chdir(root_path)
     start = timer()
-    func_signature = inspect.signature(function)
     if config_metadata.args.verbose:
         log(f"job: running: '{Job.get_job_name(job_config)}'")
     reports: JobReturn
     try:
-        if "args" in func_signature.parameters:
-            reports = function(  # type: ignore  # FIXME: which function do we have?
-                config_metadata.args, config_metadata.options, file_list
-            )
-        else:
-            reports = function(
-                options=ReadOnlyInformativeDict(config_metadata.options),  # type: ignore
-                file_list=file_list,
-                procs=config_metadata.args.procs,
-                root_path=root_path,
-                verbose=config_metadata.args.verbose,
-                # unpack useful data points from the job_config
-                label=Job.get_job_name(job_config),
-                job=job_config,
-            )
+        reports = function(
+            options=ReadOnlyInformativeDict(config_metadata.options),  # type: ignore
+            file_list=file_list,
+            procs=config_metadata.args.procs,
+            root_path=root_path,
+            verbose=config_metadata.args.verbose,
+            # unpack useful data points from the job_config
+            label=Job.get_job_name(job_config),
+            job=job_config,
+            record_sub_job_time=_record_sub_job_time,
+            **kwargs,
+        )
     except BaseException:  # pylint: disable=broad-exception-caught
         # log that we hit an error on this job and re-raise
         log(decorate=False)
-        log(f"job: ERROR: job '{Job.get_job_name(job_config)}' failed to complete!")
+        error(f"job: job '{Job.get_job_name(job_config)}' failed to complete!")
         # re-raise
         raise
@@ -83,8 +100,8 @@ def job_execute_inner(
     time_taken: timedelta = timedelta(seconds=end - start)
     if config_metadata.args.verbose:
         log(f"job: DONE: '{label}': {time_taken}")
-    timing_data = (label, time_taken)
-    return (timing_data, reports)
+    this_job_timing_data: TimingEntry = (label, time_taken)
+    return ({"job": this_job_timing_data, "commands": sub_command_timings}, reports)
 def job_execute(
@@ -92,13 +109,19 @@ def job_execute(
     running_jobs: typing.Dict[str, str],
     config_metadata: ConfigMetadata,
     file_lists: FilePathListLookup,
-) -> typing.Tuple[typing.Tuple[str, timedelta], JobReturn]:
+    **kwargs: typing.Any,
+) -> typing.Tuple[JobTiming, JobReturn]:
     """Thin-wrapper around job_execute_inner needed for mocking in tests.
     Needed for faster tests.
     """
     this_id: str = str(uuid.uuid4())
     running_jobs[this_id] = Job.get_job_name(job_config)
-    results = job_execute_inner(job_config, config_metadata, file_lists)
+    results = job_execute_inner(
+        job_config,
+        config_metadata,
+        file_lists,
+        **kwargs,
+    )
     del running_jobs[this_id]
     return results

runem/job_filter.py CHANGED Viewed

@@ -35,7 +35,7 @@ def _should_filter_out_by_tags(
         if verbose:
             log(
                 (
-                    f"not running job '{job['label']}' because it doesn't have "
+                    f"not running job '{Job.get_job_name(job)}' because it doesn't have "
                     f"any of the following tags: {printable_set(tags)}"
                 )
             )
@@ -46,7 +46,7 @@ def _should_filter_out_by_tags(
         if verbose:
             log(
                 (
-                    f"not running job '{job['label']}' because it contains the "
+                    f"not running job '{Job.get_job_name(job)}' because it contains the "
                     f"following tags: {printable_set(has_tags_to_avoid)}"
                 )
             )

runem/job_runner_simple_command.py CHANGED Viewed

@@ -5,6 +5,12 @@ from runem.run_command import run_command
 from runem.types import JobConfig
+def validate_simple_command(command_string: str) -> typing.List[str]:
+    # use shlex to handle parsing of the command string, a non-trivial problem.
+    split_command: typing.List[str] = shlex.split(command_string)
+    return split_command
 def job_runner_simple_command(
     **kwargs: typing.Any,
 ) -> None:
@@ -17,7 +23,7 @@ def job_runner_simple_command(
     command_string: str = job_config["command"]
     # use shlex to handle parsing of the command string, a non-trivial problem.
-    result = shlex.split(command_string)
+    result = validate_simple_command(command_string)
     # preserve quotes for consistent handling of strings and avoid the "word
     # splitting" problem for unix-like shells.

runem/job_wrapper.py CHANGED Viewed

@@ -1,19 +1,25 @@
 import pathlib
-from runem.job_runner_simple_command import job_runner_simple_command
+from runem.job_runner_simple_command import (
+    job_runner_simple_command,
+    validate_simple_command,
+)
 from runem.job_wrapper_python import get_job_wrapper_py_func
-from runem.types import JobConfig, JobFunction
+from runem.types import JobFunction, JobWrapper
-def get_job_wrapper(job_config: JobConfig, cfg_filepath: pathlib.Path) -> JobFunction:
+def get_job_wrapper(job_wrapper: JobWrapper, cfg_filepath: pathlib.Path) -> JobFunction:
     """Given a job-description determines the job-runner, returning it as a function.
     NOTE: Side-effects: also re-addressed the job-config in the case of functions see
           get_job_function.
     """
-    if "command" in job_config:
+    if "command" in job_wrapper:
+        # validate that the command is "understandable" and usable.
+        command_string: str = job_wrapper["command"]
+        validate_simple_command(command_string)
         return job_runner_simple_command  # type: ignore # NO_COMMIT
     # if we do not have a simple command address assume we have just an addressed
     # function
-    return get_job_wrapper_py_func(job_config, cfg_filepath)
+    return get_job_wrapper_py_func(job_wrapper, cfg_filepath)

runem/job_wrapper_python.py CHANGED Viewed

@@ -3,7 +3,7 @@ import sys
 from importlib.util import module_from_spec
 from importlib.util import spec_from_file_location as module_spec_from_file_location
-from runem.types import FunctionNotFound, JobConfig, JobFunction
+from runem.types import FunctionNotFound, JobFunction, JobWrapper
 def _load_python_function_from_module(
@@ -86,22 +86,22 @@ def _find_job_module(cfg_filepath: pathlib.Path, module_file_path: str) -> pathl
 def get_job_wrapper_py_func(
-    job_config: JobConfig, cfg_filepath: pathlib.Path
+    job_wrapper: JobWrapper, cfg_filepath: pathlib.Path
 ) -> JobFunction:
     """For a job, dynamically loads the associated python job-function.
     Side-effects: also re-addressed the job-config.
     """
-    function_to_load: str = job_config["addr"]["function"]
+    function_to_load: str = job_wrapper["addr"]["function"]
     try:
         module_file_path: pathlib.Path = _find_job_module(
-            cfg_filepath, job_config["addr"]["file"]
+            cfg_filepath, job_wrapper["addr"]["file"]
         )
     except FunctionNotFound as err:
         raise FunctionNotFound(
             (
-                f"Whilst loading job '{job_config['label']}' runem failed to find "
-                f"job.addr.file '{job_config['addr']['file']}' looking for "
+                "runem failed to find "
+                f"job.addr.file '{job_wrapper['addr']['file']}' looking for "
                 f"job.addr.function '{function_to_load}'"
             )
         ) from err
@@ -118,5 +118,5 @@ def get_job_wrapper_py_func(
     )
     # re-write the job-config file-path for the module with the one that worked
-    job_config["addr"]["file"] = str(module_file_path)
+    job_wrapper["addr"]["file"] = str(module_file_path)
     return function

runem/log.py CHANGED Viewed

@@ -14,3 +14,11 @@ def log(msg: str = "", decorate: bool = True, end: typing.Optional[str] = None)
     # print in a blocking manner, waiting for system resources to free up if a
     # runem job is contending on stdout or similar.
     blocking_print(msg, end=end)
+def warn(msg: str) -> None:
+    log(f"WARNING: {msg}")
+def error(msg: str) -> None:
+    log(f"ERROR: {msg}")

runem/report.py CHANGED Viewed

@@ -14,6 +14,7 @@ from runem.types import (
     PhaseName,
     ReportUrlInfo,
     ReportUrls,
+    TimingEntries,
 )
 try:
@@ -22,7 +23,7 @@ except ImportError:  # pragma: FIXME: add code coverage
     termplotlib = None
-def _align_bar_graphs_workaround(original_text: str) -> None:
+def _align_bar_graphs_workaround(original_text: str) -> str:
     """Module termplotlib doesn't align floats, this fixes that.
     This makes it so we can align the point in the floating point string, without it,
@@ -39,40 +40,98 @@ def _align_bar_graphs_workaround(original_text: str) -> None:
         r"\[.*?(\d+)\.", lambda m: f"[{m.group(1):>{max_width}}.", original_text
     )
-    print(formatted_text)
+    return formatted_text
+def replace_bar_graph_characters(text: str, end_str: str, replace_char: str) -> str:
+    """Replaces block characters in lines containing `end_str` with give char.
+    Args:
+        text_lines (List[str]): A list of strings, each representing a line of text.
+        replace_char (str): The character to replace all bocks with
+    Returns:
+        List[str]: The modified list of strings with block characters replaced
+                   on specified lines.
+    """
+    # Define the block character and its light shade replacement
+    block_chars = (
+        "▏▎▍▋▊▉█▌▐▄▀─"  # Extend this string with any additional block characters you use
+        "░·"  # also include the chars we might replace with for special bars
+    )
+    text_lines: typing.List[str] = text.split("\n")
+    # Process each line, replacing block characters if `end_str` is present
+    modified_lines = [
+        (
+            line.translate(str.maketrans(block_chars, replace_char * len(block_chars)))
+            if end_str in line
+            else line
+        )
+        for line in text_lines
+    ]
+    return "\n".join(modified_lines)
+def _semi_shade_phase_totals(text: str) -> str:
+    light_shade_char = "░"
+    return replace_bar_graph_characters(text, "(user-time)", light_shade_char)
+def _dot_jobs(text: str) -> str:
+    dot_char = "·"
+    return replace_bar_graph_characters(text, "(+)", dot_char)
 def _plot_times(
-    overall_run_time: timedelta,
+    wall_clock_for_runem_main: timedelta,
     phase_run_oder: OrderedPhases,
     timing_data: JobRunTimesByPhase,
-) -> timedelta:
+) -> typing.Tuple[timedelta, timedelta]:
     """Prints a report to terminal on how well we performed.
     Also calculates the wall-clock time-saved for the user.
+    Returns the total system time spent and the time-saved.     (system-time-spent,
+    wall-clock-time-saved)
     """
     labels: typing.List[str] = []
     times: typing.List[float] = []
-    job_time_sum: timedelta = timedelta()  # init to 0
-    for phase in phase_run_oder:
+    # Track active processing time for jobs, distinct from wall-clock time (the
+    # time the user experiences).
+    system_time_spent: timedelta = timedelta()  # init to 0
+    for idx, phase in enumerate(phase_run_oder):
+        not_last_phase: bool = idx < len(phase_run_oder) - 1
+        utf8_phase = " ├" if not_last_phase else " └"
+        utf8_phase_group = " │" if not_last_phase else "  "
         # log(f"Phase '{phase}' jobs took:")
-        phase_total_time: float = 0.0
         phase_start_idx = len(labels)
-        for label, job_time in timing_data[phase]:
-            if job_time.total_seconds() == 0:
-                continue
-            labels.append(f"│├{phase}.{label}")
-            times.append(job_time.total_seconds())
-            job_time_sum += job_time
-            phase_total_time += job_time.total_seconds()
-        labels.insert(phase_start_idx, f"├{phase} (total)")
-        times.insert(phase_start_idx, phase_total_time)
-    for label, job_time in reversed(timing_data["_app"]):
-        labels.insert(0, f"├runem.{label}")
-        times.insert(0, job_time.total_seconds())
-    labels.insert(0, "runem")
-    times.insert(0, overall_run_time.total_seconds())
+        phase_job_times: timedelta = _gen_jobs_report(
+            phase,
+            labels,
+            times,
+            utf8_phase_group,
+            timing_data[phase],
+        )
+        labels.insert(phase_start_idx, f"{utf8_phase}{phase} (user-time)")
+        times.insert(phase_start_idx, phase_job_times.total_seconds())
+        system_time_spent += phase_job_times
+    runem_app_timing: typing.List[JobTiming] = timing_data["_app"]
+    job_metadata: JobTiming
+    for idx, job_metadata in enumerate(reversed(runem_app_timing)):
+        last_group: bool = idx == 0  # reverse sorted
+        utf8_group = "├" if not last_group else "└"
+        job_label, job_time_total = job_metadata["job"]
+        labels.insert(0, f"{utf8_group}runem.{job_label}")
+        times.insert(0, job_time_total.total_seconds())
+    labels.insert(0, "runem (total wall-clock)")
+    times.insert(0, wall_clock_for_runem_main.total_seconds())
     if termplotlib:
         fig = termplotlib.figure()
         # cspell:disable-next-line
@@ -81,14 +140,66 @@ def _plot_times(
             labels,
             force_ascii=False,
         )
+        shaded_bar_graph: str = _semi_shade_phase_totals(fig.get_string())
+        dotted_bar_graph: str = _dot_jobs(shaded_bar_graph)
         # ensure the graphs get aligned nicely.
-        _align_bar_graphs_workaround(fig.get_string())
+        final_bar_graph: str = _align_bar_graphs_workaround(dotted_bar_graph)
+        print(final_bar_graph)
     else:  # pragma: FIXME: add code coverage
-        for label, time in zip(labels, times):
-            log(f"{label}: {time}s")
+        for job_label, time in zip(labels, times):
+            log(f"{job_label}: {time}s")
-    time_saved: timedelta = job_time_sum - overall_run_time
-    return time_saved
+    wall_clock_time_saved: timedelta = system_time_spent - wall_clock_for_runem_main
+    return system_time_spent, wall_clock_time_saved
+def _gen_jobs_report(
+    phase: PhaseName,
+    labels: typing.List[str],
+    times: typing.List[float],
+    utf8_phase_group: str,
+    job_timings: typing.List[JobTiming],
+) -> timedelta:
+    """Gathers the reports for sub-jobs.
+    Split out from _plot_times as the code was getting complex
+    """
+    job_timing: JobTiming
+    # Filter out JobTiming instances with non-zero total_seconds
+    non_zero_timing_data: typing.List[JobTiming] = [
+        job_timing
+        for job_timing in job_timings
+        if job_timing["job"][1].total_seconds() != 0
+    ]
+    job_time_sum: timedelta = timedelta()  # init to 0
+    for idx, job_timing in enumerate(non_zero_timing_data):
+        not_last: bool = idx < len(non_zero_timing_data) - 1
+        utf8_job = "├" if not_last else "└"
+        utf8_sub_jobs = "│" if not_last else " "
+        job_label, job_time_total = job_timing["job"]
+        job_bar_label: str = f"{job_label}"
+        labels.append(f"{utf8_phase_group}{utf8_job}{job_bar_label}")
+        times.append(job_time_total.total_seconds())
+        job_time_sum += job_time_total
+        sub_command_times: TimingEntries = job_timing["commands"]
+        if len(sub_command_times) <= 1:
+            # we only have one or fewer sub-commands, just show the job-time
+            continue
+        # also print the sub-components of the job as we have more than one
+        for idx, (sub_job_label, sub_job_time) in enumerate(sub_command_times):
+            sub_utf8 = "├"
+            if idx == len(sub_command_times) - 1:
+                sub_utf8 = "└"
+            labels.append(
+                f"{utf8_phase_group}{utf8_sub_jobs}{sub_utf8}{sub_job_label} (+)"
+            )
+            times.append(sub_job_time.total_seconds())
+    return job_time_sum
 def _print_reports_by_phase(
@@ -107,8 +218,8 @@ def _print_reports_by_phase(
 def report_on_run(
     phase_run_oder: OrderedPhases,
     job_run_metadatas: JobRunMetadatasByPhase,
-    overall_runtime: timedelta,
-) -> timedelta:
+    wall_clock_for_runem_main: timedelta,
+) -> typing.Tuple[timedelta, timedelta]:
     """Generate high-level reports AND prints out any reports returned by jobs.
     IMPORTANT: returns the wall-clock time saved to the user.
@@ -130,9 +241,8 @@ def report_on_run(
                 report_data[phase].extend(reports["reportUrls"])
     # Now plot the times on the terminal to give a visual report of the timing.
-    # Also, calculate the time saved by runem, a key selling-point metric
-    time_saved: timedelta = _plot_times(
-        overall_run_time=overall_runtime,
+    time_metrics: typing.Tuple[timedelta, timedelta] = _plot_times(
+        wall_clock_for_runem_main=wall_clock_for_runem_main,
         phase_run_oder=phase_run_oder,
         timing_data=timing_data,
     )
@@ -140,6 +250,7 @@ def report_on_run(
     # Penultimate-ly print out the available reports grouped by run-phase.
     _print_reports_by_phase(phase_run_oder, report_data)
-    # Return the key metric for runem, the wall-clock time saved to the user
+    # Return the key metrics for runem, the system vs wall-clock time saved to
+    # the user
     # TODO: write this to disk
-    return time_saved
+    return time_metrics

runem/run_command.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import os
 import pathlib
 import typing
+from datetime import timedelta
 from subprocess import PIPE as SUBPROCESS_PIPE
 from subprocess import STDOUT as SUBPROCESS_STDOUT
 from subprocess import Popen
+from timeit import default_timer as timer
 from runem.log import log
@@ -18,6 +20,10 @@ class RunCommandUnhandledError(RuntimeError):
     pass
+# A function type for recording timing information.
+RecordSubJobTimeType = typing.Callable[[str, timedelta], None]
 def parse_stdout(stdout: str, prefix: str) -> str:
     """Prefixes each line of the output with a given label, except trailing new
     lines."""
@@ -91,11 +97,16 @@ def run_command(  # noqa: C901
     ignore_fails: bool = False,
     valid_exit_ids: typing.Optional[typing.Tuple[int, ...]] = None,
     cwd: typing.Optional[pathlib.Path] = None,
+    record_sub_job_time: typing.Optional[RecordSubJobTimeType] = None,
     **kwargs: typing.Any,
 ) -> str:
     """Runs the given command, returning stdout or throwing on any error."""
     cmd_string = " ".join(cmd)
+    if record_sub_job_time is not None:
+        # start the capture of how long this sub-task takes.
+        start = timer()
     run_env: typing.Dict[str, str] = _prepare_environment(
         env_overrides,
     )
@@ -174,4 +185,11 @@ def run_command(  # noqa: C901
     if verbose:
         log(f"running: done: {label}: {cmd_string}")
+    if record_sub_job_time is not None:
+        # Capture how long this run took
+        end = timer()
+        time_taken: timedelta = timedelta(seconds=end - start)
+        record_sub_job_time(label, time_taken)
     return stdout

runem 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

runem 0.0.28py3-none-any.whl → 0.0.30py3-none-any.whl