PyPI - runnable - Versions diffs - 0.34.0a1__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

runnable 0.34.0a1py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of runnable might be problematic. Click here for more details.

Files changed (49) hide show

extensions/catalog/any_path.py +13 -2
extensions/job_executor/__init__.py +7 -5
extensions/job_executor/emulate.py +106 -0
extensions/job_executor/k8s.py +8 -8
extensions/job_executor/local_container.py +13 -14
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +243 -0
extensions/nodes/fail.py +72 -0
extensions/nodes/map.py +350 -0
extensions/nodes/parallel.py +159 -0
extensions/nodes/stub.py +89 -0
extensions/nodes/success.py +72 -0
extensions/nodes/task.py +92 -0
extensions/pipeline_executor/__init__.py +27 -27
extensions/pipeline_executor/argo.py +52 -46
extensions/pipeline_executor/emulate.py +112 -0
extensions/pipeline_executor/local.py +4 -4
extensions/pipeline_executor/local_container.py +19 -79
extensions/pipeline_executor/mocked.py +5 -9
extensions/pipeline_executor/retry.py +6 -10
runnable/__init__.py +2 -11
runnable/catalog.py +6 -23
runnable/cli.py +145 -48
runnable/context.py +520 -28
runnable/datastore.py +51 -54
runnable/defaults.py +12 -34
runnable/entrypoints.py +82 -440
runnable/exceptions.py +35 -34
runnable/executor.py +13 -20
runnable/gantt.py +1141 -0
runnable/graph.py +1 -1
runnable/names.py +1 -1
runnable/nodes.py +20 -16
runnable/parameters.py +108 -51
runnable/sdk.py +125 -204
runnable/tasks.py +62 -85
runnable/utils.py +6 -268
runnable-1.0.0.dist-info/METADATA +122 -0
runnable-1.0.0.dist-info/RECORD +73 -0
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/entry_points.txt +9 -8
extensions/nodes/nodes.py +0 -778
extensions/nodes/torch.py +0 -273
extensions/nodes/torch_config.py +0 -76
extensions/tasks/torch.py +0 -286
extensions/tasks/torch_config.py +0 -76
runnable-0.34.0a1.dist-info/METADATA +0 -267
runnable-0.34.0a1.dist-info/RECORD +0 -67
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/WHEEL +0 -0
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/licenses/LICENSE +0 -0

runnable/tasks.py CHANGED Viewed

@@ -5,7 +5,6 @@ import io
 import json
 import logging
 import os
-import runpy
 import subprocess
 import sys
 from datetime import datetime
@@ -26,7 +25,7 @@ from runnable.datastore import (
     Parameter,
     StepAttempt,
 )
-from runnable.defaults import TypeMapVariable
+from runnable.defaults import MapVariableType
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -49,7 +48,29 @@ class TeeIO(io.StringIO):
         self.output_stream.flush()
-sys.stdout = TeeIO()
+@contextlib.contextmanager
+def redirect_output():
+    # Set the stream handlers to use the custom TeeIO class
+    # Backup the original stdout and stderr
+    original_stdout = sys.stdout
+    original_stderr = sys.stderr
+    # Redirect stdout and stderr to custom TeeStream objects
+    sys.stdout = TeeIO(sys.stdout)
+    sys.stderr = TeeIO(sys.stderr)
+    # Replace stream for all StreamHandlers to use the new sys.stdout
+    for handler in logging.getLogger().handlers:
+        if isinstance(handler, logging.StreamHandler):
+            handler.stream = sys.stdout
+    try:
+        yield sys.stdout, sys.stderr
+    finally:
+        # Restore the original stdout and stderr
+        sys.stdout = original_stdout
+        sys.stderr = original_stderr
 class TaskReturns(BaseModel):
@@ -80,7 +101,7 @@ class BaseTaskType(BaseModel):
     def set_secrets_as_env_variables(self):
         # Preparing the environment for the task execution
         for key in self.secrets:
-            secret_value = context.run_context.secrets_handler.get(key)
+            secret_value = context.run_context.secrets.get(key)
             os.environ[key] = secret_value
     def delete_secrets_from_env_variables(self):
@@ -91,7 +112,7 @@ class BaseTaskType(BaseModel):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """The function to execute the command.
@@ -131,7 +152,7 @@ class BaseTaskType(BaseModel):
         finally:
             self.delete_secrets_from_env_variables()
-    def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
+    def resolve_unreduced_parameters(self, map_variable: MapVariableType = None):
         """Resolve the unreduced parameters."""
         params = self._context.run_log_store.get_parameters(
             run_id=self._context.run_id
@@ -154,7 +175,7 @@ class BaseTaskType(BaseModel):
     @contextlib.contextmanager
     def execution_context(
-        self, map_variable: TypeMapVariable = None, allow_complex: bool = True
+        self, map_variable: MapVariableType = None, allow_complex: bool = True
     ):
         params = self.resolve_unreduced_parameters(map_variable=map_variable)
         logger.info(f"Parameters available for the execution: {params}")
@@ -268,7 +289,7 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """Execute the notebook as defined by the command."""
         attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
@@ -290,13 +311,21 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
                     logger.info(
                         f"Calling {func} from {module} with {filtered_parameters}"
                     )
-                    out_file = TeeIO()
-                    with contextlib.redirect_stdout(out_file):
+                    context.progress.stop()  # redirecting stdout clashes with rich progress
+                    with redirect_output() as (buffer, stderr_buffer):
                         user_set_parameters = f(
                             **filtered_parameters
                         )  # This is a tuple or single value
-                    task_console.print(out_file.getvalue())
+                        print(
+                            stderr_buffer.getvalue()
+                        )  # To print the logging statements
+                    # TODO: Avoid double print!!
+                    with task_console.capture():
+                        task_console.log(buffer.getvalue())
+                        task_console.log(stderr_buffer.getvalue())
+                    context.progress.start()
                 except Exception as e:
                     raise exceptions.CommandCallError(
                         f"Function call: {self.command} did not succeed.\n"
@@ -355,69 +384,6 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
         return attempt_log
-class TorchTaskType(BaseTaskType):
-    task_type: str = Field(default="torch", serialization_alias="command_type")
-    entrypoint: str = Field(default="torch.distributed.run", frozen=True)
-    args_to_torchrun: dict[str, str | bool] = Field(default_factory=dict)  # For example
-    # {"nproc_per_node": 2, "nnodes": 1,}
-    script_to_call: str  # For example train/script.py
-    def execute_command(
-        self, map_variable: Dict[str, str | int | float] | None = None
-    ) -> StepAttempt:
-        attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
-        with (
-            self.execution_context(
-                map_variable=map_variable, allow_complex=False
-            ) as params,
-            self.expose_secrets() as _,
-        ):
-            try:
-                entry_point_args = [self.entrypoint]
-                for key, value in self.args_to_torchrun.items():
-                    entry_point_args.append(f"--{key}")
-                    if type(value) is not bool:
-                        entry_point_args.append(str(value))
-                entry_point_args.append(self.script_to_call)
-                for key, value in params.items():
-                    entry_point_args.append(f"--{key}")
-                    if type(value.value) is not bool:  # type: ignore
-                        entry_point_args.append(str(value.value))  # type: ignore
-                # TODO: Check the typing here
-                logger.info("Calling the user script with the following parameters:")
-                logger.info(entry_point_args)
-                out_file = TeeIO()
-                try:
-                    with contextlib.redirect_stdout(out_file):
-                        sys.argv = entry_point_args
-                        runpy.run_module(self.entrypoint, run_name="__main__")
-                    task_console.print(out_file.getvalue())
-                except Exception as e:
-                    raise exceptions.CommandCallError(
-                        f"Call to entrypoint {self.entrypoint} with {self.script_to_call} did not succeed."
-                    ) from e
-                finally:
-                    sys.argv = sys.argv[:1]
-                attempt_log.status = defaults.SUCCESS
-            except Exception as _e:
-                msg = f"Call to entrypoint {self.entrypoint} with {self.script_to_call} did not succeed."
-                attempt_log.message = msg
-                task_console.print_exception(show_locals=False)
-                task_console.log(_e, style=defaults.error_style)
-        attempt_log.end_time = str(datetime.now())
-        return attempt_log
 class NotebookTaskType(BaseTaskType):
     """
     --8<-- [start:notebook_reference]
@@ -482,14 +448,15 @@ class NotebookTaskType(BaseTaskType):
         return command
-    def get_notebook_output_path(self, map_variable: TypeMapVariable = None) -> str:
+    def get_notebook_output_path(self, map_variable: MapVariableType = None) -> str:
         tag = ""
         map_variable = map_variable or {}
         for key, value in map_variable.items():
             tag += f"{key}_{value}_"
-        if hasattr(self._context.executor, "_context_node"):
-            tag += self._context.executor._context_node.name
+        if isinstance(self._context, context.PipelineContext):
+            assert self._context.pipeline_executor._context_node
+            tag += self._context.pipeline_executor._context_node.name
         tag = "".join(x for x in tag if x.isalnum()).strip("-")
@@ -500,7 +467,7 @@ class NotebookTaskType(BaseTaskType):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """Execute the python notebook as defined by the command.
@@ -555,12 +522,20 @@ class NotebookTaskType(BaseTaskType):
                 }
                 kwds.update(ploomber_optional_args)
-                out_file = TeeIO()
-                with contextlib.redirect_stdout(out_file):
+                context.progress.stop()  # redirecting stdout clashes with rich progress
+                with redirect_output() as (buffer, stderr_buffer):
                     pm.execute_notebook(**kwds)
-                task_console.print(out_file.getvalue())
-                context.run_context.catalog_handler.put(name=notebook_output_path)
+                    print(stderr_buffer.getvalue())  # To print the logging statements
+                with task_console.capture():
+                    task_console.log(buffer.getvalue())
+                    task_console.log(stderr_buffer.getvalue())
+                context.progress.start()
+                context.run_context.catalog.put(name=notebook_output_path)
                 client = PloomberClient.from_path(path=notebook_output_path)
                 namespace = client.get_namespace()
@@ -678,7 +653,7 @@ class ShellTaskType(BaseTaskType):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         # Using shell=True as we want to have chained commands to be executed in the same shell.
         """Execute the shell command as defined by the command.
@@ -702,7 +677,7 @@ class ShellTaskType(BaseTaskType):
         # Expose secrets as environment variables
         if self.secrets:
             for key in self.secrets:
-                secret_value = context.run_context.secrets_handler.get(key)
+                secret_value = context.run_context.secrets.get(key)
                 subprocess_env[key] = secret_value
         try:
@@ -728,6 +703,7 @@ class ShellTaskType(BaseTaskType):
                 capture = False
                 return_keys = {x.name: x for x in self.returns}
+                context.progress.stop()  # redirecting stdout clashes with rich progress
                 proc = subprocess.Popen(
                     command,
                     shell=True,
@@ -751,6 +727,7 @@ class ShellTaskType(BaseTaskType):
                         continue
                     task_console.print(line, style=defaults.warning_style)
+                context.progress.start()
                 output_parameters: Dict[str, Parameter] = {}
                 metrics: Dict[str, Parameter] = {}

runnable/utils.py CHANGED Viewed

@@ -11,17 +11,16 @@ from collections import OrderedDict
 from datetime import datetime
 from pathlib import Path
 from string import Template as str_template
-from typing import TYPE_CHECKING, Any, Dict, Mapping, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Tuple, Union
 from ruamel.yaml import YAML
-from stevedore import driver
 import runnable.context as context
 from runnable import console, defaults, names
-from runnable.defaults import TypeMapVariable
+from runnable.defaults import MapVariableType
 if TYPE_CHECKING:  # pragma: no cover
-    from runnable.nodes import BaseNode
+    pass
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -66,6 +65,7 @@ def safe_make_dir(directory: Union[str, Path]):
     Path(directory).mkdir(parents=True, exist_ok=True)
+# TODO: remove this
 def generate_run_id(run_id: str = "") -> str:
     """Generate a new run_id.
@@ -147,19 +147,6 @@ def get_module_and_attr_names(command: str) -> Tuple[str, str]:
     return module, func
-def get_dag_hash(dag: Dict[str, Any]) -> str:
-    """Generates the hash of the dag definition.
-    Args:
-        dag (dict): The dictionary object containing the dag definition
-    Returns:
-        str: The hash of the dag definition
-    """
-    dag_str = json.dumps(dag, sort_keys=True, ensure_ascii=True)
-    return hashlib.sha1(dag_str.encode("utf-8")).hexdigest()
 def load_yaml(file_path: str, load_type: str = "safe") -> Dict[str, Any]:
     """Loads an yaml and returns the dictionary.
@@ -314,29 +301,6 @@ def remove_prefix(text: str, prefix: str) -> str:
     return text  # or whatever is given
-def get_tracked_data() -> Dict[str, str]:
-    """Scans the environment variables to find any user tracked variables that have a prefix runnable_TRACK_
-    Removes the environment variable to prevent any clashes in the future steps.
-    Returns:
-        dict: A dictionary of user tracked data
-    """
-    tracked_data = {}
-    for env_var, value in os.environ.items():
-        if env_var.startswith(defaults.TRACK_PREFIX):
-            key = remove_prefix(env_var, defaults.TRACK_PREFIX)
-            try:
-                tracked_data[key.lower()] = json.loads(value)
-            except json.decoder.JSONDecodeError:
-                logger.warning(
-                    f"Tracker {key} could not be JSON decoded, adding the literal value"
-                )
-                tracked_data[key.lower()] = value
-            del os.environ[env_var]
-    return tracked_data
 def diff_dict(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]:
     """
     Given two dicts d1 and d2, return a new dict that has upsert items from d1.
@@ -359,25 +323,6 @@ def diff_dict(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]:
     return diff
-# def hash_bytestr_iter(bytesiter, hasher, ashexstr=True):  # pylint: disable=C0116
-#     """Hashes the given bytesiter using the given hasher."""
-#     for block in bytesiter:  # pragma: no cover
-#         hasher.update(block)
-#     return hasher.hexdigest() if ashexstr else hasher.digest()  # pragma: no cover
-# def file_as_blockiter(afile, blocksize=65536):  # pylint: disable=C0116
-#     """From a StackOverflow answer: that is used to generate a MD5 hash of a large files.
-#     # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file.
-#     """
-#     with afile:  # pragma: no cover
-#         block = afile.read(blocksize)
-#         while len(block) > 0:
-#             yield block
-#             block = afile.read(blocksize)
 def get_data_hash(file_name: str) -> str:
     """Returns the hash of the data file.
@@ -397,193 +342,7 @@ def get_data_hash(file_name: str) -> str:
     return file_hash.hexdigest()
-# TODO: This is not the right place for this.
-def get_node_execution_command(
-    node: BaseNode,
-    map_variable: TypeMapVariable = None,
-    over_write_run_id: str = "",
-    log_level: str = "",
-) -> str:
-    """A utility function to standardize execution call to a node via command line.
-    Args:
-        executor (object): The executor class.
-        node (object): The Node to execute
-        map_variable (str, optional): If the node belongs to a map step. Defaults to None.
-    Returns:
-        str: The execution command to run a node via command line.
-    """
-    run_id = context.run_context.run_id
-    if over_write_run_id:
-        run_id = over_write_run_id
-    log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
-    action = (
-        f"runnable execute-single-node {run_id} "
-        f"{context.run_context.pipeline_file} "
-        f"{node._command_friendly_name()} "
-        f"--log-level {log_level} "
-    )
-    if context.run_context.from_sdk:
-        action = action + "--mode python "
-    if map_variable:
-        action = action + f"--map-variable '{json.dumps(map_variable)}' "
-    if context.run_context.configuration_file:
-        action = action + f"--config {context.run_context.configuration_file} "
-    if context.run_context.parameters_file:
-        action = action + f"--parameters-file {context.run_context.parameters_file} "
-    if context.run_context.tag:
-        action = action + f"--tag {context.run_context.tag}"
-    return action
-# TODO: This is not the right place for this.
-def get_fan_command(
-    mode: str,
-    node: BaseNode,
-    run_id: str,
-    map_variable: TypeMapVariable = None,
-    log_level: str = "",
-) -> str:
-    """
-    An utility function to return the fan "in or out" command
-    Args:
-        executor (BaseExecutor): The executor class
-        mode (str): in or out
-        node (BaseNode): The composite node that we are fanning in or out
-        run_id (str): The run id.
-        map_variable (dict, optional): If the node is a map, we have the map variable. Defaults to None.
-    Returns:
-        str: The fan in or out command
-    """
-    log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
-    action = (
-        f"runnable fan {run_id} "
-        f"{node._command_friendly_name()} "  # step name
-        f"{context.run_context.pipeline_file} "  # yaml or python
-        f"{mode} "  # in or out
-        f"--log-level {log_level} "
-    )
-    if context.run_context.configuration_file:
-        action = action + f" --config-file {context.run_context.configuration_file} "
-    if context.run_context.parameters_file:
-        action = action + f" --parameters-file {context.run_context.parameters_file}"
-    if map_variable:
-        action = action + f" --map-variable '{json.dumps(map_variable)}'"
-    if context.run_context.from_sdk:  # execution mode
-        action = action + " --mode python "
-    return action
-# TODO: This is not the right place for this.
-def get_job_execution_command(over_write_run_id: str = "") -> str:
-    """Get the execution command to run a job via command line.
-    This function should be used by all executors to submit jobs in remote environment
-    """
-    run_id = context.run_context.run_id
-    if over_write_run_id:
-        run_id = over_write_run_id
-    log_level = logging.getLevelName(logger.getEffectiveLevel())
-    action = (
-        f"runnable execute-job {context.run_context.job_definition_file} {run_id} "
-        f" --log-level {log_level}"
-    )
-    if context.run_context.configuration_file:
-        action = action + f" --config {context.run_context.configuration_file}"
-    if context.run_context.parameters_file:
-        action = action + f" --parameters {context.run_context.parameters_file}"
-    if context.run_context.from_sdk:
-        action = action + " --mode python "
-    if context.run_context.tag:
-        action = action + f" --tag {context.run_context.tag}"
-    return action
-def get_provider_by_name_and_type(
-    service_type: str, service_details: defaults.ServiceConfig
-):
-    """Given a service type, one of executor, run_log_store, catalog, secrets and the config
-    return the exact child class implementing the service.
-    We use stevedore to do the work for us.
-    Args:
-        service_type (str): One of executor, run_log_store, catalog, secrets
-        service_details (dict): The config used to instantiate the service.
-    Raises:
-        Exception: If the service by that name does not exist
-    Returns:
-        object: A service object
-    """
-    namespace = service_type
-    service_name = service_details["type"]
-    service_config: Mapping[str, Any] = {}
-    if "config" in service_details:
-        service_config = service_details.get("config", {})
-    logger.debug(
-        f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}"
-    )
-    try:
-        mgr = driver.DriverManager(
-            namespace=namespace,
-            name=service_name,
-            invoke_on_load=True,
-            invoke_kwds={**service_config},
-        )
-        return mgr.driver
-    except Exception as _e:
-        logger.exception(
-            f"Could not find the service of type: {service_type} with config: {service_details}"
-        )
-        raise Exception(
-            f"Could not find the service of type: {service_type} with config: {service_details}"
-        ) from _e
-def get_run_config() -> dict:
-    """Given an executor with assigned services, return the run_config.
-    Args:
-        executor (object): The executor with all the services assigned.
-    Returns:
-        dict: The run_config.
-    """
-    run_config = context.run_context.model_dump(by_alias=True)
-    return run_config
-def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
+def json_to_ordered_dict(json_str: str) -> MapVariableType:
     """Decode a JSON str into OrderedDict.
     Args:
@@ -598,27 +357,6 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
     return OrderedDict()
-def set_runnable_environment_variables(
-    run_id: str = "", configuration_file: str = "", tag: str = ""
-) -> None:
-    """Set the environment variables used by runnable. This function should be called during the prepare configurations
-    by all executors.
-    Args:
-        run_id (str, optional): The run id of the execution. Defaults to None.
-        configuration_file (str, optional): The configuration file if used. Defaults to None.
-        tag (str, optional): The tag associated with a run. Defaults to None.
-    """
-    if run_id:
-        os.environ[defaults.ENV_RUN_ID] = run_id
-    if configuration_file:
-        os.environ[defaults.RUNNABLE_CONFIG_FILE] = configuration_file
-    if tag:
-        os.environ[defaults.RUNNABLE_RUN_TAG] = tag
 def gather_variables() -> Dict[str, str]:
     """Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
@@ -635,7 +373,7 @@ def gather_variables() -> Dict[str, str]:
     return variables
-def make_log_file_name(name: str, map_variable: TypeMapVariable) -> str:
+def make_log_file_name(name: str, map_variable: MapVariableType) -> str:
     random_tag = "".join(random.choices(string.ascii_uppercase + string.digits, k=3))
     log_file_name = name

runnable 0.34.0a1__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

runnable 0.34.0a1py3-none-any.whl → 1.0.0py3-none-any.whl