PyPI - runnable - Versions diffs - 0.35.0__py3-none-any.whl → 0.36.1__py3-none-any.whl - Mend

runnable 0.35.0py3-none-any.whl → 0.36.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

extensions/job_executor/__init__.py +3 -4
extensions/job_executor/emulate.py +106 -0
extensions/job_executor/k8s.py +8 -8
extensions/job_executor/local_container.py +13 -14
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +7 -5
extensions/nodes/fail.py +72 -0
extensions/nodes/map.py +350 -0
extensions/nodes/parallel.py +159 -0
extensions/nodes/stub.py +89 -0
extensions/nodes/success.py +72 -0
extensions/nodes/task.py +92 -0
extensions/pipeline_executor/__init__.py +24 -26
extensions/pipeline_executor/argo.py +20 -20
extensions/pipeline_executor/emulate.py +112 -0
extensions/pipeline_executor/local.py +4 -4
extensions/pipeline_executor/local_container.py +19 -79
extensions/pipeline_executor/mocked.py +5 -9
extensions/pipeline_executor/retry.py +6 -10
runnable/__init__.py +0 -10
runnable/catalog.py +1 -21
runnable/cli.py +0 -59
runnable/context.py +519 -28
runnable/datastore.py +51 -54
runnable/defaults.py +12 -34
runnable/entrypoints.py +82 -440
runnable/exceptions.py +35 -34
runnable/executor.py +13 -20
runnable/names.py +1 -1
runnable/nodes.py +16 -15
runnable/parameters.py +2 -2
runnable/sdk.py +66 -205
runnable/tasks.py +62 -81
runnable/utils.py +6 -268
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/METADATA +1 -4
runnable-0.36.1.dist-info/RECORD +72 -0
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/entry_points.txt +8 -7
extensions/nodes/nodes.py +0 -778
extensions/tasks/torch.py +0 -286
extensions/tasks/torch_config.py +0 -76
runnable-0.35.0.dist-info/RECORD +0 -66
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/WHEEL +0 -0
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/licenses/LICENSE +0 -0

extensions/nodes/task.py ADDED Viewed

@@ -0,0 +1,92 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict
+from pydantic import ConfigDict, Field
+from runnable import datastore, defaults
+from runnable.datastore import StepLog
+from runnable.defaults import MapVariableType
+from runnable.nodes import ExecutableNode
+from runnable.tasks import BaseTaskType, create_task
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class TaskNode(ExecutableNode):
+    """
+    A node of type Task.
+    This node does the actual function execution of the graph in all cases.
+    """
+    executable: BaseTaskType = Field(exclude=True)
+    node_type: str = Field(default="task", serialization_alias="type")
+    # It is technically not allowed as parse_from_config filters them.
+    # This is just to get the task level configuration to be present during serialization.
+    model_config = ConfigDict(extra="allow")
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode":
+        # separate task config from node config
+        task_config = {
+            k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()
+        }
+        node_config = {
+            k: v for k, v in config.items() if k in TaskNode.model_fields.keys()
+        }
+        executable = create_task(task_config)
+        return cls(executable=executable, **node_config, **task_config)
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+            "executable": self.executable.get_summary(),
+            "catalog": self._get_catalog_settings(),
+        }
+        return summary
+    def execute(
+        self,
+        mock=False,
+        map_variable: MapVariableType = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        All that we do in runnable is to come to this point where we actually execute the command.
+        Args:
+            executor (_type_): The executor class
+            mock (bool, optional): If we should just mock and not execute. Defaults to False.
+            map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
+        Returns:
+            StepAttempt: The attempt object
+        """
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(map_variable), self._context.run_id
+        )
+        if not mock:
+            # Do not run if we are mocking the execution, could be useful for caching and dry runs
+            attempt_log = self.executable.execute_command(map_variable=map_variable)
+            attempt_log.attempt_number = attempt_number
+        else:
+            attempt_log = datastore.StepAttempt(
+                status=defaults.SUCCESS,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=attempt_number,
+            )
+        logger.info(f"attempt_log: {attempt_log}")
+        logger.info(f"Step {self.name} completed with status: {attempt_log.status}")
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log

extensions/pipeline_executor/__init__.py CHANGED Viewed

@@ -13,7 +13,7 @@ from runnable import (
     utils,
 )
 from runnable.datastore import DataCatalog, JsonParameter, RunLog, StepLog
-from runnable.defaults import TypeMapVariable
+from runnable.defaults import MapVariableType
 from runnable.executor import BasePipelineExecutor
 from runnable.graph import Graph
 from runnable.nodes import BaseNode
@@ -40,7 +40,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
     @property
     def _context(self):
-        assert context.run_context
+        assert isinstance(context.run_context, context.PipelineContext)
         return context.run_context
     def _get_parameters(self) -> Dict[str, JsonParameter]:
@@ -104,7 +104,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         )
         # Update run_config
-        run_config = utils.get_run_config()
+        run_config = self._context.model_dump()
         logger.debug(f"run_config as seen by executor: {run_config}")
         self._context.run_log_store.set_run_config(
             run_id=self._context.run_id, run_config=run_config
@@ -154,12 +154,12 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         data_catalogs = []
         for name_pattern in node_catalog_settings.get(stage) or []:
             if stage == "get":
-                data_catalog = self._context.catalog_handler.get(
+                data_catalog = self._context.catalog.get(
                     name=name_pattern,
                 )
             elif stage == "put":
-                data_catalog = self._context.catalog_handler.put(
+                data_catalog = self._context.catalog.put(
                     name=name_pattern, allow_file_not_found_exc=allow_file_no_found_exc
                 )
             else:
@@ -189,14 +189,15 @@ class GenericPipelineExecutor(BasePipelineExecutor):
             map_variable=map_variable,
         )
         task_console.save_text(log_file_name)
+        task_console.export_text(clear=True)
         # Put the log file in the catalog
-        self._context.catalog_handler.put(name=log_file_name)
+        self._context.catalog.put(name=log_file_name)
         os.remove(log_file_name)
     def _execute_node(
         self,
         node: BaseNode,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
         mock: bool = False,
     ):
         """
@@ -250,6 +251,10 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         console.print(f"Summary of the step: {step_log.internal_name}")
         console.print(step_log.get_summary(), style=defaults.info_style)
+        self.add_task_log_to_catalog(
+            name=self._context_node.internal_name, map_variable=map_variable
+        )
         self._context_node = None
         self._context.run_log_store.add_step_log(step_log, self._context.run_id)
@@ -266,7 +271,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         """
         step_log.code_identities.append(utils.get_git_code_identity())
-    def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def execute_from_graph(self, node: BaseNode, map_variable: MapVariableType = None):
         """
         This is the entry point to from the graph execution.
@@ -315,8 +320,6 @@ class GenericPipelineExecutor(BasePipelineExecutor):
             node.execute_as_graph(map_variable=map_variable)
             return
-        task_console.export_text(clear=True)
         task_name = node._resolve_map_placeholders(node.internal_name, map_variable)
         console.print(
             f":runner: Executing the node {task_name} ... ", style="bold color(208)"
@@ -324,7 +327,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         self.trigger_node_execution(node=node, map_variable=map_variable)
     def trigger_node_execution(
-        self, node: BaseNode, map_variable: TypeMapVariable = None
+        self, node: BaseNode, map_variable: MapVariableType = None
     ):
         """
         Call this method only if we are responsible for traversing the graph via
@@ -342,7 +345,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         pass
     def _get_status_and_next_node_name(
-        self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None
+        self, current_node: BaseNode, dag: Graph, map_variable: MapVariableType = None
     ) -> tuple[str, str]:
         """
         Given the current node and the graph, returns the name of the next node to execute.
@@ -380,7 +383,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         return step_log.status, next_node_name
-    def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None):
+    def execute_graph(self, dag: Graph, map_variable: MapVariableType = None):
         """
         The parallelization is controlled by the nodes and not by this function.
@@ -409,7 +412,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
                 dag.internal_branch_name or "Graph",
                 map_variable,
             )
-            branch_execution_task = self._context.progress.add_task(
+            branch_execution_task = context.progress.add_task(
                 f"[dark_orange]Executing {branch_task_name}",
                 total=1,
             )
@@ -429,7 +432,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
             depth = " " * ((task_name.count(".")) or 1 - 1)
-            task_execution = self._context.progress.add_task(
+            task_execution = context.progress.add_task(
                 f"{depth}Executing {task_name}", total=1
             )
@@ -440,20 +443,20 @@ class GenericPipelineExecutor(BasePipelineExecutor):
                 )
                 if status == defaults.SUCCESS:
-                    self._context.progress.update(
+                    context.progress.update(
                         task_execution,
                         description=f"{depth}[green] {task_name} Completed",
                         completed=True,
                         overflow="fold",
                     )
                 else:
-                    self._context.progress.update(
+                    context.progress.update(
                         task_execution,
                         description=f"{depth}[red] {task_name} Failed",
                         completed=True,
                     )  # type ignore
             except Exception as e:  # noqa: E722
-                self._context.progress.update(
+                context.progress.update(
                     task_execution,
                     description=f"{depth}[red] {task_name} Errored",
                     completed=True,
@@ -461,11 +464,6 @@ class GenericPipelineExecutor(BasePipelineExecutor):
                 console.print(e, style=defaults.error_style)
                 logger.exception(e)
                 raise
-            finally:
-                # Add task log to the catalog
-                self.add_task_log_to_catalog(
-                    name=working_on.internal_name, map_variable=map_variable
-                )
             console.rule(style="[dark orange]")
@@ -475,7 +473,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
             current_node = next_node_name
         if branch_execution_task:
-            self._context.progress.update(
+            context.progress.update(
                 branch_execution_task,
                 description=f"[green3] {branch_task_name} completed",
                 completed=True,
@@ -567,7 +565,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         return effective_node_config
-    def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def fan_out(self, node: BaseNode, map_variable: MapVariableType = None):
         """
         This method is used to appropriately fan-out the execution of a composite node.
         This is only useful when we want to execute a composite node during 3rd party orchestrators.
@@ -599,7 +597,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
         node.fan_out(map_variable=map_variable)
-    def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def fan_in(self, node: BaseNode, map_variable: MapVariableType = None):
         """
         This method is used to appropriately fan-in after the execution of a composite node.
         This is only useful when we want to execute a composite node during 3rd party orchestrators.

extensions/pipeline_executor/argo.py CHANGED Viewed

@@ -21,13 +21,12 @@ from pydantic.alias_generators import to_camel
 from ruamel.yaml import YAML
 from extensions.nodes.conditional import ConditionalNode
-from extensions.nodes.nodes import MapNode, ParallelNode, TaskNode
-# TODO: Should be part of a wider refactor
-# from extensions.nodes.torch import TorchNode
+from extensions.nodes.map import MapNode
+from extensions.nodes.parallel import ParallelNode
+from extensions.nodes.task import TaskNode
 from extensions.pipeline_executor import GenericPipelineExecutor
-from runnable import defaults, utils
-from runnable.defaults import TypeMapVariable
+from runnable import defaults
+from runnable.defaults import MapVariableType
 from runnable.graph import Graph, search_node_by_internal_name
 from runnable.nodes import BaseNode
@@ -453,7 +452,7 @@ class ArgoExecutor(GenericPipelineExecutor):
     """
     service_name: str = "argo"
-    _is_local: bool = False
+    _should_setup_run_log_at_traversal: bool = PrivateAttr(default=False)
     mock: bool = False
     model_config = ConfigDict(
@@ -535,13 +534,13 @@ class ArgoExecutor(GenericPipelineExecutor):
         parameters: Optional[list[Parameter]],
         task_name: str,
     ):
-        map_variable: TypeMapVariable = {}
+        map_variable: MapVariableType = {}
         for parameter in parameters or []:
             map_variable[parameter.name] = (  # type: ignore
                 "{{inputs.parameters." + str(parameter.name) + "}}"
             )
-        fan_command = utils.get_fan_command(
+        fan_command = self._context.get_fan_command(
             mode=mode,
             node=node,
             run_id=self._run_id_as_parameter,
@@ -590,7 +589,7 @@ class ArgoExecutor(GenericPipelineExecutor):
         task_name: str,
         inputs: Optional[Inputs] = None,
     ) -> ContainerTemplate:
-        assert node.node_type in ["task", "torch", "success", "stub", "fail"]
+        assert node.node_type in ["task", "success", "stub", "fail"]
         node_override = None
         if hasattr(node, "overrides"):
@@ -606,17 +605,17 @@ class ArgoExecutor(GenericPipelineExecutor):
         inputs = inputs or Inputs(parameters=[])
-        map_variable: TypeMapVariable = {}
+        map_variable: MapVariableType = {}
         for parameter in inputs.parameters or []:
             map_variable[parameter.name] = (  # type: ignore
                 "{{inputs.parameters." + str(parameter.name) + "}}"
             )
         # command = "runnable execute-single-node"
-        command = utils.get_node_execution_command(
+        command = self._context.get_node_callable_command(
             node=node,
-            over_write_run_id=self._run_id_as_parameter,
             map_variable=map_variable,
+            over_write_run_id=self._run_id_as_parameter,
             log_level=self._log_level_as_parameter,
         )
@@ -653,7 +652,7 @@ class ArgoExecutor(GenericPipelineExecutor):
     def _set_env_vars_to_task(
         self, working_on: BaseNode, container_template: CoreContainerTemplate
     ):
-        if working_on.node_type not in ["task", "torch"]:
+        if working_on.node_type not in ["task"]:
             return
         global_envs: dict[str, str] = {}
@@ -715,6 +714,7 @@ class ArgoExecutor(GenericPipelineExecutor):
             assert parent_dag_template.dag
             parent_dag_template.dag.tasks.append(on_failure_task)
             self._gather_tasks_for_dag_template(
                 on_failure_dag,
                 dag=dag,
@@ -762,7 +762,7 @@ class ArgoExecutor(GenericPipelineExecutor):
             depends = task_name
             match working_on.node_type:
-                case "task" | "success" | "stub":
+                case "task" | "success" | "stub" | "fail":
                     template_of_container = self._create_container_template(
                         working_on,
                         task_name=task_name,
@@ -958,7 +958,7 @@ class ArgoExecutor(GenericPipelineExecutor):
                 f,
             )
-    def _implicitly_fail(self, node: BaseNode, map_variable: TypeMapVariable):
+    def _implicitly_fail(self, node: BaseNode, map_variable: MapVariableType):
         assert self._context.dag
         _, current_branch = search_node_by_internal_name(
             dag=self._context.dag, internal_name=node.internal_name
@@ -1005,7 +1005,7 @@ class ArgoExecutor(GenericPipelineExecutor):
         self._implicitly_fail(node, map_variable)
-    def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def fan_out(self, node: BaseNode, map_variable: MapVariableType = None):
         # This could be the first step of the graph
         self._use_volumes()
@@ -1031,7 +1031,7 @@ class ArgoExecutor(GenericPipelineExecutor):
             with open("/tmp/output.txt", mode="w", encoding="utf-8") as myfile:
                 json.dump(node.get_parameter_value(), myfile, indent=4)
-    def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def fan_in(self, node: BaseNode, map_variable: MapVariableType = None):
         self._use_volumes()
         super().fan_in(node, map_variable)
@@ -1042,9 +1042,9 @@ class ArgoExecutor(GenericPipelineExecutor):
             case "chunked-fs":
                 self._context.run_log_store.log_folder = self._container_log_location
-        match self._context.catalog_handler.service_name:
+        match self._context.catalog.service_name:
             case "file-system":
-                self._context.catalog_handler.catalog_location = (
+                self._context.catalog.catalog_location = (
                     self._container_catalog_location
                 )

extensions/pipeline_executor/emulate.py ADDED Viewed

@@ -0,0 +1,112 @@
+import logging
+import shlex
+import subprocess
+import sys
+from pydantic import PrivateAttr
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import defaults
+from runnable.defaults import MapVariableType
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class Emulator(GenericPipelineExecutor):
+    """
+    In the mode of local execution, we run everything on the local computer.
+    This has some serious implications on the amount of time it would take to complete the run.
+    Also ensure that the local compute is good enough for the compute to happen of all the steps.
+    Example config:
+    ```yaml
+    pipeline-executor:
+      type: local
+    ```
+    """
+    service_name: str = "emulator"
+    _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
+    def trigger_node_execution(
+        self, node: BaseNode, map_variable: MapVariableType = None
+    ):
+        """
+        In this mode of execution, we prepare for the node execution and execute the node
+        Args:
+            node (BaseNode): [description]
+            map_variable (str, optional): [description]. Defaults to ''.
+        """
+        command = self._context.get_node_callable_command(
+            node, map_variable=map_variable
+        )
+        self.run_click_command(command)
+        # execute the command in a forked process
+        step_log = self._context.run_log_store.get_step_log(
+            node._get_step_log_name(map_variable), self._context.run_id
+        )
+        if step_log.status != defaults.SUCCESS:
+            msg = "Node execution inside the emulate failed. Please check the logs.\n"
+            logger.error(msg)
+            step_log.status = defaults.FAIL
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+    def execute_node(self, node: BaseNode, map_variable: MapVariableType = None):
+        """
+        For local execution, we just execute the node.
+        Args:
+            node (BaseNode): _description_
+            map_variable (dict[str, str], optional): _description_. Defaults to None.
+        """
+        self._execute_node(node=node, map_variable=map_variable)
+    def run_click_command(self, command: str) -> str:
+        """
+        Execute a Click-based CLI command in the current virtual environment.
+        Args:
+            args: List of Click command arguments (including subcommands and options)
+        Returns:
+            Combined stdout/stderr output as string
+        """
+        # For Click commands installed via setup.py entry_points
+        # command = [sys.executable, '-m', 'your_package.cli'] + args
+        # For direct module execution
+        sub_command = [sys.executable, "-m", "runnable.cli"] + shlex.split(command)[1:]
+        process = subprocess.Popen(
+            sub_command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            universal_newlines=True,
+            bufsize=1,
+        )
+        output = []
+        try:
+            while True:
+                line = process.stdout.readline()  # type: ignore
+                if not line and process.poll() is not None:
+                    break
+                print(line, end="")
+                output.append(line)
+        finally:
+            process.stdout.close()  # type: ignore
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(
+                process.returncode, command, "".join(output)
+            )
+        return "".join(output)

extensions/pipeline_executor/local.py CHANGED Viewed

@@ -4,7 +4,7 @@ from pydantic import Field, PrivateAttr
 from extensions.pipeline_executor import GenericPipelineExecutor
 from runnable import defaults
-from runnable.defaults import TypeMapVariable
+from runnable.defaults import MapVariableType
 from runnable.nodes import BaseNode
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -32,14 +32,14 @@ class LocalExecutor(GenericPipelineExecutor):
     _is_local: bool = PrivateAttr(default=True)
-    def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def execute_from_graph(self, node: BaseNode, map_variable: MapVariableType = None):
         if not self.object_serialisation:
             self._context.object_serialisation = False
         super().execute_from_graph(node=node, map_variable=map_variable)
     def trigger_node_execution(
-        self, node: BaseNode, map_variable: TypeMapVariable = None
+        self, node: BaseNode, map_variable: MapVariableType = None
     ):
         """
         In this mode of execution, we prepare for the node execution and execute the node
@@ -50,7 +50,7 @@ class LocalExecutor(GenericPipelineExecutor):
         """
         self.execute_node(node=node, map_variable=map_variable)
-    def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None):
+    def execute_node(self, node: BaseNode, map_variable: MapVariableType = None):
         """
         For local execution, we just execute the node.

runnable 0.35.0__py3-none-any.whl → 0.36.1__py3-none-any.whl

runnable 0.35.0py3-none-any.whl → 0.36.1py3-none-any.whl