PyPI - runnable - Versions diffs - 0.34.0a1__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

runnable 0.34.0a1py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of runnable might be problematic. Click here for more details.

Files changed (49) hide show

extensions/catalog/any_path.py +13 -2
extensions/job_executor/__init__.py +7 -5
extensions/job_executor/emulate.py +106 -0
extensions/job_executor/k8s.py +8 -8
extensions/job_executor/local_container.py +13 -14
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +243 -0
extensions/nodes/fail.py +72 -0
extensions/nodes/map.py +350 -0
extensions/nodes/parallel.py +159 -0
extensions/nodes/stub.py +89 -0
extensions/nodes/success.py +72 -0
extensions/nodes/task.py +92 -0
extensions/pipeline_executor/__init__.py +27 -27
extensions/pipeline_executor/argo.py +52 -46
extensions/pipeline_executor/emulate.py +112 -0
extensions/pipeline_executor/local.py +4 -4
extensions/pipeline_executor/local_container.py +19 -79
extensions/pipeline_executor/mocked.py +5 -9
extensions/pipeline_executor/retry.py +6 -10
runnable/__init__.py +2 -11
runnable/catalog.py +6 -23
runnable/cli.py +145 -48
runnable/context.py +520 -28
runnable/datastore.py +51 -54
runnable/defaults.py +12 -34
runnable/entrypoints.py +82 -440
runnable/exceptions.py +35 -34
runnable/executor.py +13 -20
runnable/gantt.py +1141 -0
runnable/graph.py +1 -1
runnable/names.py +1 -1
runnable/nodes.py +20 -16
runnable/parameters.py +108 -51
runnable/sdk.py +125 -204
runnable/tasks.py +62 -85
runnable/utils.py +6 -268
runnable-1.0.0.dist-info/METADATA +122 -0
runnable-1.0.0.dist-info/RECORD +73 -0
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/entry_points.txt +9 -8
extensions/nodes/nodes.py +0 -778
extensions/nodes/torch.py +0 -273
extensions/nodes/torch_config.py +0 -76
extensions/tasks/torch.py +0 -286
extensions/tasks/torch_config.py +0 -76
runnable-0.34.0a1.dist-info/METADATA +0 -267
runnable-0.34.0a1.dist-info/RECORD +0 -67
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/WHEEL +0 -0
{runnable-0.34.0a1.dist-info → runnable-1.0.0.dist-info}/licenses/LICENSE +0 -0

runnable/sdk.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
+import inspect
 import logging
-import os
 import re
 from abc import ABC, abstractmethod
 from pathlib import Path
@@ -16,26 +16,17 @@ from pydantic import (
     field_validator,
     model_validator,
 )
-from rich.progress import (
-    BarColumn,
-    Progress,
-    SpinnerColumn,
-    TextColumn,
-    TimeElapsedColumn,
-)
-from rich.table import Column
 from typing_extensions import Self
-from extensions.nodes.nodes import (
-    FailNode,
-    MapNode,
-    ParallelNode,
-    StubNode,
-    SuccessNode,
-    TaskNode,
-)
-from runnable import console, defaults, entrypoints, exceptions, graph, utils
-from runnable.executor import BaseJobExecutor, BasePipelineExecutor
+from extensions.nodes.conditional import ConditionalNode
+from extensions.nodes.fail import FailNode
+from extensions.nodes.map import MapNode
+from extensions.nodes.parallel import ParallelNode
+from extensions.nodes.stub import StubNode
+from extensions.nodes.success import SuccessNode
+from extensions.nodes.task import TaskNode
+from runnable import defaults, graph
+from runnable.executor import BaseJobExecutor
 from runnable.nodes import TraversalNode
 from runnable.tasks import BaseTaskType as RunnableTask
 from runnable.tasks import TaskReturns
@@ -49,7 +40,7 @@ StepType = Union[
     "ShellTask",
     "Parallel",
     "Map",
-    "TorchTask",
+    "Conditional",
 ]
@@ -69,6 +60,7 @@ class Catalog(BaseModel):
     Attributes:
         get (List[str]): List of glob patterns to get from central catalog to the compute data folder.
         put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
+        store_copy (bool): Whether to store a copy of the data in the central catalog.
     Examples:
         >>> from runnable import Catalog
@@ -83,6 +75,7 @@ class Catalog(BaseModel):
     # compute_data_folder: str = Field(default="", alias="compute_data_folder")
     get: List[str] = Field(default_factory=list, alias="get")
     put: List[str] = Field(default_factory=list, alias="put")
+    store_copy: bool = Field(default=True, alias="store_copy")
 class BaseTraversal(ABC, BaseModel):
@@ -193,6 +186,9 @@ class BaseTask(BaseTraversal):
             "This method should be implemented in the child class"
         )
+    def as_pipeline(self) -> "Pipeline":
+        return Pipeline(steps=[self], name=self.internal_name)  # type: ignore
 class PythonTask(BaseTask):
     """
@@ -282,26 +278,6 @@ class PythonTask(BaseTask):
         return node.executable
-class TorchTask(BaseTask):
-    entrypoint: str = Field(
-        alias="entrypoint", default="torch.distributed.run", frozen=True
-    )
-    args_to_torchrun: Dict[str, Any] = Field(
-        default_factory=dict, alias="args_to_torchrun"
-    )
-    script_to_call: str
-    @computed_field
-    def command_type(self) -> str:
-        return "torch"
-    def create_job(self) -> RunnableTask:
-        self.terminate_with_success = True
-        node = self.create_node()
-        return node.executable
 class NotebookTask(BaseTask):
     """
     An execution node of the pipeline of notebook.
@@ -481,6 +457,9 @@ class Stub(BaseTraversal):
         return StubNode.parse_from_config(self.model_dump(exclude_none=True))
+    def as_pipeline(self) -> "Pipeline":
+        return Pipeline(steps=[self])
 class Parallel(BaseTraversal):
     """
@@ -520,6 +499,53 @@ class Parallel(BaseTraversal):
         return node
+class Conditional(BaseTraversal):
+    branches: Dict[str, "Pipeline"]
+    parameter: str  # the name of the parameter should be isalnum
+    @field_validator("parameter")
+    @classmethod
+    def validate_parameter(cls, parameter: str) -> str:
+        if not parameter.isalnum():
+            raise AssertionError(
+                "The parameter name should be alphanumeric and not empty"
+            )
+        return parameter
+    @field_validator("branches")
+    @classmethod
+    def validate_branches(
+        cls, branches: Dict[str, "Pipeline"]
+    ) -> Dict[str, "Pipeline"]:
+        for branch_name in branches.keys():
+            if not branch_name.isalnum():
+                raise ValueError(f"Branch '{branch_name}' must be alphanumeric.")
+        return branches
+    @computed_field  # type: ignore
+    @property
+    def graph_branches(self) -> Dict[str, graph.Graph]:
+        return {
+            name: pipeline._dag.model_copy() for name, pipeline in self.branches.items()
+        }
+    def create_node(self) -> ConditionalNode:
+        if not self.next_node:
+            if not (self.terminate_with_failure or self.terminate_with_success):
+                raise AssertionError(
+                    "A node not being terminated must have a user defined next node"
+                )
+        node = ConditionalNode(
+            name=self.name,
+            branches=self.graph_branches,
+            internal_name="",
+            next_node=self.next_node,
+            parameter=self.parameter,
+        )
+        return node
 class Map(BaseTraversal):
     """
     A node that iterates over a list of items and executes a pipeline for each item.
@@ -543,7 +569,6 @@ class Map(BaseTraversal):
     iterate_on: str
     iterate_as: str
     reducer: Optional[str] = Field(default=None, alias="reducer")
-    overrides: Dict[str, Any] = Field(default_factory=dict)
     @computed_field  # type: ignore
     @property
@@ -564,7 +589,6 @@ class Map(BaseTraversal):
             next_node=self.next_node,
             iterate_on=self.iterate_on,
             iterate_as=self.iterate_as,
-            overrides=self.overrides,
             reducer=self.reducer,
         )
@@ -735,6 +759,15 @@ class Pipeline(BaseModel):
             return False
         return True
+    def get_caller(self) -> str:
+        caller_stack = inspect.stack()[2]
+        relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
+        module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
+        module_to_call = f"{module_name}.{caller_stack.function}"
+        return module_to_call
     def execute(
         self,
         configuration_file: str = "",
@@ -752,106 +785,31 @@ class Pipeline(BaseModel):
             # Immediately return as this call is only for getting the pipeline definition
             return {}
-        logger.setLevel(log_level)
-        run_id = utils.generate_run_id(run_id=run_id)
+        from runnable import context
-        parameters_file = os.environ.get("RUNNABLE_PARAMETERS_FILE", parameters_file)
-        tag = os.environ.get("RUNNABLE_tag", tag)
+        logger.setLevel(log_level)
-        configuration_file = os.environ.get(
-            "RUNNABLE_CONFIGURATION_FILE", configuration_file
-        )
-        run_context = entrypoints.prepare_configurations(
+        service_configurations = context.ServiceConfigurations(
             configuration_file=configuration_file,
-            run_id=run_id,
-            tag=tag,
-            parameters_file=parameters_file,
+            execution_context=context.ExecutionContext.PIPELINE,
         )
-        assert isinstance(run_context.executor, BasePipelineExecutor)
-        utils.set_runnable_environment_variables(
-            run_id=run_id, configuration_file=configuration_file, tag=tag
-        )
-        dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
-        run_context.from_sdk = True
-        run_context.dag = graph.create_graph(dag_definition)
-        console.print("Working with context:")
-        console.print(run_context)
-        console.rule(style="[dark orange]")
-        if not run_context.executor._is_local:
-            # We are not working with executor that does not work in local environment
-            import inspect
-            caller_stack = inspect.stack()[1]
-            relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
-            module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
-            module_to_call = f"{module_name}.{caller_stack.function}"
-            run_context.pipeline_file = f"{module_to_call}.py"
-            run_context.from_sdk = True
-        # Prepare for graph execution
-        run_context.executor._set_up_run_log(exists_ok=False)
-        with Progress(
-            SpinnerColumn(spinner_name="runner"),
-            TextColumn(
-                "[progress.description]{task.description}", table_column=Column(ratio=2)
-            ),
-            BarColumn(table_column=Column(ratio=1), style="dark_orange"),
-            TimeElapsedColumn(table_column=Column(ratio=1)),
-            console=console,
-            expand=True,
-        ) as progress:
-            pipeline_execution_task = progress.add_task(
-                "[dark_orange] Starting execution .. ", total=1
-            )
-            try:
-                run_context.progress = progress
+        configurations = {
+            "pipeline_definition_file": self.get_caller(),
+            "parameters_file": parameters_file,
+            "tag": tag,
+            "run_id": run_id,
+            "execution_mode": context.ExecutionMode.PYTHON,
+            "configuration_file": configuration_file,
+            **service_configurations.services,
+        }
-                run_context.executor.execute_graph(dag=run_context.dag)
+        run_context = context.PipelineContext.model_validate(configurations)
+        context.run_context = run_context
-                if not run_context.executor._is_local:
-                    # non local executors just traverse the graph and do nothing
-                    return {}
+        assert isinstance(run_context, context.PipelineContext)
-                run_log = run_context.run_log_store.get_run_log_by_id(
-                    run_id=run_context.run_id, full=False
-                )
-                if run_log.status == defaults.SUCCESS:
-                    progress.update(
-                        pipeline_execution_task,
-                        description="[green] Success",
-                        completed=True,
-                    )
-                else:
-                    progress.update(
-                        pipeline_execution_task,
-                        description="[red] Failed",
-                        completed=True,
-                    )
-                    raise exceptions.ExecutionFailedError(run_context.run_id)
-            except Exception as e:  # noqa: E722
-                console.print(e, style=defaults.error_style)
-                progress.update(
-                    pipeline_execution_task,
-                    description="[red] Errored execution",
-                    completed=True,
-                )
-                raise
-        if run_context.executor._is_local:
-            return run_context.run_log_store.get_run_log_by_id(
-                run_id=run_context.run_id
-            )
+        run_context.execute()
 class BaseJob(BaseModel):
@@ -875,11 +833,25 @@ class BaseJob(BaseModel):
     def get_task(self) -> RunnableTask:
         raise NotImplementedError
+    def get_caller(self) -> str:
+        caller_stack = inspect.stack()[2]
+        relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
+        module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
+        module_to_call = f"{module_name}.{caller_stack.function}"
+        return module_to_call
     def return_catalog_settings(self) -> Optional[List[str]]:
         if self.catalog is None:
             return []
         return self.catalog.put
+    def return_bool_catalog_store_copy(self) -> bool:
+        if self.catalog is None:
+            return True
+        return self.catalog.store_copy
     def _is_called_for_definition(self) -> bool:
         """
         If the run context is set, we are coming in only to get the pipeline definition.
@@ -901,65 +873,33 @@ class BaseJob(BaseModel):
         if self._is_called_for_definition():
             # Immediately return as this call is only for getting the job definition
             return {}
-        logger.setLevel(log_level)
-        run_id = utils.generate_run_id(run_id=job_id)
-        parameters_file = os.environ.get("RUNNABLE_PARAMETERS_FILE", parameters_file)
-        tag = os.environ.get("RUNNABLE_tag", tag)
+        from runnable import context
-        configuration_file = os.environ.get(
-            "RUNNABLE_CONFIGURATION_FILE", configuration_file
-        )
+        logger.setLevel(log_level)
-        run_context = entrypoints.prepare_configurations(
+        service_configurations = context.ServiceConfigurations(
             configuration_file=configuration_file,
-            run_id=run_id,
-            tag=tag,
-            parameters_file=parameters_file,
-            is_job=True,
-        )
-        assert isinstance(run_context.executor, BaseJobExecutor)
-        run_context.from_sdk = True
-        utils.set_runnable_environment_variables(
-            run_id=run_id, configuration_file=configuration_file, tag=tag
+            execution_context=context.ExecutionContext.JOB,
         )
-        console.print("Working with context:")
-        console.print(run_context)
-        console.rule(style="[dark orange]")
-        if not run_context.executor._is_local:
-            # We are not working with executor that does not work in local environment
-            import inspect
-            caller_stack = inspect.stack()[1]
-            relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
-            module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
-            module_to_call = f"{module_name}.{caller_stack.function}"
-            run_context.job_definition_file = f"{module_to_call}.py"
-        job = self.get_task()
-        catalog_settings = self.return_catalog_settings()
+        configurations = {
+            "job_definition_file": self.get_caller(),
+            "parameters_file": parameters_file,
+            "tag": tag,
+            "run_id": job_id,
+            "execution_mode": context.ExecutionMode.PYTHON,
+            "configuration_file": configuration_file,
+            "job": self.get_task(),
+            "catalog_settings": self.return_catalog_settings(),
+            **service_configurations.services,
+        }
-        try:
-            run_context.executor.submit_job(job, catalog_settings=catalog_settings)
-        finally:
-            run_context.executor.add_task_log_to_catalog("job")
+        run_context = context.JobContext.model_validate(configurations)
+        run_context.catalog_store_copy = self.return_bool_catalog_store_copy()
-        logger.info(
-            "Executing the job from the user. We are still in the caller's compute environment"
-        )
+        assert isinstance(run_context.job_executor, BaseJobExecutor)
-        if run_context.executor._is_local:
-            return run_context.run_log_store.get_run_log_by_id(
-                run_id=run_context.run_id
-            )
+        run_context.execute()
 class PythonJob(BaseJob):
@@ -983,25 +923,6 @@ class PythonJob(BaseJob):
         return task.create_node().executable
-class TorchJob(BaseJob):
-    entrypoint: str = Field(default="torch.distributed.run", frozen=True)
-    args_to_torchrun: dict[str, str | bool | int | float] = Field(
-        default_factory=dict
-    )  # For example
-    # {"nproc_per_node": 2, "nnodes": 1,}
-    script_to_call: str  # For example train/script.py
-    def get_task(self) -> RunnableTask:
-        # Piggy bank on existing tasks as a hack
-        task = TorchTask(
-            name="dummy",
-            terminate_with_success=True,
-            **self.model_dump(exclude_defaults=True, exclude_none=True),
-        )
-        return task.create_node().executable
 class NotebookJob(BaseJob):
     notebook: str = Field(serialization_alias="command")
     optional_ploomber_args: Optional[Dict[str, Any]] = Field(

runnable 0.34.0a1__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

runnable 0.34.0a1py3-none-any.whl → 1.0.0py3-none-any.whl