PyPI - runnable - Versions diffs - 0.35.0__py3-none-any.whl → 0.36.1__py3-none-any.whl - Mend

runnable 0.35.0py3-none-any.whl → 0.36.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

extensions/job_executor/__init__.py +3 -4
extensions/job_executor/emulate.py +106 -0
extensions/job_executor/k8s.py +8 -8
extensions/job_executor/local_container.py +13 -14
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +7 -5
extensions/nodes/fail.py +72 -0
extensions/nodes/map.py +350 -0
extensions/nodes/parallel.py +159 -0
extensions/nodes/stub.py +89 -0
extensions/nodes/success.py +72 -0
extensions/nodes/task.py +92 -0
extensions/pipeline_executor/__init__.py +24 -26
extensions/pipeline_executor/argo.py +20 -20
extensions/pipeline_executor/emulate.py +112 -0
extensions/pipeline_executor/local.py +4 -4
extensions/pipeline_executor/local_container.py +19 -79
extensions/pipeline_executor/mocked.py +5 -9
extensions/pipeline_executor/retry.py +6 -10
runnable/__init__.py +0 -10
runnable/catalog.py +1 -21
runnable/cli.py +0 -59
runnable/context.py +519 -28
runnable/datastore.py +51 -54
runnable/defaults.py +12 -34
runnable/entrypoints.py +82 -440
runnable/exceptions.py +35 -34
runnable/executor.py +13 -20
runnable/names.py +1 -1
runnable/nodes.py +16 -15
runnable/parameters.py +2 -2
runnable/sdk.py +66 -205
runnable/tasks.py +62 -81
runnable/utils.py +6 -268
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/METADATA +1 -4
runnable-0.36.1.dist-info/RECORD +72 -0
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/entry_points.txt +8 -7
extensions/nodes/nodes.py +0 -778
extensions/tasks/torch.py +0 -286
extensions/tasks/torch_config.py +0 -76
runnable-0.35.0.dist-info/RECORD +0 -66
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/WHEEL +0 -0
{runnable-0.35.0.dist-info → runnable-0.36.1.dist-info}/licenses/LICENSE +0 -0

runnable/sdk.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
+import inspect
 import logging
-import os
 import re
 from abc import ABC, abstractmethod
 from pathlib import Path
@@ -16,27 +16,17 @@ from pydantic import (
     field_validator,
     model_validator,
 )
-from rich.progress import (
-    BarColumn,
-    Progress,
-    SpinnerColumn,
-    TextColumn,
-    TimeElapsedColumn,
-)
-from rich.table import Column
 from typing_extensions import Self
 from extensions.nodes.conditional import ConditionalNode
-from extensions.nodes.nodes import (
-    FailNode,
-    MapNode,
-    ParallelNode,
-    StubNode,
-    SuccessNode,
-    TaskNode,
-)
-from runnable import console, defaults, entrypoints, exceptions, graph, utils
-from runnable.executor import BaseJobExecutor, BasePipelineExecutor
+from extensions.nodes.fail import FailNode
+from extensions.nodes.map import MapNode
+from extensions.nodes.parallel import ParallelNode
+from extensions.nodes.stub import StubNode
+from extensions.nodes.success import SuccessNode
+from extensions.nodes.task import TaskNode
+from runnable import defaults, graph
+from runnable.executor import BaseJobExecutor
 from runnable.nodes import TraversalNode
 from runnable.tasks import BaseTaskType as RunnableTask
 from runnable.tasks import TaskReturns
@@ -50,7 +40,6 @@ StepType = Union[
     "ShellTask",
     "Parallel",
     "Map",
-    "TorchTask",
     "Conditional",
 ]
@@ -196,7 +185,7 @@ class BaseTask(BaseTraversal):
         )
     def as_pipeline(self) -> "Pipeline":
-        return Pipeline(steps=[self])  # type: ignore
+        return Pipeline(steps=[self], name=self.internal_name)  # type: ignore
 class PythonTask(BaseTask):
@@ -287,27 +276,6 @@ class PythonTask(BaseTask):
         return node.executable
-class TorchTask(BaseTask):
-    # entrypoint: str = Field(
-    #     alias="entrypoint", default="torch.distributed.run", frozen=True
-    # )
-    # args_to_torchrun: Dict[str, Any] = Field(
-    #     default_factory=dict, alias="args_to_torchrun"
-    # )
-    script_to_call: str
-    accelerate_config_file: str
-    @computed_field
-    def command_type(self) -> str:
-        return "torch"
-    def create_job(self) -> RunnableTask:
-        self.terminate_with_success = True
-        node = self.create_node()
-        return node.executable
 class NotebookTask(BaseTask):
     """
     An execution node of the pipeline of notebook.
@@ -487,6 +455,9 @@ class Stub(BaseTraversal):
         return StubNode.parse_from_config(self.model_dump(exclude_none=True))
+    def as_pipeline(self) -> "Pipeline":
+        return Pipeline(steps=[self])
 class Parallel(BaseTraversal):
     """
@@ -786,6 +757,15 @@ class Pipeline(BaseModel):
             return False
         return True
+    def get_caller(self) -> str:
+        caller_stack = inspect.stack()[2]
+        relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
+        module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
+        module_to_call = f"{module_name}.{caller_stack.function}"
+        return module_to_call
     def execute(
         self,
         configuration_file: str = "",
@@ -803,106 +783,31 @@ class Pipeline(BaseModel):
             # Immediately return as this call is only for getting the pipeline definition
             return {}
-        logger.setLevel(log_level)
-        run_id = utils.generate_run_id(run_id=run_id)
-        parameters_file = os.environ.get("RUNNABLE_PARAMETERS_FILE", parameters_file)
+        from runnable import context
-        tag = os.environ.get("RUNNABLE_tag", tag)
+        logger.setLevel(log_level)
-        configuration_file = os.environ.get(
-            "RUNNABLE_CONFIGURATION_FILE", configuration_file
-        )
-        run_context = entrypoints.prepare_configurations(
+        service_configurations = context.ServiceConfigurations(
             configuration_file=configuration_file,
-            run_id=run_id,
-            tag=tag,
-            parameters_file=parameters_file,
-        )
-        assert isinstance(run_context.executor, BasePipelineExecutor)
-        utils.set_runnable_environment_variables(
-            run_id=run_id, configuration_file=configuration_file, tag=tag
+            execution_context=context.ExecutionContext.PIPELINE,
         )
-        dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
-        run_context.from_sdk = True
-        run_context.dag = graph.create_graph(dag_definition)
-        console.print("Working with context:")
-        console.print(run_context)
-        console.rule(style="[dark orange]")
-        if not run_context.executor._is_local:
-            # We are not working with executor that does not work in local environment
-            import inspect
-            caller_stack = inspect.stack()[1]
-            relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
-            module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
-            module_to_call = f"{module_name}.{caller_stack.function}"
-            run_context.pipeline_file = f"{module_to_call}.py"
-            run_context.from_sdk = True
-        # Prepare for graph execution
-        run_context.executor._set_up_run_log(exists_ok=False)
-        with Progress(
-            SpinnerColumn(spinner_name="runner"),
-            TextColumn(
-                "[progress.description]{task.description}", table_column=Column(ratio=2)
-            ),
-            BarColumn(table_column=Column(ratio=1), style="dark_orange"),
-            TimeElapsedColumn(table_column=Column(ratio=1)),
-            console=console,
-            expand=True,
-        ) as progress:
-            pipeline_execution_task = progress.add_task(
-                "[dark_orange] Starting execution .. ", total=1
-            )
-            try:
-                run_context.progress = progress
-                run_context.executor.execute_graph(dag=run_context.dag)
+        configurations = {
+            "pipeline_definition_file": self.get_caller(),
+            "parameters_file": parameters_file,
+            "tag": tag,
+            "run_id": run_id,
+            "execution_mode": context.ExecutionMode.PYTHON,
+            "configuration_file": configuration_file,
+            **service_configurations.services,
+        }
-                if not run_context.executor._is_local:
-                    # non local executors just traverse the graph and do nothing
-                    return {}
+        run_context = context.PipelineContext.model_validate(configurations)
+        context.run_context = run_context
-                run_log = run_context.run_log_store.get_run_log_by_id(
-                    run_id=run_context.run_id, full=False
-                )
+        assert isinstance(run_context, context.PipelineContext)
-                if run_log.status == defaults.SUCCESS:
-                    progress.update(
-                        pipeline_execution_task,
-                        description="[green] Success",
-                        completed=True,
-                    )
-                else:
-                    progress.update(
-                        pipeline_execution_task,
-                        description="[red] Failed",
-                        completed=True,
-                    )
-                    raise exceptions.ExecutionFailedError(run_context.run_id)
-            except Exception as e:  # noqa: E722
-                console.print(e, style=defaults.error_style)
-                progress.update(
-                    pipeline_execution_task,
-                    description="[red] Errored execution",
-                    completed=True,
-                )
-                raise
-        if run_context.executor._is_local:
-            return run_context.run_log_store.get_run_log_by_id(
-                run_id=run_context.run_id
-            )
+        run_context.execute()
 class BaseJob(BaseModel):
@@ -926,6 +831,15 @@ class BaseJob(BaseModel):
     def get_task(self) -> RunnableTask:
         raise NotImplementedError
+    def get_caller(self) -> str:
+        caller_stack = inspect.stack()[2]
+        relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
+        module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
+        module_to_call = f"{module_name}.{caller_stack.function}"
+        return module_to_call
     def return_catalog_settings(self) -> Optional[List[str]]:
         if self.catalog is None:
             return []
@@ -952,65 +866,32 @@ class BaseJob(BaseModel):
         if self._is_called_for_definition():
             # Immediately return as this call is only for getting the job definition
             return {}
-        logger.setLevel(log_level)
-        run_id = utils.generate_run_id(run_id=job_id)
-        parameters_file = os.environ.get("RUNNABLE_PARAMETERS_FILE", parameters_file)
-        tag = os.environ.get("RUNNABLE_tag", tag)
+        from runnable import context
-        configuration_file = os.environ.get(
-            "RUNNABLE_CONFIGURATION_FILE", configuration_file
-        )
+        logger.setLevel(log_level)
-        run_context = entrypoints.prepare_configurations(
+        service_configurations = context.ServiceConfigurations(
             configuration_file=configuration_file,
-            run_id=run_id,
-            tag=tag,
-            parameters_file=parameters_file,
-            is_job=True,
-        )
-        assert isinstance(run_context.executor, BaseJobExecutor)
-        run_context.from_sdk = True
-        utils.set_runnable_environment_variables(
-            run_id=run_id, configuration_file=configuration_file, tag=tag
+            execution_context=context.ExecutionContext.JOB,
         )
-        console.print("Working with context:")
-        console.print(run_context)
-        console.rule(style="[dark orange]")
-        if not run_context.executor._is_local:
-            # We are not working with executor that does not work in local environment
-            import inspect
-            caller_stack = inspect.stack()[1]
-            relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
-            module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
-            module_to_call = f"{module_name}.{caller_stack.function}"
-            run_context.job_definition_file = f"{module_to_call}.py"
-        job = self.get_task()
-        catalog_settings = self.return_catalog_settings()
+        configurations = {
+            "job_definition_file": self.get_caller(),
+            "parameters_file": parameters_file,
+            "tag": tag,
+            "run_id": job_id,
+            "execution_mode": context.ExecutionMode.PYTHON,
+            "configuration_file": configuration_file,
+            "job": self.get_task(),
+            "catalog_settings": self.return_catalog_settings(),
+            **service_configurations.services,
+        }
-        try:
-            run_context.executor.submit_job(job, catalog_settings=catalog_settings)
-        finally:
-            run_context.executor.add_task_log_to_catalog("job")
+        run_context = context.JobContext.model_validate(configurations)
-        logger.info(
-            "Executing the job from the user. We are still in the caller's compute environment"
-        )
+        assert isinstance(run_context.job_executor, BaseJobExecutor)
-        if run_context.executor._is_local:
-            return run_context.run_log_store.get_run_log_by_id(
-                run_id=run_context.run_id
-            )
+        run_context.execute()
 class PythonJob(BaseJob):
@@ -1034,26 +915,6 @@ class PythonJob(BaseJob):
         return task.create_node().executable
-class TorchJob(BaseJob):
-    # entrypoint: str = Field(default="torch.distributed.run", frozen=True)
-    # args_to_torchrun: dict[str, str | bool | int | float] = Field(
-    #     default_factory=dict
-    # )  # For example
-    # {"nproc_per_node": 2, "nnodes": 1,}
-    script_to_call: str  # For example train/script.py
-    accelerate_config_file: str
-    def get_task(self) -> RunnableTask:
-        # Piggy bank on existing tasks as a hack
-        task = TorchTask(
-            name="dummy",
-            terminate_with_success=True,
-            **self.model_dump(exclude_defaults=True, exclude_none=True),
-        )
-        return task.create_node().executable
 class NotebookJob(BaseJob):
     notebook: str = Field(serialization_alias="command")
     optional_ploomber_args: Optional[Dict[str, Any]] = Field(

runnable/tasks.py CHANGED Viewed

@@ -25,7 +25,7 @@ from runnable.datastore import (
     Parameter,
     StepAttempt,
 )
-from runnable.defaults import TypeMapVariable
+from runnable.defaults import MapVariableType
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -48,7 +48,29 @@ class TeeIO(io.StringIO):
         self.output_stream.flush()
-sys.stdout = TeeIO()
+@contextlib.contextmanager
+def redirect_output():
+    # Set the stream handlers to use the custom TeeIO class
+    # Backup the original stdout and stderr
+    original_stdout = sys.stdout
+    original_stderr = sys.stderr
+    # Redirect stdout and stderr to custom TeeStream objects
+    sys.stdout = TeeIO(sys.stdout)
+    sys.stderr = TeeIO(sys.stderr)
+    # Replace stream for all StreamHandlers to use the new sys.stdout
+    for handler in logging.getLogger().handlers:
+        if isinstance(handler, logging.StreamHandler):
+            handler.stream = sys.stdout
+    try:
+        yield sys.stdout, sys.stderr
+    finally:
+        # Restore the original stdout and stderr
+        sys.stdout = original_stdout
+        sys.stderr = original_stderr
 class TaskReturns(BaseModel):
@@ -79,7 +101,7 @@ class BaseTaskType(BaseModel):
     def set_secrets_as_env_variables(self):
         # Preparing the environment for the task execution
         for key in self.secrets:
-            secret_value = context.run_context.secrets_handler.get(key)
+            secret_value = context.run_context.secrets.get(key)
             os.environ[key] = secret_value
     def delete_secrets_from_env_variables(self):
@@ -90,7 +112,7 @@ class BaseTaskType(BaseModel):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """The function to execute the command.
@@ -130,7 +152,7 @@ class BaseTaskType(BaseModel):
         finally:
             self.delete_secrets_from_env_variables()
-    def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
+    def resolve_unreduced_parameters(self, map_variable: MapVariableType = None):
         """Resolve the unreduced parameters."""
         params = self._context.run_log_store.get_parameters(
             run_id=self._context.run_id
@@ -153,7 +175,7 @@ class BaseTaskType(BaseModel):
     @contextlib.contextmanager
     def execution_context(
-        self, map_variable: TypeMapVariable = None, allow_complex: bool = True
+        self, map_variable: MapVariableType = None, allow_complex: bool = True
     ):
         params = self.resolve_unreduced_parameters(map_variable=map_variable)
         logger.info(f"Parameters available for the execution: {params}")
@@ -267,7 +289,7 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """Execute the notebook as defined by the command."""
         attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
@@ -289,13 +311,21 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
                     logger.info(
                         f"Calling {func} from {module} with {filtered_parameters}"
                     )
-                    out_file = TeeIO()
-                    with contextlib.redirect_stdout(out_file):
+                    context.progress.stop()  # redirecting stdout clashes with rich progress
+                    with redirect_output() as (buffer, stderr_buffer):
                         user_set_parameters = f(
                             **filtered_parameters
                         )  # This is a tuple or single value
-                    task_console.print(out_file.getvalue())
+                        print(
+                            stderr_buffer.getvalue()
+                        )  # To print the logging statements
+                    # TODO: Avoid double print!!
+                    with task_console.capture():
+                        task_console.log(buffer.getvalue())
+                        task_console.log(stderr_buffer.getvalue())
+                    context.progress.start()
                 except Exception as e:
                     raise exceptions.CommandCallError(
                         f"Function call: {self.command} did not succeed.\n"
@@ -354,66 +384,6 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
         return attempt_log
-class TorchTaskType(BaseTaskType):
-    task_type: str = Field(default="torch", serialization_alias="command_type")
-    accelerate_config_file: str
-    script_to_call: str  # For example train/script.py
-    def execute_command(
-        self, map_variable: Dict[str, str | int | float] | None = None
-    ) -> StepAttempt:
-        from accelerate.commands import launch
-        attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
-        with (
-            self.execution_context(
-                map_variable=map_variable, allow_complex=False
-            ) as params,
-            self.expose_secrets() as _,
-        ):
-            try:
-                script_args = []
-                for key, value in params.items():
-                    script_args.append(f"--{key}")
-                    if type(value.value) is not bool:
-                        script_args.append(str(value.value))
-                # TODO: Check the typing here
-                logger.info("Calling the user script with the following parameters:")
-                logger.info(script_args)
-                out_file = TeeIO()
-                try:
-                    with contextlib.redirect_stdout(out_file):
-                        parser = launch.launch_command_parser()
-                        args = parser.parse_args(self.script_to_call)
-                        args.training_script = self.script_to_call
-                        args.config_file = self.accelerate_config_file
-                        args.training_script_args = script_args
-                        launch.launch_command(args)
-                    task_console.print(out_file.getvalue())
-                except Exception as e:
-                    raise exceptions.CommandCallError(
-                        f"Call to script{self.script_to_call} did not succeed."
-                    ) from e
-                finally:
-                    sys.argv = sys.argv[:1]
-                attempt_log.status = defaults.SUCCESS
-            except Exception as _e:
-                msg = f"Call to script: {self.script_to_call} did not succeed."
-                attempt_log.message = msg
-                task_console.print_exception(show_locals=False)
-                task_console.log(_e, style=defaults.error_style)
-        attempt_log.end_time = str(datetime.now())
-        return attempt_log
 class NotebookTaskType(BaseTaskType):
     """
     --8<-- [start:notebook_reference]
@@ -478,14 +448,15 @@ class NotebookTaskType(BaseTaskType):
         return command
-    def get_notebook_output_path(self, map_variable: TypeMapVariable = None) -> str:
+    def get_notebook_output_path(self, map_variable: MapVariableType = None) -> str:
         tag = ""
         map_variable = map_variable or {}
         for key, value in map_variable.items():
             tag += f"{key}_{value}_"
-        if hasattr(self._context.executor, "_context_node"):
-            tag += self._context.executor._context_node.name
+        if isinstance(self._context, context.PipelineContext):
+            assert self._context.pipeline_executor._context_node
+            tag += self._context.pipeline_executor._context_node.name
         tag = "".join(x for x in tag if x.isalnum()).strip("-")
@@ -496,7 +467,7 @@ class NotebookTaskType(BaseTaskType):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         """Execute the python notebook as defined by the command.
@@ -551,12 +522,20 @@ class NotebookTaskType(BaseTaskType):
                 }
                 kwds.update(ploomber_optional_args)
-                out_file = TeeIO()
-                with contextlib.redirect_stdout(out_file):
+                context.progress.stop()  # redirecting stdout clashes with rich progress
+                with redirect_output() as (buffer, stderr_buffer):
                     pm.execute_notebook(**kwds)
-                task_console.print(out_file.getvalue())
-                context.run_context.catalog_handler.put(name=notebook_output_path)
+                    print(stderr_buffer.getvalue())  # To print the logging statements
+                with task_console.capture():
+                    task_console.log(buffer.getvalue())
+                    task_console.log(stderr_buffer.getvalue())
+                context.progress.start()
+                context.run_context.catalog.put(name=notebook_output_path)
                 client = PloomberClient.from_path(path=notebook_output_path)
                 namespace = client.get_namespace()
@@ -674,7 +653,7 @@ class ShellTaskType(BaseTaskType):
     def execute_command(
         self,
-        map_variable: TypeMapVariable = None,
+        map_variable: MapVariableType = None,
     ) -> StepAttempt:
         # Using shell=True as we want to have chained commands to be executed in the same shell.
         """Execute the shell command as defined by the command.
@@ -698,7 +677,7 @@ class ShellTaskType(BaseTaskType):
         # Expose secrets as environment variables
         if self.secrets:
             for key in self.secrets:
-                secret_value = context.run_context.secrets_handler.get(key)
+                secret_value = context.run_context.secrets.get(key)
                 subprocess_env[key] = secret_value
         try:
@@ -724,6 +703,7 @@ class ShellTaskType(BaseTaskType):
                 capture = False
                 return_keys = {x.name: x for x in self.returns}
+                context.progress.stop()  # redirecting stdout clashes with rich progress
                 proc = subprocess.Popen(
                     command,
                     shell=True,
@@ -747,6 +727,7 @@ class ShellTaskType(BaseTaskType):
                         continue
                     task_console.print(line, style=defaults.warning_style)
+                context.progress.start()
                 output_parameters: Dict[str, Parameter] = {}
                 metrics: Dict[str, Parameter] = {}

runnable 0.35.0__py3-none-any.whl → 0.36.1__py3-none-any.whl

runnable 0.35.0py3-none-any.whl → 0.36.1py3-none-any.whl