PyPI - runnable - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

runnable 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

runnable/__init__.py +12 -1
runnable/catalog.py +2 -2
runnable/cli.py +5 -5
runnable/datastore.py +3 -2
runnable/defaults.py +21 -18
runnable/entrypoints.py +41 -77
runnable/executor.py +6 -16
runnable/extensions/catalog/file_system/implementation.py +2 -1
runnable/extensions/executor/__init__.py +20 -9
runnable/extensions/executor/argo/implementation.py +6 -5
runnable/extensions/executor/argo/specification.yaml +1 -1
runnable/extensions/executor/k8s_job/implementation_FF.py +4 -4
runnable/extensions/executor/local/implementation.py +1 -0
runnable/extensions/executor/local_container/implementation.py +4 -10
runnable/extensions/executor/mocked/implementation.py +2 -33
runnable/extensions/nodes.py +40 -60
runnable/integration.py +2 -2
runnable/interaction.py +9 -4
runnable/nodes.py +19 -7
runnable/parameters.py +1 -1
runnable/sdk.py +181 -59
runnable/tasks.py +124 -121
runnable/utils.py +11 -11
{runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/METADATA +53 -53
{runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/RECORD +28 -28
{runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/WHEEL +1 -1
{runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/LICENSE +0 -0
{runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/entry_points.txt +0 -0

runnable/sdk.py CHANGED Viewed

@@ -3,11 +3,10 @@ from __future__ import annotations
 import logging
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
-from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
 from rich import print
-from ruamel.yaml import YAML
 from typing_extensions import Self
 from runnable import defaults, entrypoints, graph, utils
@@ -16,11 +15,8 @@ from runnable.nodes import TraversalNode
 logger = logging.getLogger(defaults.LOGGER_NAME)
-StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"]
-TraversalTypes = Union["Stub", "Task", "Parallel", "Map"]
-ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
+StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
+TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
 class Catalog(BaseModel):
@@ -33,7 +29,7 @@ class Catalog(BaseModel):
         put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
     Examples:
-        >>> from magnus import Catalog, Task
+        >>> from runnable import Catalog, Task
         >>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
         >>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
@@ -107,7 +103,7 @@ class BaseTraversal(ABC, BaseModel):
         ...
-class Task(BaseTraversal):
+class BaseTask(BaseTraversal):
     """
     An execution node of the pipeline.
     Please refer to [concepts](concepts/task.md) for more information.
@@ -133,10 +129,10 @@ class Task(BaseTraversal):
             executor:
               type: local-container
               config:
-                docker_image: "magnus/magnus:latest"
+                docker_image: "runnable/runnable:latest"
                 overrides:
                   custom_docker_image:
-                    docker_image: "magnus/magnus:custom"
+                    docker_image: "runnable/runnable:custom"
             ```
             ### Task specific configuration
             ```python
@@ -148,48 +144,173 @@ class Task(BaseTraversal):
         optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
             Only used when command_type is 'notebook', defaults to {}
         output_cell_tag (Optional[str]): The tag of the output cell.
-            Only used when command_type is 'notebook', defaults to "magnus_output"
+            Only used when command_type is 'notebook', defaults to "runnable_output"
         terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
         terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
         on_failure (str): The name of the node to execute if the step fails.
     """
-    command: str = Field(alias="command")
-    command_type: str = Field(default="python")
     catalog: Optional[Catalog] = Field(default=None, alias="catalog")
     overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
+    def create_node(self) -> TaskNode:
+        if not self.next_node:
+            if not (self.terminate_with_failure or self.terminate_with_success):
+                raise AssertionError("A node not being terminated must have a user defined next node")
+        print(self.model_dump(exclude_none=True))
+        return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
+class PythonTask(BaseTask):
+    """
+    An execution node of the pipeline of python functions.
+    Please refer to [concepts](concepts/task.md) for more information.
+    Attributes:
+        name (str): The name of the node.
+        function (callable): The function to execute.
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = PythonTask(name="task", function="function'",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    function: Callable = Field(exclude=True)
+    @computed_field
+    def command_type(self) -> str:
+        return "python"
+    @computed_field
+    def command(self) -> str:
+        module = self.function.__module__
+        name = self.function.__name__
+        return f"{module}.{name}"
+class NotebookTask(BaseTask):
+    """
+    An execution node of the pipeline of type notebook.
+    Please refer to [concepts](concepts/task.md) for more information.
+    Attributes:
+        name (str): The name of the node.
+        notebook: The path to the notebook
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        returns: A list of the names of variables to return from the notebook.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = NotebookTask(name="task", notebook="evaluation.ipynb",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        notebook_output_path (Optional[str]): The path to save the notebook output.
+            Only used when command_type is 'notebook', defaults to command+_out.ipynb
+        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
+            Only used when command_type is 'notebook', defaults to {}
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    notebook: str = Field(alias="command")
     notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
     optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
-    output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag")
+    returns: List[str] = Field(default_factory=list, alias="returns")
-    @field_validator("command_type", mode="before")
-    @classmethod
-    def validate_command_type(cls, value: str) -> str:
-        if value not in ALLOWED_COMMAND_TYPES:
-            raise ValueError(f"Invalid command_type: {value}")
-        return value
+    @computed_field
+    def command_type(self) -> str:
+        return "notebook"
-    @model_validator(mode="after")
-    def check_notebook_args(self) -> "Task":
-        if self.command_type != "notebook":
-            assert (
-                self.notebook_output_path is None
-            ), "Only command_types of 'notebook' can be used with notebook_output_path"
-            assert (
-                self.optional_ploomber_args is None
-            ), "Only command_types of 'notebook' can be used with optional_ploomber_args"
+class ShellTask(BaseTask):
+    """
+    An execution node of the pipeline of type shell.
+    Please refer to [concepts](concepts/task.md) for more information.
-            assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag"
-        return self
+    Attributes:
+        name (str): The name of the node.
+        command: The shell command to execute.
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        returns: A list of the names of variables to capture from environment variables of shell.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
-    def create_node(self) -> TaskNode:
-        if not self.next_node:
-            if not (self.terminate_with_failure or self.terminate_with_success):
-                raise AssertionError("A node not being terminated must have a user defined next node")
-        return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = ShellTask(name="task", command="exit 0",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    command: str = Field(alias="command")
+    returns: List[str] = Field(default_factory=list, alias="returns")
+    @computed_field
+    def command_type(self) -> str:
+        return "shell"
 class Stub(BaseTraversal):
@@ -341,7 +462,8 @@ class Pipeline(BaseModel):
     A Pipeline is a directed acyclic graph of Steps that define a workflow.
     Attributes:
-        steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline.
+        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
+            A list of Steps that make up the Pipeline.
         start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
         name (str, optional): The name of the Pipeline. Defaults to "".
         description (str, optional): A description of the Pipeline. Defaults to "".
@@ -385,6 +507,9 @@ class Pipeline(BaseModel):
         self._dag.check_graph()
+    def return_dag(self) -> graph.Graph:
+        return self._dag
     def execute(
         self,
         configuration_file: str = "",
@@ -393,7 +518,6 @@ class Pipeline(BaseModel):
         parameters_file: str = "",
         use_cached: str = "",
         log_level: str = defaults.LOG_LEVEL,
-        output_pipeline_definition: str = "magnus-pipeline.yaml",
     ):
         """
         *Execute* the Pipeline.
@@ -408,7 +532,7 @@ class Pipeline(BaseModel):
         Args:
             configuration_file (str, optional): The path to the configuration file. Defaults to "".
-                The configuration file can be overridden by the environment variable MAGNUS_CONFIGURATION_FILE.
+                The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
             run_id (str, optional): The ID of the run. Defaults to "".
             tag (str, optional): The tag of the run. Defaults to "".
@@ -419,18 +543,18 @@ class Pipeline(BaseModel):
                 Provide the run_id of the older execution to recover.
             log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
-            output_pipeline_definition (str, optional): The path to the output pipeline definition file.
-                Defaults to "magnus-pipeline.yaml".
-                Only applicable for the execution via SDK for non ```local``` executors.
         """
-        from runnable.extensions.executor.local.implementation import LocalExecutor
-        from runnable.extensions.executor.mocked.implementation import MockedExecutor
+        # py_to_yaml is used by non local executors to generate the yaml representation of the pipeline.
+        py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
+        if py_to_yaml == "true":
+            return
         logger.setLevel(log_level)
         run_id = utils.generate_run_id(run_id=run_id)
-        configuration_file = os.environ.get("MAGNUS_CONFIGURATION_FILE", configuration_file)
+        configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
         run_context = entrypoints.prepare_configurations(
             configuration_file=configuration_file,
             run_id=run_id,
@@ -440,7 +564,7 @@ class Pipeline(BaseModel):
         )
         run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
-        utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
+        utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
         dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
@@ -449,17 +573,14 @@ class Pipeline(BaseModel):
         print("Working with context:")
         print(run_context)
-        if not (isinstance(run_context.executor, LocalExecutor) or isinstance(run_context.executor, MockedExecutor)):
-            logger.debug(run_context.dag.model_dump(by_alias=True))
-            yaml = YAML()
+        if not run_context.executor._local:
+            # We are working with non local executor
+            import inspect
-            with open(output_pipeline_definition, "w", encoding="utf-8") as f:
-                yaml.dump(
-                    {"dag": run_context.dag.model_dump(by_alias=True, exclude_none=True)},
-                    f,
-                )
+            caller_stack = inspect.stack()[1]
+            module_to_call = f"{caller_stack.filename.replace('/', '.').replace('.py', '')}.{caller_stack.function}"
-            return
+            run_context.pipeline_file = f"{module_to_call}.py"
         # Prepare for graph execution
         run_context.executor.prepare_for_graph_execution()
@@ -467,4 +588,5 @@ class Pipeline(BaseModel):
         logger.info("Executing the graph")
         run_context.executor.execute_graph(dag=run_context.dag)
-        return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)
+        if run_context.executor._local:
+            return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)

runnable 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

runnable 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl