PyPI - runnable - Versions diffs - 0.12.2__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

runnable 0.12.2py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

runnable/extensions/executor/{local/implementation.py → local.py} +0 -2
runnable/extensions/nodes.py +16 -1
runnable/sdk.py +150 -101
runnable/tasks.py +148 -3
runnable-0.13.0.dist-info/METADATA +270 -0
{runnable-0.12.2.dist-info → runnable-0.13.0.dist-info}/RECORD +9 -10
{runnable-0.12.2.dist-info → runnable-0.13.0.dist-info}/WHEEL +1 -1
{runnable-0.12.2.dist-info → runnable-0.13.0.dist-info}/entry_points.txt +1 -1
runnable/extensions/executor/local/__init__.py +0 -0
runnable-0.12.2.dist-info/METADATA +0 -453
{runnable-0.12.2.dist-info → runnable-0.13.0.dist-info}/LICENSE +0 -0

runnable/extensions/executor/{local/implementation.py → local.py} RENAMED Viewed

@@ -19,8 +19,6 @@ class LocalExecutor(GenericExecutor):
     Example config:
     execution:
       type: local
-      config:
-        enable_parallel: True or False to enable parallel.
     """

runnable/extensions/nodes.py CHANGED Viewed

@@ -797,12 +797,27 @@ class DagNode(CompositeNode):
 class StubNode(ExecutableNode):
     """
     Stub is a convenience design node.
     It always returns success in the attempt log and does nothing.
     This node is very similar to pass state in Step functions.
     This node type could be handy when designing the pipeline and stubbing functions
+    --8<-- [start:stub_reference]
+    An stub execution node of the pipeline.
+    Please refer to define pipeline/tasks/stub for more information.
+    As part of the dag definition, a stub task is defined as follows:
+    dag:
+      steps:
+        stub_task: # The name of the node
+        type: stub
+        on_failure: The name of the step to traverse in case of failure
+        next: The next node to execute after this task, use "success" to terminate the pipeline successfully
+          or "fail" to terminate the pipeline with an error.
+    It can take arbritary number of parameters, which is handy to temporarily silence a task node.
+    --8<-- [end:stub_reference]
     """
     node_type: str = Field(default="stub", serialization_alias="type")

runnable/sdk.py CHANGED Viewed

@@ -61,11 +61,9 @@ class Catalog(BaseModel):
         put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
     Examples:
-        >>> from runnable import Catalog, Task
+        >>> from runnable import Catalog
         >>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
-        >>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
     """
     model_config = ConfigDict(extra="forbid")  # Need to be for command, would be validated later
@@ -143,50 +141,7 @@ class BaseTraversal(ABC, BaseModel):
 class BaseTask(BaseTraversal):
     """
-    An execution node of the pipeline.
-    Please refer to [concepts](concepts/task.md) for more information.
-    Attributes:
-        name (str): The name of the node.
-        command (str): The command to execute.
-            - For python functions, [dotted path](concepts/task.md/#python_functions) to the function.
-            - For shell commands: command to execute in the shell.
-            - For notebooks: path to the notebook.
-        command_type (str): The type of command to execute.
-            Can be one of "shell", "python", or "notebook".
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        overrides (Dict[str, Any]): Any overrides to the command.
-            Individual tasks can override the global configuration config by referring to the
-            specific override.
-            For example,
-            ### Global configuration
-            ```yaml
-            executor:
-              type: local-container
-              config:
-                docker_image: "runnable/runnable:latest"
-                overrides:
-                  custom_docker_image:
-                    docker_image: "runnable/runnable:custom"
-            ```
-            ### Task specific configuration
-            ```python
-            task = Task(name="task", command="echo 'hello'", command_type="shell",
-                    overrides={'local-container': custom_docker_image})
-            ```
-        notebook_output_path (Optional[str]): The path to save the notebook output.
-            Only used when command_type is 'notebook', defaults to command+_out.ipynb
-        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
-            Only used when command_type is 'notebook', defaults to {}
-        output_cell_tag (Optional[str]): The tag of the output cell.
-            Only used when command_type is 'notebook', defaults to "runnable_output"
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
+    Base task type which has catalog, overrides, returns and secrets.
     """
     catalog: Optional[Catalog] = Field(default=None, alias="catalog")
@@ -220,12 +175,50 @@ class BaseTask(BaseTraversal):
 class PythonTask(BaseTask):
     """
     An execution node of the pipeline of python functions.
+    Please refer to [concepts](concepts/task.md/#python_functions) for more information.
     Attributes:
         name (str): The name of the node.
         function (callable): The function to execute.
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[Union[str, TaskReturns]] : A list of the names of variables to return from the task.
+            The names should match the order of the variables returned by the function.
+            ```TaskReturns```: can be JSON friendly variables, objects or metrics.
+            By default, all variables are assumed to be JSON friendly and will be serialized to JSON.
+            Pydantic models are readily supported and will be serialized to JSON.
+            To return a python object, please use ```pickled(<name>)```.
+            It is advised to use ```pickled(<name>)``` for big JSON friendly variables.
+            For example,
+            ```python
+            from runnable import pickled
+            def f():
+                ...
+                x = 1
+                return x, df # A simple JSON friendly variable and a python object.
+            task = PythonTask(name="task", function=f, returns=["x", pickled(df)]))
+            ```
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+            and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -246,11 +239,6 @@ class PythonTask(BaseTask):
             task = PythonTask(name="task", function="function'",
                     overrides={'local-container': custom_docker_image})
             ```
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     function: Callable = Field(exclude=True)
@@ -269,15 +257,52 @@ class PythonTask(BaseTask):
 class NotebookTask(BaseTask):
     """
-    An execution node of the pipeline of type notebook.
-    Please refer to [concepts](concepts/task.md) for more information.
+    An execution node of the pipeline of notebook.
+    Please refer to [concepts](concepts/task.md/#notebooks) for more information.
+    We internally use [Ploomber engine](https://github.com/ploomber/ploomber-engine) to execute the notebook.
     Attributes:
         name (str): The name of the node.
-        notebook: The path to the notebook
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        returns: A list of the names of variables to return from the notebook.
+        notebook (str): The path to the notebook relative the project root.
+        optional_ploomber_args (Dict[str, Any]): Any optional ploomber args, please refer to
+            [Ploomber engine](https://github.com/ploomber/ploomber-engine) for more information.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[Union[str, TaskReturns]] : A list of the names of variables to return from the task.
+            The names should match the order of the variables returned by the function.
+            ```TaskReturns```: can be JSON friendly variables, objects or metrics.
+            By default, all variables are assumed to be JSON friendly and will be serialized to JSON.
+            Pydantic models are readily supported and will be serialized to JSON.
+            To return a python object, please use ```pickled(<name>)```.
+            It is advised to use ```pickled(<name>)``` for big JSON friendly variables.
+            For example,
+            ```python
+            from runnable import pickled
+            # assume, example.ipynb is the notebook with df and x as variables in some cells.
+            task = Notebook(name="task", notebook="example.ipynb", returns=["x", pickled(df)]))
+            ```
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+        and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -295,18 +320,9 @@ class NotebookTask(BaseTask):
             ```
             ### Task specific configuration
             ```python
-            task = NotebookTask(name="task", notebook="evaluation.ipynb",
+            task = NotebookTask(name="task", notebook="example.ipynb",
                     overrides={'local-container': custom_docker_image})
             ```
-        notebook_output_path (Optional[str]): The path to save the notebook output.
-            Only used when command_type is 'notebook', defaults to command+_out.ipynb
-        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
-            Only used when command_type is 'notebook', defaults to {}
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     notebook: str = Field(serialization_alias="command")
@@ -319,15 +335,33 @@ class NotebookTask(BaseTask):
 class ShellTask(BaseTask):
     """
-    An execution node of the pipeline of type shell.
-    Please refer to [concepts](concepts/task.md) for more information.
+    An execution node of the pipeline of shell script.
+    Please refer to [concepts](concepts/task.md/#shell) for more information.
     Attributes:
         name (str): The name of the node.
-        command: The shell command to execute.
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        returns: A list of the names of variables to capture from environment variables of shell.
+        command (str): The path to the notebook relative the project root.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[str] : A list of the names of environment variables to collect from the task.
+            The names should match the order of the variables returned by the function.
+            Shell based tasks can only return JSON friendly variables.
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+        and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -345,14 +379,10 @@ class ShellTask(BaseTask):
             ```
             ### Task specific configuration
             ```python
-            task = ShellTask(name="task", command="exit 0",
+            task = ShellTask(name="task", command="export x=1",
                     overrides={'local-container': custom_docker_image})
             ```
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     command: str = Field(alias="command")
@@ -364,16 +394,20 @@ class ShellTask(BaseTask):
 class Stub(BaseTraversal):
     """
-    A node that does nothing.
+    A node that passes through the pipeline with no action. Just like ```pass``` in Python.
+    Please refer to [concepts](concepts/task.md/#stub) for more information.
     A stub node can tak arbitrary number of arguments.
-    Please refer to [concepts](concepts/stub.md) for more information.
     Attributes:
         name (str): The name of the node.
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        command (str): The path to the notebook relative the project root.
         terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
     """
     model_config = ConfigDict(extra="ignore")
@@ -422,12 +456,13 @@ class Map(BaseTraversal):
     Please refer to [concepts](concepts/map.md) for more information.
     Attributes:
-        branch: The pipeline to execute for each item.
+        branch (Pipeline): The pipeline to execute for each item.
-        iterate_on: The name of the parameter to iterate over.
+        iterate_on (str): The name of the parameter to iterate over.
             The parameter should be defined either by previous steps or statically at the start of execution.
-        iterate_as: The name of the iterable to be passed to functions.
+        iterate_as (str): The name of the iterable to be passed to functions.
+        reducer (Callable): The function to reduce the results of the branches.
         overrides (Dict[str, Any]): Any overrides to the command.
@@ -510,29 +545,44 @@ class Fail(BaseModel):
 class Pipeline(BaseModel):
     """
-    A Pipeline is a directed acyclic graph of Steps that define a workflow.
+    A Pipeline is a sequence of Steps.
     Attributes:
-        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
+        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map]]):
             A list of Steps that make up the Pipeline.
-        start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
+            The order of steps is important as it determines the order of execution.
+            Any on failure behavior should the first step in ```on_failure``` pipelines.
+        on_failure (List[List[Pipeline], optional): A list of Pipelines to execute in case of failure.
+            For example, for the below pipeline:
+                step1 >> step2
+                and step1 to reach step3 in case of failure.
+                failure_pipeline = Pipeline(steps=[step1, step3])
+                pipeline = Pipeline(steps=[step1, step2, on_failure=[failure_pipeline])
         name (str, optional): The name of the Pipeline. Defaults to "".
         description (str, optional): A description of the Pipeline. Defaults to "".
-        add_terminal_nodes (bool, optional): Whether to add terminal nodes to the Pipeline. Defaults to True.
-    The default behavior is to add "success" and "fail" nodes to the Pipeline.
-    To add custom success and fail nodes, set add_terminal_nodes=False and create success
-    and fail nodes manually.
+    The pipeline implicitly add success and fail nodes.
     """
-    steps: List[Union[StepType, List[StepType]]]
+    steps: List[Union[StepType, List["Pipeline"]]]
     name: str = ""
     description: str = ""
-    add_terminal_nodes: bool = True  # Adds "success" and "fail" nodes
     internal_branch_name: str = ""
+    @property
+    def add_terminal_nodes(self) -> bool:
+        return True
     _dag: graph.Graph = PrivateAttr()
     model_config = ConfigDict(extra="forbid")
@@ -590,6 +640,7 @@ class Pipeline(BaseModel):
                 Any definition of pipeline should have one node that terminates with success.
         """
         # TODO: Bug with repeat names
+        # TODO: https://github.com/AstraZeneca/runnable/issues/156
         success_path: List[StepType] = []
         on_failure_paths: List[List[StepType]] = []
@@ -598,7 +649,7 @@ class Pipeline(BaseModel):
             if isinstance(step, (Stub, PythonTask, NotebookTask, ShellTask, Parallel, Map)):
                 success_path.append(step)
                 continue
-            on_failure_paths.append(step)
+            # on_failure_paths.append(step)
         if not success_path:
             raise Exception("There should be some success path")
@@ -654,21 +705,19 @@ class Pipeline(BaseModel):
         Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
-        Or create the ```yaml``` representation of the pipeline for other executors.
+        Or create the representation of the pipeline for other executors.
         Please refer to [concepts](concepts/executor.md) for more information.
         Args:
             configuration_file (str, optional): The path to the configuration file. Defaults to "".
-                The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
+                The configuration file can be overridden by the environment variable RUNNABLE_CONFIGURATION_FILE.
             run_id (str, optional): The ID of the run. Defaults to "".
             tag (str, optional): The tag of the run. Defaults to "".
                 Use to group multiple runs.
             parameters_file (str, optional): The path to the parameters file. Defaults to "".
-            use_cached (str, optional): Whether to use cached results. Defaults to "".
-                Provide the run_id of the older execution to recover.
             log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
         """

runnable/tasks.py CHANGED Viewed

@@ -188,7 +188,56 @@ def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
 class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
-    """The task class for python command."""
+    """
+    --8<-- [start:python_reference]
+    An execution node of the pipeline of python functions.
+    Please refer to define pipeline/tasks/python for more information.
+    As part of the dag definition, a python task is defined as follows:
+    dag:
+      steps:
+        python_task: # The name of the node
+          type: task
+          command_type: python # this is default
+          command: my_module.my_function # the dotted path to the function. Please refer to the yaml section of
+            define pipeline/tasks/python for concrete details.
+          returns:
+            - name: # The name to assign the return value
+              kind: json # the default value is json,
+                can be object for python objects and metric for metrics
+          secrets:
+            - my_secret_key # A list of secrets to expose by secrets manager
+          catalog:
+            get:
+              - A list of glob patterns to get from the catalog to the local file system
+            put:
+              - A list of glob patterns to put to the catalog from the local file system
+          on_failure: The name of the step to traverse in case of failure
+          overrides:
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            #Global configuration
+            executor:
+            type: local-container
+            config:
+              docker_image: "runnable/runnable:latest"
+              overrides:
+              custom_docker_image:
+                docker_image: "runnable/runnable:custom"
+            ## In the node definition
+            overrides:
+            local-container:
+              docker_image: "runnable/runnable:custom"
+            This instruction will override the docker image for the local-container executor.
+          next: The next node to execute after this task, use "success" to terminate the pipeline successfully
+            or "fail" to terminate the pipeline with an error.
+    --8<-- [end:python_reference]
+    """
     task_type: str = Field(default="python", serialization_alias="command_type")
     command: str
@@ -277,7 +326,56 @@ class PythonTaskType(BaseTaskType):  # pylint: disable=too-few-public-methods
 class NotebookTaskType(BaseTaskType):
-    """The task class for Notebook based execution."""
+    """
+    --8<-- [start:notebook_reference]
+    An execution node of the pipeline of notebook execution.
+    Please refer to define pipeline/tasks/notebook for more information.
+    As part of the dag definition, a notebook task is defined as follows:
+    dag:
+      steps:
+        notebook_task: # The name of the node
+          type: task
+          command_type: notebook
+          command: the path to the notebook relative to project root.
+          optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
+          returns:
+            - name: # The name to assign the return value
+              kind: json # the default value is json,
+                can be object for python objects and metric for metrics
+          secrets:
+            - my_secret_key # A list of secrets to expose by secrets manager
+          catalog:
+            get:
+              - A list of glob patterns to get from the catalog to the local file system
+            put:
+              - A list of glob patterns to put to the catalog from the local file system
+          on_failure: The name of the step to traverse in case of failure
+          overrides:
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            #Global configuration
+            executor:
+            type: local-container
+            config:
+              docker_image: "runnable/runnable:latest"
+              overrides:
+                custom_docker_image:
+                  docker_image: "runnable/runnable:custom"
+            ## In the node definition
+            overrides:
+              local-container:
+                docker_image: "runnable/runnable:custom"
+            This instruction will override the docker image for the local-container executor.
+          next: The next node to execute after this task, use "success" to terminate the pipeline successfully
+            or "fail" to terminate the pipeline with an error.
+    --8<-- [end:notebook_reference]
+    """
     task_type: str = Field(default="notebook", serialization_alias="command_type")
     command: str
@@ -410,7 +508,54 @@ class NotebookTaskType(BaseTaskType):
 class ShellTaskType(BaseTaskType):
     """
-    The task class for shell based commands.
+    --8<-- [start:shell_reference]
+    An execution node of the pipeline of shell execution.
+    Please refer to define pipeline/tasks/shell for more information.
+    As part of the dag definition, a shell task is defined as follows:
+    dag:
+      steps:
+        shell_task: # The name of the node
+          type: task
+          command_type: shell
+          command: The command to execute, it could be multiline
+          optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
+          returns:
+            - name: # The name to assign the return value
+            kind: json # the default value is json,
+                can be object for python objects and metric for metrics
+          secrets:
+            - my_secret_key # A list of secrets to expose by secrets manager
+          catalog:
+            get:
+              - A list of glob patterns to get from the catalog to the local file system
+            put:
+              - A list of glob patterns to put to the catalog from the local file system
+          on_failure: The name of the step to traverse in case of failure
+          overrides:
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            #Global configuration
+            executor:
+            type: local-container
+            config:
+              docker_image: "runnable/runnable:latest"
+              overrides:
+                custom_docker_image:
+                  docker_image: "runnable/runnable:custom"
+            ## In the node definition
+            overrides:
+              local-container:
+                docker_image: "runnable/runnable:custom"
+            This instruction will override the docker image for the local-container executor.
+          next: The next node to execute after this task, use "success" to terminate the pipeline successfully
+            or "fail" to terminate the pipeline with an error.
+    --8<-- [end:shell_reference]
     """
     task_type: str = Field(default="shell", serialization_alias="command_type")

runnable 0.12.2__py3-none-any.whl → 0.13.0__py3-none-any.whl

runnable 0.12.2py3-none-any.whl → 0.13.0py3-none-any.whl