PyPI - runnable - Versions diffs - 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl - Mend

runnable 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

runnable/__init__.py +2 -0
runnable/defaults.py +1 -1
runnable/entrypoints.py +7 -6
runnable/extensions/executor/__init__.py +10 -0
runnable/extensions/executor/local_container/implementation.py +54 -1
runnable/sdk.py +150 -101
runnable/tasks.py +23 -24
runnable-0.12.3.dist-info/METADATA +270 -0
{runnable-0.12.1.dist-info → runnable-0.12.3.dist-info}/RECORD +12 -12
{runnable-0.12.1.dist-info → runnable-0.12.3.dist-info}/entry_points.txt +1 -0
runnable-0.12.1.dist-info/METADATA +0 -453
{runnable-0.12.1.dist-info → runnable-0.12.3.dist-info}/LICENSE +0 -0
{runnable-0.12.1.dist-info → runnable-0.12.3.dist-info}/WHEEL +0 -0

runnable/__init__.py CHANGED Viewed

@@ -15,6 +15,8 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
 console = Console(record=True)
 console.print(":runner: Lets go!!")
+task_console = Console(record=True)
 from runnable.sdk import (  # noqa
     Catalog,
     Fail,

runnable/defaults.py CHANGED Viewed

@@ -77,7 +77,7 @@ DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
 DEFAULT_EXECUTOR = ServiceConfig(type="local", config={})
 DEFAULT_RUN_LOG_STORE = ServiceConfig(type="file-system", config={})
 DEFAULT_CATALOG = ServiceConfig(type="file-system", config={})
-DEFAULT_SECRETS = ServiceConfig(type="do-nothing", config={})
+DEFAULT_SECRETS = ServiceConfig(type="env-secrets", config={})
 DEFAULT_EXPERIMENT_TRACKER = ServiceConfig(type="do-nothing", config={})
 DEFAULT_PICKLER = ServiceConfig(type="pickle", config={})

runnable/entrypoints.py CHANGED Viewed

@@ -9,7 +9,7 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
 from rich.table import Column
 import runnable.context as context
-from runnable import console, defaults, graph, utils
+from runnable import console, defaults, graph, task_console, utils
 from runnable.defaults import RunnableConfig, ServiceConfig
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -165,6 +165,7 @@ def execute(
         tag=tag,
         parameters_file=parameters_file,
     )
     console.print("Working with context:")
     console.print(run_context)
     console.rule(style="[dark orange]")
@@ -239,7 +240,7 @@ def execute_single_node(
     """
     from runnable import nodes
-    console.print(f"Executing the single node: {step_name} with map variable: {map_variable}")
+    task_console.print(f"Executing the single node: {step_name} with map variable: {map_variable}")
     configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
@@ -250,9 +251,9 @@ def execute_single_node(
         tag=tag,
         parameters_file=parameters_file,
     )
-    console.print("Working with context:")
-    console.print(run_context)
-    console.rule(style="[dark orange]")
+    task_console.print("Working with context:")
+    task_console.print(run_context)
+    task_console.rule(style="[dark orange]")
     executor = run_context.executor
     run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -281,7 +282,7 @@ def execute_single_node(
             node=node_to_execute,
             map_variable=map_variable_dict,
         )
-        console.save_text(log_file_name)
+        task_console.save_text(log_file_name)
         # Put the log file in the catalog
         run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)

runnable/extensions/executor/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from runnable import (
     exceptions,
     integration,
     parameters,
+    task_console,
     utils,
 )
 from runnable.datastore import DataCatalog, JsonParameter, RunLog, StepLog
@@ -340,10 +341,18 @@ class GenericExecutor(BaseExecutor):
             node.execute_as_graph(map_variable=map_variable, **kwargs)
             return
+        task_console.export_text(clear=True)
         task_name = node._resolve_map_placeholders(node.internal_name, map_variable)
         console.print(f":runner: Executing the node {task_name} ... ", style="bold color(208)")
         self.trigger_job(node=node, map_variable=map_variable, **kwargs)
+        log_file_name = utils.make_log_file_name(node=node, map_variable=map_variable)
+        task_console.save_text(log_file_name, clear=True)
+        self._context.catalog_handler.put(name=log_file_name, run_id=self._context.run_id)
+        os.remove(log_file_name)
     def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
         """
         Call this method only if we are responsible for traversing the graph via
@@ -493,6 +502,7 @@ class GenericExecutor(BaseExecutor):
         logger.info(f"Finished execution of the {branch} with status {run_log.status}")
+        # We are in the root dag
         if dag == self._context.dag:
             run_log = cast(RunLog, run_log)
             console.print("Completed Execution, Summary:", style="bold color(208)")

runnable/extensions/executor/local_container/implementation.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Dict, cast
 from pydantic import Field
 from rich import print
-from runnable import defaults, utils
+from runnable import console, defaults, task_console, utils
 from runnable.datastore import StepLog
 from runnable.defaults import TypeMapVariable
 from runnable.extensions.executor import GenericExecutor
@@ -96,6 +96,59 @@ class LocalContainerExecutor(GenericExecutor):
         """
         return self._execute_node(node, map_variable, **kwargs)
+    def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
+        self.add_code_identities(node=node, step_log=step_log)
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        logger.info(f"Executing node: {node.get_summary()}")
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._execute_node(node, map_variable=map_variable, **kwargs)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            node.execute_as_graph(map_variable=map_variable, **kwargs)
+            return
+        task_console.export_text(clear=True)
+        task_name = node._resolve_map_placeholders(node.internal_name, map_variable)
+        console.print(f":runner: Executing the node {task_name} ... ", style="bold color(208)")
+        self.trigger_job(node=node, map_variable=map_variable, **kwargs)
     def execute_job(self, node: TaskNode):
         """
         Set up the step log and call the execute node

runnable/sdk.py CHANGED Viewed

@@ -61,11 +61,9 @@ class Catalog(BaseModel):
         put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
     Examples:
-        >>> from runnable import Catalog, Task
+        >>> from runnable import Catalog
         >>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
-        >>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
     """
     model_config = ConfigDict(extra="forbid")  # Need to be for command, would be validated later
@@ -143,50 +141,7 @@ class BaseTraversal(ABC, BaseModel):
 class BaseTask(BaseTraversal):
     """
-    An execution node of the pipeline.
-    Please refer to [concepts](concepts/task.md) for more information.
-    Attributes:
-        name (str): The name of the node.
-        command (str): The command to execute.
-            - For python functions, [dotted path](concepts/task.md/#python_functions) to the function.
-            - For shell commands: command to execute in the shell.
-            - For notebooks: path to the notebook.
-        command_type (str): The type of command to execute.
-            Can be one of "shell", "python", or "notebook".
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        overrides (Dict[str, Any]): Any overrides to the command.
-            Individual tasks can override the global configuration config by referring to the
-            specific override.
-            For example,
-            ### Global configuration
-            ```yaml
-            executor:
-              type: local-container
-              config:
-                docker_image: "runnable/runnable:latest"
-                overrides:
-                  custom_docker_image:
-                    docker_image: "runnable/runnable:custom"
-            ```
-            ### Task specific configuration
-            ```python
-            task = Task(name="task", command="echo 'hello'", command_type="shell",
-                    overrides={'local-container': custom_docker_image})
-            ```
-        notebook_output_path (Optional[str]): The path to save the notebook output.
-            Only used when command_type is 'notebook', defaults to command+_out.ipynb
-        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
-            Only used when command_type is 'notebook', defaults to {}
-        output_cell_tag (Optional[str]): The tag of the output cell.
-            Only used when command_type is 'notebook', defaults to "runnable_output"
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
+    Base task type which has catalog, overrides, returns and secrets.
     """
     catalog: Optional[Catalog] = Field(default=None, alias="catalog")
@@ -220,12 +175,50 @@ class BaseTask(BaseTraversal):
 class PythonTask(BaseTask):
     """
     An execution node of the pipeline of python functions.
+    Please refer to [concepts](concepts/task.md/#python_functions) for more information.
     Attributes:
         name (str): The name of the node.
         function (callable): The function to execute.
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[Union[str, TaskReturns]] : A list of the names of variables to return from the task.
+            The names should match the order of the variables returned by the function.
+            ```TaskReturns```: can be JSON friendly variables, objects or metrics.
+            By default, all variables are assumed to be JSON friendly and will be serialized to JSON.
+            Pydantic models are readily supported and will be serialized to JSON.
+            To return a python object, please use ```pickled(<name>)```.
+            It is advised to use ```pickled(<name>)``` for big JSON friendly variables.
+            For example,
+            ```python
+            from runnable import pickled
+            def f():
+                ...
+                x = 1
+                return x, df # A simple JSON friendly variable and a python object.
+            task = PythonTask(name="task", function=f, returns=["x", pickled(df)]))
+            ```
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+            and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -246,11 +239,6 @@ class PythonTask(BaseTask):
             task = PythonTask(name="task", function="function'",
                     overrides={'local-container': custom_docker_image})
             ```
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     function: Callable = Field(exclude=True)
@@ -269,15 +257,52 @@ class PythonTask(BaseTask):
 class NotebookTask(BaseTask):
     """
-    An execution node of the pipeline of type notebook.
-    Please refer to [concepts](concepts/task.md) for more information.
+    An execution node of the pipeline of notebook.
+    Please refer to [concepts](concepts/task.md/#notebooks) for more information.
+    We internally use [Ploomber engine](https://github.com/ploomber/ploomber-engine) to execute the notebook.
     Attributes:
         name (str): The name of the node.
-        notebook: The path to the notebook
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        returns: A list of the names of variables to return from the notebook.
+        notebook (str): The path to the notebook relative the project root.
+        optional_ploomber_args (Dict[str, Any]): Any optional ploomber args, please refer to
+            [Ploomber engine](https://github.com/ploomber/ploomber-engine) for more information.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[Union[str, TaskReturns]] : A list of the names of variables to return from the task.
+            The names should match the order of the variables returned by the function.
+            ```TaskReturns```: can be JSON friendly variables, objects or metrics.
+            By default, all variables are assumed to be JSON friendly and will be serialized to JSON.
+            Pydantic models are readily supported and will be serialized to JSON.
+            To return a python object, please use ```pickled(<name>)```.
+            It is advised to use ```pickled(<name>)``` for big JSON friendly variables.
+            For example,
+            ```python
+            from runnable import pickled
+            # assume, example.ipynb is the notebook with df and x as variables in some cells.
+            task = Notebook(name="task", notebook="example.ipynb", returns=["x", pickled(df)]))
+            ```
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+        and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -295,18 +320,9 @@ class NotebookTask(BaseTask):
             ```
             ### Task specific configuration
             ```python
-            task = NotebookTask(name="task", notebook="evaluation.ipynb",
+            task = NotebookTask(name="task", notebook="example.ipynb",
                     overrides={'local-container': custom_docker_image})
             ```
-        notebook_output_path (Optional[str]): The path to save the notebook output.
-            Only used when command_type is 'notebook', defaults to command+_out.ipynb
-        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
-            Only used when command_type is 'notebook', defaults to {}
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     notebook: str = Field(serialization_alias="command")
@@ -319,15 +335,33 @@ class NotebookTask(BaseTask):
 class ShellTask(BaseTask):
     """
-    An execution node of the pipeline of type shell.
-    Please refer to [concepts](concepts/task.md) for more information.
+    An execution node of the pipeline of shell script.
+    Please refer to [concepts](concepts/task.md/#shell) for more information.
     Attributes:
         name (str): The name of the node.
-        command: The shell command to execute.
-        catalog (Optional[Catalog]): The catalog to sync data from/to.
-            Please see Catalog about the structure of the catalog.
-        returns: A list of the names of variables to capture from environment variables of shell.
+        command (str): The path to the notebook relative the project root.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
+        returns List[str] : A list of the names of environment variables to collect from the task.
+            The names should match the order of the variables returned by the function.
+            Shell based tasks can only return JSON friendly variables.
+            To mark any JSON friendly variable as a ```metric```, please use ```metric(x)```.
+            Metric variables should be JSON friendly and can be treated just like any other parameter.
+        catalog Optional[Catalog]: The files sync data from/to, refer to Catalog.
+        secrets List[str]: List of secrets to pass to the task. They are exposed as environment variables
+        and removed after execution.
         overrides (Dict[str, Any]): Any overrides to the command.
             Individual tasks can override the global configuration config by referring to the
             specific override.
@@ -345,14 +379,10 @@ class ShellTask(BaseTask):
             ```
             ### Task specific configuration
             ```python
-            task = ShellTask(name="task", command="exit 0",
+            task = ShellTask(name="task", command="export x=1",
                     overrides={'local-container': custom_docker_image})
             ```
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
-        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
-        on_failure (str): The name of the node to execute if the step fails.
     """
     command: str = Field(alias="command")
@@ -364,16 +394,20 @@ class ShellTask(BaseTask):
 class Stub(BaseTraversal):
     """
-    A node that does nothing.
+    A node that passes through the pipeline with no action. Just like ```pass``` in Python.
+    Please refer to [concepts](concepts/task.md/#stub) for more information.
     A stub node can tak arbitrary number of arguments.
-    Please refer to [concepts](concepts/stub.md) for more information.
     Attributes:
         name (str): The name of the node.
-        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        command (str): The path to the notebook relative the project root.
         terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+                Defaults to False.
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+                Defaults to False.
+        on_failure (str): The name of the node to execute if the step fails.
     """
     model_config = ConfigDict(extra="ignore")
@@ -422,12 +456,13 @@ class Map(BaseTraversal):
     Please refer to [concepts](concepts/map.md) for more information.
     Attributes:
-        branch: The pipeline to execute for each item.
+        branch (Pipeline): The pipeline to execute for each item.
-        iterate_on: The name of the parameter to iterate over.
+        iterate_on (str): The name of the parameter to iterate over.
             The parameter should be defined either by previous steps or statically at the start of execution.
-        iterate_as: The name of the iterable to be passed to functions.
+        iterate_as (str): The name of the iterable to be passed to functions.
+        reducer (Callable): The function to reduce the results of the branches.
         overrides (Dict[str, Any]): Any overrides to the command.
@@ -510,29 +545,44 @@ class Fail(BaseModel):
 class Pipeline(BaseModel):
     """
-    A Pipeline is a directed acyclic graph of Steps that define a workflow.
+    A Pipeline is a sequence of Steps.
     Attributes:
-        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
+        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map]]):
             A list of Steps that make up the Pipeline.
-        start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
+            The order of steps is important as it determines the order of execution.
+            Any on failure behavior should the first step in ```on_failure``` pipelines.
+        on_failure (List[List[Pipeline], optional): A list of Pipelines to execute in case of failure.
+            For example, for the below pipeline:
+                step1 >> step2
+                and step1 to reach step3 in case of failure.
+                failure_pipeline = Pipeline(steps=[step1, step3])
+                pipeline = Pipeline(steps=[step1, step2, on_failure=[failure_pipeline])
         name (str, optional): The name of the Pipeline. Defaults to "".
         description (str, optional): A description of the Pipeline. Defaults to "".
-        add_terminal_nodes (bool, optional): Whether to add terminal nodes to the Pipeline. Defaults to True.
-    The default behavior is to add "success" and "fail" nodes to the Pipeline.
-    To add custom success and fail nodes, set add_terminal_nodes=False and create success
-    and fail nodes manually.
+    The pipeline implicitly add success and fail nodes.
     """
-    steps: List[Union[StepType, List[StepType]]]
+    steps: List[Union[StepType, List["Pipeline"]]]
     name: str = ""
     description: str = ""
-    add_terminal_nodes: bool = True  # Adds "success" and "fail" nodes
     internal_branch_name: str = ""
+    @property
+    def add_terminal_nodes(self) -> bool:
+        return True
     _dag: graph.Graph = PrivateAttr()
     model_config = ConfigDict(extra="forbid")
@@ -590,6 +640,7 @@ class Pipeline(BaseModel):
                 Any definition of pipeline should have one node that terminates with success.
         """
         # TODO: Bug with repeat names
+        # TODO: https://github.com/AstraZeneca/runnable/issues/156
         success_path: List[StepType] = []
         on_failure_paths: List[List[StepType]] = []
@@ -598,7 +649,7 @@ class Pipeline(BaseModel):
             if isinstance(step, (Stub, PythonTask, NotebookTask, ShellTask, Parallel, Map)):
                 success_path.append(step)
                 continue
-            on_failure_paths.append(step)
+            # on_failure_paths.append(step)
         if not success_path:
             raise Exception("There should be some success path")
@@ -654,21 +705,19 @@ class Pipeline(BaseModel):
         Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
-        Or create the ```yaml``` representation of the pipeline for other executors.
+        Or create the representation of the pipeline for other executors.
         Please refer to [concepts](concepts/executor.md) for more information.
         Args:
             configuration_file (str, optional): The path to the configuration file. Defaults to "".
-                The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
+                The configuration file can be overridden by the environment variable RUNNABLE_CONFIGURATION_FILE.
             run_id (str, optional): The ID of the run. Defaults to "".
             tag (str, optional): The tag of the run. Defaults to "".
                 Use to group multiple runs.
             parameters_file (str, optional): The path to the parameters file. Defaults to "".
-            use_cached (str, optional): Whether to use cached results. Defaults to "".
-                Provide the run_id of the older execution to recover.
             log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
         """

runnable 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

runnable 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl