PyPI - runnable - Versions diffs - 0.3.0__tar.gz → 0.5.0__tar.gz - Mend

runnable 0.3.0tar.gz → 0.5.0tar.gz

Files changed (70) hide show

{runnable-0.3.0 → runnable-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: runnable
-Version: 0.3.0
+Version: 0.5.0
 Summary: A Compute agnostic pipelining software
 Home-page: https://github.com/vijayvammi/runnable
 License: Apache-2.0

{runnable-0.3.0 → runnable-0.5.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "runnable"
-version = "0.3.0"
+version = "0.5.0"
 description = "A Compute agnostic pipelining software"
 authors = ["Vijay Vammi <mesanthu@gmail.com>"]
 license = "Apache-2.0"
@@ -68,6 +68,7 @@ runnable= 'runnable.cli:cli'
 "local-container" = "runnable.extensions.executor.local_container.implementation:LocalContainerExecutor"
 "argo" = "runnable.extensions.executor.argo.implementation:ArgoExecutor"
 "mocked" = "runnable.extensions.executor.mocked.implementation:MockedExecutor"
+"retry" = "runnable.extensions.executor.retry.implementation:RetryExecutor"
 # Plugins for Catalog
 [tool.poetry.plugins."catalog"]

{runnable-0.3.0 → runnable-0.5.0}/runnable/__init__.py RENAMED Viewed

@@ -24,7 +24,18 @@ from runnable.interaction import (
     set_parameter,
     track_this,
 )  # noqa
-from runnable.sdk import Stub, Pipeline, Task, Parallel, Map, Catalog, Success, Fail  # noqa
+from runnable.sdk import (
+    Stub,
+    Pipeline,
+    Parallel,
+    Map,
+    Catalog,
+    Success,
+    Fail,
+    PythonTask,
+    NotebookTask,
+    ShellTask,
+)  # noqa
 # TODO: Think of model registry as a central place to store models.

{runnable-0.3.0 → runnable-0.5.0}/runnable/cli.py RENAMED Viewed

@@ -41,8 +41,7 @@ def cli():
 )
 @click.option("--tag", default="", help="A tag attached to the run")
 @click.option("--run-id", help="An optional run_id, one would be generated if not provided")
-@click.option("--use-cached", help="Provide the previous run_id to re-run.", show_default=True)
-def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cached):  # pragma: no cover
+def execute(file, config_file, parameters_file, log_level, tag, run_id):  # pragma: no cover
     """
     Execute a pipeline
@@ -59,7 +58,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
                                     [default: ]
     --run-id TEXT                An optional run_id, one would be generated if not
                                     provided
-    --use-cached TEXT            Provide the previous run_id to re-run.
     """
     logger.setLevel(log_level)
     entrypoints.execute(
@@ -67,7 +65,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
         pipeline_file=file,
         tag=tag,
         run_id=run_id,
-        use_cached=use_cached,
         parameters_file=parameters_file,
     )

{runnable-0.3.0 → runnable-0.5.0}/runnable/context.py RENAMED Viewed

@@ -24,8 +24,6 @@ class Context(BaseModel):
     tag: str = ""
     run_id: str = ""
     variables: Dict[str, str] = {}
-    use_cached: bool = False
-    original_run_id: str = ""
     dag: Optional[Graph] = None
     dag_hash: str = ""
     execution_plan: str = ""

{runnable-0.3.0 → runnable-0.5.0}/runnable/datastore.py RENAMED Viewed

@@ -169,9 +169,7 @@ class RunLog(BaseModel):
     run_id: str
     dag_hash: Optional[str] = None
-    use_cached: bool = False
     tag: Optional[str] = ""
-    original_run_id: Optional[str] = ""
     status: str = defaults.FAIL
     steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
     parameters: Dict[str, Any] = Field(default_factory=dict)
@@ -659,9 +657,7 @@ class BufferRunLogstore(BaseRunLogStore):
         self.run_log = RunLog(
             run_id=run_id,
             dag_hash=dag_hash,
-            use_cached=use_cached,
             tag=tag,
-            original_run_id=original_run_id,
             status=status,
         )
         return self.run_log

{runnable-0.3.0 → runnable-0.5.0}/runnable/defaults.py RENAMED Viewed

@@ -35,7 +35,7 @@ class ServiceConfig(TypedDict):
     config: Mapping[str, Any]
-class runnableConfig(TypedDict, total=False):
+class RunnableConfig(TypedDict, total=False):
     run_log_store: Optional[ServiceConfig]
     secrets: Optional[ServiceConfig]
     catalog: Optional[ServiceConfig]

{runnable-0.3.0 → runnable-0.5.0}/runnable/entrypoints.py RENAMED Viewed

@@ -9,12 +9,12 @@ from rich import print
 import runnable.context as context
 from runnable import defaults, graph, utils
-from runnable.defaults import ServiceConfig, runnableConfig
+from runnable.defaults import RunnableConfig, ServiceConfig
 logger = logging.getLogger(defaults.LOGGER_NAME)
-def get_default_configs() -> runnableConfig:
+def get_default_configs() -> RunnableConfig:
     """
     User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
     """
@@ -37,7 +37,6 @@ def prepare_configurations(
     configuration_file: str = "",
     pipeline_file: str = "",
     tag: str = "",
-    use_cached: str = "",
     parameters_file: str = "",
     force_local_executor: bool = False,
 ) -> context.Context:
@@ -51,7 +50,6 @@ def prepare_configurations(
         pipeline_file (str): The config/dag file
         run_id (str): The run id of the run.
         tag (str): If a tag is provided at the run time
-        use_cached (str): Provide the run_id of the older run
     Returns:
         executor.BaseExecutor : A prepared executor as per the dag/config
@@ -64,7 +62,7 @@ def prepare_configurations(
     if configuration_file:
         templated_configuration = utils.load_yaml(configuration_file) or {}
-    configuration: runnableConfig = cast(runnableConfig, templated_configuration)
+    configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
     # Run log settings, configuration over-rides everything
     run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
@@ -141,11 +139,6 @@ def prepare_configurations(
         run_context.pipeline_file = pipeline_file
         run_context.dag = dag
-    run_context.use_cached = False
-    if use_cached:
-        run_context.use_cached = True
-        run_context.original_run_id = use_cached
     context.run_context = run_context
     return run_context
@@ -156,7 +149,6 @@ def execute(
     pipeline_file: str,
     tag: str = "",
     run_id: str = "",
-    use_cached: str = "",
     parameters_file: str = "",
 ):
     # pylint: disable=R0914,R0913
@@ -168,10 +160,8 @@ def execute(
         pipeline_file (str): The config/dag file
         run_id (str): The run id of the run.
         tag (str): If a tag is provided at the run time
-        use_cached (str): The previous run_id to use.
         parameters_file (str): The parameters being sent in to the application
     """
-    # Re run settings
     run_id = utils.generate_run_id(run_id=run_id)
     run_context = prepare_configurations(
@@ -179,7 +169,6 @@ def execute(
         pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
-        use_cached=use_cached,
         parameters_file=parameters_file,
     )
     print("Working with context:")
@@ -231,7 +220,6 @@ def execute_single_node(
         pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
-        use_cached="",
         parameters_file=parameters_file,
     )
     print("Working with context:")
@@ -416,7 +404,6 @@ def fan(
         pipeline_file=pipeline_file,
         run_id=run_id,
         tag=tag,
-        use_cached="",
         parameters_file=parameters_file,
     )
     print("Working with context:")

{runnable-0.3.0 → runnable-0.5.0}/runnable/executor.py RENAMED Viewed

@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict
 import runnable.context as context
 from runnable import defaults
-from runnable.datastore import DataCatalog, RunLog, StepLog
+from runnable.datastore import DataCatalog, StepLog
 from runnable.defaults import TypeMapVariable
 from runnable.graph import Graph
@@ -36,9 +36,6 @@ class BaseExecutor(ABC, BaseModel):
     overrides: dict = {}
-    # TODO: This needs to go away
-    _previous_run_log: Optional[RunLog] = None
-    _single_step: str = ""
     _local: bool = False  # This is a flag to indicate whether the executor is local or not.
     _context_step_log = None  # type : StepLog
@@ -60,21 +57,6 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    # TODO: This needs to go away
-    @abstractmethod
-    def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
-        """
-        Set up the executor for using a previous execution.
-        Retrieve the older run log, error out if it does not exist.
-        Sync the catalogs from the previous run log with the current one.
-        Update the parameters of this execution with the previous one. The previous one take precedence.
-        Args:
-            parameters (Dict[str, Any]): The parameters for the current execution.
-        """
     @abstractmethod
     def _set_up_run_log(self, exists_ok=False):
         """
@@ -293,28 +275,6 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    # TODO: This needs to go away
-    @abstractmethod
-    def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
-        """
-        In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
-        necessary.
-            * True: If its not a re-run.
-            * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
-            * False: If its a re-run and we succeeded in the last run.
-        Most cases, this logic need not be touched
-        Args:
-            node (Node): The node to check against re-run
-            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
-                        Defaults to None.
-        Returns:
-            bool: Eligibility for re-run. True means re-run, False means skip to the next step.
-        """
-        ...
     @abstractmethod
     def send_return_code(self, stage="traversal"):
         """

{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/__init__.py RENAMED Viewed

@@ -3,12 +3,12 @@ import json
 import logging
 import os
 from abc import abstractmethod
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional
 from rich import print
 from runnable import context, defaults, exceptions, integration, parameters, utils
-from runnable.datastore import DataCatalog, RunLog, StepLog
+from runnable.datastore import DataCatalog, StepLog
 from runnable.defaults import TypeMapVariable
 from runnable.executor import BaseExecutor
 from runnable.experiment_tracker import get_tracked_data
@@ -40,20 +40,6 @@ class GenericExecutor(BaseExecutor):
     def _context(self):
         return context.run_context
-    @property
-    def step_decorator_run_id(self):
-        """
-        TODO: Experimental feature, design is not mature yet.
-        This function is used by the decorator function.
-        The design idea is we can over-ride this method in different implementations to retrieve the run_id.
-        But is it really intrusive to ask to set the environmental variable runnable_RUN_ID?
-        Returns:
-            _type_: _description_
-        """
-        return os.environ.get("runnable_RUN_ID", None)
     def _get_parameters(self) -> Dict[str, Any]:
         """
         Consolidate the parameters from the environment variables
@@ -72,28 +58,6 @@ class GenericExecutor(BaseExecutor):
         params.update(parameters.get_user_set_parameters())
         return params
-    def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
-        try:
-            attempt_run_log = self._context.run_log_store.get_run_log_by_id(
-                run_id=self._context.original_run_id, full=False
-            )
-        except exceptions.RunLogNotFoundError as e:
-            msg = (
-                f"Expected a run log with id: {self._context.original_run_id} "
-                "but it does not exist in the run log store. "
-                "If the original execution was in a different environment, ensure that it is available in the current "
-                "environment."
-            )
-            logger.exception(msg)
-            raise Exception(msg) from e
-        # Sync the previous run log catalog to this one.
-        self._context.catalog_handler.sync_between_runs(
-            previous_run_id=self._context.original_run_id, run_id=self._context.run_id
-        )
-        parameters.update(cast(RunLog, attempt_run_log).parameters)
     def _set_up_run_log(self, exists_ok=False):
         """
         Create a run log and put that in the run log store
@@ -115,22 +79,16 @@ class GenericExecutor(BaseExecutor):
             raise
         # Consolidate and get the parameters
-        parameters = self._get_parameters()
-        # TODO: This needs to go away
-        if self._context.use_cached:
-            self._set_up_for_re_run(parameters=parameters)
+        params = self._get_parameters()
         self._context.run_log_store.create_run_log(
             run_id=self._context.run_id,
             tag=self._context.tag,
             status=defaults.PROCESSING,
             dag_hash=self._context.dag_hash,
-            use_cached=self._context.use_cached,
-            original_run_id=self._context.original_run_id,
         )
         # Any interaction with run log store attributes should happen via API if available.
-        self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=parameters)
+        self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=params)
         # Update run_config
         run_config = utils.get_run_config()
@@ -409,17 +367,6 @@ class GenericExecutor(BaseExecutor):
             self._execute_node(node, map_variable=map_variable, **kwargs)
             return
-        # TODO: This needs to go away
-        # In single step
-        if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun(
-            node, map_variable=map_variable
-        ):
-            # If the node name does not match, we move on to the next node.
-            # If previous run was successful, move on to the next step
-            step_log.mock = True
-            step_log.status = defaults.SUCCESS
-            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
-            return
         # We call an internal function to iterate the sub graphs and execute them
         if node.is_composite:
             self._context.run_log_store.add_step_log(step_log, self._context.run_id)
@@ -543,47 +490,6 @@ class GenericExecutor(BaseExecutor):
             run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
         print(json.dumps(run_log.model_dump(), indent=4))
-    # TODO: This needs to go away
-    def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
-        """
-        In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
-        necessary.
-            * True: If its not a re-run.
-            * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
-            * False: If its a re-run and we succeeded in the last run.
-        Most cases, this logic need not be touched
-        Args:
-            node (Node): The node to check against re-run
-            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
-                        Defaults to None.
-        Returns:
-            bool: Eligibility for re-run. True means re-run, False means skip to the next step.
-        """
-        if self._context.use_cached:
-            node_step_log_name = node._get_step_log_name(map_variable=map_variable)
-            logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
-            try:
-                previous_node_log = self._context.run_log_store.get_step_log(
-                    internal_name=node_step_log_name, run_id=self._context.original_run_id
-                )
-            except exceptions.StepLogNotFoundError:
-                logger.warning(f"Did not find the node {node.name} in previous run log")
-                return True  # We should re-run the node.
-            logger.info(f"The original step status: {previous_node_log.status}")
-            if previous_node_log.status == defaults.SUCCESS:
-                return False  # We need not run the node
-            logger.info(f"The new execution should start executing graph from this node {node.name}")
-            return True
-        return True
     def send_return_code(self, stage="traversal"):
         """
         Convenience function used by pipeline to send return code to the caller of the cli

{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/mocked/implementation.py RENAMED Viewed

@@ -32,9 +32,6 @@ class MockedExecutor(GenericExecutor):
     def _context(self):
         return context.run_context
-    def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
-        raise Exception("MockedExecutor does not support re-run")
     def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
         """
         This is the entry point to from the graph execution.
@@ -85,7 +82,7 @@ class MockedExecutor(GenericExecutor):
             # node is not patched, so mock it
             step_log.mock = True
         else:
-            # node is mocked, change the executable to python with the
+            # node is patched
             # command as the patch value
             executable_type = node_to_send.executable.__class__
             executable = create_executable(
@@ -94,7 +91,6 @@ class MockedExecutor(GenericExecutor):
                 node_name=node.name,
             )
             node_to_send.executable = executable
-            pass
         # Executor specific way to trigger a job
         self._context.run_log_store.add_step_log(step_log, self._context.run_id)
@@ -117,27 +113,6 @@ class MockedExecutor(GenericExecutor):
         self.prepare_for_node_execution()
         self.execute_node(node=node, map_variable=map_variable, **kwargs)
-    # TODO: This needs to go away
-    def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
-        """
-        In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
-        necessary.
-            * True: If its not a re-run.
-            * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
-            * False: If its a re-run and we succeeded in the last run.
-        Most cases, this logic need not be touched
-        Args:
-            node (Node): The node to check against re-run
-            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
-                        Defaults to None.
-        Returns:
-            bool: Eligibility for re-run. True means re-run, False means skip to the next step.
-        """
-        return True
     def _resolve_executor_config(self, node: BaseNode):
         """
         The overrides section can contain specific over-rides to an global executor config.

runnable-0.5.0/runnable/extensions/executor/retry/implementation.py ADDED Viewed

@@ -0,0 +1,305 @@
+import copy
+import json
+import logging
+from functools import cached_property
+from typing import Any, Dict, List, Optional
+from rich import print
+from runnable import context, defaults, exceptions, parameters, utils
+from runnable.datastore import DataCatalog, RunLog
+from runnable.defaults import TypeMapVariable
+from runnable.experiment_tracker import get_tracked_data
+from runnable.extensions.executor import GenericExecutor
+from runnable.graph import Graph
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class RetryExecutor(GenericExecutor):
+    """
+    The skeleton of an executor class.
+    Any implementation of an executor should inherit this class and over-ride accordingly.
+    This is a loaded base class which has a lot of methods already implemented for "typical" executions.
+    Look at the function docs to understand how to use them appropriately.
+    For any implementation:
+    1). Who/when should the run log be set up?
+    2). Who/When should the step log be set up?
+    """
+    service_name: str = "retry"
+    service_type: str = "executor"
+    run_id: str
+    _local: bool = True
+    _original_run_log: Optional[RunLog] = None
+    @property
+    def _context(self):
+        return context.run_context
+    @cached_property
+    def original_run_log(self):
+        self.original_run_log = self._context.run_log_store.get_run_log_by_id(
+            run_id=self.run_id,
+            full=True,
+        )
+    def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
+        # Sync the previous run log catalog to this one.
+        self._context.catalog_handler.sync_between_runs(previous_run_id=self.run_id, run_id=self._context.run_id)
+        params.update(self.original_run_log.parameters)
+    def _set_up_run_log(self, exists_ok=False):
+        """
+        Create a run log and put that in the run log store
+        If exists_ok, we allow the run log to be already present in the run log store.
+        """
+        super()._set_up_run_log(exists_ok=exists_ok)
+        params = self._get_parameters()
+        self._set_up_for_re_run(params=params)
+    def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
+        """
+        This is the entry point when we do the actual execution of the function.
+        DO NOT Over-ride this function.
+        While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
+        this function.
+        In most cases,
+            * We get the corresponding step_log of the node and the parameters.
+            * We sync the catalog to GET any data sets that are in the catalog
+            * We call the execute method of the node for the actual compute and retry it as many times as asked.
+            * If the node succeeds, we get any of the user defined metrics provided by the user.
+            * We sync the catalog to PUT any data sets that are in the catalog.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
+                        Defaults to None.
+        """
+        step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
+        """
+        By now, all the parameters are part of the run log as a dictionary.
+        We set them as environment variables, serialized as json strings.
+        """
+        params = self._context.run_log_store.get_parameters(run_id=self._context.run_id)
+        params_copy = copy.deepcopy(params)
+        # This is only for the API to work.
+        parameters.set_user_defined_params_as_environment_variables(params)
+        attempt = self.step_attempt_number
+        logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}")
+        attempt_log = self._context.run_log_store.create_attempt_log()
+        self._context_step_log = step_log
+        self._context_node = node
+        data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get")
+        try:
+            attempt_log = node.execute(
+                executor=self,
+                mock=step_log.mock,
+                map_variable=map_variable,
+                params=params,
+                **kwargs,
+            )
+        except Exception as e:
+            # Any exception here is a runnable exception as node suppresses exceptions.
+            msg = "This is clearly runnable fault, please report a bug and the logs"
+            logger.exception(msg)
+            raise Exception(msg) from e
+        finally:
+            attempt_log.attempt_number = attempt
+            step_log.attempts.append(attempt_log)
+            tracked_data = get_tracked_data()
+            self._context.experiment_tracker.publish_data(tracked_data)
+            parameters_out = attempt_log.output_parameters
+            if attempt_log.status == defaults.FAIL:
+                logger.exception(f"Node: {node} failed")
+                step_log.status = defaults.FAIL
+            else:
+                # Mock is always set to False, bad design??
+                # TODO: Stub nodes should not sync back data
+                # TODO: Errors in catalog syncing should point to Fail step
+                # TODO: Even for a failed execution, the catalog can happen
+                step_log.status = defaults.SUCCESS
+                self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get)
+                step_log.user_defined_metrics = tracked_data
+                diff_parameters = utils.diff_dict(params_copy, parameters_out)
+                self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters)
+            # Remove the step context
+            parameters.get_user_set_parameters(remove=True)
+            self._context_step_log = None
+            self._context_node = None  # type: ignore
+            self._context_metrics = {}
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+    def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
+        self.add_code_identities(node=node, step_log=step_log)
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            self._execute_node(node, map_variable=map_variable, **kwargs)
+            return
+        # In single step
+        if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
+            # If the node name does not match, we move on to the next node.
+            # If previous run was successful, move on to the next step
+            step_log.mock = True
+            step_log.status = defaults.SUCCESS
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            node.execute_as_graph(map_variable=map_variable, **kwargs)
+            return
+        # Executor specific way to trigger a job
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        self.execute_node(node=node, map_variable=map_variable, **kwargs)
+    def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
+        """
+        The parallelization is controlled by the nodes and not by this function.
+        Transpilers should over ride this method to do the translation of dag to the platform specific way.
+        Interactive methods should use this to traverse and execute the dag.
+            - Use execute_from_graph to handle sub-graphs
+        Logically the method should:
+            * Start at the dag.start_at of the dag.
+            * Call the self.execute_from_graph(node)
+            * depending upon the status of the execution, either move to the success node or failure node.
+        Args:
+            dag (Graph): The directed acyclic graph to traverse and execute.
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
+                    Defaults to None.
+        """
+        current_node = dag.start_at
+        previous_node = None
+        logger.info(f"Running the execution with {current_node}")
+        while True:
+            working_on = dag.get_node_by_name(current_node)
+            if previous_node == current_node:
+                raise Exception("Potentially running in a infinite loop")
+            previous_node = current_node
+            logger.info(f"Creating execution log for {working_on}")
+            self.execute_from_graph(working_on, map_variable=map_variable, **kwargs)
+            _, next_node_name = self._get_status_and_next_node_name(
+                current_node=working_on, dag=dag, map_variable=map_variable
+            )
+            if working_on.node_type in ["success", "fail"]:
+                break
+            current_node = next_node_name
+        run_log = self._context.run_log_store.get_branch_log(
+            working_on._get_branch_log_name(map_variable), self._context.run_id
+        )
+        branch = "graph"
+        if working_on.internal_branch_name:
+            branch = working_on.internal_branch_name
+        logger.info(f"Finished execution of the {branch} with status {run_log.status}")
+        # get the final run log
+        if branch == "graph":
+            run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
+        print(json.dumps(run_log.model_dump(), indent=4))
+    def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
+        """
+        In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
+        necessary.
+            * True: If its not a re-run.
+            * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
+            * False: If its a re-run and we succeeded in the last run.
+        Most cases, this logic need not be touched
+        Args:
+            node (Node): The node to check against re-run
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
+                        Defaults to None.
+        Returns:
+            bool: Eligibility for re-run. True means re-run, False means skip to the next step.
+        """
+        node_step_log_name = node._get_step_log_name(map_variable=map_variable)
+        logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
+        try:
+            previous_attempt_log, _ = self.original_run_log.search_step_by_internal_name(node_step_log_name)
+        except exceptions.StepLogNotFoundError:
+            logger.warning(f"Did not find the node {node.name} in previous run log")
+            return True  # We should re-run the node.
+        logger.info(f"The original step status: {previous_attempt_log.status}")
+        if previous_attempt_log.status == defaults.SUCCESS:
+            return False  # We need not run the node
+        logger.info(f"The new execution should start executing graph from this node {node.name}")
+        return True
+    def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
+        self._execute_node(node, map_variable=map_variable, **kwargs)

{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/file_system/implementation.py RENAMED Viewed

@@ -108,9 +108,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
         run_log = RunLog(
             run_id=run_id,
             dag_hash=dag_hash,
-            use_cached=use_cached,
             tag=tag,
-            original_run_id=original_run_id,
             status=status,
         )
         self.write_to_folder(run_log)

{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/generic_chunked.py RENAMED Viewed

@@ -305,9 +305,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         run_log = RunLog(
             run_id=run_id,
             dag_hash=dag_hash,
-            use_cached=use_cached,
             tag=tag,
-            original_run_id=original_run_id,
             status=status,
         )

runnable-0.5.0/runnable/extensions/secrets/env_secrets/__init__.py ADDED Viewed

File without changes

{runnable-0.3.0 → runnable-0.5.0}/runnable/sdk.py RENAMED Viewed

@@ -3,9 +3,9 @@ from __future__ import annotations
 import logging
 import os
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
-from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
 from rich import print
 from typing_extensions import Self
@@ -15,11 +15,8 @@ from runnable.nodes import TraversalNode
 logger = logging.getLogger(defaults.LOGGER_NAME)
-StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"]
-TraversalTypes = Union["Stub", "Task", "Parallel", "Map"]
-ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
+StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
+TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
 class Catalog(BaseModel):
@@ -106,10 +103,7 @@ class BaseTraversal(ABC, BaseModel):
         ...
-## TODO: Add python task, shell task, and notebook task.
-class Task(BaseTraversal):
+class BaseTask(BaseTraversal):
     """
     An execution node of the pipeline.
     Please refer to [concepts](concepts/task.md) for more information.
@@ -157,41 +151,166 @@ class Task(BaseTraversal):
     """
-    command: str = Field(alias="command")
-    command_type: str = Field(default="python")
     catalog: Optional[Catalog] = Field(default=None, alias="catalog")
     overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
+    def create_node(self) -> TaskNode:
+        if not self.next_node:
+            if not (self.terminate_with_failure or self.terminate_with_success):
+                raise AssertionError("A node not being terminated must have a user defined next node")
+        print(self.model_dump(exclude_none=True))
+        return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
+class PythonTask(BaseTask):
+    """
+    An execution node of the pipeline of python functions.
+    Please refer to [concepts](concepts/task.md) for more information.
+    Attributes:
+        name (str): The name of the node.
+        function (callable): The function to execute.
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = PythonTask(name="task", function="function'",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    function: Callable = Field(exclude=True)
+    @computed_field
+    def command_type(self) -> str:
+        return "python"
+    @computed_field
+    def command(self) -> str:
+        module = self.function.__module__
+        name = self.function.__name__
+        return f"{module}.{name}"
+class NotebookTask(BaseTask):
+    """
+    An execution node of the pipeline of type notebook.
+    Please refer to [concepts](concepts/task.md) for more information.
+    Attributes:
+        name (str): The name of the node.
+        notebook: The path to the notebook
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        returns: A list of the names of variables to return from the notebook.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = NotebookTask(name="task", notebook="evaluation.ipynb",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        notebook_output_path (Optional[str]): The path to save the notebook output.
+            Only used when command_type is 'notebook', defaults to command+_out.ipynb
+        optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
+            Only used when command_type is 'notebook', defaults to {}
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    notebook: str = Field(alias="command")
     notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
     optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
-    output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag")
+    returns: List[str] = Field(default_factory=list, alias="returns")
-    @field_validator("command_type", mode="before")
-    @classmethod
-    def validate_command_type(cls, value: str) -> str:
-        if value not in ALLOWED_COMMAND_TYPES:
-            raise ValueError(f"Invalid command_type: {value}")
-        return value
+    @computed_field
+    def command_type(self) -> str:
+        return "notebook"
-    @model_validator(mode="after")
-    def check_notebook_args(self) -> "Task":
-        if self.command_type != "notebook":
-            assert (
-                self.notebook_output_path is None
-            ), "Only command_types of 'notebook' can be used with notebook_output_path"
-            assert (
-                self.optional_ploomber_args is None
-            ), "Only command_types of 'notebook' can be used with optional_ploomber_args"
+class ShellTask(BaseTask):
+    """
+    An execution node of the pipeline of type shell.
+    Please refer to [concepts](concepts/task.md) for more information.
-            assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag"
-        return self
+    Attributes:
+        name (str): The name of the node.
+        command: The shell command to execute.
+        catalog (Optional[Catalog]): The catalog to sync data from/to.
+            Please see Catalog about the structure of the catalog.
+        returns: A list of the names of variables to capture from environment variables of shell.
+        overrides (Dict[str, Any]): Any overrides to the command.
+            Individual tasks can override the global configuration config by referring to the
+            specific override.
-    def create_node(self) -> TaskNode:
-        if not self.next_node:
-            if not (self.terminate_with_failure or self.terminate_with_success):
-                raise AssertionError("A node not being terminated must have a user defined next node")
-        return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
+            For example,
+            ### Global configuration
+            ```yaml
+            executor:
+              type: local-container
+              config:
+                docker_image: "runnable/runnable:latest"
+                overrides:
+                  custom_docker_image:
+                    docker_image: "runnable/runnable:custom"
+            ```
+            ### Task specific configuration
+            ```python
+            task = ShellTask(name="task", command="exit 0",
+                    overrides={'local-container': custom_docker_image})
+            ```
+        terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
+        terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
+        on_failure (str): The name of the node to execute if the step fails.
+    """
+    command: str = Field(alias="command")
+    returns: List[str] = Field(default_factory=list, alias="returns")
+    @computed_field
+    def command_type(self) -> str:
+        return "shell"
 class Stub(BaseTraversal):
@@ -343,7 +462,8 @@ class Pipeline(BaseModel):
     A Pipeline is a directed acyclic graph of Steps that define a workflow.
     Attributes:
-        steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline.
+        steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
+            A list of Steps that make up the Pipeline.
         start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
         name (str, optional): The name of the Pipeline. Defaults to "".
         description (str, optional): A description of the Pipeline. Defaults to "".
@@ -440,7 +560,6 @@ class Pipeline(BaseModel):
             run_id=run_id,
             tag=tag,
             parameters_file=parameters_file,
-            use_cached=use_cached,
         )
         run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value