PyPI - runnable - Versions diffs - 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

runnable 0.14.0py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

runnable/__init__.py +1 -1
runnable/catalog.py +2 -0
runnable/cli.py +264 -307
runnable/context.py +10 -3
runnable/datastore.py +145 -22
runnable/defaults.py +13 -54
runnable/entrypoints.py +197 -185
runnable/exceptions.py +22 -0
runnable/executor.py +114 -86
runnable/graph.py +0 -1
runnable/nodes.py +36 -6
runnable/sdk.py +132 -36
runnable/tasks.py +6 -15
runnable/utils.py +22 -30
{runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/METADATA +6 -3
runnable-0.16.0.dist-info/RECORD +23 -0
{runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/entry_points.txt +12 -7
runnable/integration.py +0 -197
runnable-0.14.0.dist-info/RECORD +0 -24
{runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +0 -0
{runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/licenses/LICENSE +0 -0

runnable/executor.py CHANGED Viewed

@@ -5,17 +5,17 @@ import os
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, PrivateAttr
 import runnable.context as context
 from runnable import defaults
-from runnable.datastore import DataCatalog, StepLog
+from runnable.datastore import DataCatalog, JobLog, StepLog
 from runnable.defaults import TypeMapVariable
 from runnable.graph import Graph
 if TYPE_CHECKING:  # pragma: no cover
-    from extensions.nodes.nodes import TaskNode
     from runnable.nodes import BaseNode
+    from runnable.tasks import BaseTaskType
 logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -34,14 +34,10 @@ class BaseExecutor(ABC, BaseModel):
     service_name: str = ""
     service_type: str = "executor"
-    overrides: dict = {}
-    _local: bool = (
+    _is_local: bool = (
         False  # This is a flag to indicate whether the executor is local or not.
     )
-    # TODO: Change this to _is_local
-    _context_node: Optional[BaseNode] = None
     model_config = ConfigDict(extra="forbid")
     @property
@@ -68,33 +64,62 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
+    # TODO: Make this attempt number
+    @property
+    def step_attempt_number(self) -> int:
+        """
+        The attempt number of the current step.
+        Orchestrators should use this step to submit multiple attempts of the job.
+        Returns:
+            int: The attempt number of the current step. Defaults to 1.
+        """
+        return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
     @abstractmethod
-    def prepare_for_graph_execution(self):
+    def send_return_code(self, stage="traversal"):
+        """
+        Convenience function used by pipeline to send return code to the caller of the cli
+        Raises:
+            Exception: If the pipeline execution failed
         """
-        This method should be called prior to calling execute_graph.
-        Perform any steps required before doing the graph execution.
+        ...
+class BaseJobExecutor(BaseExecutor):
+    service_type: str = "job_executor"
-        The most common implementation is to prepare a run log for the run if the run uses local interactive compute.
+    @abstractmethod
+    def submit_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
+        """
+        Local executors should
+        - create the run log
+        - and call an execute_job
-        But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK.
-        We do not set up a run log as its not relevant.
+        Non local executors should
+        - transpile the job to the platform specific job spec
+        - submit the job to call execute_job
         """
         ...
     @abstractmethod
-    def prepare_for_node_execution(self):
+    def add_code_identities(self, job_log: JobLog, **kwargs):
         """
-        Perform any modifications to the services prior to execution of the node.
+        Add code identities specific to the implementation.
+        The Base class has an implementation of adding git code identities.
         Args:
-            node (Node): [description]
-            map_variable (dict, optional): [description]. Defaults to None.
+            step_log (object): The step log object
+            node (BaseNode): The node we are adding the step log for
         """
         ...
     @abstractmethod
     def _sync_catalog(
-        self, stage: str, synced_catalogs=None
+        self,
+        catalog_settings: Optional[List[str]],
     ) -> Optional[List[DataCatalog]]:
         """
         1). Identify the catalog settings by over-riding node settings with the global settings.
@@ -117,6 +142,34 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
+    @abstractmethod
+    def execute_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
+        """
+        Focusses only on execution of the job.
+        """
+        ...
+# TODO: Consolidate execute_node, trigger_node_execution, _execute_node
+class BasePipelineExecutor(BaseExecutor):
+    service_type: str = "pipeline_executor"
+    overrides: dict = {}
+    _context_node: Optional[BaseNode] = PrivateAttr(default=None)
+    @abstractmethod
+    def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
+        """
+        Add code identities specific to the implementation.
+        The Base class has an implementation of adding git code identities.
+        Args:
+            step_log (object): The step log object
+            node (BaseNode): The node we are adding the step log for
+        """
+        ...
     @abstractmethod
     def get_effective_compute_data_folder(self) -> Optional[str]:
         """
@@ -134,16 +187,30 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    @property
-    def step_attempt_number(self) -> int:
+    @abstractmethod
+    def _sync_catalog(
+        self, stage: str, synced_catalogs=None
+    ) -> Optional[List[DataCatalog]]:
         """
-        The attempt number of the current step.
-        Orchestrators should use this step to submit multiple attempts of the job.
+        1). Identify the catalog settings by over-riding node settings with the global settings.
+        2). For stage = get:
+                Identify the catalog items that are being asked to get from the catalog
+                And copy them to the local compute data folder
+        3). For stage = put:
+                Identify the catalog items that are being asked to put into the catalog
+                Copy the items from local compute folder to the catalog
+        4). Add the items onto the step log according to the stage
+        Args:
+            node (Node): The current node being processed
+            step_log (StepLog): The step log corresponding to that node
+            stage (str): One of get or put
+        Raises:
+            Exception: If the stage is not in one of get/put
-        Returns:
-            int: The attempt number of the current step. Defaults to 1.
         """
-        return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
+        ...
     @abstractmethod
     def _execute_node(
@@ -190,19 +257,6 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    @abstractmethod
-    def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
-        """
-        Add code identities specific to the implementation.
-        The Base class has an implementation of adding git code identities.
-        Args:
-            step_log (object): The step log object
-            node (BaseNode): The node we are adding the step log for
-        """
-        ...
     @abstractmethod
     def execute_from_graph(
         self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
@@ -233,29 +287,10 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    @abstractmethod
-    def trigger_job(
-        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
-    ):
-        """
-        Executor specific way of triggering jobs when runnable does both traversal and execution
-        Transpilers will NEVER use this method and will NEVER call them.
-        Only interactive executors who need execute_from_graph will ever implement it.
-        Args:
-            node (BaseNode): The node to execute
-            map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
-                    Defaults to ''.
-        NOTE: We do not raise an exception as this method is not required by many extensions
-        """
-        ...
     @abstractmethod
     def _get_status_and_next_node_name(
         self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None
-    ):
+    ) -> tuple[str, str]:
         """
         Given the current node and the graph, returns the name of the next node to execute.
@@ -294,17 +329,7 @@ class BaseExecutor(ABC, BaseModel):
         ...
     @abstractmethod
-    def send_return_code(self, stage="traversal"):
-        """
-        Convenience function used by pipeline to send return code to the caller of the cli
-        Raises:
-            Exception: If the pipeline execution failed
-        """
-        ...
-    @abstractmethod
-    def _resolve_executor_config(self, node: BaseNode):
+    def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
         """
         The overrides section can contain specific over-rides to an global executor config.
         To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
@@ -337,22 +362,6 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
-    @abstractmethod
-    def execute_job(self, node: TaskNode):
-        """
-        Executor specific way of executing a job (python function or a notebook).
-        Interactive executors should execute the job.
-        Transpilers should write the instructions.
-        Args:
-            node (BaseNode): The job node to execute
-        Raises:
-            NotImplementedError: Executors should choose to extend this functionality or not.
-        """
-        ...
     @abstractmethod
     def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
         """
@@ -397,3 +406,22 @@ class BaseExecutor(ABC, BaseModel):
         """
         ...
+    @abstractmethod
+    def trigger_node_execution(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        Executor specific way of triggering jobs when runnable does both traversal and execution
+        Transpilers will NEVER use this method and will NEVER call them.
+        Only interactive executors who need execute_from_graph will ever implement it.
+        Args:
+            node (BaseNode): The node to execute
+            map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to ''.
+        NOTE: We do not raise an exception as this method is not required by many extensions
+        """
+        ...

runnable/graph.py CHANGED Viewed

@@ -398,7 +398,6 @@ def create_node(name: str, step_config: dict, internal_branch_name: Optional[str
         node = node_mgr.parse_from_config(config=invoke_kwds)
         return node
     except KeyError:
-        # type is missing!!
         msg = "The node configuration does not contain the required key 'type'."
         logger.exception(step_config)
         raise Exception(msg)

runnable/nodes.py CHANGED Viewed

@@ -435,16 +435,34 @@ class ExecutableNode(TraversalNode):
         return self.max_attempts
     def _get_branch_by_name(self, branch_name: str):
-        raise Exception("This is an executable node and does not have branches")
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have branches"
+        )
     def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
-        raise Exception("This is an executable node and does not have a graph")
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a graph"
+        )
     def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
-        raise Exception("This is an executable node and does not have a fan in")
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a fan in"
+        )
     def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
-        raise Exception("This is an executable node and does not have a fan out")
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a fan out"
+        )
+    def prepare_for_job_execution(self):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a prepare_for_job_execution"
+        )
+    def tear_down_after_job_execution(self):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a tear_down_after_job_execution",
+        )
 class CompositeNode(TraversalNode):
@@ -455,7 +473,9 @@ class CompositeNode(TraversalNode):
         Returns:
             dict: catalog settings defined as per the node or None
         """
-        raise Exception("This is a composite node and does not have a catalog settings")
+        raise exceptions.NodeMethodCallError(
+            "This is a composite node and does not have a catalog settings"
+        )
     def _get_max_attempts(self) -> int:
         raise Exception("This is a composite node and does not have a max_attempts")
@@ -467,10 +487,20 @@ class CompositeNode(TraversalNode):
         attempt_number: int = 1,
         **kwargs,
     ) -> StepLog:
-        raise Exception(
+        raise exceptions.NodeMethodCallError(
             "This is a composite node and does not have an execute function"
         )
+    def prepare_for_job_execution(self):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a prepare_for_job_execution"
+        )
+    def tear_down_after_job_execution(self):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a tear_down_after_job_execution"
+        )
 class TerminalNode(BaseNode):
     def _get_on_failure_node(self) -> str:

runnable/sdk.py CHANGED Viewed

@@ -35,7 +35,9 @@ from extensions.nodes.nodes import (
     TaskNode,
 )
 from runnable import console, defaults, entrypoints, exceptions, graph, utils
+from runnable.executor import BaseJobExecutor, BasePipelineExecutor
 from runnable.nodes import TraversalNode
+from runnable.tasks import BaseTaskType as RunnableTask
 from runnable.tasks import TaskReturns
 # TODO: This might have to be an extension
@@ -190,6 +192,11 @@ class BaseTask(BaseTraversal):
             self.model_dump(exclude_none=True, by_alias=True)
         )
+    def create_job(self) -> RunnableTask:
+        raise NotImplementedError(
+            "This method should be implemented in the child class"
+        )
 class PythonTask(BaseTask):
     """
@@ -273,6 +280,11 @@ class PythonTask(BaseTask):
         return f"{module}.{name}"
+    def create_job(self) -> RunnableTask:
+        self.terminate_with_success = True
+        node = self.create_node()
+        return node.executable
 class NotebookTask(BaseTask):
     """
@@ -353,6 +365,11 @@ class NotebookTask(BaseTask):
     def command_type(self) -> str:
         return "notebook"
+    def create_job(self) -> RunnableTask:
+        self.terminate_with_success = True
+        node = self.create_node()
+        return node.executable
 class ShellTask(BaseTask):
     """
@@ -621,6 +638,7 @@ class Pipeline(BaseModel):
     model_config = ConfigDict(extra="forbid")
     def _validate_path(self, path: List[StepType], failure_path: bool = False) -> None:
+        # TODO: Drastically simplify this
         # Check if one and only one step terminates with success
         # Check no more than one step terminates with failure
@@ -734,6 +752,16 @@ class Pipeline(BaseModel):
         dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
         return graph.create_graph(dag_definition)
+    def _is_called_for_definition(self) -> bool:
+        """
+        If the run context is set, we are coming in only to get the pipeline definition.
+        """
+        from runnable.context import run_context
+        if run_context is None:
+            return False
+        return True
     def execute(
         self,
         configuration_file: str = "",
@@ -743,33 +771,12 @@ class Pipeline(BaseModel):
         log_level: str = defaults.LOG_LEVEL,
     ):
         """
-        *Execute* the Pipeline.
-        Execution of pipeline could either be:
-        Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
-        Or create the representation of the pipeline for other executors.
-        Please refer to [concepts](concepts/executor.md) for more information.
-        Args:
-            configuration_file (str, optional): The path to the configuration file. Defaults to "".
-                The configuration file can be overridden by the environment variable RUNNABLE_CONFIGURATION_FILE.
-            run_id (str, optional): The ID of the run. Defaults to "".
-            tag (str, optional): The tag of the run. Defaults to "".
-                Use to group multiple runs.
-            parameters_file (str, optional): The path to the parameters file. Defaults to "".
-            log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
+        Overloaded method:
+        - Could be called by the user when executing the pipeline via SDK
+        - Could be called by the system itself when getting the pipeline definition
         """
-        # py_to_yaml is used by non local executors to generate the yaml representation of the pipeline.
-        py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
-        if py_to_yaml == "true":
+        if self._is_called_for_definition():
+            # Immediately return as this call is only for getting the pipeline definition
             return {}
         logger.setLevel(log_level)
@@ -785,21 +792,22 @@ class Pipeline(BaseModel):
             parameters_file=parameters_file,
         )
-        run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
+        assert isinstance(run_context.executor, BasePipelineExecutor)
         utils.set_runnable_environment_variables(
             run_id=run_id, configuration_file=configuration_file, tag=tag
         )
         dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
+        run_context.from_sdk = True
         run_context.dag = graph.create_graph(dag_definition)
         console.print("Working with context:")
         console.print(run_context)
         console.rule(style="[dark orange]")
-        if not run_context.executor._local:
-            # We are not working with non local executor
+        if not run_context.executor._is_local:
+            # We are not working with executor that does not work in local environment
             import inspect
             caller_stack = inspect.stack()[1]
@@ -809,9 +817,10 @@ class Pipeline(BaseModel):
             module_to_call = f"{module_name}.{caller_stack.function}"
             run_context.pipeline_file = f"{module_to_call}.py"
+            run_context.from_sdk = True
         # Prepare for graph execution
-        run_context.executor.prepare_for_graph_execution()
+        run_context.executor._set_up_run_log(exists_ok=False)
         with Progress(
             SpinnerColumn(spinner_name="runner"),
@@ -823,14 +832,16 @@ class Pipeline(BaseModel):
             console=console,
             expand=True,
         ) as progress:
+            pipeline_execution_task = progress.add_task(
+                "[dark_orange] Starting execution .. ", total=1
+            )
             try:
                 run_context.progress = progress
-                pipeline_execution_task = progress.add_task(
-                    "[dark_orange] Starting execution .. ", total=1
-                )
                 run_context.executor.execute_graph(dag=run_context.dag)
-                if not run_context.executor._local:
+                if not run_context.executor._is_local:
+                    # non local executors just traverse the graph and do nothing
                     return {}
                 run_log = run_context.run_log_store.get_run_log_by_id(
@@ -859,7 +870,92 @@ class Pipeline(BaseModel):
                 )
                 raise
-        if run_context.executor._local:
+        if run_context.executor._is_local:
+            return run_context.run_log_store.get_run_log_by_id(
+                run_id=run_context.run_id
+            )
+class Job(BaseModel):
+    name: str
+    task: BaseTask
+    def return_task(self) -> RunnableTask:
+        return self.task.create_job()
+    def return_catalog_settings(self) -> Optional[List[str]]:
+        if self.task.catalog is None:
+            return []
+        return self.task.catalog.put
+    def _is_called_for_definition(self) -> bool:
+        """
+        If the run context is set, we are coming in only to get the pipeline definition.
+        """
+        from runnable.context import run_context
+        if run_context is None:
+            return False
+        return True
+    def execute(
+        self,
+        configuration_file: str = "",
+        job_id: str = "",
+        tag: str = "",
+        parameters_file: str = "",
+        log_level: str = defaults.LOG_LEVEL,
+    ):
+        if self._is_called_for_definition():
+            # Immediately return as this call is only for getting the job definition
+            return {}
+        logger.setLevel(log_level)
+        run_id = utils.generate_run_id(run_id=job_id)
+        configuration_file = os.environ.get(
+            "RUNNABLE_CONFIGURATION_FILE", configuration_file
+        )
+        run_context = entrypoints.prepare_configurations(
+            configuration_file=configuration_file,
+            run_id=run_id,
+            tag=tag,
+            parameters_file=parameters_file,
+            is_job=True,
+        )
+        assert isinstance(run_context.executor, BaseJobExecutor)
+        run_context.from_sdk = True
+        utils.set_runnable_environment_variables(
+            run_id=run_id, configuration_file=configuration_file, tag=tag
+        )
+        console.print("Working with context:")
+        console.print(run_context)
+        console.rule(style="[dark orange]")
+        if not run_context.executor._is_local:
+            # We are not working with executor that does not work in local environment
+            import inspect
+            caller_stack = inspect.stack()[1]
+            relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
+            module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
+            module_to_call = f"{module_name}.{caller_stack.function}"
+            run_context.job_definition_file = f"{module_to_call}.py"
+        job = self.task.create_job()
+        catalog_settings = self.return_catalog_settings()
+        run_context.executor.submit_job(job, catalog_settings=catalog_settings)
+        logger.info(
+            "Executing the job from the user. We are still in the caller's compute environment"
+        )
+        if run_context.executor._is_local:
             return run_context.run_log_store.get_run_log_by_id(
                 run_id=run_context.run_id
             )

runnable/tasks.py CHANGED Viewed

@@ -156,19 +156,6 @@ class BaseTaskType(BaseModel):
         params = self.resolve_unreduced_parameters(map_variable=map_variable)
         logger.info(f"Parameters available for the execution: {params}")
-        for param_name, param in params.items():
-            # Any access to unreduced param should be replaced.
-            # The replacement is the context param
-            # It is possible that the unreduced param is not created as no upstream step
-            # has created it yet.
-            if param.reduced is False:
-                context_param = param_name
-                for _, v in map_variable.items():  # type: ignore
-                    context_param = f"{v}_{context_param}"
-                if context_param in params:
-                    params[param_name].value = params[context_param].value
         task_console.log("Parameters available for the execution:")
         task_console.log(params)
@@ -440,8 +427,12 @@ class NotebookTaskType(BaseTaskType):
     @property
     def notebook_output_path(self) -> str:
-        node_name = self._context.executor._context_node.internal_name
-        sane_name = "".join(x for x in node_name if x.isalnum())
+        # This is to accommodate jobs which does not have a context_node
+        if self._context.executor._context_node:
+            node_name = self._context.executor._context_node.internal_name
+            sane_name = "".join(x for x in node_name if x.isalnum())
+        else:
+            sane_name = ""
         output_path = Path(".", self.command)
         file_name = output_path.parent / (output_path.stem + f"{sane_name}_out.ipynb")

runnable 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

runnable 0.14.0py3-none-any.whl → 0.16.0py3-none-any.whl