PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

extensions/nodes/parallel.py ADDED Viewed

@@ -0,0 +1,281 @@
+import logging
+from copy import deepcopy
+from multiprocessing import Pool
+from typing import Any, Dict, Optional, cast
+from pydantic import Field, field_serializer
+from runnable import console, defaults, exceptions
+from runnable.defaults import IterableParameterModel
+from runnable.graph import Graph, create_graph
+from runnable.nodes import CompositeNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class ParallelNode(CompositeNode):
+    """
+    A composite node containing many graph objects within itself.
+    The structure is generally:
+        ParallelNode:
+            Branch A:
+                Sub graph definition
+            Branch B:
+                Sub graph definition
+            . . .
+    """
+    node_type: str = Field(default="parallel", serialization_alias="type")
+    branches: Dict[str, Graph]
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+            "branches": [branch.get_summary() for branch in self.branches.values()],
+        }
+        return summary
+    @field_serializer("branches")
+    def ser_branches(self, branches: Dict[str, Graph]) -> Dict[str, Graph]:
+        ret: Dict[str, Graph] = {}
+        for branch_name, branch in branches.items():
+            ret[branch_name.split(".")[-1]] = branch
+        return ret
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "ParallelNode":
+        internal_name = cast(str, config.get("internal_name"))
+        config_branches = config.pop("branches", {})
+        branches = {}
+        for branch_name, branch_config in config_branches.items():
+            sub_graph = create_graph(
+                deepcopy(branch_config),
+                internal_branch_name=internal_name + "." + branch_name,
+            )
+            branches[internal_name + "." + branch_name] = sub_graph
+        if not branches:
+            raise Exception("A parallel node should have branches")
+        return cls(branches=branches, **config)
+    def _get_branch_by_name(self, branch_name: str) -> Graph:
+        if branch_name in self.branches:
+            return self.branches[branch_name]
+        raise Exception(f"Branch {branch_name} does not exist")
+    def fan_out(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        The general fan out method for a node of type Parallel.
+        This method assumes that the step log has already been created.
+        3rd party orchestrators should create the step log and use this method to create the branch logs.
+        Args:
+            executor (BaseExecutor): The executor class as defined by the config
+            iter_variable (dict, optional): If the node is part of a map node. Defaults to None.
+        """
+        # Prepare the branch logs
+        for internal_branch_name, _ in self.branches.items():
+            effective_branch_name = self._resolve_map_placeholders(
+                internal_branch_name, iter_variable=iter_variable
+            )
+            try:
+                branch_log = self._context.run_log_store.get_branch_log(
+                    effective_branch_name, self._context.run_id
+                )
+                console.print(f"Branch log already exists for {effective_branch_name}")
+            except (exceptions.BranchLogNotFoundError, exceptions.EntityNotFoundError):
+                branch_log = self._context.run_log_store.create_branch_log(
+                    effective_branch_name
+                )
+                console.print(f"Branch log created for {effective_branch_name}")
+            branch_log.status = defaults.PROCESSING
+            self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
+    def execute_as_graph(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This function does the actual execution of the sub-branches of the parallel node.
+        From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
+        The modes that render the job specifications, do not need to interact with this node at all as they have their
+        own internal mechanisms of handing parallel states.
+        If they do not, you can find a way using as-is nodes as hack nodes.
+        The execution of a dag, could result in
+            * The dag being completely executed with a definite (fail, success) state in case of
+                local or local-container execution
+            * The dag being in a processing state with PROCESSING status in case of local-aws-batch
+        Only fail state is considered failure during this phase of execution.
+        Args:
+            executor (Executor): The Executor as per the use config
+            **kwargs: Optional kwargs passed around
+        """
+        self.fan_out(iter_variable=iter_variable)
+        # Check if parallel execution is enabled and supported
+        enable_parallel = getattr(
+            self._context.pipeline_executor, "enable_parallel", False
+        )
+        supports_parallel_writes = getattr(
+            self._context.run_log_store, "supports_parallel_writes", False
+        )
+        # Check if we're using a local executor (local or local-container)
+        executor_service_name = getattr(
+            self._context.pipeline_executor, "service_name", ""
+        )
+        is_local_executor = executor_service_name in ["local", "local-container"]
+        if enable_parallel and is_local_executor:
+            if not supports_parallel_writes:
+                logger.warning(
+                    "Parallel execution was requested but the run log store does not support parallel writes. "
+                    "Falling back to sequential execution. Consider using a run log store with "
+                    "supports_parallel_writes=True for parallel execution."
+                )
+                self._execute_sequentially(iter_variable)
+            else:
+                logger.info("Executing branches in parallel")
+                self._execute_in_parallel(iter_variable)
+        else:
+            self._execute_sequentially(iter_variable)
+        self.fan_in(iter_variable=iter_variable)
+    def _execute_sequentially(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Execute branches sequentially (original behavior)."""
+        for _, branch in self.branches.items():
+            self._context.pipeline_executor.execute_graph(
+                branch, iter_variable=iter_variable
+            )
+    def _execute_in_parallel(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Execute branches in parallel using multiprocessing."""
+        from runnable.entrypoints import execute_single_branch
+        # Prepare arguments for each branch
+        branch_args = []
+        for branch_name, branch in self.branches.items():
+            branch_args.append((branch_name, branch, self._context, iter_variable))
+        # Use multiprocessing Pool to execute branches in parallel
+        with Pool() as pool:
+            results = pool.starmap(execute_single_branch, branch_args)
+        # Check if any branch failed
+        if not all(results):
+            failed_branches = [
+                branch_name
+                for (branch_name, _, _, _), result in zip(branch_args, results)
+                if not result
+            ]
+            logger.error(f"The following branches failed: {failed_branches}")
+            # Note: The actual failure handling and status update will be done in fan_in()
+    def fan_in(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        The general fan in method for a node of type Parallel.
+        3rd party orchestrators should use this method to find the status of the composite step.
+        Args:
+            executor (BaseExecutor): The executor class as defined by the config
+            iter_variable (dict, optional): If the node is part of a map. Defaults to None.
+        """
+        effective_internal_name = self._resolve_map_placeholders(
+            self.internal_name, iter_variable=iter_variable
+        )
+        step_success_bool = True
+        for internal_branch_name, _ in self.branches.items():
+            effective_branch_name = self._resolve_map_placeholders(
+                internal_branch_name, iter_variable=iter_variable
+            )
+            branch_log = self._context.run_log_store.get_branch_log(
+                effective_branch_name, self._context.run_id
+            )
+            if branch_log.status != defaults.SUCCESS:
+                step_success_bool = False
+        # Collate all the results and update the status of the step
+        step_log = self._context.run_log_store.get_step_log(
+            effective_internal_name, self._context.run_id
+        )
+        if step_success_bool:  #  If none failed
+            step_log.status = defaults.SUCCESS
+        else:
+            step_log.status = defaults.FAIL
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        # If we failed, return without parameter rollback
+        if not step_log.status == defaults.SUCCESS:
+            return
+        # Roll back parameters from all branches to parent scope
+        parent_params = self._context.run_log_store.get_parameters(
+            self._context.run_id, internal_branch_name=self.internal_branch_name
+        )
+        for internal_branch_name, _ in self.branches.items():
+            effective_branch_name = self._resolve_map_placeholders(
+                internal_branch_name, iter_variable=iter_variable
+            )
+            branch_params = self._context.run_log_store.get_parameters(
+                self._context.run_id, internal_branch_name=effective_branch_name
+            )
+            # Merge branch parameters into parent (overwrite with branch values)
+            # If multiple branches set the same parameter, last one wins
+            parent_params.update(branch_params)
+        self._context.run_log_store.set_parameters(
+            parameters=parent_params,
+            run_id=self._context.run_id,
+            internal_branch_name=self.internal_branch_name,
+        )
+    async def execute_as_graph_async(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Async parallel execution."""
+        self.fan_out(iter_variable=iter_variable)  # sync - just creates branch logs
+        for _, branch in self.branches.items():
+            await self._context.pipeline_executor.execute_graph_async(
+                branch, iter_variable=iter_variable
+            )
+        self.fan_in(iter_variable=iter_variable)  # sync - just collates status

extensions/nodes/pyproject.toml ADDED Viewed

@@ -0,0 +1,15 @@
+[project]
+name = "nodes"
+version = "0.0.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = []
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]

extensions/nodes/stub.py ADDED Viewed

@@ -0,0 +1,93 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict, Optional
+from pydantic import ConfigDict, Field
+from runnable import datastore, defaults
+from runnable.datastore import StepLog
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import ExecutableNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class StubNode(ExecutableNode):
+    """
+    Stub is a convenience design node.
+    It always returns success in the attempt log and does nothing.
+    This node is very similar to pass state in Step functions.
+    This node type could be handy when designing the pipeline and stubbing functions
+    --8<-- [start:stub_reference]
+    An stub execution node of the pipeline.
+    Please refer to define pipeline/tasks/stub for more information.
+    As part of the dag definition, a stub task is defined as follows:
+    dag:
+      steps:
+        stub_task: # The name of the node
+        type: stub
+        on_failure: The name of the step to traverse in case of failure
+        next: The next node to execute after this task, use "success" to terminate the pipeline successfully
+          or "fail" to terminate the pipeline with an error.
+    It can take arbritary number of parameters, which is handy to temporarily silence a task node.
+    --8<-- [end:stub_reference]
+    """
+    node_type: str = Field(default="stub", serialization_alias="type")
+    model_config = ConfigDict(extra="ignore")
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+        }
+        return summary
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "StubNode":
+        return cls(**config)
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        Do Nothing node.
+        We just send an success attempt log back to the caller
+        Args:
+            executor ([type]): [description]
+            mock (bool, optional): [description]. Defaults to False.
+            iter_variable (str, optional): [description]. Defaults to ''.
+        Returns:
+            [type]: [description]
+        """
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(iter_variable), self._context.run_id
+        )
+        attempt_log = datastore.StepAttempt(
+            status=defaults.SUCCESS,
+            start_time=str(datetime.now()),
+            end_time=str(datetime.now()),
+            attempt_number=attempt_number,
+        )
+        self._context.pipeline_executor.add_code_identities(
+            node=self, attempt_log=attempt_log
+        )
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log

extensions/nodes/success.py ADDED Viewed

@@ -0,0 +1,78 @@
+from datetime import datetime
+from typing import Any, Dict, Optional, cast
+from pydantic import Field
+from runnable import datastore, defaults
+from runnable.datastore import StepLog
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import TerminalNode
+class SuccessNode(TerminalNode):
+    """
+    A leaf node of the graph that represents a success node
+    """
+    node_type: str = Field(default="success", serialization_alias="type")
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "SuccessNode":
+        return cast("SuccessNode", super().parse_from_config(config))
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+        }
+        return summary
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        Execute the success node.
+        Set the run or branch log status to success.
+        Args:
+            executor (_type_): The executor class
+            mock (bool, optional): If we should just mock and not perform anything. Defaults to False.
+            iter_variable (dict, optional): If the node belongs to an internal branch. Defaults to None.
+        Returns:
+            StepAttempt: The step attempt object
+        """
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(iter_variable), self._context.run_id
+        )
+        attempt_log = datastore.StepAttempt(
+            status=defaults.SUCCESS,
+            start_time=str(datetime.now()),
+            end_time=str(datetime.now()),
+            attempt_number=attempt_number,
+            retry_indicator=self._context.retry_indicator,
+        )
+        # Add code identities to the attempt
+        self._context.pipeline_executor.add_code_identities(
+            node=self, attempt_log=attempt_log
+        )
+        run_or_branch_log = self._context.run_log_store.get_branch_log(
+            self._get_branch_log_name(iter_variable), self._context.run_id
+        )
+        run_or_branch_log.status = defaults.SUCCESS
+        self._context.run_log_store.add_branch_log(
+            run_or_branch_log, self._context.run_id
+        )
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log

extensions/nodes/task.py ADDED Viewed

@@ -0,0 +1,156 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict, Optional
+from pydantic import ConfigDict, Field
+from runnable import datastore, defaults
+from runnable.datastore import StepLog
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import ExecutableNode
+from runnable.tasks import BaseTaskType, create_task
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class TaskNode(ExecutableNode):
+    """
+    A node of type Task.
+    This node does the actual function execution of the graph in all cases.
+    """
+    executable: BaseTaskType = Field(exclude=True)
+    node_type: str = Field(default="task", serialization_alias="type")
+    # It is technically not allowed as parse_from_config filters them.
+    # This is just to get the task level configuration to be present during serialization.
+    model_config = ConfigDict(extra="allow")
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode":
+        # separate task config from node config
+        task_config = {
+            k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()
+        }
+        node_config = {
+            k: v for k, v in config.items() if k in TaskNode.model_fields.keys()
+        }
+        executable = create_task(task_config)
+        return cls(executable=executable, **node_config, **task_config)
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+            "executable": self.executable.get_summary(),
+            "catalog": self._get_catalog_settings(),
+        }
+        return summary
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        All that we do in runnable is to come to this point where we actually execute the command.
+        Args:
+            executor (_type_): The executor class
+            mock (bool, optional): If we should just mock and not execute. Defaults to False.
+            iter_variable: Optional iteration variable if the node is part of internal branch. Defaults to None.
+        Returns:
+            StepAttempt: The attempt object
+        """
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(iter_variable), self._context.run_id
+        )
+        # Set the branch scope for parameter operations
+        self.executable.internal_branch_name = self._get_branch_log_name(iter_variable)
+        if not mock:
+            # Do not run if we are mocking the execution, could be useful for caching and dry runs
+            attempt_log = self.executable.execute_command(iter_variable=iter_variable)
+            attempt_log.attempt_number = attempt_number
+            attempt_log.retry_indicator = self._context.retry_indicator
+        else:
+            attempt_log = datastore.StepAttempt(
+                status=defaults.SUCCESS,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=attempt_number,
+                retry_indicator=self._context.retry_indicator,
+            )
+        # Add code identities to the attempt
+        self._context.pipeline_executor.add_code_identities(
+            node=self, attempt_log=attempt_log
+        )
+        logger.info(f"attempt_log: {attempt_log}")
+        logger.info(f"Step {self.name} completed with status: {attempt_log.status}")
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log
+    async def execute_async(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+        mock: bool = False,
+    ) -> StepLog:
+        """Async task execution with fallback to sync."""
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(iter_variable), self._context.run_id
+        )
+        # Set the branch scope for parameter operations
+        self.executable.internal_branch_name = self._get_branch_log_name(iter_variable)
+        if not mock:
+            # Get event_callback from executor
+            event_callback = self._context.pipeline_executor._event_callback
+            # Try async first, fall back to sync
+            try:
+                attempt_log = await self.executable.execute_command_async(
+                    iter_variable=iter_variable,
+                    event_callback=event_callback,
+                )
+            except NotImplementedError:
+                # Task doesn't support async, fall back to sync
+                attempt_log = self.executable.execute_command(
+                    iter_variable=iter_variable
+                )
+            attempt_log.attempt_number = attempt_number
+            attempt_log.retry_indicator = self._context.retry_indicator
+        else:
+            attempt_log = datastore.StepAttempt(
+                status=defaults.SUCCESS,
+                start_time=str(datetime.now()),
+                end_time=str(datetime.now()),
+                attempt_number=attempt_number,
+                retry_indicator=self._context.retry_indicator,
+            )
+        # Add code identities to the attempt
+        self._context.pipeline_executor.add_code_identities(
+            node=self, attempt_log=attempt_log
+        )
+        logger.info(f"attempt_log: {attempt_log}")
+        logger.info(f"Step {self.name} completed with status: {attempt_log.status}")
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log

extensions/pipeline_executor/README.md ADDED Viewed

File without changes