PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

runnable/nodes.py ADDED Viewed

@@ -0,0 +1,593 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+import runnable.context as context
+from runnable import defaults, exceptions
+from runnable.datastore import StepLog
+from runnable.defaults import IterableParameterModel
+from runnable.graph import Graph
+logger = logging.getLogger(defaults.LOGGER_NAME)
+# --8<-- [start:docs]
+class BaseNode(ABC, BaseModel):
+    """
+    Base class with common functionality provided for a Node of a graph.
+    A node of a graph could be a
+        * single execution node as task, success, fail.
+        * Could be graph in itself as parallel, dag and map.
+        * could be a convenience function like as-is.
+    The name is relative to the DAG.
+    The internal name of the node, is absolute name in dot path convention.
+        This has one to one mapping to the name in the run log
+    The internal name of a node, should always be odd when split against dot.
+    The internal branch name, only applies for branched nodes, is the branch it belongs to.
+    The internal branch name should always be even when split against dot.
+    """
+    node_type: str = Field(serialization_alias="type")
+    name: str
+    internal_name: str = Field(exclude=True)
+    internal_branch_name: str = Field(default="", exclude=True)
+    is_composite: bool = Field(default=False, exclude=True)
+    @property
+    def _context(self):
+        current_context = context.get_run_context()
+        if current_context is None:
+            raise RuntimeError("No run context available")
+        if not isinstance(
+            current_context, (context.PipelineContext, context.AsyncPipelineContext)
+        ):
+            raise TypeError(
+                f"Expected PipelineContext or AsyncPipelineContext, got {type(current_context).__name__}"
+            )
+        return current_context
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=False)
+    @field_validator("name")
+    @classmethod
+    def validate_name(cls, name: str):
+        if "." in name or "%" in name:
+            raise ValueError("Node names cannot have . or '%' in them")
+        return name
+    def _command_friendly_name(
+        self, replace_with=defaults.COMMAND_FRIENDLY_CHARACTER
+    ) -> str:
+        """
+        Replace spaces with special character for spaces.
+        Spaces in the naming of the node is convenient for the user but causes issues when used programmatically.
+        Returns:
+            str: The command friendly name of the node
+        """
+        return self.internal_name.replace(" ", replace_with)
+    @classmethod
+    def _get_internal_name_from_command_name(cls, command_name: str) -> str:
+        """
+        Replace runnable specific character (%) with whitespace.
+        The opposite of _command_friendly_name.
+        Args:
+            command_name (str): The command friendly node name
+        Returns:
+            str: The internal name of the step
+        """
+        return command_name.replace(defaults.COMMAND_FRIENDLY_CHARACTER, " ")
+    @classmethod
+    def _resolve_iter_placeholders(
+        cls,
+        name: str,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ) -> str:
+        """
+        Resolve iteration placeholders (map and loop) in node names.
+        Replaces MAP_PLACEHOLDER with map variable values and LOOP_PLACEHOLDER
+        with loop iteration indices in order.
+        Args:
+            name: The name containing placeholders
+            iter_variable: Iteration variables (map and loop)
+        Returns:
+            str: Name with placeholders resolved
+        """
+        if not iter_variable:
+            return name
+        resolved_name = name
+        # Resolve map placeholders
+        if iter_variable.map_variable:
+            for _, value in iter_variable.map_variable.items():
+                resolved_name = resolved_name.replace(
+                    defaults.MAP_PLACEHOLDER, str(value.value), 1
+                )
+        # Resolve loop placeholders
+        if iter_variable.loop_variable:
+            for loop_index in iter_variable.loop_variable:
+                resolved_name = resolved_name.replace(
+                    defaults.LOOP_PLACEHOLDER, str(loop_index.value), 1
+                )
+        return resolved_name
+    @classmethod
+    def _resolve_map_placeholders(
+        cls,
+        name: str,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ) -> str:
+        """Deprecated: Use _resolve_iter_placeholders instead."""
+        return cls._resolve_iter_placeholders(name, iter_variable)
+    def _get_step_log_name(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ) -> str:
+        """
+        For every step in the dag, there is a corresponding step log name.
+        This method returns the step log name in dot path convention.
+        All node types except a map state has a "static" defined step_log names and are equivalent to internal_name.
+        For nodes belonging to map state, the internal name has a placeholder that is replaced at runtime.
+        Args:
+            map_variable (dict): If the node is of type map, the names are based on the current iteration state of the
+            parameter.
+        Returns:
+            str: The dot path name of the step log name
+        """
+        return self._resolve_map_placeholders(
+            self.internal_name, iter_variable=iter_variable
+        )
+    def _get_branch_log_name(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ) -> str:
+        """
+        For nodes that are internally branches, this method returns the branch log name.
+        The branch log name is in dot path convention.
+        For nodes that are not map, the internal branch name is equivalent to the branch name.
+        For map nodes, the internal branch name has a placeholder that is replaced at runtime.
+        Args:
+            map_variable (dict): If the node is of type map, the names are based on the current iteration state of the
+            parameter.
+        Returns:
+            str: The dot path name of the branch log
+        """
+        return self._resolve_map_placeholders(
+            self.internal_branch_name, iter_variable=iter_variable
+        )
+    def __str__(self) -> str:  # pragma: no cover
+        """
+        String representation of the node.
+        Returns:
+            str: The string representation of the node.
+        """
+        return f"Node of type {self.node_type} and name {self.internal_name}"
+    @abstractmethod
+    def _get_on_failure_node(self) -> str:
+        """
+        If the node defines a on_failure node in the config, return this or None.
+        The naming is relative to the dag, the caller is supposed to resolve it to the correct graph
+        Returns:
+            str: The on_failure node defined by the dag or ''
+        This is a base implementation which the BaseNode does not satisfy
+        """
+    @abstractmethod
+    def _get_next_node(self) -> str:
+        """
+        Return the next node as defined by the config.
+        Returns:
+            str: The node name, relative to the dag, as defined by the config
+        """
+    @abstractmethod
+    def _is_terminal_node(self) -> bool:
+        """
+        Returns whether a node has a next node
+        Returns:
+            bool: True or False of whether there is next node.
+        """
+    @abstractmethod
+    def _get_catalog_settings(self) -> Dict[str, Any]:
+        """
+        If the node defines a catalog settings, return it or None
+        Returns:
+            dict: catalog settings defined as per the node or None
+        """
+    @abstractmethod
+    def _get_branch_by_name(self, branch_name: str) -> Graph:
+        """
+        Retrieve a branch by name.
+        The name is expected to follow a dot path convention.
+        Args:
+            branch_name (str): [description]
+        Raises:
+            Exception: [description]
+        """
+    def _get_neighbors(self) -> List[str]:
+        """
+        Gets the connecting neighbor nodes, either the "next" node or "on_failure" node.
+        Returns:
+            list: List of connected neighbors for a given node. Empty if terminal node.
+        """
+        neighbors = []
+        try:
+            next_node = self._get_next_node()
+            neighbors += [next_node]
+        except exceptions.TerminalNodeError:
+            pass
+        try:
+            fail_node = self._get_on_failure_node()
+            if fail_node:
+                neighbors += [fail_node]
+        except exceptions.TerminalNodeError:
+            pass
+        return neighbors
+    @abstractmethod
+    def _get_executor_config(self, executor_type: str) -> str:
+        """
+        Return the executor config of the node, if defined, or empty dict
+        Args:
+            executor_type (str): The executor type that the config refers to.
+        Returns:
+            dict: The executor config, if defined or an empty dict
+        """
+    @abstractmethod
+    def _get_max_attempts(self) -> int:
+        """
+        The number of max attempts as defined by the config or 1.
+        Returns:
+            int: The number of maximum retries as defined by the config or 1.
+        """
+    @abstractmethod
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        The actual function that does the execution of the command in the config.
+        Should only be implemented for task, success, fail and as-is and never for
+        composite nodes.
+        Args:
+            executor (runnable.executor.BaseExecutor): The executor class
+            mock (bool, optional): Don't run, just pretend. Defaults to False.
+            map_variable (str, optional): The value of the map iteration variable, if part of a map node.
+                Defaults to ''.
+        Raises:
+            NotImplementedError: Base class, hence not implemented.
+        """
+    async def execute_async(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+        mock: bool = False,
+    ) -> StepLog:
+        """
+        Async execution - default delegates to sync execute().
+        Override in subclasses that support true async execution (TaskNode).
+        Terminal nodes (SuccessNode, FailNode) use this default.
+        """
+        return self.execute(
+            iter_variable=iter_variable,
+            attempt_number=attempt_number,
+            mock=mock,
+        )
+    @abstractmethod
+    def execute_as_graph(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This function would be called to set up the execution of the individual
+        branches of a composite node.
+        Function should only be implemented for composite nodes like dag, map, parallel.
+        Args:
+            executor (runnable.executor.BaseExecutor): The executor.
+        Raises:
+            NotImplementedError: Base class, hence not implemented.
+        """
+    @abstractmethod
+    def fan_out(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This function would be called to set up the execution of the individual
+        branches of a composite node.
+        Function should only be implemented for composite nodes like dag, map, parallel.
+        Args:
+            executor (runnable.executor.BaseExecutor): The executor.
+            map_variable (str, optional): The value of the map iteration variable, if part of a map node.
+        Raises:
+            Exception: If the node is not a composite node.
+        """
+    @abstractmethod
+    def fan_in(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This function would be called to tear down the execution of the individual
+        branches of a composite node.
+        Function should only be implemented for composite nodes like dag, map, parallel.
+        Args:
+            executor (runnable.executor.BaseExecutor): The executor.
+            map_variable (str, optional): The value of the map iteration variable, if part of a map node.
+        Raises:
+            Exception: If the node is not a composite node.
+        """
+    @classmethod
+    @abstractmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "BaseNode":
+        """
+        Parse the config from the user and create the corresponding node.
+        Args:
+            config (Dict[str, Any]): The config of the node from the yaml or from the sdk.
+        Returns:
+            BaseNode: The corresponding node.
+        """
+    @abstractmethod
+    def get_summary(self) -> Dict[str, Any]:
+        """
+        Return the summary of the node
+        Returns:
+            Dict[str, Any]: _description_
+        """
+# --8<-- [end:docs]
+class TraversalNode(BaseNode):
+    next_node: str = Field(serialization_alias="next")
+    on_failure: str = Field(default="")
+    overrides: Dict[str, str] = Field(default_factory=dict)
+    def _get_on_failure_node(self) -> str:
+        """
+        If the node defines a on_failure node in the config, return this or None.
+        The naming is relative to the dag, the caller is supposed to resolve it to the correct graph
+        Returns:
+            str: The on_failure node defined by the dag or ''
+        This is a base implementation which the BaseNode does not satisfy
+        """
+        return self.on_failure
+    def _get_next_node(self) -> str:
+        """
+        Return the next node as defined by the config.
+        Returns:
+            str: The node name, relative to the dag, as defined by the config
+        """
+        return self.next_node
+    def _is_terminal_node(self) -> bool:
+        """
+        Returns whether a node has a next node
+        Returns:
+            bool: True or False of whether there is next node.
+        """
+        return False
+    def _get_executor_config(self, executor_type) -> str:
+        return self.overrides.get(executor_type) or ""
+# Unfortunately, this is defined in 2 places. Look in SDK
+class CatalogStructure(BaseModel):
+    model_config = ConfigDict(extra="forbid")  # Need to forbid
+    get: List[str] = Field(default_factory=list)
+    put: List[str] = Field(default_factory=list)
+    store_copy: bool = Field(default=True, alias="store_copy")
+class ExecutableNode(TraversalNode):
+    catalog: Optional[CatalogStructure] = Field(default=None)
+    max_attempts: int = Field(default=1, ge=1)
+    def _get_catalog_settings(self) -> Dict[str, Any]:
+        """
+        If the node defines a catalog settings, return it or None
+        Returns:
+            dict: catalog settings defined as per the node or None
+        """
+        if self.catalog:
+            return self.catalog.model_dump()
+        return {}
+    def _get_max_attempts(self) -> int:
+        return self.max_attempts
+    def _get_branch_by_name(self, branch_name: str):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have branches"
+        )
+    def execute_as_graph(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a graph"
+        )
+    def fan_in(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a fan in"
+        )
+    def fan_out(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.NodeMethodCallError(
+            "This is an executable node and does not have a fan out"
+        )
+class CompositeNode(TraversalNode):
+    is_composite: bool = True
+    def _get_catalog_settings(self) -> Dict[str, Any]:
+        """
+        If the node defines a catalog settings, return it or None
+        Returns:
+            dict: catalog settings defined as per the node or None
+        """
+        raise exceptions.NodeMethodCallError(
+            "This is a composite node and does not have a catalog settings"
+        )
+    def _get_max_attempts(self) -> int:
+        raise Exception("This is a composite node and does not have a max_attempts")
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        raise exceptions.NodeMethodCallError(
+            "This is a composite node and does not have an execute function"
+        )
+    async def execute_as_graph_async(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        Async execution of sub-graph.
+        Default raises NotImplementedError - override in subclasses
+        that support async execution (ParallelNode, MapNode, DagNode).
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} must implement execute_as_graph_async() "
+            f"for async execution support."
+        )
+class TerminalNode(BaseNode):
+    def _get_on_failure_node(self) -> str:
+        return ""
+    def _get_next_node(self) -> str:
+        raise exceptions.TerminalNodeError()
+    def _is_terminal_node(self) -> bool:
+        return True
+    def _get_catalog_settings(self) -> Dict[str, Any]:
+        raise exceptions.TerminalNodeError()
+    def _get_branch_by_name(self, branch_name: str):
+        raise exceptions.TerminalNodeError()
+    def _get_executor_config(self, executor_type) -> str:
+        raise exceptions.TerminalNodeError()
+    def _get_max_attempts(self) -> int:
+        return 1
+    def execute_as_graph(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.TerminalNodeError()
+    def fan_in(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.TerminalNodeError()
+    def fan_out(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        raise exceptions.TerminalNodeError()
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "TerminalNode":
+        return cls(**config)