PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

extensions/job_executor/local_container.py ADDED Viewed

@@ -0,0 +1,161 @@
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional
+from pydantic import Field, PrivateAttr
+from extensions.job_executor import GenericJobExecutor
+from runnable import context, defaults
+from runnable.tasks import BaseTaskType
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class LocalContainerJobExecutor(GenericJobExecutor):
+    """
+    The LocalJobExecutor is a job executor that runs the job locally.
+    """
+    service_name: str = "local-container"
+    docker_image: str
+    auto_remove_container: bool = True
+    environment: Dict[str, str] = Field(default_factory=dict)
+    _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
+    _container_log_location = "/tmp/run_logs/"
+    _container_catalog_location = "/tmp/catalog/"
+    _container_secrets_location = "/tmp/dotenv"
+    _volumes: Dict[str, Dict[str, str]] = {}
+    def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
+        """
+        This method gets invoked by the CLI.
+        """
+        self._set_up_run_log()
+        self._mount_volumes()
+        # Call the container job
+        job_log = self._context.run_log_store.create_job_log()
+        self._context.run_log_store.add_job_log(
+            run_id=self._context.run_id, job_log=job_log
+        )
+        self.spin_container()
+    def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
+        """
+        This method gets invoked by the CLI.
+        """
+        self._use_volumes()
+        super().execute_job(job, catalog_settings=catalog_settings)
+    def spin_container(self):
+        """
+        This method spins up the container
+        """
+        import docker  # pylint: disable=C0415
+        try:
+            client = docker.from_env()
+            api_client = docker.APIClient()
+        except Exception as ex:
+            logger.exception("Could not get access to docker")
+            raise Exception(
+                "Could not get the docker socket file, do you have docker installed?"
+            ) from ex
+        try:
+            assert isinstance(self._context, context.JobContext)
+            command = self._context.get_job_callable_command()
+            logger.info(f"Running the command {command}")
+            docker_image = self.docker_image
+            environment = self.environment
+            container = client.containers.create(
+                image=docker_image,
+                command=command,
+                auto_remove=False,
+                volumes=self._volumes,
+                environment=environment,
+            )
+            container.start()
+            stream = api_client.logs(
+                container=container.id, timestamps=True, stream=True, follow=True
+            )
+            while True:
+                try:
+                    output = next(stream).decode("utf-8")
+                    output = output.strip("\r\n")
+                    logger.info(output)
+                    print(output)
+                except StopIteration:
+                    logger.info("Docker Run completed")
+                    break
+            exit_status = api_client.inspect_container(container.id)["State"][
+                "ExitCode"
+            ]
+            if self.auto_remove_container:
+                container.remove(force=True)
+            if exit_status != 0:
+                msg = f"Docker command failed with exit code {exit_status}"
+                raise Exception(msg)
+        except Exception as _e:
+            logger.exception("Problems with spinning/running the container")
+            raise _e
+    def _mount_volumes(self):
+        """
+        Mount the volumes for the container
+        """
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+            case "chunked-fs":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+        match self._context.catalog.service_name:
+            case "file-system":
+                catalog_location = self._context.catalog.catalog_location
+                self._volumes[str(Path(catalog_location).resolve())] = {
+                    "bind": f"{self._container_catalog_location}",
+                    "mode": "rw",
+                }
+        match self._context.secrets.service_name:
+            case "dotenv":
+                secrets_location = self._context.secrets.location
+                self._volumes[str(Path(secrets_location).resolve())] = {
+                    "bind": f"{self._container_secrets_location}",
+                    "mode": "ro",
+                }
+    def _use_volumes(self):
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                self._context.run_log_store.log_folder = self._container_log_location
+            case "chunked-fs":
+                self._context.run_log_store.log_folder = self._container_log_location
+        match self._context.catalog.service_name:
+            case "file-system":
+                self._context.catalog.catalog_location = (
+                    self._container_catalog_location
+                )
+        match self._context.secrets.service_name:
+            case "dotenv":
+                self._context.secrets.location = self._container_secrets_location

extensions/job_executor/pyproject.toml ADDED Viewed

@@ -0,0 +1,16 @@
+[project]
+name = "job_executor"
+version = "0.0.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = []
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]

extensions/nodes/README.md ADDED Viewed

File without changes

extensions/nodes/__init__.py ADDED Viewed

File without changes

extensions/nodes/conditional.py ADDED Viewed

@@ -0,0 +1,301 @@
+import logging
+from copy import deepcopy
+from typing import Any, Optional, cast
+from pydantic import Field, field_serializer, field_validator
+from runnable import console, defaults, exceptions
+from runnable.datastore import Parameter
+from runnable.defaults import IterableParameterModel
+from runnable.graph import Graph, create_graph
+from runnable.nodes import CompositeNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class ConditionalNode(CompositeNode):
+    """
+    parameter: name -> the parameter which is used for evaluation
+    default: Optional[branch] = branch to execute if nothing is matched.
+    branches: {
+        "case1" : branch1,
+        "case2: branch2,
+    }
+    Conceptually this is equal to:
+    match parameter:
+        case "case1":
+            branch1
+        case "case2":
+            branch2
+        case _:
+            default
+    """
+    node_type: str = Field(default="conditional", serialization_alias="type")
+    parameter: str  # the name of the parameter should be isalnum
+    default: Graph | None = Field(default=None)  # TODO: Think about the design of this
+    branches: dict[str, Graph]
+    # The keys of the branches should be isalnum()
+    @field_validator("parameter", mode="after")
+    @classmethod
+    def check_parameter(cls, parameter: str) -> str:
+        """
+        Validate that the parameter name is alphanumeric.
+        Args:
+            parameter (str): The parameter name to validate.
+        Raises:
+            ValueError: If the parameter name is not alphanumeric.
+        Returns:
+            str: The validated parameter name.
+        """
+        if not parameter.isalnum():
+            raise ValueError(f"Parameter '{parameter}' must be alphanumeric.")
+        return parameter
+    def get_parameter_value(self) -> str | int | bool | float:
+        """
+        Get the parameter value from the context.
+        Returns:
+            Any: The value of the parameter.
+        """
+        parameters: dict[str, Parameter] = self._context.run_log_store.get_parameters(
+            run_id=self._context.run_id
+        )
+        if self.parameter not in parameters:
+            raise Exception(f"Parameter {self.parameter} not found in parameters")
+        chosen_parameter_value = parameters[self.parameter].get_value()
+        assert isinstance(chosen_parameter_value, (int, float, bool, str)), (
+            f"Parameter '{self.parameter}' must be of type int, float, bool, or str, "
+            f"but got {type(chosen_parameter_value).__name__}."
+        )
+        return chosen_parameter_value
+    def get_summary(self) -> dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+            "branches": [branch.get_summary() for branch in self.branches.values()],
+            "parameter": self.parameter,
+            "default": self.default.get_summary() if self.default else None,
+        }
+        return summary
+    @field_serializer("branches")
+    def ser_branches(self, branches: dict[str, Graph]) -> dict[str, Graph]:
+        ret: dict[str, Graph] = {}
+        for branch_name, branch in branches.items():
+            ret[branch_name.split(".")[-1]] = branch
+        return ret
+    @classmethod
+    def parse_from_config(cls, config: dict[str, Any]) -> "ConditionalNode":
+        internal_name = cast(str, config.get("internal_name"))
+        config_branches = config.pop("branches", {})
+        branches = {}
+        for branch_name, branch_config in config_branches.items():
+            sub_graph = create_graph(
+                deepcopy(branch_config),
+                internal_branch_name=internal_name + "." + branch_name,
+            )
+            branches[internal_name + "." + branch_name] = sub_graph
+        if not branches:
+            raise Exception("A parallel node should have branches")
+        return cls(branches=branches, **config)
+    def _get_branch_by_name(self, branch_name: str) -> Graph:
+        if branch_name in self.branches:
+            return self.branches[branch_name]
+        raise Exception(f"Branch {branch_name} does not exist")
+    def fan_out(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This method is restricted to creating branch logs.
+        """
+        parameter_value = self.get_parameter_value()
+        hit_once = False
+        for internal_branch_name, _ in self.branches.items():
+            # the match is done on the last part of the branch name
+            result = str(parameter_value) == internal_branch_name.split(".")[-1]
+            if not result:
+                # Need not create a branch log for this branch
+                continue
+            effective_branch_name = self._resolve_map_placeholders(
+                internal_branch_name, iter_variable=iter_variable
+            )
+            hit_once = True
+            try:
+                branch_log = self._context.run_log_store.get_branch_log(
+                    effective_branch_name, self._context.run_id
+                )
+                console.print(f"Branch log already exists for {effective_branch_name}")
+            except exceptions.BranchLogNotFoundError:
+                branch_log = self._context.run_log_store.create_branch_log(
+                    effective_branch_name
+                )
+                console.print(f"Branch log created for {effective_branch_name}")
+            branch_log.status = defaults.PROCESSING
+            self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
+        if not hit_once:
+            raise Exception(
+                "None of the branches were true. Please check your evaluate statements"
+            )
+    def execute_as_graph(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This function does the actual execution of the sub-branches of the parallel node.
+        From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
+        The modes that render the job specifications, do not need to interact with this node at all as they have their
+        own internal mechanisms of handing parallel states.
+        If they do not, you can find a way using as-is nodes as hack nodes.
+        The execution of a dag, could result in
+            * The dag being completely executed with a definite (fail, success) state in case of
+                local or local-container execution
+            * The dag being in a processing state with PROCESSING status in case of local-aws-batch
+        Only fail state is considered failure during this phase of execution.
+        Args:
+            executor (Executor): The Executor as per the use config
+            **kwargs: Optional kwargs passed around
+        """
+        self.fan_out(iter_variable=iter_variable)
+        parameter_value = self.get_parameter_value()
+        for internal_branch_name, branch in self.branches.items():
+            result = str(parameter_value) == internal_branch_name.split(".")[-1]
+            if result:
+                # if the condition is met, execute the graph
+                logger.debug(f"Executing graph for {branch}")
+                self._context.pipeline_executor.execute_graph(
+                    branch, iter_variable=iter_variable
+                )
+        self.fan_in(iter_variable=iter_variable)
+    def fan_in(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        The general fan in method for a node of type Parallel.
+        3rd party orchestrators should use this method to find the status of the composite step.
+        Args:
+            executor (BaseExecutor): The executor class as defined by the config
+            map_variable (dict, optional): If the node is part of a map. Defaults to None.
+        """
+        effective_internal_name = self._resolve_map_placeholders(
+            self.internal_name, iter_variable=iter_variable
+        )
+        step_success_bool: bool = True
+        parameter_value = self.get_parameter_value()
+        executed_branch_name = None
+        for internal_branch_name, _ in self.branches.items():
+            result = str(parameter_value) == internal_branch_name.split(".")[-1]
+            if not result:
+                # The branch would not have been executed
+                continue
+            effective_branch_name = self._resolve_map_placeholders(
+                internal_branch_name, iter_variable=iter_variable
+            )
+            executed_branch_name = effective_branch_name
+            branch_log = self._context.run_log_store.get_branch_log(
+                effective_branch_name, self._context.run_id
+            )
+            if branch_log.status != defaults.SUCCESS:
+                step_success_bool = False
+        step_log = self._context.run_log_store.get_step_log(
+            effective_internal_name, self._context.run_id
+        )
+        if step_success_bool:  # If none failed
+            step_log.status = defaults.SUCCESS
+        else:
+            step_log.status = defaults.FAIL
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        # If we failed, return without parameter rollback
+        if not step_log.status == defaults.SUCCESS:
+            return
+        # Roll back parameters from executed branch to parent scope
+        if executed_branch_name:
+            parent_params = self._context.run_log_store.get_parameters(
+                self._context.run_id, internal_branch_name=self.internal_branch_name
+            )
+            branch_params = self._context.run_log_store.get_parameters(
+                self._context.run_id, internal_branch_name=executed_branch_name
+            )
+            # Merge branch parameters into parent (overwrite with branch values)
+            parent_params.update(branch_params)
+            self._context.run_log_store.set_parameters(
+                parameters=parent_params,
+                run_id=self._context.run_id,
+                internal_branch_name=self.internal_branch_name,
+            )
+    async def execute_as_graph_async(
+        self,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Async conditional execution."""
+        self.fan_out(iter_variable=iter_variable)  # sync
+        parameter_value = self.get_parameter_value()
+        for internal_branch_name, branch in self.branches.items():
+            result = str(parameter_value) == internal_branch_name.split(".")[-1]
+            if result:
+                logger.debug(f"Executing graph for {branch}")
+                await self._context.pipeline_executor.execute_graph_async(
+                    branch, iter_variable=iter_variable
+                )
+        self.fan_in(iter_variable=iter_variable)  # sync

extensions/nodes/fail.py ADDED Viewed

@@ -0,0 +1,78 @@
+from datetime import datetime
+from typing import Any, Dict, Optional, cast
+from pydantic import Field
+from runnable import datastore, defaults
+from runnable.datastore import StepLog
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import TerminalNode
+class FailNode(TerminalNode):
+    """
+    A leaf node of the graph that represents a failure node
+    """
+    node_type: str = Field(default="fail", serialization_alias="type")
+    @classmethod
+    def parse_from_config(cls, config: Dict[str, Any]) -> "FailNode":
+        return cast("FailNode", super().parse_from_config(config))
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "name": self.name,
+            "type": self.node_type,
+        }
+        return summary
+    def execute(
+        self,
+        mock=False,
+        iter_variable: Optional[IterableParameterModel] = None,
+        attempt_number: int = 1,
+    ) -> StepLog:
+        """
+        Execute the failure node.
+        Set the run or branch log status to failure.
+        Args:
+            executor (_type_): the executor class
+            mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False.
+            iter_variable (dict, optional): If the node belongs to internal branches. Defaults to None.
+        Returns:
+            StepAttempt: The step attempt object
+        """
+        step_log = self._context.run_log_store.get_step_log(
+            self._get_step_log_name(iter_variable), self._context.run_id
+        )
+        attempt_log = datastore.StepAttempt(
+            status=defaults.SUCCESS,
+            start_time=str(datetime.now()),
+            end_time=str(datetime.now()),
+            attempt_number=attempt_number,
+            retry_indicator=self._context.retry_indicator,
+        )
+        # Add code identities to the attempt
+        self._context.pipeline_executor.add_code_identities(
+            node=self, attempt_log=attempt_log
+        )
+        run_or_branch_log = self._context.run_log_store.get_branch_log(
+            self._get_branch_log_name(iter_variable), self._context.run_id
+        )
+        run_or_branch_log.status = defaults.FAIL
+        self._context.run_log_store.add_branch_log(
+            run_or_branch_log, self._context.run_id
+        )
+        step_log.status = attempt_log.status
+        step_log.attempts.append(attempt_log)
+        return step_log