PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

extensions/pipeline_executor/local_container.py ADDED Viewed

@@ -0,0 +1,369 @@
+import logging
+import os
+from pathlib import Path
+from typing import Dict, Optional
+from pydantic import Field, PrivateAttr
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import defaults
+from runnable.datastore import StepAttempt
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class LocalContainerExecutor(GenericPipelineExecutor):
+    """
+    In the mode of local-container, we execute all the commands in a container.
+    Ensure that the local compute has enough resources to finish all your jobs.
+    Configuration options:
+    ```yaml
+    pipeline-executor:
+      type: local-container
+      config:
+        docker_image: <required>
+        auto_remove_container: true/false
+        environment:
+          key: value
+        overrides:
+          alternate_config:
+            docker_image: <required>
+            auto_remove_container: true/false
+            environment:
+              key: value
+    ```
+    - ```docker_image```: The default docker image to use for all the steps.
+    - ```auto_remove_container```: Remove container after execution
+    - ```environment```: Environment variables to pass to the container
+    Overrides give you the ability to override the default docker image for a single step.
+    A step can then then refer to the alternate_config in the task definition.
+    Example:
+    ```python
+    from runnable import PythonTask
+    task = PythonTask(
+        name="alt_task",
+        overrides={
+            "local-container": "alternate_config"
+            }
+        )
+    ```
+    In the above example, ```alt_task``` will run in the docker image/configuration
+    as defined in the alternate_config.
+    ```runnable``` does not build the docker image for you, it is still left for the user to build
+    and ensure that the docker image provided is the correct one.
+    """
+    service_name: str = "local-container"
+    enable_parallel: bool = Field(default=False)
+    docker_image: str
+    auto_remove_container: bool = True
+    environment: Dict[str, str] = Field(default_factory=dict)
+    _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
+    _container_log_location = "/tmp/run_logs/"
+    _container_catalog_location = "/tmp/catalog/"
+    _container_secrets_location = "/tmp/dotenv"
+    _volumes: Dict[str, Dict[str, str]] = {}
+    def _get_docker_image_digest(self, docker_image: str) -> str | None:
+        """
+        Retrieve the docker image digest, trying local first, then pulling if needed.
+        Args:
+            docker_image: The docker image name/tag
+        Returns:
+            The image digest (sha256:...) or None if retrieval fails
+        """
+        import docker  # pylint: disable=C0415
+        try:
+            client = docker.from_env()
+            # Try to get digest from local image first
+            try:
+                image = client.images.get(docker_image)
+                # Get the RepoDigest which contains the sha256 digest
+                if image.attrs.get("RepoDigests"):
+                    # RepoDigests is a list like ["registry/repo@sha256:..."]
+                    for digest in image.attrs["RepoDigests"]:
+                        if "@sha256:" in digest:
+                            return digest.split("@")[1]  # Return just "sha256:..."
+                # If no RepoDigest, try to get the image ID (less ideal but better than nothing)
+                if image.id:
+                    return image.id
+            except docker.errors.ImageNotFound:
+                # Image not found locally, try to pull it
+                logger.info(
+                    f"Docker image {docker_image} not found locally, pulling..."
+                )
+                try:
+                    pulled_image = client.images.pull(docker_image)
+                    # Get digest from pulled image
+                    if pulled_image.attrs.get("RepoDigests"):
+                        for digest in pulled_image.attrs["RepoDigests"]:
+                            if "@sha256:" in digest:
+                                return digest.split("@")[1]  # Return just "sha256:..."
+                    if pulled_image.id:
+                        return pulled_image.id
+                except Exception as pull_ex:
+                    logger.warning(
+                        f"Failed to pull docker image {docker_image}: {pull_ex}"
+                    )
+        except Exception as ex:
+            logger.warning(
+                f"Failed to retrieve docker image digest for {docker_image}: {ex}"
+            )
+        return None
+    def add_code_identities(self, node: BaseNode, attempt_log: StepAttempt):
+        """
+        Call the Base class to add the git code identity and add docker identity
+        Args:
+            node (BaseNode): The node we are adding the code identity
+            attempt_log (StepAttempt): The step attempt log corresponding to the node
+        """
+        super().add_code_identities(node, attempt_log)
+        if node.node_type in ["success", "fail"]:
+            # Need not add code identities if we are in a success or fail node
+            return
+        # Add docker image digest as code identity if available, fall back to image name
+        docker_digest = os.getenv("RUNNABLE_CODE_ID_DOCKER_IMAGE_DIGEST")
+        if not docker_digest:
+            # Fall back to docker image name if digest not available
+            executor_config = self._resolve_executor_config(node)
+            docker_digest = executor_config.get("docker_image", None)
+        if docker_digest:
+            code_id = self._context.run_log_store.create_code_identity()
+            code_id.code_identifier = docker_digest
+            code_id.code_identifier_type = "docker"
+            code_id.code_identifier_dependable = True
+            code_id.code_identifier_url = "local docker host"
+            attempt_log.code_identities.append(code_id)
+            logger.debug(f"Added docker image code identity: {docker_digest[:50]}...")
+    def execute_node(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        We are already in the container, we just execute the node.
+        The node is already prepared for execution.
+        """
+        self._use_volumes()
+        return self._execute_node(node, iter_variable)
+    def trigger_node_execution(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        We come into this step via execute from graph, use trigger job to spin up the container.
+        In local container execution, we just spin the container to execute runnable execute_single_node.
+        Args:
+            node (BaseNode): The node we are currently executing
+            iter_variable (str, optional): If the node is part of the map branch. Defaults to ''.
+        """
+        self._mount_volumes()
+        executor_config = self._resolve_executor_config(node)
+        auto_remove_container = executor_config.get("auto_remove_container", True)
+        logger.debug("Here is the resolved executor config")
+        logger.debug(executor_config)
+        command = self._context.get_node_callable_command(
+            node, iter_variable=iter_variable
+        )
+        self._spin_container(
+            node=node,
+            command=command,
+            iter_variable=iter_variable,
+            auto_remove_container=auto_remove_container,
+        )
+        step_log = self._context.run_log_store.get_step_log(
+            node._get_step_log_name(iter_variable), self._context.run_id
+        )
+        if step_log.status != defaults.SUCCESS:
+            msg = (
+                "Node execution inside the container failed. Please check the logs.\n"
+                "Note: If you do not see any docker issue from your side and the code works properly on local execution"
+                "please raise a bug report."
+            )
+            logger.error(msg)
+            step_log.status = defaults.FAIL
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            raise Exception(msg)
+    def _spin_container(
+        self,
+        node: BaseNode,
+        command: str,
+        iter_variable: Optional[IterableParameterModel] = None,
+        auto_remove_container: bool = True,
+    ):
+        """
+        During the flow run, we have to spin up a container with the docker image mentioned
+        and the right log locations
+        """
+        # Conditional import
+        import docker  # pylint: disable=C0415
+        try:
+            client = docker.from_env()
+            api_client = docker.APIClient()
+        except Exception as ex:
+            logger.exception("Could not get access to docker")
+            raise Exception(
+                "Could not get the docker socket file, do you have docker installed?"
+            ) from ex
+        try:
+            logger.info(f"Running the command {command}")
+            #  Overrides global config with local
+            executor_config = self._resolve_executor_config(node)
+            docker_image = executor_config.get("docker_image", None)
+            environment = executor_config.get("environment", {})
+            environment.update(self._context.variables)
+            if not docker_image:
+                raise Exception(
+                    f"Please provide a docker_image using executor_config of the step {node.name} or at global config"
+                )
+            # Retrieve docker image digest and pass it as environment variable
+            digest = self._get_docker_image_digest(docker_image)
+            if digest:
+                environment["RUNNABLE_CODE_ID_DOCKER_IMAGE_DIGEST"] = digest
+                logger.info(f"Retrieved docker image digest: {digest[:12]}...")
+            else:
+                logger.warning(
+                    f"Could not retrieve digest for docker image: {docker_image}"
+                )
+            container = client.containers.create(
+                image=docker_image,
+                command=command,
+                auto_remove=False,
+                volumes=self._volumes,
+                # network_mode="host",
+                environment=environment,
+            )
+            # print(container.__dict__)
+            container.start()
+            stream = api_client.logs(
+                container=container.id, timestamps=True, stream=True, follow=True
+            )
+            while True:
+                try:
+                    output = next(stream).decode("utf-8")
+                    output = output.strip("\r\n")
+                    logger.info(output)
+                    print(output)
+                except StopIteration:
+                    logger.info("Docker Run completed")
+                    break
+            exit_status = api_client.inspect_container(container.id)["State"][
+                "ExitCode"
+            ]
+            if auto_remove_container:
+                container.remove(force=True)
+            if exit_status != 0:
+                msg = f"Docker command failed with exit code {exit_status}"
+                raise Exception(msg)
+        except Exception as _e:
+            logger.exception("Problems with spinning/running the container")
+            raise _e
+    def _mount_volumes(self):
+        """
+        Mount the volumes for the container
+        """
+        # TODO: There should be an abstraction on top of service providers
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+            case "chunked-fs":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+        match self._context.catalog.service_name:
+            case "file-system":
+                catalog_location = self._context.catalog.catalog_location
+                self._volumes[str(Path(catalog_location).resolve())] = {
+                    "bind": f"{self._container_catalog_location}",
+                    "mode": "rw",
+                }
+        match self._context.secrets.service_name:
+            case "dotenv":
+                secrets_location = self._context.secrets.location
+                self._volumes[str(Path(secrets_location).resolve())] = {
+                    "bind": f"{self._container_secrets_location}",
+                    "mode": "ro",
+                }
+    def _use_volumes(self):
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                self._context.run_log_store.log_folder = self._container_log_location
+            case "chunked-fs":
+                self._context.run_log_store.log_folder = self._container_log_location
+        match self._context.catalog.service_name:
+            case "file-system":
+                self._context.catalog.catalog_location = (
+                    self._container_catalog_location
+                )
+        match self._context.secrets.service_name:
+            case "dotenv":
+                self._context.secrets.location = self._container_secrets_location

extensions/pipeline_executor/mocked.py ADDED Viewed

@@ -0,0 +1,159 @@
+import copy
+import logging
+from typing import Any, Dict, Optional, Type, cast
+from pydantic import ConfigDict, Field
+from extensions.nodes.task import TaskNode
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import defaults
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import BaseNode
+from runnable.tasks import BaseTaskType
+logger = logging.getLogger(defaults.LOGGER_NAME)
+def create_executable(
+    params: Dict[str, Any],
+    model: Type[BaseTaskType],
+    node_name: str,
+) -> BaseTaskType:
+    class EasyModel(model):  # type: ignore
+        model_config = ConfigDict(extra="ignore")
+    swallow_all = EasyModel(node_name=node_name, **params)
+    return swallow_all
+class MockedExecutor(GenericPipelineExecutor):
+    service_name: str = "mocked"
+    _is_local: bool = True
+    model_config = ConfigDict(extra="ignore")
+    patches: Dict[str, Any] = Field(default_factory=dict)
+    def execute_from_graph(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            iter_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(
+            node.name, node._get_step_log_name(iter_variable)
+        )
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        logger.info(f"Executing node: {node.get_summary()}")
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            self._execute_node(node, iter_variable=iter_variable)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            node.execute_as_graph(iter_variable=iter_variable)
+            return
+        if node.name not in self.patches:
+            # node is not patched, so mock it
+            self._execute_node(node, iter_variable=iter_variable, mock=True)
+        else:
+            # node is patched
+            # command as the patch value
+            node_to_send: TaskNode = cast(TaskNode, node).model_copy(deep=True)
+            executable_type = node_to_send.executable.__class__
+            executable = create_executable(
+                self.patches[node.name],
+                executable_type,
+                node_name=node.name,
+            )
+            node_to_send.executable = executable
+            self._execute_node(node_to_send, iter_variable=iter_variable, mock=False)
+    def _resolve_executor_config(self, node: BaseNode):
+        """
+        The overrides section can contain specific over-rides to an global executor config.
+        To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
+        The nodes can over-ride the global config by referring to key in the overrides.
+        This function also applies variables to the effective node config.
+        For example:
+        # configuration.yaml
+        execution:
+          type: cloud-implementation
+          config:
+            k1: v1
+            k3: v3
+            overrides:
+             custom_config:
+                k1: v11
+                k2: v2 # Could be a mapping internally.
+        # in pipeline definition.yaml
+        dag:
+          steps:
+            step1:
+              overrides:
+                cloud-implementation: custom_config
+        This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
+        Args:
+            node (BaseNode): The current node being processed.
+        """
+        effective_node_config = copy.deepcopy(self.model_dump())
+        return effective_node_config
+    def execute_node(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        The entry point for all executors apart from local.
+        We have already prepared for node execution.
+        Args:
+            node (BaseNode): The node to execute
+            iter_variable (dict, optional): If the node is part of a map, send in the map dictionary. Defaults to None.
+        Raises:
+            NotImplementedError: _description_
+        """
+        ...

extensions/pipeline_executor/pyproject.toml ADDED Viewed

@@ -0,0 +1,16 @@
+[project]
+name = "pipeline_executor"
+version = "0.0.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = []
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]

extensions/run_log_store/README.md ADDED Viewed

File without changes

extensions/run_log_store/__init__.py ADDED Viewed

File without changes

extensions/run_log_store/any_path.py ADDED Viewed

@@ -0,0 +1,100 @@
+import logging
+from abc import abstractmethod
+from typing import Any, Dict
+from runnable import defaults, exceptions
+from runnable.datastore import BaseRunLogStore, RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class AnyPathRunLogStore(BaseRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "file-system"
+    log_folder: str = defaults.LOG_LOCATION_FOLDER
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    @abstractmethod
+    def write_to_path(self, run_log: RunLog): ...
+    @abstractmethod
+    def read_from_path(self, run_id: str) -> RunLog: ...
+    def create_run_log(
+        self,
+        run_id: str,
+        dag_hash: str = "",
+        use_cached: bool = False,
+        tag: str = "",
+        original_run_id: str = "",
+        status: str = defaults.CREATED,
+    ) -> RunLog:
+        """
+        # Creates a Run log
+        # Adds it to the db
+        """
+        try:
+            self.get_run_log_by_id(run_id=run_id, full=False)
+            raise exceptions.RunLogExistsError(run_id=run_id)
+        except exceptions.RunLogNotFoundError:
+            pass
+        logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
+        run_log = RunLog(
+            run_id=run_id,
+            dag_hash=dag_hash,
+            tag=tag,
+            status=status,
+        )
+        self.write_to_path(run_log)
+        return run_log
+    def get_run_log_by_id(
+        self,
+        run_id: str,
+        full: bool = False,
+    ) -> RunLog:
+        """
+        # Returns the run_log defined by id
+        # Raises Exception if not found
+        """
+        try:
+            logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
+            run_log = self.read_from_path(run_id)
+            return run_log
+        except FileNotFoundError as e:
+            raise exceptions.RunLogNotFoundError(run_id) from e
+    def put_run_log(self, run_log: RunLog):
+        """
+        # Puts the run_log into the database
+        """
+        logger.info(
+            f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
+        )
+        self.write_to_path(run_log)