PyPI - runnable - Versions diffs - 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

runnable 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/file_system.py +253 -0
extensions/catalog/pyproject.toml +14 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +160 -0
extensions/job_executor/k8s.py +362 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +61 -0
extensions/job_executor/local_container.py +192 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/nodes.py +954 -0
extensions/nodes/pyproject.toml +15 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +644 -0
extensions/pipeline_executor/argo.py +1307 -0
extensions/pipeline_executor/argo_specification.yaml +51 -0
extensions/pipeline_executor/local.py +62 -0
extensions/pipeline_executor/local_container.py +363 -0
extensions/pipeline_executor/mocked.py +161 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/pipeline_executor/retry.py +180 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/chunked_fs.py +113 -0
extensions/run_log_store/db/implementation_FF.py +163 -0
extensions/run_log_store/db/integration_FF.py +0 -0
extensions/run_log_store/file_system.py +145 -0
extensions/run_log_store/generic_chunked.py +599 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/sdk.py +40 -99
{runnable-0.17.0.dist-info → runnable-0.18.0.dist-info}/METADATA +1 -7
runnable-0.18.0.dist-info/RECORD +58 -0
runnable-0.17.0.dist-info/RECORD +0 -23
{runnable-0.17.0.dist-info → runnable-0.18.0.dist-info}/WHEEL +0 -0
{runnable-0.17.0.dist-info → runnable-0.18.0.dist-info}/entry_points.txt +0 -0
{runnable-0.17.0.dist-info → runnable-0.18.0.dist-info}/licenses/LICENSE +0 -0

extensions/pipeline_executor/argo_specification.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: runnable-dag
+spec:
+  activeDeadlineSeconds: int # max run time of the workflow
+  entrypoint: str
+  nodeSelector: Dict[str, str] # global node selector
+  parallelism: # global level
+  podGC: OnPodCompletion
+  resources: # Should be converted to podSpecPath
+    limits:
+    requests:
+  podSpecPatch: json str representation of resources for defaults
+  retryStrategy: # global level for all templates
+    limit: int
+    retryPolicy: # global level for all templates
+    backoff:
+      duration: str
+      factor: int
+      maxDuration: str
+  serviceAccountName: str # Optionally required
+  templateDefaults:
+    activeDeadlineSeconds: int, for a template
+    timeout: str # max time including the wait time
+    failFast: true
+    volumes:
+  templates:
+    activeDeadlineSeconds: # override
+    nodeSelector: # override
+    retryStrategy: # override
+    tolerations: # override
+    container:
+      command:
+      env:
+      image:
+      imagePullPolicy:
+      volumeMounts:
+      resources:
+        limits:
+        requests:
+    dag:
+      tasks:
+        depends:
+        continueOn:
+  tolerations: # global level for all templates
+    effect: str
+    key: str
+    operator: str
+    value: str
+  volumes:

extensions/pipeline_executor/local.py ADDED Viewed

@@ -0,0 +1,62 @@
+import logging
+from pydantic import Field, PrivateAttr
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import defaults
+from runnable.defaults import TypeMapVariable
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class LocalExecutor(GenericPipelineExecutor):
+    """
+    In the mode of local execution, we run everything on the local computer.
+    This has some serious implications on the amount of time it would take to complete the run.
+    Also ensure that the local compute is good enough for the compute to happen of all the steps.
+    Example config:
+    execution:
+      type: local
+    """
+    service_name: str = "local"
+    object_serialisation: bool = Field(default=True)
+    _is_local: bool = PrivateAttr(default=True)
+    def execute_from_graph(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        if not self.object_serialisation:
+            self._context.object_serialisation = False
+        super().execute_from_graph(node=node, map_variable=map_variable, **kwargs)
+    def trigger_node_execution(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        In this mode of execution, we prepare for the node execution and execute the node
+        Args:
+            node (BaseNode): [description]
+            map_variable (str, optional): [description]. Defaults to ''.
+        """
+        self.execute_node(node=node, map_variable=map_variable, **kwargs)
+    def execute_node(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        For local execution, we just execute the node.
+        Args:
+            node (BaseNode): _description_
+            map_variable (dict[str, str], optional): _description_. Defaults to None.
+        """
+        self._execute_node(node=node, map_variable=map_variable, **kwargs)

extensions/pipeline_executor/local_container.py ADDED Viewed

@@ -0,0 +1,363 @@
+import logging
+from pathlib import Path
+from typing import Dict
+from pydantic import Field
+from rich import print
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import console, defaults, task_console, utils
+from runnable.datastore import StepLog
+from runnable.defaults import TypeMapVariable
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class LocalContainerExecutor(GenericPipelineExecutor):
+    """
+    In the mode of local-container, we execute all the commands in a container.
+    Ensure that the local compute has enough resources to finish all your jobs.
+    The image of the run, could either be provided as default in the configuration of the execution engine
+    i.e.:
+    execution:
+      type: 'local-container'
+      config:
+        docker_image: the image you want the code to run in.
+    or default image could be over-ridden for a single node by providing a docker_image in the step config.
+    i.e:
+    dag:
+      steps:
+        step:
+          executor_config:
+            local-container:
+                docker_image: The image that you want that single step to run in.
+    This image would only be used for that step only.
+    This mode does not build the docker image with the latest code for you, it is still left for the user to build
+    and ensure that the docker image provided is the correct one.
+    Example config:
+    execution:
+      type: local-container
+      config:
+        docker_image: The default docker image to use if the node does not provide one.
+    """
+    service_name: str = "local-container"
+    docker_image: str
+    auto_remove_container: bool = True
+    environment: Dict[str, str] = Field(default_factory=dict)
+    _is_local: bool = False
+    _container_log_location = "/tmp/run_logs/"
+    _container_catalog_location = "/tmp/catalog/"
+    _container_secrets_location = "/tmp/dotenv"
+    _volumes: Dict[str, Dict[str, str]] = {}
+    def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
+        """
+        Call the Base class to add the git code identity and add docker identity
+        Args:
+            node (BaseNode): The node we are adding the code identity
+            step_log (Object): The step log corresponding to the node
+        """
+        super().add_code_identities(node, step_log)
+        if node.node_type in ["success", "fail"]:
+            # Need not add code identities if we are in a success or fail node
+            return
+        executor_config = self._resolve_executor_config(node)
+        docker_image = executor_config.get("docker_image", None)
+        if docker_image:
+            code_id = self._context.run_log_store.create_code_identity()
+            code_id.code_identifier = utils.get_local_docker_image_id(docker_image)
+            code_id.code_identifier_type = "docker"
+            code_id.code_identifier_dependable = True
+            code_id.code_identifier_url = "local docker host"
+            step_log.code_identities.append(code_id)
+    def execute_node(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        We are already in the container, we just execute the node.
+        The node is already prepared for execution.
+        """
+        self._use_volumes()
+        return self._execute_node(node, map_variable, **kwargs)
+    def execute_from_graph(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(
+            node.name, node._get_step_log_name(map_variable)
+        )
+        self.add_code_identities(node=node, step_log=step_log)
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        logger.info(f"Executing node: {node.get_summary()}")
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._execute_node(node, map_variable=map_variable, **kwargs)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            node.execute_as_graph(map_variable=map_variable, **kwargs)
+            return
+        task_console.export_text(clear=True)
+        task_name = node._resolve_map_placeholders(node.internal_name, map_variable)
+        console.print(
+            f":runner: Executing the node {task_name} ... ", style="bold color(208)"
+        )
+        self.trigger_node_execution(node=node, map_variable=map_variable, **kwargs)
+    # def execute_job(self, node: TaskNode):
+    #     """
+    #     Set up the step log and call the execute node
+    #     Args:
+    #         node (BaseNode): _description_
+    #     """
+    #     step_log = self._context.run_log_store.create_step_log(
+    #         node.name, node._get_step_log_name(map_variable=None)
+    #     )
+    #     self.add_code_identities(node=node, step_log=step_log)
+    #     step_log.step_type = node.node_type
+    #     step_log.status = defaults.PROCESSING
+    #     self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+    #     command = utils.get_job_execution_command(node)
+    #     self._spin_container(node=node, command=command)
+    #     # Check the step log status and warn if necessary. Docker errors are generally suppressed.
+    #     step_log = self._context.run_log_store.get_step_log(
+    #         node._get_step_log_name(map_variable=None), self._context.run_id
+    #     )
+    #     if step_log.status != defaults.SUCCESS:
+    #         msg = (
+    #             "Node execution inside the container failed. Please check the logs.\n"
+    #             "Note: If you do not see any docker issue from your side and the code works properly on local execution"
+    #             "please raise a bug report."
+    #         )
+    #         logger.warning(msg)
+    def trigger_node_execution(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        We come into this step via execute from graph, use trigger job to spin up the container.
+        In local container execution, we just spin the container to execute runnable execute_single_node.
+        Args:
+            node (BaseNode): The node we are currently executing
+            map_variable (str, optional): If the node is part of the map branch. Defaults to ''.
+        """
+        self._mount_volumes()
+        executor_config = self._resolve_executor_config(node)
+        auto_remove_container = executor_config.get("auto_remove_container", True)
+        logger.debug("Here is the resolved executor config")
+        logger.debug(executor_config)
+        command = utils.get_node_execution_command(node, map_variable=map_variable)
+        self._spin_container(
+            node=node,
+            command=command,
+            map_variable=map_variable,
+            auto_remove_container=auto_remove_container,
+            **kwargs,
+        )
+        step_log = self._context.run_log_store.get_step_log(
+            node._get_step_log_name(map_variable), self._context.run_id
+        )
+        if step_log.status != defaults.SUCCESS:
+            msg = (
+                "Node execution inside the container failed. Please check the logs.\n"
+                "Note: If you do not see any docker issue from your side and the code works properly on local execution"
+                "please raise a bug report."
+            )
+            logger.error(msg)
+            step_log.status = defaults.FAIL
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+    def _spin_container(
+        self,
+        node: BaseNode,
+        command: str,
+        map_variable: TypeMapVariable = None,
+        auto_remove_container: bool = True,
+        **kwargs,
+    ):
+        """
+        During the flow run, we have to spin up a container with the docker image mentioned
+        and the right log locations
+        """
+        # Conditional import
+        import docker  # pylint: disable=C0415
+        try:
+            client = docker.from_env()
+            api_client = docker.APIClient()
+        except Exception as ex:
+            logger.exception("Could not get access to docker")
+            raise Exception(
+                "Could not get the docker socket file, do you have docker installed?"
+            ) from ex
+        try:
+            logger.info(f"Running the command {command}")
+            print(command)
+            #  Overrides global config with local
+            executor_config = self._resolve_executor_config(node)
+            docker_image = executor_config.get("docker_image", None)
+            environment = executor_config.get("environment", {})
+            environment.update(self._context.variables)
+            if not docker_image:
+                raise Exception(
+                    f"Please provide a docker_image using executor_config of the step {node.name} or at global config"
+                )
+            # TODO: Should consider using getpass.getuser() when running the docker container? Volume permissions
+            container = client.containers.create(
+                image=docker_image,
+                command=command,
+                auto_remove=False,
+                volumes=self._volumes,
+                network_mode="host",
+                environment=environment,
+            )
+            # print(container.__dict__)
+            container.start()
+            stream = api_client.logs(
+                container=container.id, timestamps=True, stream=True, follow=True
+            )
+            while True:
+                try:
+                    output = next(stream).decode("utf-8")
+                    output = output.strip("\r\n")
+                    logger.info(output)
+                    print(output)
+                except StopIteration:
+                    logger.info("Docker Run completed")
+                    break
+            exit_status = api_client.inspect_container(container.id)["State"][
+                "ExitCode"
+            ]
+            if auto_remove_container:
+                container.remove(force=True)
+            if exit_status != 0:
+                msg = f"Docker command failed with exit code {exit_status}"
+                raise Exception(msg)
+        except Exception as _e:
+            logger.exception("Problems with spinning/running the container")
+            raise _e
+    def _mount_volumes(self):
+        """
+        Mount the volumes for the container
+        """
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+            case "chunked-fs":
+                write_to = self._context.run_log_store.log_folder
+                self._volumes[str(Path(write_to).resolve())] = {
+                    "bind": f"{self._container_log_location}",
+                    "mode": "rw",
+                }
+        match self._context.catalog_handler.service_name:
+            case "file-system":
+                catalog_location = self._context.catalog_handler.catalog_location
+                self._volumes[str(Path(catalog_location).resolve())] = {
+                    "bind": f"{self._container_catalog_location}",
+                    "mode": "rw",
+                }
+        match self._context.secrets_handler.service_name:
+            case "dotenv":
+                secrets_location = self._context.secrets_handler.location
+                self._volumes[str(Path(secrets_location).resolve())] = {
+                    "bind": f"{self._container_secrets_location}",
+                    "mode": "ro",
+                }
+    def _use_volumes(self):
+        match self._context.run_log_store.service_name:
+            case "file-system":
+                self._context.run_log_store.log_folder = self._container_log_location
+            case "chunked-fs":
+                self._context.run_log_store.log_folder = self._container_log_location
+        match self._context.catalog_handler.service_name:
+            case "file-system":
+                self._context.catalog_handler.catalog_location = (
+                    self._container_catalog_location
+                )
+        match self._context.secrets_handler.service_name:
+            case "dotenv":
+                self._context.secrets_handler.location = (
+                    self._container_secrets_location
+                )

extensions/pipeline_executor/mocked.py ADDED Viewed

@@ -0,0 +1,161 @@
+import copy
+import logging
+from typing import Any, Dict, Type, cast
+from pydantic import ConfigDict, Field
+from extensions.nodes.nodes import TaskNode
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import context, defaults
+from runnable.defaults import TypeMapVariable
+from runnable.nodes import BaseNode
+from runnable.tasks import BaseTaskType
+logger = logging.getLogger(defaults.LOGGER_NAME)
+def create_executable(
+    params: Dict[str, Any], model: Type[BaseTaskType], node_name: str
+) -> BaseTaskType:
+    class EasyModel(model):  # type: ignore
+        model_config = ConfigDict(extra="ignore")
+    swallow_all = EasyModel(node_name=node_name, **params)
+    return swallow_all
+class MockedExecutor(GenericPipelineExecutor):
+    service_name: str = "mocked"
+    _is_local: bool = True
+    model_config = ConfigDict(extra="ignore")
+    patches: Dict[str, Any] = Field(default_factory=dict)
+    @property
+    def _context(self):
+        return context.run_context
+    def execute_from_graph(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(
+            node.name, node._get_step_log_name(map_variable)
+        )
+        self.add_code_identities(node=node, step_log=step_log)
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        logger.info(f"Executing node: {node.get_summary()}")
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            self._execute_node(node, map_variable=map_variable, **kwargs)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            node.execute_as_graph(map_variable=map_variable, **kwargs)
+            return
+        if node.name not in self.patches:
+            # node is not patched, so mock it
+            self._execute_node(node, map_variable=map_variable, mock=True, **kwargs)
+        else:
+            # node is patched
+            # command as the patch value
+            node_to_send: TaskNode = cast(TaskNode, node).model_copy(deep=True)
+            executable_type = node_to_send.executable.__class__
+            executable = create_executable(
+                self.patches[node.name],
+                executable_type,
+                node_name=node.name,
+            )
+            node_to_send.executable = executable
+            self._execute_node(
+                node_to_send, map_variable=map_variable, mock=False, **kwargs
+            )
+    def _resolve_executor_config(self, node: BaseNode):
+        """
+        The overrides section can contain specific over-rides to an global executor config.
+        To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
+        The nodes can over-ride the global config by referring to key in the overrides.
+        This function also applies variables to the effective node config.
+        For example:
+        # configuration.yaml
+        execution:
+          type: cloud-implementation
+          config:
+            k1: v1
+            k3: v3
+            overrides:
+             custom_config:
+                k1: v11
+                k2: v2 # Could be a mapping internally.
+        # in pipeline definition.yaml
+        dag:
+          steps:
+            step1:
+              overrides:
+                cloud-implementation: custom_config
+        This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
+        Args:
+            node (BaseNode): The current node being processed.
+        """
+        effective_node_config = copy.deepcopy(self.model_dump())
+        return effective_node_config
+    def execute_node(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        The entry point for all executors apart from local.
+        We have already prepared for node execution.
+        Args:
+            node (BaseNode): The node to execute
+            map_variable (dict, optional): If the node is part of a map, send in the map dictionary. Defaults to None.
+        Raises:
+            NotImplementedError: _description_
+        """
+        ...

extensions/pipeline_executor/pyproject.toml ADDED Viewed

@@ -0,0 +1,16 @@
+[project]
+name = "pipeline_executor"
+version = "0.0.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = []
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]

runnable 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

runnable 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl