PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

extensions/pipeline_executor/emulate.py ADDED Viewed

@@ -0,0 +1,119 @@
+import logging
+import shlex
+import subprocess
+import sys
+from typing import Optional
+from pydantic import PrivateAttr
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import defaults
+from runnable.defaults import IterableParameterModel
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class Emulator(GenericPipelineExecutor):
+    """
+    In the mode of local execution, we run everything on the local computer.
+    This has some serious implications on the amount of time it would take to complete the run.
+    Also ensure that the local compute is good enough for the compute to happen of all the steps.
+    Example config:
+    ```yaml
+    pipeline-executor:
+      type: local
+    ```
+    """
+    service_name: str = "emulator"
+    _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
+    def trigger_node_execution(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        In this mode of execution, we prepare for the node execution and execute the node
+        Args:
+            node (BaseNode): [description]
+            iter_variable (str, optional): [description]. Defaults to ''.
+        """
+        command = self._context.get_node_callable_command(
+            node, iter_variable=iter_variable
+        )
+        self.run_click_command(command)
+        # execute the command in a forked process
+        step_log = self._context.run_log_store.get_step_log(
+            node._get_step_log_name(iter_variable), self._context.run_id
+        )
+        if step_log.status != defaults.SUCCESS:
+            msg = "Node execution inside the emulate failed. Please check the logs.\n"
+            logger.error(msg)
+            step_log.status = defaults.FAIL
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+    def execute_node(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        For local execution, we just execute the node.
+        Args:
+            node (BaseNode): _description_
+            iter_variable (dict[str, str], optional): _description_. Defaults to None.
+        """
+        self._execute_node(node=node, iter_variable=iter_variable)
+    def run_click_command(self, command: str) -> str:
+        """
+        Execute a Click-based CLI command in the current virtual environment.
+        Args:
+            args: List of Click command arguments (including subcommands and options)
+        Returns:
+            Combined stdout/stderr output as string
+        """
+        # For Click commands installed via setup.py entry_points
+        # command = [sys.executable, '-m', 'your_package.cli'] + args
+        # For direct module execution
+        sub_command = [sys.executable, "-m", "runnable.cli"] + shlex.split(command)[1:]
+        process = subprocess.Popen(
+            sub_command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            universal_newlines=True,
+            bufsize=1,
+        )
+        output = []
+        try:
+            while True:
+                line = process.stdout.readline()  # type: ignore
+                if not line and process.poll() is not None:
+                    break
+                print(line, end="")
+                output.append(line)
+        finally:
+            process.stdout.close()  # type: ignore
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(
+                process.returncode, command, "".join(output)
+            )
+        return "".join(output)

extensions/pipeline_executor/local.py ADDED Viewed

@@ -0,0 +1,226 @@
+import logging
+import os
+from typing import List, Optional
+from pydantic import Field, PrivateAttr
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import console, defaults
+from runnable.datastore import DataCatalog
+from runnable.defaults import IterableParameterModel
+from runnable.graph import Graph
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class LocalExecutor(GenericPipelineExecutor):
+    """
+    In the mode of local execution, we run everything on the local computer.
+    This has some serious implications on the amount of time it would take to complete the run.
+    Also ensure that the local compute is good enough for the compute to happen of all the steps.
+    Example config:
+    ```yaml
+    pipeline-executor:
+      type: local
+      config:
+        enable_parallel: false  # Enable parallel execution for parallel/map nodes
+    ```
+    """
+    service_name: str = "local"
+    enable_parallel: bool = Field(default=False)
+    # TODO: Not fully done
+    object_serialisation: bool = Field(default=True)
+    _is_local: bool = PrivateAttr(default=True)
+    def execute_from_graph(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        if not self.object_serialisation:
+            self._context.object_serialisation = False
+        super().execute_from_graph(node=node, iter_variable=iter_variable)
+    def trigger_node_execution(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        In this mode of execution, we prepare for the node execution and execute the node
+        Args:
+            node (BaseNode): [description]
+            iter_variable (str, optional): [description]. Defaults to ''.
+        """
+        self.execute_node(node=node, iter_variable=iter_variable)
+    def execute_node(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """
+        For local execution, we just execute the node.
+        Args:
+            node (BaseNode): _description_
+            iter_variable (dict[str, str], optional): _description_. Defaults to None.
+        """
+        self._execute_node(node=node, iter_variable=iter_variable)
+    # ═══════════════════════════════════════════════════════════════
+    # Async Path - implement async methods for local execution
+    # ═══════════════════════════════════════════════════════════════
+    async def execute_graph_async(
+        self,
+        dag: Graph,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Async graph traversal."""
+        current_node = dag.start_at
+        previous_node = None
+        logger.info(f"Running async execution with {current_node}")
+        branch_task_name: str = ""
+        if dag.internal_branch_name:
+            branch_task_name = BaseNode._resolve_map_placeholders(
+                dag.internal_branch_name or "Graph",
+                iter_variable,
+            )
+            console.print(
+                f":runner: Executing the branch {branch_task_name} ... ",
+                style="bold color(208)",
+            )
+        while True:
+            working_on = dag.get_node_by_name(current_node)
+            task_name = working_on._resolve_map_placeholders(
+                working_on.internal_name, iter_variable
+            )
+            if previous_node == current_node:
+                raise Exception("Potentially running in an infinite loop")
+            previous_node = current_node
+            try:
+                await self.execute_from_graph_async(
+                    working_on, iter_variable=iter_variable
+                )
+                # Sync helper - no await needed
+                status, next_node_name = self._get_status_and_next_node_name(
+                    current_node=working_on, dag=dag, iter_variable=iter_variable
+                )
+                if status == defaults.SUCCESS:
+                    console.print(f":white_check_mark: Node {task_name} succeeded")
+                else:
+                    console.print(f":x: Node {task_name} failed")
+            except Exception as e:
+                console.print(":x: Error during execution", style="bold red")
+                console.print(e, style=defaults.error_style)
+                logger.exception(e)
+                raise
+            console.rule(style="[dark orange]")
+            if working_on.node_type in ["success", "fail"]:
+                break
+            current_node = next_node_name
+        # Sync helper - no await needed
+        self._finalize_graph_execution(working_on, dag, iter_variable)
+    async def execute_from_graph_async(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Async node execution entry point."""
+        if not self.object_serialisation:
+            self._context.object_serialisation = False
+        # Sync helper - no await needed
+        step_log = self._prepare_node_for_execution(node, iter_variable)
+        if step_log is None:
+            return  # Skipped
+        logger.info(f"Executing node: {node.get_summary()}")
+        if node.node_type in ["success", "fail"]:
+            await self._execute_node_async(node, iter_variable=iter_variable)
+            return
+        if node.is_composite:
+            await node.execute_as_graph_async(iter_variable=iter_variable)
+            return
+        task_name = node._resolve_map_placeholders(node.internal_name, iter_variable)
+        console.print(
+            f":runner: Executing the node {task_name} ... ", style="bold color(208)"
+        )
+        await self.trigger_node_execution_async(node=node, iter_variable=iter_variable)
+    async def trigger_node_execution_async(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+    ):
+        """Async trigger for node execution."""
+        await self._execute_node_async(node=node, iter_variable=iter_variable)
+    async def _execute_node_async(
+        self,
+        node: BaseNode,
+        iter_variable: Optional[IterableParameterModel] = None,
+        mock: bool = False,
+    ):
+        """Async node execution wrapper."""
+        current_attempt_number = self._calculate_attempt_number(node, iter_variable)
+        os.environ[defaults.ATTEMPT_NUMBER] = str(current_attempt_number)
+        logger.info(
+            f"Trying to execute node: {node.internal_name}, attempt: {current_attempt_number}"
+        )
+        self._context_node = node
+        # Sync - catalog get
+        data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(stage="get")
+        logger.debug(f"data_catalogs_get: {data_catalogs_get}")
+        # ASYNC - execute the node
+        step_log = await node.execute_async(
+            iter_variable=iter_variable,
+            attempt_number=current_attempt_number,
+            mock=mock,
+        )
+        # Sync - catalog put and finalization
+        allow_file_not_found_exc = step_log.status != defaults.SUCCESS
+        data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
+            stage="put", allow_file_no_found_exc=allow_file_not_found_exc
+        )
+        logger.debug(f"data_catalogs_put: {data_catalogs_put}")
+        step_log.add_data_catalogs(data_catalogs_put or [])
+        step_log.add_data_catalogs(data_catalogs_get or [])
+        console.print(f"Summary of the step: {step_log.internal_name}")
+        console.print(step_log.get_summary(), style=defaults.info_style)
+        self.add_task_log_to_catalog(
+            name=self._context_node.internal_name, iter_variable=iter_variable
+        )
+        self._context_node = None
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)