PyPI - runnable - Versions diffs - 0.50.0__py3-none-any.whl - Mend

runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/any_path.py +214 -0
extensions/catalog/file_system.py +52 -0
extensions/catalog/minio.py +72 -0
extensions/catalog/pyproject.toml +14 -0
extensions/catalog/s3.py +11 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +236 -0
extensions/job_executor/emulate.py +70 -0
extensions/job_executor/k8s.py +553 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +35 -0
extensions/job_executor/local_container.py +161 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/__init__.py +0 -0
extensions/nodes/conditional.py +301 -0
extensions/nodes/fail.py +78 -0
extensions/nodes/loop.py +394 -0
extensions/nodes/map.py +477 -0
extensions/nodes/parallel.py +281 -0
extensions/nodes/pyproject.toml +15 -0
extensions/nodes/stub.py +93 -0
extensions/nodes/success.py +78 -0
extensions/nodes/task.py +156 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +871 -0
extensions/pipeline_executor/argo.py +1266 -0
extensions/pipeline_executor/emulate.py +119 -0
extensions/pipeline_executor/local.py +226 -0
extensions/pipeline_executor/local_container.py +369 -0
extensions/pipeline_executor/mocked.py +159 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/any_path.py +100 -0
extensions/run_log_store/chunked_fs.py +122 -0
extensions/run_log_store/chunked_minio.py +141 -0
extensions/run_log_store/file_system.py +91 -0
extensions/run_log_store/generic_chunked.py +549 -0
extensions/run_log_store/minio.py +114 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +108 -0
runnable/catalog.py +141 -0
runnable/cli.py +484 -0
runnable/context.py +730 -0
runnable/datastore.py +1058 -0
runnable/defaults.py +159 -0
runnable/entrypoints.py +390 -0
runnable/exceptions.py +137 -0
runnable/executor.py +561 -0
runnable/gantt.py +1646 -0
runnable/graph.py +501 -0
runnable/names.py +546 -0
runnable/nodes.py +593 -0
runnable/parameters.py +217 -0
runnable/pickler.py +96 -0
runnable/sdk.py +1277 -0
runnable/secrets.py +92 -0
runnable/tasks.py +1268 -0
runnable/telemetry.py +142 -0
runnable/utils.py +423 -0
runnable-0.50.0.dist-info/METADATA +189 -0
runnable-0.50.0.dist-info/RECORD +72 -0
runnable-0.50.0.dist-info/WHEEL +4 -0
runnable-0.50.0.dist-info/entry_points.txt +53 -0
runnable-0.50.0.dist-info/licenses/LICENSE +201 -0

extensions/run_log_store/generic_chunked.py ADDED Viewed

@@ -0,0 +1,549 @@
+import json
+import logging
+from abc import abstractmethod
+from enum import Enum
+from typing import Any, Dict, Union
+from runnable import defaults, exceptions
+from runnable.datastore import (
+    BaseRunLogStore,
+    BranchLog,
+    RunLog,
+    StepLog,
+)
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class ChunkedRunLogStore(BaseRunLogStore):
+    """
+    A generic implementation of a RunLogStore that stores RunLogs in chunks.
+    """
+    service_name: str = ""
+    supports_parallel_writes: bool = True
+    class LogTypes(Enum):
+        RUN_LOG = "RunLog"
+        BRANCH_LOG = "BranchLog"
+    class ModelTypes(Enum):
+        RUN_LOG = RunLog
+        BRANCH_LOG = BranchLog
+    def get_file_name(self, log_type: LogTypes, name: str = "") -> str:
+        """
+        Get the exact file name for a log type.
+        Args:
+            log_type (LogTypes): Either RUN_LOG or BRANCH_LOG
+            name (str, optional): The internal_branch_name for BranchLog. Defaults to ''.
+        Raises:
+            Exception: If log_type is not recognized or name is missing for BRANCH_LOG
+        Returns:
+            str: The exact file name
+        """
+        if log_type == self.LogTypes.RUN_LOG:
+            return self.LogTypes.RUN_LOG.value
+        if log_type == self.LogTypes.BRANCH_LOG:
+            if not name:
+                raise Exception("Name (internal_branch_name) required for BRANCH_LOG")
+            return f"{self.LogTypes.BRANCH_LOG.value}-{name}"
+        raise Exception(f"Unexpected log type: {log_type}")
+    @abstractmethod
+    def _exists(self, run_id: str, name: str) -> bool:
+        """
+        Check if a file exists in the persistence layer.
+        Args:
+            run_id (str): The run id
+            name (str): The exact file name to check
+        Returns:
+            bool: True if file exists, False otherwise
+        """
+        ...
+    @abstractmethod
+    def _list_branch_logs(self, run_id: str) -> list[str]:
+        """
+        List all branch log file names for a run_id.
+        Args:
+            run_id (str): The run id
+        Returns:
+            list[str]: List of branch log file names (e.g., ["BranchLog-map.1", "BranchLog-map.2"])
+        """
+        ...
+    @abstractmethod
+    def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
+        """
+        Store the contents against the name in the persistence layer.
+        Args:
+            run_id (str): The run id
+            contents (dict): The dict to store
+            name (str): The name to store as
+        """
+        ...
+    @abstractmethod
+    def _retrieve(self, run_id: str, name: str) -> dict:
+        """
+        Does the job of retrieving from the persistent layer.
+        Args:
+            name (str): the name of the file to retrieve
+        Returns:
+            dict: The contents
+        """
+        ...
+    def store(self, run_id: str, log_type: LogTypes, contents: dict, name: str = ""):
+        """Store a log in the persistence layer.
+        Args:
+            run_id (str): The run id to store against
+            log_type (LogTypes): The type of log to store (RUN_LOG or BRANCH_LOG)
+            contents (dict): The dict of contents to store
+            name (str, optional): The internal_branch_name for BRANCH_LOG. Defaults to ''.
+        """
+        file_name = self.get_file_name(log_type=log_type, name=name)
+        # Check if file exists to determine if this is an update or insert
+        insert = not self._exists(run_id=run_id, name=file_name)
+        if not insert:
+            # File exists - merge with existing contents
+            existing_contents = self._retrieve(run_id=run_id, name=file_name)
+            contents = dict(existing_contents, **contents)
+        self._store(run_id=run_id, contents=contents, name=file_name, insert=insert)
+    def retrieve(self, run_id: str, log_type: LogTypes, name: str = "") -> Any:
+        """
+        Retrieve a log model by type and name.
+        Args:
+            run_id (str): The run id
+            log_type (LogTypes): Either RUN_LOG or BRANCH_LOG
+            name (str, optional): The internal_branch_name for BRANCH_LOG. Defaults to ''.
+        Raises:
+            Exception: If name is missing for BRANCH_LOG
+            EntityNotFoundError: If the file is not found
+        Returns:
+            Union[RunLog, BranchLog]: The requested log object
+        """
+        if log_type == self.LogTypes.BRANCH_LOG and not name:
+            raise Exception("Name (internal_branch_name) required for BRANCH_LOG")
+        file_name = self.get_file_name(log_type=log_type, name=name)
+        if not self._exists(run_id=run_id, name=file_name):
+            raise exceptions.EntityNotFoundError()
+        contents = self._retrieve(run_id=run_id, name=file_name)
+        model_class = self.ModelTypes[log_type.name].value
+        return model_class.model_validate(contents)
+    def _get_parent_branch(self, name: str) -> Union[str, None]:
+        """
+        Returns the name of the parent branch.
+        If the step is part of main dag, return None.
+        Args:
+            name (str): The name of the step.
+        Returns:
+            str: The name of the branch containing the step.
+        """
+        dot_path = name.split(".")
+        if len(dot_path) == 1:
+            return None
+        # Ignore the step name
+        return ".".join(dot_path[:-1])
+    def _get_parent_step(self, name: str) -> Union[str, None]:
+        """
+        Returns the step containing the step, useful when we have steps within a branch.
+        Returns None, if the step belongs to parent dag.
+        Args:
+            name (str): The name of the step to find the parent step it belongs to.
+        Returns:
+            str: The parent step the step belongs to, None if the step belongs to parent dag.
+        """
+        dot_path = name.split(".")
+        if len(dot_path) == 1:
+            return None
+        # Ignore the branch.step_name
+        return ".".join(dot_path[:-2])
+    def _prepare_full_run_log(self, run_log: RunLog):
+        """
+        Populate run log with branch logs.
+        Since branches now contain their own steps and parameters,
+        we just need to attach branches to their parent steps.
+        """
+        run_id = run_log.run_id
+        # Get all branch log file names
+        branch_file_names = self._list_branch_logs(run_id=run_id)
+        if not branch_file_names:
+            return
+        # Load all branch logs
+        branch_logs: Dict[str, BranchLog] = {}
+        for file_name in branch_file_names:
+            contents = self._retrieve(run_id=run_id, name=file_name)
+            branch_log = BranchLog.model_validate(contents)
+            branch_logs[branch_log.internal_name] = branch_log
+        # Attach branches to their parent steps
+        for branch_name, branch_log in branch_logs.items():
+            # For a branch like "conditional.heads", parent step is "conditional"
+            # For a branch like "map.a.nested", parent step is "map.a"
+            dot_path = branch_name.split(".")
+            if len(dot_path) < 2:
+                # Branches must have at least step.branch format
+                continue
+            parent_step_name = ".".join(dot_path[:-1])
+            # Find parent step (could be in run_log or another branch)
+            parent_branch_name = self._get_parent_branch(parent_step_name)
+            if parent_branch_name and parent_branch_name in branch_logs:
+                parent_step = branch_logs[parent_branch_name].steps.get(
+                    parent_step_name
+                )
+            else:
+                parent_step = run_log.steps.get(parent_step_name)
+            if parent_step:
+                parent_step.branches[branch_name] = branch_log
+    def create_run_log(
+        self,
+        run_id: str,
+        dag_hash: str = "",
+        use_cached: bool = False,
+        tag: str = "",
+        original_run_id: str = "",
+        status: str = defaults.CREATED,
+    ):
+        """
+        Creates a Run Log object by using the config
+        Logically the method should do the following:
+            * Creates a Run log
+            * Adds it to the db
+            * Return the log
+        """
+        try:
+            self.get_run_log_by_id(run_id=run_id, full=False)
+            raise exceptions.RunLogExistsError(run_id=run_id)
+        except exceptions.RunLogNotFoundError:
+            pass
+        logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
+        run_log = RunLog(
+            run_id=run_id,
+            dag_hash=dag_hash,
+            tag=tag,
+            status=status,
+        )
+        self.store(
+            run_id=run_id,
+            contents=json.loads(run_log.model_dump_json()),
+            log_type=self.LogTypes.RUN_LOG,
+        )
+        return run_log
+    def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
+        """
+        Retrieves a Run log from the database using the config and the run_id
+        Args:
+            run_id (str): The run_id of the run
+            full (bool): return the full run log store or only the RunLog object
+        Returns:
+            RunLog: The RunLog object identified by the run_id
+        Logically the method should:
+            * Returns the run_log defined by id from the data store defined by the config
+        """
+        try:
+            logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
+            run_log = self.retrieve(run_id=run_id, log_type=self.LogTypes.RUN_LOG)
+            if full:
+                self._prepare_full_run_log(run_log=run_log)
+            return run_log
+        except exceptions.EntityNotFoundError as e:
+            raise exceptions.RunLogNotFoundError(run_id) from e
+    def put_run_log(self, run_log: RunLog):
+        """
+        Puts the Run Log in the database as defined by the config
+        Args:
+            run_log (RunLog): The Run log of the run
+        Logically the method should:
+            Puts the run_log into the database
+        Raises:
+            NotImplementedError: This is a base class and therefore has no default implementation
+        """
+        run_id = run_log.run_id
+        self.store(
+            run_id=run_id,
+            contents=json.loads(run_log.model_dump_json()),
+            log_type=self.LogTypes.RUN_LOG,
+        )
+    def get_parameters(self, run_id: str, internal_branch_name: str = "") -> dict:
+        """
+        Get parameters from RunLog or BranchLog.
+        Args:
+            run_id (str): The run_id of the run
+            internal_branch_name (str): If provided, get from that branch
+        Returns:
+            dict: Parameters from the specified scope
+        """
+        if internal_branch_name:
+            branch = self.retrieve(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                name=internal_branch_name,
+            )
+            return branch.parameters
+        run_log = self.get_run_log_by_id(run_id=run_id)
+        return run_log.parameters
+    def set_parameters(
+        self, run_id: str, parameters: dict, internal_branch_name: str = ""
+    ):
+        """
+        Set parameters on RunLog or BranchLog.
+        Args:
+            run_id (str): The run_id of the run
+            parameters (dict): Parameters to set
+            internal_branch_name (str): If provided, set on that branch
+        """
+        if internal_branch_name:
+            branch = self.retrieve(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                name=internal_branch_name,
+            )
+            branch.parameters.update(parameters)
+            self.store(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                contents=json.loads(branch.model_dump_json()),
+                name=internal_branch_name,
+            )
+        else:
+            run_log = self.get_run_log_by_id(run_id=run_id)
+            run_log.parameters.update(parameters)
+            self.put_run_log(run_log)
+    def get_run_config(self, run_id: str) -> dict:
+        """
+        Given a run_id, return the run_config used to perform the run.
+        Args:
+            run_id (str): The run_id of the run
+        Returns:
+            dict: The run config used for the run
+        """
+        run_log = self.get_run_log_by_id(run_id=run_id)
+        return run_log.run_config
+    def set_run_config(self, run_id: str, run_config: dict):
+        """Set the run config used to run the run_id
+        Args:
+            run_id (str): The run_id of the run
+            run_config (dict): The run_config of the run
+        """
+        run_log = self.get_run_log_by_id(run_id=run_id)
+        run_log.run_config.update(run_config)
+        self.put_run_log(run_log=run_log)
+    def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
+        """
+        Get a step log from the datastore for run_id and the internal naming of the step log
+        The internal naming of the step log is a dot path convention.
+        The method should:
+            * Call get_run_log_by_id(run_id) to retrieve the run_log
+            * Identify the step location by decoding the internal naming
+            * Return the step log
+        Args:
+            internal_name (str): The internal name of the step log
+            run_id (str): The run_id of the run
+        Returns:
+            StepLog: The step log object for the step defined by the internal naming and run_id
+        Raises:
+            RunLogNotFoundError: If the run log for run_id is not found in the datastore
+            StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
+        """
+        logger.info(
+            f"{self.service_name} Getting the step log: {internal_name} of {run_id}"
+        )
+        # Determine if step is in a branch or root
+        parent_branch = self._get_parent_branch(internal_name)
+        if not parent_branch:
+            # Root-level step - get from RunLog
+            run_log = self.get_run_log_by_id(run_id=run_id)
+            if internal_name not in run_log.steps:
+                raise exceptions.StepLogNotFoundError(
+                    run_id=run_id, step_name=internal_name
+                )
+            return run_log.steps[internal_name]
+        else:
+            # Branch step - get from BranchLog
+            try:
+                branch_log = self.retrieve(
+                    run_id=run_id,
+                    log_type=self.LogTypes.BRANCH_LOG,
+                    name=parent_branch,
+                )
+                if internal_name not in branch_log.steps:
+                    raise exceptions.StepLogNotFoundError(
+                        run_id=run_id, step_name=internal_name
+                    )
+                return branch_log.steps[internal_name]
+            except exceptions.EntityNotFoundError as e:
+                raise exceptions.StepLogNotFoundError(
+                    run_id=run_id, step_name=internal_name
+                ) from e
+    def add_step_log(self, step_log: StepLog, run_id: str):
+        """
+        Add the step log to its parent (RunLog or BranchLog).
+        Args:
+            step_log (StepLog): The Step log to add
+            run_id (str): The run id of the run
+        """
+        logger.info(f"{self.service_name} Adding step log: {step_log.internal_name}")
+        internal_name = step_log.internal_name
+        parent_branch = self._get_parent_branch(internal_name)
+        if not parent_branch:
+            # Root-level step - add to RunLog
+            run_log = self.get_run_log_by_id(run_id=run_id)
+            run_log.steps[internal_name] = step_log
+            self.put_run_log(run_log)
+        else:
+            # Branch step - add to BranchLog
+            branch_log = self.retrieve(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                name=parent_branch,
+            )
+            branch_log.steps[internal_name] = step_log
+            self.store(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                contents=json.loads(branch_log.model_dump_json()),
+                name=parent_branch,
+            )
+    def get_branch_log(
+        self, internal_branch_name: str, run_id: str
+    ) -> Union[BranchLog, RunLog]:
+        """
+        Returns the branch log by the internal branch name for the run id
+        If the internal branch name is none, returns the run log
+        Args:
+            internal_branch_name (str): The internal branch name to retrieve.
+            run_id (str): The run id of interest
+        Returns:
+            BranchLog: The branch log or the run log as requested.
+        """
+        try:
+            if not internal_branch_name:
+                return self.get_run_log_by_id(run_id=run_id)
+            branch = self.retrieve(
+                run_id=run_id,
+                log_type=self.LogTypes.BRANCH_LOG,
+                name=internal_branch_name,
+            )
+            return branch
+        except exceptions.EntityNotFoundError as e:
+            raise exceptions.BranchLogNotFoundError(
+                run_id=run_id, branch_name=internal_branch_name
+            ) from e
+    def add_branch_log(
+        self,
+        branch_log: Union[BranchLog, RunLog],
+        run_id: str,
+    ):
+        """
+        The method should:
+        # Get the run log
+        # Get the branch and step containing the branch
+        # Add the branch to the step
+        # Write the run_log
+        The branch log could some times be a Run log and should be handled appropriately
+        Args:
+            branch_log (BranchLog): The branch log/run log to add to the database
+            run_id (str): The run id to which the branch/run log is added
+        """
+        if not isinstance(branch_log, BranchLog):
+            self.put_run_log(branch_log)
+            return
+        internal_branch_name = branch_log.internal_name
+        logger.info(
+            f"{self.service_name} Adding the branch log to DB: {branch_log.internal_name}"
+        )
+        self.store(
+            run_id=run_id,
+            log_type=self.LogTypes.BRANCH_LOG,
+            contents=json.loads(branch_log.model_dump_json()),
+            name=internal_branch_name,
+        )

extensions/run_log_store/minio.py ADDED Viewed

@@ -0,0 +1,114 @@
+import json
+import logging
+from functools import lru_cache
+from typing import Any, Dict
+from cloudpathlib import S3Client, S3Path
+from pydantic import Field, SecretStr
+from extensions.run_log_store.any_path import AnyPathRunLogStore
+from runnable import defaults
+from runnable.datastore import RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+@lru_cache
+def get_minio_client(
+    endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
+) -> S3Client:
+    return S3Client(
+        endpoint_url=endpoint_url,
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+class MinioRunLogStore(AnyPathRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "minio"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
+    bucket: str = Field(default="runnable/run-logs")
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "Type": self.service_name,
+            "Location": f"{self.endpoint_url}/{self.bucket}",
+        }
+        return summary
+    def get_run_log_bucket(self) -> S3Path:
+        run_id = self._context.run_id
+        return S3Path(
+            f"s3://{self.bucket}/{run_id}/",
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+    def write_to_path(self, run_log: RunLog):
+        """
+        Write the run log to the folder
+        Args:
+            run_log (RunLog): The run log to be added to the database
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_bucket.mkdir(parents=True, exist_ok=True)
+        run_log_object = run_log_bucket / f"{run_log.run_id}.json"
+        run_log_object.write_text(
+            json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
+        )
+    def read_from_path(self, run_id: str) -> RunLog:
+        """
+        Look into the run log folder for the run log for the run id.
+        If the run log does not exist, raise an exception. If it does, decode it
+        as a RunLog and return it
+        Args:
+            run_id (str): The requested run id to retrieve the run log store
+        Raises:
+            FileNotFoundError: If the Run Log has not been found.
+        Returns:
+            RunLog: The decoded Run log
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_object = run_log_bucket / f"{run_id}.json"
+        run_log_text = json.loads(run_log_object.read_text())
+        run_log = RunLog(**json.loads(run_log_text))
+        return run_log

extensions/run_log_store/pyproject.toml ADDED Viewed

@@ -0,0 +1,15 @@
+[project]
+name = "run_log_store"
+version = "0.0.0"
+description = "Extensions to run log store"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = []
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]

extensions/secrets/README.md ADDED Viewed

File without changes