PyPI - runnable - Versions diffs - 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

runnable 0.17.1py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

extensions/README.md +0 -0
extensions/__init__.py +0 -0
extensions/catalog/README.md +0 -0
extensions/catalog/file_system.py +253 -0
extensions/catalog/pyproject.toml +14 -0
extensions/job_executor/README.md +0 -0
extensions/job_executor/__init__.py +160 -0
extensions/job_executor/k8s.py +484 -0
extensions/job_executor/k8s_job_spec.yaml +37 -0
extensions/job_executor/local.py +61 -0
extensions/job_executor/local_container.py +192 -0
extensions/job_executor/pyproject.toml +16 -0
extensions/nodes/README.md +0 -0
extensions/nodes/nodes.py +954 -0
extensions/nodes/pyproject.toml +15 -0
extensions/pipeline_executor/README.md +0 -0
extensions/pipeline_executor/__init__.py +644 -0
extensions/pipeline_executor/argo.py +1307 -0
extensions/pipeline_executor/argo_specification.yaml +51 -0
extensions/pipeline_executor/local.py +62 -0
extensions/pipeline_executor/local_container.py +362 -0
extensions/pipeline_executor/mocked.py +161 -0
extensions/pipeline_executor/pyproject.toml +16 -0
extensions/pipeline_executor/retry.py +180 -0
extensions/run_log_store/README.md +0 -0
extensions/run_log_store/__init__.py +0 -0
extensions/run_log_store/chunked_fs.py +113 -0
extensions/run_log_store/db/implementation_FF.py +163 -0
extensions/run_log_store/db/integration_FF.py +0 -0
extensions/run_log_store/file_system.py +145 -0
extensions/run_log_store/generic_chunked.py +599 -0
extensions/run_log_store/pyproject.toml +15 -0
extensions/secrets/README.md +0 -0
extensions/secrets/dotenv.py +62 -0
extensions/secrets/pyproject.toml +15 -0
runnable/__init__.py +1 -0
runnable/catalog.py +1 -2
runnable/entrypoints.py +1 -5
runnable/executor.py +1 -1
runnable/parameters.py +0 -9
runnable/utils.py +5 -25
{runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
runnable-0.19.0.dist-info/RECORD +58 -0
{runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
runnable-0.17.1.dist-info/RECORD +0 -23
{runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
{runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0

extensions/pipeline_executor/retry.py ADDED Viewed

@@ -0,0 +1,180 @@
+import logging
+from functools import cached_property
+from typing import Any, Dict, Optional
+from extensions.pipeline_executor import GenericPipelineExecutor
+from runnable import context, defaults, exceptions
+from runnable.datastore import RunLog
+from runnable.defaults import TypeMapVariable
+from runnable.nodes import BaseNode
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class RetryExecutor(GenericPipelineExecutor):
+    """
+    The skeleton of an executor class.
+    Any implementation of an executor should inherit this class and over-ride accordingly.
+    This is a loaded base class which has a lot of methods already implemented for "typical" executions.
+    Look at the function docs to understand how to use them appropriately.
+    For any implementation:
+    1). Who/when should the run log be set up?
+    2). Who/When should the step log be set up?
+    """
+    service_name: str = "retry"
+    service_type: str = "executor"
+    run_id: str
+    _is_local: bool = True
+    _original_run_log: Optional[RunLog] = None
+    _restart_initiated: bool = False
+    @property
+    def _context(self):
+        return context.run_context
+    @cached_property
+    def original_run_log(self):
+        return self._context.run_log_store.get_run_log_by_id(
+            run_id=self.run_id,
+            full=True,
+        )
+    def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
+        # Sync the previous run log catalog to this one.
+        self._context.catalog_handler.sync_between_runs(
+            previous_run_id=self.run_id, run_id=self._context.run_id
+        )
+        params.update(self.original_run_log.parameters)
+    def _set_up_run_log(self, exists_ok=False):
+        """
+        Create a run log and put that in the run log store
+        If exists_ok, we allow the run log to be already present in the run log store.
+        """
+        super()._set_up_run_log(exists_ok=exists_ok)
+        # Should the parameters be copied from previous execution
+        # self._set_up_for_re_run(params=params)
+    def execute_from_graph(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        """
+        This is the entry point to from the graph execution.
+        While the self.execute_graph is responsible for traversing the graph, this function is responsible for
+        actual execution of the node.
+        If the node type is:
+            * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
+            * success: We can delegate to _execute_node
+            * fail: We can delegate to _execute_node
+        For nodes that are internally graphs:
+            * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
+            * dag: Delegate the responsibility of execution to the node.execute_as_graph()
+            * map: Delegate the responsibility of execution to the node.execute_as_graph()
+        Transpilers will NEVER use this method and will NEVER call ths method.
+        This method should only be used by interactive executors.
+        Args:
+            node (Node): The node to execute
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
+                    Defaults to None.
+        """
+        step_log = self._context.run_log_store.create_step_log(
+            node.name, node._get_step_log_name(map_variable)
+        )
+        self.add_code_identities(node=node, step_log=step_log)
+        step_log.step_type = node.node_type
+        step_log.status = defaults.PROCESSING
+        # Add the step log to the database as per the situation.
+        # If its a terminal node, complete it now
+        if node.node_type in ["success", "fail"]:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            self._execute_node(node, map_variable=map_variable, **kwargs)
+            return
+        # In retry step
+        if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
+            # If the node name does not match, we move on to the next node.
+            # If previous run was successful, move on to the next step
+            step_log.mock = True
+            step_log.status = defaults.SUCCESS
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            return
+        # We call an internal function to iterate the sub graphs and execute them
+        if node.is_composite:
+            self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+            node.execute_as_graph(map_variable=map_variable, **kwargs)
+            return
+        # Executor specific way to trigger a job
+        self._context.run_log_store.add_step_log(step_log, self._context.run_id)
+        self.execute_node(node=node, map_variable=map_variable, **kwargs)
+    def _is_step_eligible_for_rerun(
+        self, node: BaseNode, map_variable: TypeMapVariable = None
+    ):
+        """
+        In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
+        necessary.
+            * True: If its not a re-run.
+            * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
+            * False: If its a re-run and we succeeded in the last run.
+        Most cases, this logic need not be touched
+        Args:
+            node (Node): The node to check against re-run
+            map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
+                        Defaults to None.
+        Returns:
+            bool: Eligibility for re-run. True means re-run, False means skip to the next step.
+        """
+        node_step_log_name = node._get_step_log_name(map_variable=map_variable)
+        logger.info(
+            f"Scanning previous run logs for node logs of: {node_step_log_name}"
+        )
+        if self._restart_initiated:
+            return True
+        try:
+            previous_attempt_log, _ = (
+                self.original_run_log.search_step_by_internal_name(node_step_log_name)
+            )
+        except exceptions.StepLogNotFoundError:
+            logger.warning(f"Did not find the node {node.name} in previous run log")
+            self._restart_initiated = True
+            return True  # We should re-run the node.
+        logger.info(f"The original step status: {previous_attempt_log.status}")
+        if previous_attempt_log.status == defaults.SUCCESS:
+            return False  # We need not run the node
+        logger.info(
+            f"The new execution should start executing graph from this node {node.name}"
+        )
+        self._restart_initiated = True
+        return True
+    def execute_node(
+        self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
+    ):
+        self._execute_node(node, map_variable=map_variable, **kwargs)

extensions/run_log_store/README.md ADDED Viewed

File without changes

extensions/run_log_store/__init__.py ADDED Viewed

File without changes

extensions/run_log_store/chunked_fs.py ADDED Viewed

@@ -0,0 +1,113 @@
+import json
+import logging
+from pathlib import Path
+from string import Template
+from typing import Any, Dict, Optional, Sequence, Union
+from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
+from runnable import defaults, utils
+logger = logging.getLogger(defaults.LOGGER_NAME)
+T = Union[str, Path]
+class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
+    """
+    File system run log store but chunks the run log into thread safe chunks.
+    This enables executions to be parallel.
+    """
+    service_name: str = "chunked-fs"
+    log_folder: str = defaults.LOG_LOCATION_FOLDER
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    def get_matches(
+        self, run_id: str, name: str, multiple_allowed: bool = False
+    ) -> Optional[Union[Sequence[T], T]]:
+        """
+        Get contents of files matching the pattern name*
+        Args:
+            run_id (str): The run id
+            name (str): The suffix of the file name to check in the run log store.
+        """
+        log_folder = self.log_folder_with_run_id(run_id=run_id)
+        sub_name = Template(name).safe_substitute({"creation_time": ""})
+        matches = list(log_folder.glob(f"{sub_name}*"))
+        if matches:
+            if not multiple_allowed:
+                if len(matches) > 1:
+                    msg = f"Multiple matches found for {name} while multiple is not allowed"
+                    raise Exception(msg)
+                return matches[0]
+            return matches
+        return None
+    def log_folder_with_run_id(self, run_id: str) -> Path:
+        """
+        Utility function to get the log folder for a run id.
+        Args:
+            run_id (str): The run id
+        Returns:
+            Path: The path to the log folder with the run id
+        """
+        return Path(self.log_folder) / run_id
+    def safe_suffix_json(self, name: Union[Path, str]) -> str:
+        """
+        Safely attach a suffix to a json file.
+        Args:
+            name (Path): The name of the file with or without suffix of json
+        Returns:
+            str : The name of the file with .json
+        """
+        if str(name).endswith("json"):
+            return str(name)
+        return str(name) + ".json"
+    def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
+        """
+        Store the contents against the name in the folder.
+        Args:
+            run_id (str): The run id
+            contents (dict): The dict to store
+            name (str): The name to store as
+        """
+        if insert:
+            name = self.log_folder_with_run_id(run_id=run_id) / name
+        utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
+        with open(self.safe_suffix_json(name), "w") as fw:
+            json.dump(contents, fw, ensure_ascii=True, indent=4)
+    def _retrieve(self, name: Union[str, Path]) -> dict:
+        """
+        Does the job of retrieving from the folder.
+        Args:
+            name (str): the name of the file to retrieve
+        Returns:
+            dict: The contents
+        """
+        contents: dict = {}
+        with open(self.safe_suffix_json(name), "r") as fr:
+            contents = json.load(fr)
+        return contents

extensions/run_log_store/db/implementation_FF.py ADDED Viewed

@@ -0,0 +1,163 @@
+import datetime
+import json
+import logging
+from pathlib import Path
+from string import Template
+from typing import Any, Dict, List, Optional, Union, cast
+from runnable import defaults, utils
+from runnable.extensions.run_log_store.generic_chunked import ChunkedRunLogStore
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class DBRunLogStore(ChunkedRunLogStore):
+    """
+    File system run log store but chunks the run log into thread safe chunks.
+    This enables executions to be parallel.
+    """
+    service_name: str = "chunked-fs"
+    connection_string: str
+    db_name: str
+    _DB_LOG: Any = None
+    _engine: Any = None
+    _session: Any = None
+    _connection_string: str = ""
+    _base: Any = None
+    def model_post_init(self, _: Any) -> None:
+        run_context = self._context
+        secrets = cast(Dict[str, str], run_context.secrets_handler.get())
+        connection_string = Template(self.connection_string).safe_substitute(**secrets)
+        try:
+            import sqlalchemy
+            from sqlalchemy import Column, DateTime, Integer, Sequence, Text
+            from sqlalchemy.orm import declarative_base, sessionmaker
+            Base = declarative_base()
+            class DBLog(Base):
+                """
+                Base table for storing run logs in database.
+                In this model, we fragment the run log into logical units that are concurrent safe.
+                """
+                __tablename__ = self.db_name
+                pk = Column(Integer, Sequence("id_seq"), primary_key=True)
+                run_id = Column(Text, index=True)
+                attribute_key = Column(
+                    Text
+                )  # run_log, step_internal_name, parameter_key etc
+                attribute_type = Column(Text)  # RunLog, Step, Branch, Parameter
+                attribute_value = Column(Text)  # The JSON string
+                created_at = Column(DateTime, default=datetime.datetime.utcnow)
+            self._engine = sqlalchemy.create_engine(
+                connection_string, pool_pre_ping=True
+            )
+            self._session = sessionmaker(bind=self._engine)
+            self._DB_LOG = DBLog
+            self._connection_string = connection_string
+            self._base = Base
+        except ImportError as _e:
+            logger.exception("Unable to import SQLalchemy, is it installed?")
+            msg = "SQLAlchemy is required for this extension. Please install it"
+            raise Exception(msg) from _e
+    def create_tables(self):
+        import sqlalchemy
+        engine = sqlalchemy.create_engine(self._connection_string)
+        self._base.metadata.create_all(engine)
+    def get_matches(
+        self, run_id: str, name: str, multiple_allowed: bool = False
+    ) -> Optional[Union[List[Path], Path]]:
+        """
+        Get contents of files matching the pattern name*
+        Args:
+            run_id (str): The run id
+            name (str): The suffix of the file name to check in the run log store.
+        """
+        log_folder = self.log_folder_with_run_id(run_id=run_id)
+        sub_name = Template(name).safe_substitute({"creation_time": ""})
+        matches = list(log_folder.glob(f"{sub_name}*"))
+        if matches:
+            if not multiple_allowed:
+                if len(matches) > 1:
+                    msg = f"Multiple matches found for {name} while multiple is not allowed"
+                    raise Exception(msg)
+                return matches[0]
+            return matches
+        return None
+    def log_folder_with_run_id(self, run_id: str) -> Path:
+        """
+        Utility function to get the log folder for a run id.
+        Args:
+            run_id (str): The run id
+        Returns:
+            Path: The path to the log folder with the run id
+        """
+        return Path(self.log_folder) / run_id
+    def safe_suffix_json(self, name: Union[Path, str]) -> str:
+        """
+        Safely attach a suffix to a json file.
+        Args:
+            name (Path): The name of the file with or without suffix of json
+        Returns:
+            str : The name of the file with .json
+        """
+        if str(name).endswith("json"):
+            return str(name)
+        return str(name) + ".json"
+    def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
+        """
+        Store the contents against the name in the folder.
+        Args:
+            run_id (str): The run id
+            contents (dict): The dict to store
+            name (str): The name to store as
+        """
+        if insert:
+            name = self.log_folder_with_run_id(run_id=run_id) / name
+        utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
+        with open(self.safe_suffix_json(name), "w") as fw:
+            json.dump(contents, fw, ensure_ascii=True, indent=4)
+    def _retrieve(self, name: Path) -> dict:
+        """
+        Does the job of retrieving from the folder.
+        Args:
+            name (str): the name of the file to retrieve
+        Returns:
+            dict: The contents
+        """
+        contents: dict = {}
+        with open(self.safe_suffix_json(name), "r") as fr:
+            contents = json.load(fr)
+        return contents

extensions/run_log_store/db/integration_FF.py ADDED Viewed

File without changes

extensions/run_log_store/file_system.py ADDED Viewed

@@ -0,0 +1,145 @@
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict
+from runnable import defaults, exceptions, utils
+from runnable.datastore import BaseRunLogStore, RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class FileSystemRunLogstore(BaseRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "file-system"
+    log_folder: str = defaults.LOG_LOCATION_FOLDER
+    @property
+    def log_folder_name(self):
+        return self.log_folder
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    def write_to_folder(self, run_log: RunLog):
+        """
+        Write the run log to the folder
+        Args:
+            run_log (RunLog): The run log to be added to the database
+        """
+        write_to = self.log_folder_name
+        utils.safe_make_dir(write_to)
+        write_to_path = Path(write_to)
+        run_id = run_log.run_id
+        json_file_path = write_to_path / f"{run_id}.json"
+        with json_file_path.open("w") as fw:
+            json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4)  # pylint: disable=no-member
+    def get_from_folder(self, run_id: str) -> RunLog:
+        """
+        Look into the run log folder for the run log for the run id.
+        If the run log does not exist, raise an exception. If it does, decode it
+        as a RunLog and return it
+        Args:
+            run_id (str): The requested run id to retrieve the run log store
+        Raises:
+            FileNotFoundError: If the Run Log has not been found.
+        Returns:
+            RunLog: The decoded Run log
+        """
+        write_to = self.log_folder_name
+        read_from_path = Path(write_to)
+        json_file_path = read_from_path / f"{run_id}.json"
+        if not json_file_path.exists():
+            raise FileNotFoundError(f"Expected {json_file_path} is not present")
+        with json_file_path.open("r") as fr:
+            json_str = json.load(fr)
+            run_log = RunLog(**json_str)  # pylint: disable=no-member
+        return run_log
+    def create_run_log(
+        self,
+        run_id: str,
+        dag_hash: str = "",
+        use_cached: bool = False,
+        tag: str = "",
+        original_run_id: str = "",
+        status: str = defaults.CREATED,
+    ) -> RunLog:
+        """
+        # Creates a Run log
+        # Adds it to the db
+        """
+        try:
+            self.get_run_log_by_id(run_id=run_id, full=False)
+            raise exceptions.RunLogExistsError(run_id=run_id)
+        except exceptions.RunLogNotFoundError:
+            pass
+        logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
+        run_log = RunLog(
+            run_id=run_id,
+            dag_hash=dag_hash,
+            tag=tag,
+            status=status,
+        )
+        self.write_to_folder(run_log)
+        return run_log
+    def get_run_log_by_id(
+        self,
+        run_id: str,
+        full: bool = False,
+    ) -> RunLog:
+        """
+        # Returns the run_log defined by id
+        # Raises Exception if not found
+        """
+        try:
+            logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
+            run_log = self.get_from_folder(run_id)
+            return run_log
+        except FileNotFoundError as e:
+            raise exceptions.RunLogNotFoundError(run_id) from e
+    def put_run_log(self, run_log: RunLog):
+        """
+        # Puts the run_log into the database
+        """
+        logger.info(
+            f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
+        )
+        self.write_to_folder(run_log)

runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

runnable 0.17.1py3-none-any.whl → 0.19.0py3-none-any.whl