PyPI - runnable - Versions diffs - 0.26.0__tar.gz → 0.28.0__tar.gz - Mend

runnable 0.26.0tar.gz → 0.28.0tar.gz

Files changed (63) hide show

{runnable-0.26.0 → runnable-0.28.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.26.0
+Version: 0.28.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE

{runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/minio.py RENAMED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any
 from cloudpathlib import CloudPath, S3Client, S3Path
+from pydantic import Field, SecretStr
 from extensions.catalog.any_path import AnyPathCatalog
 from runnable import defaults
@@ -25,9 +26,9 @@ def get_minio_client(
 class MinioCatalog(AnyPathCatalog):
     service_name: str = "minio"
-    endpoint_url: str = "http://localhost:9002"
-    aws_access_key_id: str = "minioadmin"
-    aws_secret_access_key: str = "minioadmin"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
     bucket: str = "runnable"
     def get_summary(self) -> dict[str, Any]:
@@ -44,7 +45,9 @@ class MinioCatalog(AnyPathCatalog):
         return S3Path(
             f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
             client=get_minio_client(
-                self.endpoint_url, self.aws_access_key_id, self.aws_secret_access_key
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
             ),
         )

runnable-0.28.0/extensions/run_log_store/any_path.py ADDED Viewed

@@ -0,0 +1,100 @@
+import logging
+from abc import abstractmethod
+from typing import Any, Dict
+from runnable import defaults, exceptions
+from runnable.datastore import BaseRunLogStore, RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class AnyPathRunLogStore(BaseRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "file-system"
+    log_folder: str = defaults.LOG_LOCATION_FOLDER
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    @abstractmethod
+    def write_to_path(self, run_log: RunLog): ...
+    @abstractmethod
+    def read_from_path(self, run_id: str) -> RunLog: ...
+    def create_run_log(
+        self,
+        run_id: str,
+        dag_hash: str = "",
+        use_cached: bool = False,
+        tag: str = "",
+        original_run_id: str = "",
+        status: str = defaults.CREATED,
+    ) -> RunLog:
+        """
+        # Creates a Run log
+        # Adds it to the db
+        """
+        try:
+            self.get_run_log_by_id(run_id=run_id, full=False)
+            raise exceptions.RunLogExistsError(run_id=run_id)
+        except exceptions.RunLogNotFoundError:
+            pass
+        logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
+        run_log = RunLog(
+            run_id=run_id,
+            dag_hash=dag_hash,
+            tag=tag,
+            status=status,
+        )
+        self.write_to_path(run_log)
+        return run_log
+    def get_run_log_by_id(
+        self,
+        run_id: str,
+        full: bool = False,
+    ) -> RunLog:
+        """
+        # Returns the run_log defined by id
+        # Raises Exception if not found
+        """
+        try:
+            logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
+            run_log = self.read_from_path(run_id)
+            return run_log
+        except FileNotFoundError as e:
+            raise exceptions.RunLogNotFoundError(run_id) from e
+    def put_run_log(self, run_log: RunLog):
+        """
+        # Puts the run_log into the database
+        """
+        logger.info(
+            f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
+        )
+        self.write_to_path(run_log)

{runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/chunked_fs.py RENAMED Viewed

@@ -2,15 +2,13 @@ import json
 import logging
 from pathlib import Path
 from string import Template
-from typing import Any, Dict, Optional, Sequence, Union
+from typing import Any, Dict, Union
 from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
 from runnable import defaults, utils
 logger = logging.getLogger(defaults.LOGGER_NAME)
-T = Union[str, Path]
 class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
     """
@@ -28,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
     def get_matches(
         self, run_id: str, name: str, multiple_allowed: bool = False
-    ) -> Optional[Union[Sequence[T], T]]:
+    ) -> str | list[str] | None:
         """
         Get contents of files matching the pattern name*
@@ -46,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
                 if len(matches) > 1:
                     msg = f"Multiple matches found for {name} while multiple is not allowed"
                     raise Exception(msg)
-                return matches[0]
-            return matches
+                return str(matches[0])
+            return [str(match) for match in matches]
         return None
@@ -78,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
         return str(name) + ".json"
-    def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
+    def _store(self, run_id: str, contents: dict, name: str, insert=False):
         """
         Store the contents against the name in the folder.
@@ -87,15 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
             contents (dict): The dict to store
             name (str): The name to store as
         """
+        log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
         if insert:
-            name = self.log_folder_with_run_id(run_id=run_id) / name
+            name = str(log_folder_with_run_id / name)
-        utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
+        utils.safe_make_dir(log_folder_with_run_id)
         with open(self.safe_suffix_json(name), "w") as fw:
             json.dump(contents, fw, ensure_ascii=True, indent=4)
-    def _retrieve(self, name: Union[str, Path]) -> dict:
+    def _retrieve(self, run_id: str, name: str) -> dict:
         """
         Does the job of retrieving from the folder.
@@ -105,6 +105,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
         Returns:
             dict: The contents
         """
         contents: dict = {}
         with open(self.safe_suffix_json(name), "r") as fr:

runnable-0.28.0/extensions/run_log_store/chunked_minio.py ADDED Viewed

@@ -0,0 +1,131 @@
+import json
+import logging
+from functools import lru_cache
+from string import Template
+from typing import Any, Dict
+from cloudpathlib import S3Client, S3Path
+from pydantic import Field, SecretStr
+from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
+from runnable import defaults
+logger = logging.getLogger(defaults.LOGGER_NAME)
+@lru_cache
+def get_minio_client(
+    endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
+) -> S3Client:
+    return S3Client(
+        endpoint_url=endpoint_url,
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+class ChunkedMinioRunLogStore(ChunkedRunLogStore):
+    """
+    File system run log store but chunks the run log into thread safe chunks.
+    This enables executions to be parallel.
+    """
+    service_name: str = "chunked-minio"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
+    bucket: str = Field(default="runnable/run-logs")
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "Type": self.service_name,
+            "Location": f"{self.endpoint_url}/{self.bucket}",
+        }
+        return summary
+    def get_run_log_bucket(self) -> S3Path:
+        run_id = self._context.run_id
+        return S3Path(
+            f"s3://{self.bucket}/{run_id}/",
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+    def get_matches(
+        self, run_id: str, name: str, multiple_allowed: bool = False
+    ) -> None | str | list[str]:
+        """
+        Get contents of files matching the pattern name*
+        Args:
+            run_id (str): The run id
+            name (str): The suffix of the file name to check in the run log store.
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_bucket.mkdir(parents=True, exist_ok=True)
+        sub_name = Template(name).safe_substitute({"creation_time": ""})
+        matches = list(run_log_bucket.glob(f"{sub_name}*"))
+        if matches:
+            if not multiple_allowed:
+                if len(matches) > 1:
+                    msg = f"Multiple matches found for {name} while multiple is not allowed"
+                    raise Exception(msg)
+                return str(matches[0])
+            return [str(match) for match in matches]
+        return None
+    def _store(self, run_id: str, contents: dict, name: str, insert=False):
+        """
+        Store the contents against the name in the folder.
+        Args:
+            run_id (str): The run id
+            contents (dict): The dict to store
+            name (str): The name to store as
+        """
+        if insert:
+            name = str(self.get_run_log_bucket() / name)
+        self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
+        obj = S3Path(
+            name,
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+        obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
+    def _retrieve(self, run_id: str, name: str) -> dict:
+        """
+        Does the job of retrieving from the folder.
+        Args:
+            name (str): the name of the file to retrieve
+        Returns:
+            dict: The contents
+        """
+        obj = S3Path(
+            name,
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+        run_log_text = json.loads(obj.read_text())
+        return run_log_text

{runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/file_system.py RENAMED Viewed

@@ -3,13 +3,14 @@ import logging
 from pathlib import Path
 from typing import Any, Dict
-from runnable import defaults, exceptions, utils
-from runnable.datastore import BaseRunLogStore, RunLog
+from extensions.run_log_store.any_path import AnyPathRunLogStore
+from runnable import defaults, utils
+from runnable.datastore import RunLog
 logger = logging.getLogger(defaults.LOGGER_NAME)
-class FileSystemRunLogstore(BaseRunLogStore):
+class FileSystemRunLogstore(AnyPathRunLogStore):
     """
     In this type of Run Log store, we use a file system to store the JSON run log.
@@ -43,7 +44,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
         return summary
-    def write_to_folder(self, run_log: RunLog):
+    def write_to_path(self, run_log: RunLog):
         """
         Write the run log to the folder
@@ -60,7 +61,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
         with json_file_path.open("w") as fw:
             json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4)  # pylint: disable=no-member
-    def get_from_folder(self, run_id: str) -> RunLog:
+    def read_from_path(self, run_id: str) -> RunLog:
         """
         Look into the run log folder for the run log for the run id.
@@ -88,58 +89,3 @@ class FileSystemRunLogstore(BaseRunLogStore):
             json_str = json.load(fr)
             run_log = RunLog(**json_str)  # pylint: disable=no-member
         return run_log
-    def create_run_log(
-        self,
-        run_id: str,
-        dag_hash: str = "",
-        use_cached: bool = False,
-        tag: str = "",
-        original_run_id: str = "",
-        status: str = defaults.CREATED,
-    ) -> RunLog:
-        """
-        # Creates a Run log
-        # Adds it to the db
-        """
-        try:
-            self.get_run_log_by_id(run_id=run_id, full=False)
-            raise exceptions.RunLogExistsError(run_id=run_id)
-        except exceptions.RunLogNotFoundError:
-            pass
-        logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
-        run_log = RunLog(
-            run_id=run_id,
-            dag_hash=dag_hash,
-            tag=tag,
-            status=status,
-        )
-        self.write_to_folder(run_log)
-        return run_log
-    def get_run_log_by_id(
-        self,
-        run_id: str,
-        full: bool = False,
-    ) -> RunLog:
-        """
-        # Returns the run_log defined by id
-        # Raises Exception if not found
-        """
-        try:
-            logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
-            run_log = self.get_from_folder(run_id)
-            return run_log
-        except FileNotFoundError as e:
-            raise exceptions.RunLogNotFoundError(run_id) from e
-    def put_run_log(self, run_log: RunLog):
-        """
-        # Puts the run_log into the database
-        """
-        logger.info(
-            f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
-        )
-        self.write_to_folder(run_log)

{runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/generic_chunked.py RENAMED Viewed

@@ -1,10 +1,10 @@
+import json
 import logging
 import time
 from abc import abstractmethod
 from enum import Enum
-from pathlib import Path
 from string import Template
-from typing import Any, Dict, Optional, Sequence, Union
+from typing import Any, Dict, Union
 from runnable import defaults, exceptions
 from runnable.datastore import (
@@ -21,9 +21,6 @@ from runnable.datastore import (
 logger = logging.getLogger(defaults.LOGGER_NAME)
-T = Union[str, Path]  # Holds str, path
 class EntityNotFoundError(Exception):
     pass
@@ -87,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
     @abstractmethod
     def get_matches(
         self, run_id: str, name: str, multiple_allowed: bool = False
-    ) -> Optional[Union[Sequence[T], T]]:
+    ) -> None | str | list[str]:
         """
         Get contents of persistence layer matching the pattern name*
@@ -98,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         ...
     @abstractmethod
-    def _store(self, run_id: str, contents: dict, name: T, insert: bool = False):
+    def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
         """
         Store the contents against the name in the persistence layer.
@@ -110,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         ...
     @abstractmethod
-    def _retrieve(self, name: T) -> dict:
+    def _retrieve(self, run_id: str, name: str) -> dict:
         """
         Does the job of retrieving from the persistent layer.
@@ -140,9 +137,10 @@ class ChunkedRunLogStore(BaseRunLogStore):
         insert = False
         if match:
-            existing_contents = self._retrieve(name=match)  # type: ignore
+            assert isinstance(match, str)
+            existing_contents = self._retrieve(run_id=run_id, name=match)
             contents = dict(existing_contents, **contents)
-            name_to_give = match  # type: ignore
+            name_to_give = match
         else:
             name_to_give = Template(naming_pattern).safe_substitute(
                 {"creation_time": str(int(time.time_ns()))}
@@ -190,13 +188,15 @@ class ChunkedRunLogStore(BaseRunLogStore):
         if matches:
             if not multiple_allowed:
-                contents = self._retrieve(name=matches)  # type: ignore
+                assert isinstance(matches, str)
+                contents = self._retrieve(run_id=run_id, name=matches)
                 model = self.ModelTypes[log_type.name].value
                 return model(**contents)
+            assert isinstance(matches, list)
             models = []
-            for match in matches:  # type: ignore
-                contents = self._retrieve(name=match)
+            for match in matches:
+                contents = self._retrieve(run_id=run_id, name=match)
                 model = self.ModelTypes[log_type.name].value
                 models.append(model(**contents))
             return models
@@ -225,7 +225,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
             # No branch logs are found
             return {}
         # Forcing get_matches to always return a list is a better design
-        epoch_created = [str(match).split("-")[-1] for match in matches]  # type: ignore
+        assert isinstance(matches, list)
+        epoch_created = [str(match).split("-")[-1] for match in matches]
         # sort matches by epoch created
         epoch_created, matches = zip(*sorted(zip(epoch_created, matches)))  # type: ignore
@@ -234,7 +236,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         for match in matches:
             model = self.ModelTypes[log_type.name].value
-            log_model = model(**self._retrieve(match))
+            log_model = model(**self._retrieve(run_id=run_id, name=match))
             logs[log_model.internal_name] = log_model  # type: ignore
         return logs
@@ -341,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
         )
         self.store(
-            run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
+            run_id=run_id,
+            contents=json.loads(run_log.model_dump_json()),
+            log_type=self.LogTypes.RUN_LOG,
         )
         return run_log
@@ -388,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
         """
         run_id = run_log.run_id
         self.store(
-            run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
+            run_id=run_id,
+            contents=json.loads(run_log.model_dump_json()),
+            log_type=self.LogTypes.RUN_LOG,
         )
     def get_parameters(self, run_id: str) -> dict:
@@ -447,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
             self.store(
                 run_id=run_id,
                 log_type=self.LogTypes.PARAMETER,
-                contents={key: value.model_dump(by_alias=True)},
+                contents={key: json.loads(value.model_dump_json(by_alias=True))},
                 name=key,
             )
@@ -538,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         self.store(
             run_id=run_id,
             log_type=self.LogTypes.STEP_LOG,
-            contents=step_log.model_dump(),
+            contents=json.loads(step_log.model_dump_json()),
             name=step_log.internal_name,
         )
@@ -594,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
         self.store(
             run_id=run_id,
             log_type=self.LogTypes.BRANCH_LOG,
-            contents=branch_log.model_dump(),
+            contents=json.loads(branch_log.model_dump_json()),
             name=internal_branch_name,
         )

runnable-0.28.0/extensions/run_log_store/minio.py ADDED Viewed

@@ -0,0 +1,114 @@
+import json
+import logging
+from functools import lru_cache
+from typing import Any, Dict
+from cloudpathlib import S3Client, S3Path
+from pydantic import Field, SecretStr
+from extensions.run_log_store.any_path import AnyPathRunLogStore
+from runnable import defaults
+from runnable.datastore import RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+@lru_cache
+def get_minio_client(
+    endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
+) -> S3Client:
+    return S3Client(
+        endpoint_url=endpoint_url,
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+class MinioRunLogStore(AnyPathRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "minio"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
+    bucket: str = Field(default="runnable/run-logs")
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {
+            "Type": self.service_name,
+            "Location": f"{self.endpoint_url}/{self.bucket}",
+        }
+        return summary
+    def get_run_log_bucket(self) -> S3Path:
+        run_id = self._context.run_id
+        return S3Path(
+            f"s3://{self.bucket}/{run_id}/",
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+    def write_to_path(self, run_log: RunLog):
+        """
+        Write the run log to the folder
+        Args:
+            run_log (RunLog): The run log to be added to the database
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_bucket.mkdir(parents=True, exist_ok=True)
+        run_log_object = run_log_bucket / f"{run_log.run_id}.json"
+        run_log_object.write_text(
+            json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
+        )
+    def read_from_path(self, run_id: str) -> RunLog:
+        """
+        Look into the run log folder for the run log for the run id.
+        If the run log does not exist, raise an exception. If it does, decode it
+        as a RunLog and return it
+        Args:
+            run_id (str): The requested run id to retrieve the run log store
+        Raises:
+            FileNotFoundError: If the Run Log has not been found.
+        Returns:
+            RunLog: The decoded Run log
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_object = run_log_bucket / f"{run_id}.json"
+        run_log_text = json.loads(run_log_object.read_text())
+        run_log = RunLog(**json.loads(run_log_text))
+        return run_log

{runnable-0.26.0 → runnable-0.28.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "runnable"
-version = "0.26.0"
+version = "0.28.0"
 description = "Add your description here"
 readme = "README.md"
 authors = [
@@ -122,7 +122,9 @@ include = [
 [project.entry-points.'run_log_store']
 "buffered" = "runnable.datastore:BufferRunLogstore"
 file-system = "extensions.run_log_store.file_system:FileSystemRunLogstore"
+"minio" = "extensions.run_log_store.minio:MinioRunLogStore"
 "chunked-fs" = "extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore"
+"chunked-minio" = "extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore"
 [project.entry-points.'pickler']
 "pickle" = "runnable.pickler:NativePickler"