PyPI - runnable - Versions diffs - 0.26.0__tar.gz → 0.27.0__tar.gz - Mend

runnable 0.26.0tar.gz → 0.27.0tar.gz

Files changed (62) hide show

{runnable-0.26.0 → runnable-0.27.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: runnable
-Version: 0.26.0
+Version: 0.27.0
 Summary: Add your description here
 Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
 License-File: LICENSE

{runnable-0.26.0 → runnable-0.27.0}/extensions/catalog/minio.py RENAMED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any
 from cloudpathlib import CloudPath, S3Client, S3Path
+from pydantic import Field, SecretStr
 from extensions.catalog.any_path import AnyPathCatalog
 from runnable import defaults
@@ -25,9 +26,9 @@ def get_minio_client(
 class MinioCatalog(AnyPathCatalog):
     service_name: str = "minio"
-    endpoint_url: str = "http://localhost:9002"
-    aws_access_key_id: str = "minioadmin"
-    aws_secret_access_key: str = "minioadmin"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
     bucket: str = "runnable"
     def get_summary(self) -> dict[str, Any]:
@@ -44,7 +45,9 @@ class MinioCatalog(AnyPathCatalog):
         return S3Path(
             f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
             client=get_minio_client(
-                self.endpoint_url, self.aws_access_key_id, self.aws_secret_access_key
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
             ),
         )

runnable-0.26.0/extensions/run_log_store/file_system.py → runnable-0.27.0/extensions/run_log_store/any_path.py RENAMED Viewed

@@ -1,15 +1,14 @@
-import json
 import logging
-from pathlib import Path
+from abc import abstractmethod
 from typing import Any, Dict
-from runnable import defaults, exceptions, utils
+from runnable import defaults, exceptions
 from runnable.datastore import BaseRunLogStore, RunLog
 logger = logging.getLogger(defaults.LOGGER_NAME)
-class FileSystemRunLogstore(BaseRunLogStore):
+class AnyPathRunLogStore(BaseRunLogStore):
     """
     In this type of Run Log store, we use a file system to store the JSON run log.
@@ -43,51 +42,11 @@ class FileSystemRunLogstore(BaseRunLogStore):
         return summary
-    def write_to_folder(self, run_log: RunLog):
-        """
-        Write the run log to the folder
-        Args:
-            run_log (RunLog): The run log to be added to the database
-        """
-        write_to = self.log_folder_name
-        utils.safe_make_dir(write_to)
-        write_to_path = Path(write_to)
-        run_id = run_log.run_id
-        json_file_path = write_to_path / f"{run_id}.json"
-        with json_file_path.open("w") as fw:
-            json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4)  # pylint: disable=no-member
-    def get_from_folder(self, run_id: str) -> RunLog:
-        """
-        Look into the run log folder for the run log for the run id.
-        If the run log does not exist, raise an exception. If it does, decode it
-        as a RunLog and return it
+    @abstractmethod
+    def write_to_path(self, run_log: RunLog): ...
-        Args:
-            run_id (str): The requested run id to retrieve the run log store
-        Raises:
-            FileNotFoundError: If the Run Log has not been found.
-        Returns:
-            RunLog: The decoded Run log
-        """
-        write_to = self.log_folder_name
-        read_from_path = Path(write_to)
-        json_file_path = read_from_path / f"{run_id}.json"
-        if not json_file_path.exists():
-            raise FileNotFoundError(f"Expected {json_file_path} is not present")
-        with json_file_path.open("r") as fr:
-            json_str = json.load(fr)
-            run_log = RunLog(**json_str)  # pylint: disable=no-member
-        return run_log
+    @abstractmethod
+    def read_from_path(self, run_id: str) -> RunLog: ...
     def create_run_log(
         self,
@@ -116,7 +75,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
             tag=tag,
             status=status,
         )
-        self.write_to_folder(run_log)
+        self.write_to_path(run_log)
         return run_log
     def get_run_log_by_id(
@@ -130,7 +89,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
         """
         try:
             logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
-            run_log = self.get_from_folder(run_id)
+            run_log = self.read_from_path(run_id)
             return run_log
         except FileNotFoundError as e:
             raise exceptions.RunLogNotFoundError(run_id) from e
@@ -142,4 +101,4 @@ class FileSystemRunLogstore(BaseRunLogStore):
         logger.info(
             f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
         )
-        self.write_to_folder(run_log)
+        self.write_to_path(run_log)

{runnable-0.26.0 → runnable-0.27.0}/extensions/run_log_store/chunked_fs.py RENAMED Viewed

@@ -2,14 +2,16 @@ import json
 import logging
 from pathlib import Path
 from string import Template
-from typing import Any, Dict, Optional, Sequence, Union
+from typing import Any, Dict, Optional, Union
+from cloudpathlib import CloudPath
 from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
 from runnable import defaults, utils
 logger = logging.getLogger(defaults.LOGGER_NAME)
-T = Union[str, Path]
+MixT = Union[CloudPath, Path]
 class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
@@ -28,7 +30,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
     def get_matches(
         self, run_id: str, name: str, multiple_allowed: bool = False
-    ) -> Optional[Union[Sequence[T], T]]:
+    ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
         """
         Get contents of files matching the pattern name*
@@ -78,7 +80,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
         return str(name) + ".json"
-    def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
+    def _store(self, run_id: str, contents: dict, name: MixT, insert=False):
         """
         Store the contents against the name in the folder.
@@ -87,15 +89,16 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
             contents (dict): The dict to store
             name (str): The name to store as
         """
+        log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
         if insert:
-            name = self.log_folder_with_run_id(run_id=run_id) / name
+            name = log_folder_with_run_id / name
-        utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
+        utils.safe_make_dir(log_folder_with_run_id)
-        with open(self.safe_suffix_json(name), "w") as fw:
+        with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "w") as fw:
             json.dump(contents, fw, ensure_ascii=True, indent=4)
-    def _retrieve(self, name: Union[str, Path]) -> dict:
+    def _retrieve(self, run_id: str, name: MixT) -> dict:
         """
         Does the job of retrieving from the folder.
@@ -106,8 +109,9 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
             dict: The contents
         """
         contents: dict = {}
+        log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
-        with open(self.safe_suffix_json(name), "r") as fr:
+        with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "r") as fr:
             contents = json.load(fr)
         return contents

runnable-0.27.0/extensions/run_log_store/file_system.py ADDED Viewed

@@ -0,0 +1,91 @@
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict
+from extensions.run_log_store.any_path import AnyPathRunLogStore
+from runnable import defaults, utils
+from runnable.datastore import RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+class FileSystemRunLogstore(AnyPathRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "file-system"
+    log_folder: str = defaults.LOG_LOCATION_FOLDER
+    @property
+    def log_folder_name(self):
+        return self.log_folder
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    def write_to_path(self, run_log: RunLog):
+        """
+        Write the run log to the folder
+        Args:
+            run_log (RunLog): The run log to be added to the database
+        """
+        write_to = self.log_folder_name
+        utils.safe_make_dir(write_to)
+        write_to_path = Path(write_to)
+        run_id = run_log.run_id
+        json_file_path = write_to_path / f"{run_id}.json"
+        with json_file_path.open("w") as fw:
+            json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4)  # pylint: disable=no-member
+    def read_from_path(self, run_id: str) -> RunLog:
+        """
+        Look into the run log folder for the run log for the run id.
+        If the run log does not exist, raise an exception. If it does, decode it
+        as a RunLog and return it
+        Args:
+            run_id (str): The requested run id to retrieve the run log store
+        Raises:
+            FileNotFoundError: If the Run Log has not been found.
+        Returns:
+            RunLog: The decoded Run log
+        """
+        write_to = self.log_folder_name
+        read_from_path = Path(write_to)
+        json_file_path = read_from_path / f"{run_id}.json"
+        if not json_file_path.exists():
+            raise FileNotFoundError(f"Expected {json_file_path} is not present")
+        with json_file_path.open("r") as fr:
+            json_str = json.load(fr)
+            run_log = RunLog(**json_str)  # pylint: disable=no-member
+        return run_log

{runnable-0.26.0 → runnable-0.27.0}/extensions/run_log_store/generic_chunked.py RENAMED Viewed

@@ -4,7 +4,9 @@ from abc import abstractmethod
 from enum import Enum
 from pathlib import Path
 from string import Template
-from typing import Any, Dict, Optional, Sequence, Union
+from typing import Any, Dict, Optional, Union
+from cloudpathlib import CloudPath
 from runnable import defaults, exceptions
 from runnable.datastore import (
@@ -21,7 +23,7 @@ from runnable.datastore import (
 logger = logging.getLogger(defaults.LOGGER_NAME)
-T = Union[str, Path]  # Holds str, path
+MixT = Union[CloudPath, Path]  # Holds str, path
 class EntityNotFoundError(Exception):
@@ -87,7 +89,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
     @abstractmethod
     def get_matches(
         self, run_id: str, name: str, multiple_allowed: bool = False
-    ) -> Optional[Union[Sequence[T], T]]:
+    ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
         """
         Get contents of persistence layer matching the pattern name*
@@ -98,7 +100,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         ...
     @abstractmethod
-    def _store(self, run_id: str, contents: dict, name: T, insert: bool = False):
+    def _store(self, run_id: str, contents: dict, name: MixT, insert: bool = False):
         """
         Store the contents against the name in the persistence layer.
@@ -110,7 +112,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         ...
     @abstractmethod
-    def _retrieve(self, name: T) -> dict:
+    def _retrieve(self, run_id: str, name: MixT) -> dict:
         """
         Does the job of retrieving from the persistent layer.
@@ -140,7 +142,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         insert = False
         if match:
-            existing_contents = self._retrieve(name=match)  # type: ignore
+            existing_contents = self._retrieve(run_id=run_id, name=match)  # type: ignore
             contents = dict(existing_contents, **contents)
             name_to_give = match  # type: ignore
         else:
@@ -149,7 +151,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
             )
             insert = True
-        self._store(run_id=run_id, contents=contents, name=name_to_give, insert=insert)
+        self._store(
+            run_id=run_id, contents=contents, name=Path(name_to_give), insert=insert
+        )
     def retrieve(
         self, run_id: str, log_type: LogTypes, name: str = "", multiple_allowed=False
@@ -190,13 +194,13 @@ class ChunkedRunLogStore(BaseRunLogStore):
         if matches:
             if not multiple_allowed:
-                contents = self._retrieve(name=matches)  # type: ignore
+                contents = self._retrieve(run_id=run_id, name=matches)  # type: ignore
                 model = self.ModelTypes[log_type.name].value
                 return model(**contents)
             models = []
             for match in matches:  # type: ignore
-                contents = self._retrieve(name=match)
+                contents = self._retrieve(run_id=run_id, name=match)
                 model = self.ModelTypes[log_type.name].value
                 models.append(model(**contents))
             return models
@@ -225,7 +229,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
             # No branch logs are found
             return {}
         # Forcing get_matches to always return a list is a better design
-        epoch_created = [str(match).split("-")[-1] for match in matches]  # type: ignore
+        assert isinstance(matches, list)
+        epoch_created = [str(match).split("-")[-1] for match in matches]
         # sort matches by epoch created
         epoch_created, matches = zip(*sorted(zip(epoch_created, matches)))  # type: ignore
@@ -234,7 +240,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
         for match in matches:
             model = self.ModelTypes[log_type.name].value
-            log_model = model(**self._retrieve(match))
+            log_model = model(**self._retrieve(run_id=run_id, name=match))
             logs[log_model.internal_name] = log_model  # type: ignore
         return logs

runnable-0.27.0/extensions/run_log_store/minio.py ADDED Viewed

@@ -0,0 +1,111 @@
+import json
+import logging
+from functools import lru_cache
+from typing import Any, Dict
+from cloudpathlib import S3Client, S3Path
+from pydantic import Field, SecretStr
+from extensions.run_log_store.any_path import AnyPathRunLogStore
+from runnable import defaults
+from runnable.datastore import RunLog
+logger = logging.getLogger(defaults.LOGGER_NAME)
+@lru_cache
+def get_minio_client(
+    endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
+) -> S3Client:
+    return S3Client(
+        endpoint_url=endpoint_url,
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+class MinioRunLogStore(AnyPathRunLogStore):
+    """
+    In this type of Run Log store, we use a file system to store the JSON run log.
+    Every single run is stored as a different file which makes it compatible across other store types.
+    When to use:
+        When locally testing a pipeline and have the need to compare across runs.
+        Its fully featured and perfectly fine if your local environment is where you would do everything.
+    Do not use:
+        If you need parallelization on local, this run log would not support it.
+    Example config:
+    run_log:
+      type: file-system
+      config:
+        log_folder: The folder to out the logs. Defaults to .run_log_store
+    """
+    service_name: str = "file-system"
+    endpoint_url: str = Field(default="http://localhost:9002")
+    aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
+    aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
+    bucket: str = Field(default="runnable/run-logs")
+    def get_summary(self) -> Dict[str, Any]:
+        summary = {"Type": self.service_name, "Location": self.log_folder}
+        return summary
+    def get_run_log_bucket(self) -> S3Path:
+        run_id = self._context.run_id
+        return S3Path(
+            f"s3://{self.bucket}/{run_id}/",
+            client=get_minio_client(
+                self.endpoint_url,
+                self.aws_access_key_id.get_secret_value(),
+                self.aws_secret_access_key.get_secret_value(),
+            ),
+        )
+    def write_to_path(self, run_log: RunLog):
+        """
+        Write the run log to the folder
+        Args:
+            run_log (RunLog): The run log to be added to the database
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_bucket.mkdir(parents=True, exist_ok=True)
+        run_log_object = run_log_bucket / f"{run_log.run_id}.json"
+        run_log_object.write_text(
+            json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
+        )
+    def read_from_path(self, run_id: str) -> RunLog:
+        """
+        Look into the run log folder for the run log for the run id.
+        If the run log does not exist, raise an exception. If it does, decode it
+        as a RunLog and return it
+        Args:
+            run_id (str): The requested run id to retrieve the run log store
+        Raises:
+            FileNotFoundError: If the Run Log has not been found.
+        Returns:
+            RunLog: The decoded Run log
+        """
+        run_log_bucket = self.get_run_log_bucket()
+        run_log_object = run_log_bucket / f"{run_id}.json"
+        run_log_text = json.loads(run_log_object.read_text())
+        run_log = RunLog(**json.loads(run_log_text))
+        return run_log

{runnable-0.26.0 → runnable-0.27.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "runnable"
-version = "0.26.0"
+version = "0.27.0"
 description = "Add your description here"
 readme = "README.md"
 authors = [
@@ -122,6 +122,7 @@ include = [
 [project.entry-points.'run_log_store']
 "buffered" = "runnable.datastore:BufferRunLogstore"
 file-system = "extensions.run_log_store.file_system:FileSystemRunLogstore"
+"minio" = "extensions.run_log_store.minio:MinioRunLogStore"
 "chunked-fs" = "extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore"
 [project.entry-points.'pickler']