runnable 0.26.0__tar.gz → 0.28.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. {runnable-0.26.0 → runnable-0.28.0}/PKG-INFO +1 -1
  2. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/minio.py +7 -4
  3. runnable-0.28.0/extensions/run_log_store/any_path.py +100 -0
  4. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/chunked_fs.py +11 -10
  5. runnable-0.28.0/extensions/run_log_store/chunked_minio.py +131 -0
  6. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/file_system.py +6 -60
  7. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/generic_chunked.py +26 -20
  8. runnable-0.28.0/extensions/run_log_store/minio.py +114 -0
  9. {runnable-0.26.0 → runnable-0.28.0}/pyproject.toml +3 -1
  10. {runnable-0.26.0 → runnable-0.28.0}/.gitignore +0 -0
  11. {runnable-0.26.0 → runnable-0.28.0}/LICENSE +0 -0
  12. {runnable-0.26.0 → runnable-0.28.0}/README.md +0 -0
  13. {runnable-0.26.0 → runnable-0.28.0}/extensions/README.md +0 -0
  14. {runnable-0.26.0 → runnable-0.28.0}/extensions/__init__.py +0 -0
  15. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/README.md +0 -0
  16. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/any_path.py +0 -0
  17. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/file_system.py +0 -0
  18. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/pyproject.toml +0 -0
  19. {runnable-0.26.0 → runnable-0.28.0}/extensions/catalog/s3.py +0 -0
  20. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/README.md +0 -0
  21. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/__init__.py +0 -0
  22. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/k8s.py +0 -0
  23. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/k8s_job_spec.yaml +0 -0
  24. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/local.py +0 -0
  25. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/local_container.py +0 -0
  26. {runnable-0.26.0 → runnable-0.28.0}/extensions/job_executor/pyproject.toml +0 -0
  27. {runnable-0.26.0 → runnable-0.28.0}/extensions/nodes/README.md +0 -0
  28. {runnable-0.26.0 → runnable-0.28.0}/extensions/nodes/nodes.py +0 -0
  29. {runnable-0.26.0 → runnable-0.28.0}/extensions/nodes/pyproject.toml +0 -0
  30. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/README.md +0 -0
  31. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/__init__.py +0 -0
  32. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/argo.py +0 -0
  33. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/local.py +0 -0
  34. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/local_container.py +0 -0
  35. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/mocked.py +0 -0
  36. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/pyproject.toml +0 -0
  37. {runnable-0.26.0 → runnable-0.28.0}/extensions/pipeline_executor/retry.py +0 -0
  38. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/README.md +0 -0
  39. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/__init__.py +0 -0
  40. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/db/implementation_FF.py +0 -0
  41. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/db/integration_FF.py +0 -0
  42. {runnable-0.26.0 → runnable-0.28.0}/extensions/run_log_store/pyproject.toml +0 -0
  43. {runnable-0.26.0 → runnable-0.28.0}/extensions/secrets/README.md +0 -0
  44. {runnable-0.26.0 → runnable-0.28.0}/extensions/secrets/dotenv.py +0 -0
  45. {runnable-0.26.0 → runnable-0.28.0}/extensions/secrets/pyproject.toml +0 -0
  46. {runnable-0.26.0 → runnable-0.28.0}/runnable/__init__.py +0 -0
  47. {runnable-0.26.0 → runnable-0.28.0}/runnable/catalog.py +0 -0
  48. {runnable-0.26.0 → runnable-0.28.0}/runnable/cli.py +0 -0
  49. {runnable-0.26.0 → runnable-0.28.0}/runnable/context.py +0 -0
  50. {runnable-0.26.0 → runnable-0.28.0}/runnable/datastore.py +0 -0
  51. {runnable-0.26.0 → runnable-0.28.0}/runnable/defaults.py +0 -0
  52. {runnable-0.26.0 → runnable-0.28.0}/runnable/entrypoints.py +0 -0
  53. {runnable-0.26.0 → runnable-0.28.0}/runnable/exceptions.py +0 -0
  54. {runnable-0.26.0 → runnable-0.28.0}/runnable/executor.py +0 -0
  55. {runnable-0.26.0 → runnable-0.28.0}/runnable/graph.py +0 -0
  56. {runnable-0.26.0 → runnable-0.28.0}/runnable/names.py +0 -0
  57. {runnable-0.26.0 → runnable-0.28.0}/runnable/nodes.py +0 -0
  58. {runnable-0.26.0 → runnable-0.28.0}/runnable/parameters.py +0 -0
  59. {runnable-0.26.0 → runnable-0.28.0}/runnable/pickler.py +0 -0
  60. {runnable-0.26.0 → runnable-0.28.0}/runnable/sdk.py +0 -0
  61. {runnable-0.26.0 → runnable-0.28.0}/runnable/secrets.py +0 -0
  62. {runnable-0.26.0 → runnable-0.28.0}/runnable/tasks.py +0 -0
  63. {runnable-0.26.0 → runnable-0.28.0}/runnable/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.26.0
3
+ Version: 0.28.0
4
4
  Summary: Add your description here
5
5
  Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
6
  License-File: LICENSE
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import Any
5
5
 
6
6
  from cloudpathlib import CloudPath, S3Client, S3Path
7
+ from pydantic import Field, SecretStr
7
8
 
8
9
  from extensions.catalog.any_path import AnyPathCatalog
9
10
  from runnable import defaults
@@ -25,9 +26,9 @@ def get_minio_client(
25
26
  class MinioCatalog(AnyPathCatalog):
26
27
  service_name: str = "minio"
27
28
 
28
- endpoint_url: str = "http://localhost:9002"
29
- aws_access_key_id: str = "minioadmin"
30
- aws_secret_access_key: str = "minioadmin"
29
+ endpoint_url: str = Field(default="http://localhost:9002")
30
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
31
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
31
32
  bucket: str = "runnable"
32
33
 
33
34
  def get_summary(self) -> dict[str, Any]:
@@ -44,7 +45,9 @@ class MinioCatalog(AnyPathCatalog):
44
45
  return S3Path(
45
46
  f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
46
47
  client=get_minio_client(
47
- self.endpoint_url, self.aws_access_key_id, self.aws_secret_access_key
48
+ self.endpoint_url,
49
+ self.aws_access_key_id.get_secret_value(),
50
+ self.aws_secret_access_key.get_secret_value(),
48
51
  ),
49
52
  )
50
53
 
@@ -0,0 +1,100 @@
1
+ import logging
2
+ from abc import abstractmethod
3
+ from typing import Any, Dict
4
+
5
+ from runnable import defaults, exceptions
6
+ from runnable.datastore import BaseRunLogStore, RunLog
7
+
8
+ logger = logging.getLogger(defaults.LOGGER_NAME)
9
+
10
+
11
+ class AnyPathRunLogStore(BaseRunLogStore):
12
+ """
13
+ In this type of Run Log store, we use a file system to store the JSON run log.
14
+
15
+ Every single run is stored as a different file which makes it compatible across other store types.
16
+
17
+ When to use:
18
+ When locally testing a pipeline and have the need to compare across runs.
19
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
20
+
21
+ Do not use:
22
+ If you need parallelization on local, this run log would not support it.
23
+
24
+ Example config:
25
+
26
+ run_log:
27
+ type: file-system
28
+ config:
29
+ log_folder: The folder to out the logs. Defaults to .run_log_store
30
+
31
+ """
32
+
33
+ service_name: str = "file-system"
34
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
35
+
36
+ def get_summary(self) -> Dict[str, Any]:
37
+ summary = {"Type": self.service_name, "Location": self.log_folder}
38
+
39
+ return summary
40
+
41
+ @abstractmethod
42
+ def write_to_path(self, run_log: RunLog): ...
43
+
44
+ @abstractmethod
45
+ def read_from_path(self, run_id: str) -> RunLog: ...
46
+
47
+ def create_run_log(
48
+ self,
49
+ run_id: str,
50
+ dag_hash: str = "",
51
+ use_cached: bool = False,
52
+ tag: str = "",
53
+ original_run_id: str = "",
54
+ status: str = defaults.CREATED,
55
+ ) -> RunLog:
56
+ """
57
+ # Creates a Run log
58
+ # Adds it to the db
59
+ """
60
+
61
+ try:
62
+ self.get_run_log_by_id(run_id=run_id, full=False)
63
+ raise exceptions.RunLogExistsError(run_id=run_id)
64
+ except exceptions.RunLogNotFoundError:
65
+ pass
66
+
67
+ logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
68
+ run_log = RunLog(
69
+ run_id=run_id,
70
+ dag_hash=dag_hash,
71
+ tag=tag,
72
+ status=status,
73
+ )
74
+ self.write_to_path(run_log)
75
+ return run_log
76
+
77
+ def get_run_log_by_id(
78
+ self,
79
+ run_id: str,
80
+ full: bool = False,
81
+ ) -> RunLog:
82
+ """
83
+ # Returns the run_log defined by id
84
+ # Raises Exception if not found
85
+ """
86
+ try:
87
+ logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
88
+ run_log = self.read_from_path(run_id)
89
+ return run_log
90
+ except FileNotFoundError as e:
91
+ raise exceptions.RunLogNotFoundError(run_id) from e
92
+
93
+ def put_run_log(self, run_log: RunLog):
94
+ """
95
+ # Puts the run_log into the database
96
+ """
97
+ logger.info(
98
+ f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
99
+ )
100
+ self.write_to_path(run_log)
@@ -2,15 +2,13 @@ import json
2
2
  import logging
3
3
  from pathlib import Path
4
4
  from string import Template
5
- from typing import Any, Dict, Optional, Sequence, Union
5
+ from typing import Any, Dict, Union
6
6
 
7
7
  from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
8
8
  from runnable import defaults, utils
9
9
 
10
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
11
11
 
12
- T = Union[str, Path]
13
-
14
12
 
15
13
  class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
16
14
  """
@@ -28,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
28
26
 
29
27
  def get_matches(
30
28
  self, run_id: str, name: str, multiple_allowed: bool = False
31
- ) -> Optional[Union[Sequence[T], T]]:
29
+ ) -> str | list[str] | None:
32
30
  """
33
31
  Get contents of files matching the pattern name*
34
32
 
@@ -46,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
46
44
  if len(matches) > 1:
47
45
  msg = f"Multiple matches found for {name} while multiple is not allowed"
48
46
  raise Exception(msg)
49
- return matches[0]
50
- return matches
47
+ return str(matches[0])
48
+ return [str(match) for match in matches]
51
49
 
52
50
  return None
53
51
 
@@ -78,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
78
76
 
79
77
  return str(name) + ".json"
80
78
 
81
- def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
79
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
82
80
  """
83
81
  Store the contents against the name in the folder.
84
82
 
@@ -87,15 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
87
85
  contents (dict): The dict to store
88
86
  name (str): The name to store as
89
87
  """
88
+
89
+ log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
90
90
  if insert:
91
- name = self.log_folder_with_run_id(run_id=run_id) / name
91
+ name = str(log_folder_with_run_id / name)
92
92
 
93
- utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
93
+ utils.safe_make_dir(log_folder_with_run_id)
94
94
 
95
95
  with open(self.safe_suffix_json(name), "w") as fw:
96
96
  json.dump(contents, fw, ensure_ascii=True, indent=4)
97
97
 
98
- def _retrieve(self, name: Union[str, Path]) -> dict:
98
+ def _retrieve(self, run_id: str, name: str) -> dict:
99
99
  """
100
100
  Does the job of retrieving from the folder.
101
101
 
@@ -105,6 +105,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
105
105
  Returns:
106
106
  dict: The contents
107
107
  """
108
+
108
109
  contents: dict = {}
109
110
 
110
111
  with open(self.safe_suffix_json(name), "r") as fr:
@@ -0,0 +1,131 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from string import Template
5
+ from typing import Any, Dict
6
+
7
+ from cloudpathlib import S3Client, S3Path
8
+ from pydantic import Field, SecretStr
9
+
10
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
11
+ from runnable import defaults
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class ChunkedMinioRunLogStore(ChunkedRunLogStore):
28
+ """
29
+ File system run log store but chunks the run log into thread safe chunks.
30
+ This enables executions to be parallel.
31
+ """
32
+
33
+ service_name: str = "chunked-minio"
34
+ endpoint_url: str = Field(default="http://localhost:9002")
35
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
36
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
37
+ bucket: str = Field(default="runnable/run-logs")
38
+
39
+ def get_summary(self) -> Dict[str, Any]:
40
+ summary = {
41
+ "Type": self.service_name,
42
+ "Location": f"{self.endpoint_url}/{self.bucket}",
43
+ }
44
+
45
+ return summary
46
+
47
+ def get_run_log_bucket(self) -> S3Path:
48
+ run_id = self._context.run_id
49
+
50
+ return S3Path(
51
+ f"s3://{self.bucket}/{run_id}/",
52
+ client=get_minio_client(
53
+ self.endpoint_url,
54
+ self.aws_access_key_id.get_secret_value(),
55
+ self.aws_secret_access_key.get_secret_value(),
56
+ ),
57
+ )
58
+
59
+ def get_matches(
60
+ self, run_id: str, name: str, multiple_allowed: bool = False
61
+ ) -> None | str | list[str]:
62
+ """
63
+ Get contents of files matching the pattern name*
64
+
65
+ Args:
66
+ run_id (str): The run id
67
+ name (str): The suffix of the file name to check in the run log store.
68
+ """
69
+ run_log_bucket = self.get_run_log_bucket()
70
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
71
+
72
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
73
+ matches = list(run_log_bucket.glob(f"{sub_name}*"))
74
+
75
+ if matches:
76
+ if not multiple_allowed:
77
+ if len(matches) > 1:
78
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
79
+ raise Exception(msg)
80
+ return str(matches[0])
81
+ return [str(match) for match in matches]
82
+
83
+ return None
84
+
85
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
86
+ """
87
+ Store the contents against the name in the folder.
88
+
89
+ Args:
90
+ run_id (str): The run id
91
+ contents (dict): The dict to store
92
+ name (str): The name to store as
93
+ """
94
+
95
+ if insert:
96
+ name = str(self.get_run_log_bucket() / name)
97
+
98
+ self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
99
+ obj = S3Path(
100
+ name,
101
+ client=get_minio_client(
102
+ self.endpoint_url,
103
+ self.aws_access_key_id.get_secret_value(),
104
+ self.aws_secret_access_key.get_secret_value(),
105
+ ),
106
+ )
107
+
108
+ obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
109
+
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
111
+ """
112
+ Does the job of retrieving from the folder.
113
+
114
+ Args:
115
+ name (str): the name of the file to retrieve
116
+
117
+ Returns:
118
+ dict: The contents
119
+ """
120
+
121
+ obj = S3Path(
122
+ name,
123
+ client=get_minio_client(
124
+ self.endpoint_url,
125
+ self.aws_access_key_id.get_secret_value(),
126
+ self.aws_secret_access_key.get_secret_value(),
127
+ ),
128
+ )
129
+
130
+ run_log_text = json.loads(obj.read_text())
131
+ return run_log_text
@@ -3,13 +3,14 @@ import logging
3
3
  from pathlib import Path
4
4
  from typing import Any, Dict
5
5
 
6
- from runnable import defaults, exceptions, utils
7
- from runnable.datastore import BaseRunLogStore, RunLog
6
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
7
+ from runnable import defaults, utils
8
+ from runnable.datastore import RunLog
8
9
 
9
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
10
11
 
11
12
 
12
- class FileSystemRunLogstore(BaseRunLogStore):
13
+ class FileSystemRunLogstore(AnyPathRunLogStore):
13
14
  """
14
15
  In this type of Run Log store, we use a file system to store the JSON run log.
15
16
 
@@ -43,7 +44,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
43
44
 
44
45
  return summary
45
46
 
46
- def write_to_folder(self, run_log: RunLog):
47
+ def write_to_path(self, run_log: RunLog):
47
48
  """
48
49
  Write the run log to the folder
49
50
 
@@ -60,7 +61,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
60
61
  with json_file_path.open("w") as fw:
61
62
  json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
62
63
 
63
- def get_from_folder(self, run_id: str) -> RunLog:
64
+ def read_from_path(self, run_id: str) -> RunLog:
64
65
  """
65
66
  Look into the run log folder for the run log for the run id.
66
67
 
@@ -88,58 +89,3 @@ class FileSystemRunLogstore(BaseRunLogStore):
88
89
  json_str = json.load(fr)
89
90
  run_log = RunLog(**json_str) # pylint: disable=no-member
90
91
  return run_log
91
-
92
- def create_run_log(
93
- self,
94
- run_id: str,
95
- dag_hash: str = "",
96
- use_cached: bool = False,
97
- tag: str = "",
98
- original_run_id: str = "",
99
- status: str = defaults.CREATED,
100
- ) -> RunLog:
101
- """
102
- # Creates a Run log
103
- # Adds it to the db
104
- """
105
-
106
- try:
107
- self.get_run_log_by_id(run_id=run_id, full=False)
108
- raise exceptions.RunLogExistsError(run_id=run_id)
109
- except exceptions.RunLogNotFoundError:
110
- pass
111
-
112
- logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
113
- run_log = RunLog(
114
- run_id=run_id,
115
- dag_hash=dag_hash,
116
- tag=tag,
117
- status=status,
118
- )
119
- self.write_to_folder(run_log)
120
- return run_log
121
-
122
- def get_run_log_by_id(
123
- self,
124
- run_id: str,
125
- full: bool = False,
126
- ) -> RunLog:
127
- """
128
- # Returns the run_log defined by id
129
- # Raises Exception if not found
130
- """
131
- try:
132
- logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
133
- run_log = self.get_from_folder(run_id)
134
- return run_log
135
- except FileNotFoundError as e:
136
- raise exceptions.RunLogNotFoundError(run_id) from e
137
-
138
- def put_run_log(self, run_log: RunLog):
139
- """
140
- # Puts the run_log into the database
141
- """
142
- logger.info(
143
- f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
144
- )
145
- self.write_to_folder(run_log)
@@ -1,10 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import time
3
4
  from abc import abstractmethod
4
5
  from enum import Enum
5
- from pathlib import Path
6
6
  from string import Template
7
- from typing import Any, Dict, Optional, Sequence, Union
7
+ from typing import Any, Dict, Union
8
8
 
9
9
  from runnable import defaults, exceptions
10
10
  from runnable.datastore import (
@@ -21,9 +21,6 @@ from runnable.datastore import (
21
21
  logger = logging.getLogger(defaults.LOGGER_NAME)
22
22
 
23
23
 
24
- T = Union[str, Path] # Holds str, path
25
-
26
-
27
24
  class EntityNotFoundError(Exception):
28
25
  pass
29
26
 
@@ -87,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
87
84
  @abstractmethod
88
85
  def get_matches(
89
86
  self, run_id: str, name: str, multiple_allowed: bool = False
90
- ) -> Optional[Union[Sequence[T], T]]:
87
+ ) -> None | str | list[str]:
91
88
  """
92
89
  Get contents of persistence layer matching the pattern name*
93
90
 
@@ -98,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
98
95
  ...
99
96
 
100
97
  @abstractmethod
101
- def _store(self, run_id: str, contents: dict, name: T, insert: bool = False):
98
+ def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
102
99
  """
103
100
  Store the contents against the name in the persistence layer.
104
101
 
@@ -110,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
110
107
  ...
111
108
 
112
109
  @abstractmethod
113
- def _retrieve(self, name: T) -> dict:
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
114
111
  """
115
112
  Does the job of retrieving from the persistent layer.
116
113
 
@@ -140,9 +137,10 @@ class ChunkedRunLogStore(BaseRunLogStore):
140
137
  insert = False
141
138
 
142
139
  if match:
143
- existing_contents = self._retrieve(name=match) # type: ignore
140
+ assert isinstance(match, str)
141
+ existing_contents = self._retrieve(run_id=run_id, name=match)
144
142
  contents = dict(existing_contents, **contents)
145
- name_to_give = match # type: ignore
143
+ name_to_give = match
146
144
  else:
147
145
  name_to_give = Template(naming_pattern).safe_substitute(
148
146
  {"creation_time": str(int(time.time_ns()))}
@@ -190,13 +188,15 @@ class ChunkedRunLogStore(BaseRunLogStore):
190
188
 
191
189
  if matches:
192
190
  if not multiple_allowed:
193
- contents = self._retrieve(name=matches) # type: ignore
191
+ assert isinstance(matches, str)
192
+ contents = self._retrieve(run_id=run_id, name=matches)
194
193
  model = self.ModelTypes[log_type.name].value
195
194
  return model(**contents)
196
195
 
196
+ assert isinstance(matches, list)
197
197
  models = []
198
- for match in matches: # type: ignore
199
- contents = self._retrieve(name=match)
198
+ for match in matches:
199
+ contents = self._retrieve(run_id=run_id, name=match)
200
200
  model = self.ModelTypes[log_type.name].value
201
201
  models.append(model(**contents))
202
202
  return models
@@ -225,7 +225,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
225
225
  # No branch logs are found
226
226
  return {}
227
227
  # Forcing get_matches to always return a list is a better design
228
- epoch_created = [str(match).split("-")[-1] for match in matches] # type: ignore
228
+
229
+ assert isinstance(matches, list)
230
+ epoch_created = [str(match).split("-")[-1] for match in matches]
229
231
 
230
232
  # sort matches by epoch created
231
233
  epoch_created, matches = zip(*sorted(zip(epoch_created, matches))) # type: ignore
@@ -234,7 +236,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
234
236
 
235
237
  for match in matches:
236
238
  model = self.ModelTypes[log_type.name].value
237
- log_model = model(**self._retrieve(match))
239
+ log_model = model(**self._retrieve(run_id=run_id, name=match))
238
240
  logs[log_model.internal_name] = log_model # type: ignore
239
241
 
240
242
  return logs
@@ -341,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
341
343
  )
342
344
 
343
345
  self.store(
344
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
346
+ run_id=run_id,
347
+ contents=json.loads(run_log.model_dump_json()),
348
+ log_type=self.LogTypes.RUN_LOG,
345
349
  )
346
350
  return run_log
347
351
 
@@ -388,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
388
392
  """
389
393
  run_id = run_log.run_id
390
394
  self.store(
391
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
395
+ run_id=run_id,
396
+ contents=json.loads(run_log.model_dump_json()),
397
+ log_type=self.LogTypes.RUN_LOG,
392
398
  )
393
399
 
394
400
  def get_parameters(self, run_id: str) -> dict:
@@ -447,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
447
453
  self.store(
448
454
  run_id=run_id,
449
455
  log_type=self.LogTypes.PARAMETER,
450
- contents={key: value.model_dump(by_alias=True)},
456
+ contents={key: json.loads(value.model_dump_json(by_alias=True))},
451
457
  name=key,
452
458
  )
453
459
 
@@ -538,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
538
544
  self.store(
539
545
  run_id=run_id,
540
546
  log_type=self.LogTypes.STEP_LOG,
541
- contents=step_log.model_dump(),
547
+ contents=json.loads(step_log.model_dump_json()),
542
548
  name=step_log.internal_name,
543
549
  )
544
550
 
@@ -594,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
594
600
  self.store(
595
601
  run_id=run_id,
596
602
  log_type=self.LogTypes.BRANCH_LOG,
597
- contents=branch_log.model_dump(),
603
+ contents=json.loads(branch_log.model_dump_json()),
598
604
  name=internal_branch_name,
599
605
  )
@@ -0,0 +1,114 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from typing import Any, Dict
5
+
6
+ from cloudpathlib import S3Client, S3Path
7
+ from pydantic import Field, SecretStr
8
+
9
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
10
+ from runnable import defaults
11
+ from runnable.datastore import RunLog
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class MinioRunLogStore(AnyPathRunLogStore):
28
+ """
29
+ In this type of Run Log store, we use a file system to store the JSON run log.
30
+
31
+ Every single run is stored as a different file which makes it compatible across other store types.
32
+
33
+ When to use:
34
+ When locally testing a pipeline and have the need to compare across runs.
35
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
36
+
37
+ Do not use:
38
+ If you need parallelization on local, this run log would not support it.
39
+
40
+ Example config:
41
+
42
+ run_log:
43
+ type: file-system
44
+ config:
45
+ log_folder: The folder to out the logs. Defaults to .run_log_store
46
+
47
+ """
48
+
49
+ service_name: str = "minio"
50
+
51
+ endpoint_url: str = Field(default="http://localhost:9002")
52
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
53
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
54
+ bucket: str = Field(default="runnable/run-logs")
55
+
56
+ def get_summary(self) -> Dict[str, Any]:
57
+ summary = {
58
+ "Type": self.service_name,
59
+ "Location": f"{self.endpoint_url}/{self.bucket}",
60
+ }
61
+
62
+ return summary
63
+
64
+ def get_run_log_bucket(self) -> S3Path:
65
+ run_id = self._context.run_id
66
+
67
+ return S3Path(
68
+ f"s3://{self.bucket}/{run_id}/",
69
+ client=get_minio_client(
70
+ self.endpoint_url,
71
+ self.aws_access_key_id.get_secret_value(),
72
+ self.aws_secret_access_key.get_secret_value(),
73
+ ),
74
+ )
75
+
76
+ def write_to_path(self, run_log: RunLog):
77
+ """
78
+ Write the run log to the folder
79
+
80
+ Args:
81
+ run_log (RunLog): The run log to be added to the database
82
+ """
83
+ run_log_bucket = self.get_run_log_bucket()
84
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
85
+
86
+ run_log_object = run_log_bucket / f"{run_log.run_id}.json"
87
+ run_log_object.write_text(
88
+ json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
89
+ )
90
+
91
+ def read_from_path(self, run_id: str) -> RunLog:
92
+ """
93
+ Look into the run log folder for the run log for the run id.
94
+
95
+ If the run log does not exist, raise an exception. If it does, decode it
96
+ as a RunLog and return it
97
+
98
+ Args:
99
+ run_id (str): The requested run id to retrieve the run log store
100
+
101
+ Raises:
102
+ FileNotFoundError: If the Run Log has not been found.
103
+
104
+ Returns:
105
+ RunLog: The decoded Run log
106
+ """
107
+ run_log_bucket = self.get_run_log_bucket()
108
+
109
+ run_log_object = run_log_bucket / f"{run_id}.json"
110
+
111
+ run_log_text = json.loads(run_log_object.read_text())
112
+ run_log = RunLog(**json.loads(run_log_text))
113
+
114
+ return run_log
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "runnable"
3
- version = "0.26.0"
3
+ version = "0.28.0"
4
4
  description = "Add your description here"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -122,7 +122,9 @@ include = [
122
122
  [project.entry-points.'run_log_store']
123
123
  "buffered" = "runnable.datastore:BufferRunLogstore"
124
124
  file-system = "extensions.run_log_store.file_system:FileSystemRunLogstore"
125
+ "minio" = "extensions.run_log_store.minio:MinioRunLogStore"
125
126
  "chunked-fs" = "extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore"
127
+ "chunked-minio" = "extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore"
126
128
 
127
129
  [project.entry-points.'pickler']
128
130
  "pickle" = "runnable.pickler:NativePickler"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes