runnable 0.27.0__py3-none-any.whl → 0.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -144,14 +144,10 @@ class GenericJobExecutor(BaseJobExecutor):
144
144
  logger.info("No catalog settings found")
145
145
  return None
146
146
 
147
- compute_data_folder = self._context.catalog_handler.compute_data_folder
148
-
149
147
  data_catalogs = []
150
148
  for name_pattern in catalog_settings:
151
149
  data_catalog = self._context.catalog_handler.put(
152
150
  name=name_pattern,
153
- run_id=self._context.run_id,
154
- compute_data_folder=compute_data_folder,
155
151
  )
156
152
 
157
153
  logger.debug(f"Added data catalog: {data_catalog} to job log")
@@ -168,7 +164,5 @@ class GenericJobExecutor(BaseJobExecutor):
168
164
  )
169
165
  task_console.save_text(log_file_name)
170
166
  # Put the log file in the catalog
171
- self._context.catalog_handler.put(
172
- name=log_file_name, run_id=self._context.run_id
173
- )
167
+ self._context.catalog_handler.put(name=log_file_name)
174
168
  os.remove(log_file_name)
@@ -33,10 +33,6 @@ class AnyPathRunLogStore(BaseRunLogStore):
33
33
  service_name: str = "file-system"
34
34
  log_folder: str = defaults.LOG_LOCATION_FOLDER
35
35
 
36
- @property
37
- def log_folder_name(self):
38
- return self.log_folder
39
-
40
36
  def get_summary(self) -> Dict[str, Any]:
41
37
  summary = {"Type": self.service_name, "Location": self.log_folder}
42
38
 
@@ -2,17 +2,13 @@ import json
2
2
  import logging
3
3
  from pathlib import Path
4
4
  from string import Template
5
- from typing import Any, Dict, Optional, Union
6
-
7
- from cloudpathlib import CloudPath
5
+ from typing import Any, Dict, Union
8
6
 
9
7
  from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
10
8
  from runnable import defaults, utils
11
9
 
12
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
13
11
 
14
- MixT = Union[CloudPath, Path]
15
-
16
12
 
17
13
  class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
18
14
  """
@@ -30,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
30
26
 
31
27
  def get_matches(
32
28
  self, run_id: str, name: str, multiple_allowed: bool = False
33
- ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
29
+ ) -> str | list[str] | None:
34
30
  """
35
31
  Get contents of files matching the pattern name*
36
32
 
@@ -48,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
48
44
  if len(matches) > 1:
49
45
  msg = f"Multiple matches found for {name} while multiple is not allowed"
50
46
  raise Exception(msg)
51
- return matches[0]
52
- return matches
47
+ return str(matches[0])
48
+ return [str(match) for match in matches]
53
49
 
54
50
  return None
55
51
 
@@ -80,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
80
76
 
81
77
  return str(name) + ".json"
82
78
 
83
- def _store(self, run_id: str, contents: dict, name: MixT, insert=False):
79
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
84
80
  """
85
81
  Store the contents against the name in the folder.
86
82
 
@@ -89,16 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
89
85
  contents (dict): The dict to store
90
86
  name (str): The name to store as
91
87
  """
88
+
92
89
  log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
93
90
  if insert:
94
- name = log_folder_with_run_id / name
91
+ name = str(log_folder_with_run_id / name)
95
92
 
96
93
  utils.safe_make_dir(log_folder_with_run_id)
97
94
 
98
- with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "w") as fw:
95
+ with open(self.safe_suffix_json(name), "w") as fw:
99
96
  json.dump(contents, fw, ensure_ascii=True, indent=4)
100
97
 
101
- def _retrieve(self, run_id: str, name: MixT) -> dict:
98
+ def _retrieve(self, run_id: str, name: str) -> dict:
102
99
  """
103
100
  Does the job of retrieving from the folder.
104
101
 
@@ -108,10 +105,10 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
108
105
  Returns:
109
106
  dict: The contents
110
107
  """
108
+
111
109
  contents: dict = {}
112
- log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
113
110
 
114
- with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "r") as fr:
111
+ with open(self.safe_suffix_json(name), "r") as fr:
115
112
  contents = json.load(fr)
116
113
 
117
114
  return contents
@@ -0,0 +1,131 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from string import Template
5
+ from typing import Any, Dict
6
+
7
+ from cloudpathlib import S3Client, S3Path
8
+ from pydantic import Field, SecretStr
9
+
10
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
11
+ from runnable import defaults
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class ChunkedMinioRunLogStore(ChunkedRunLogStore):
28
+ """
29
+ File system run log store but chunks the run log into thread safe chunks.
30
+ This enables executions to be parallel.
31
+ """
32
+
33
+ service_name: str = "chunked-minio"
34
+ endpoint_url: str = Field(default="http://localhost:9002")
35
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
36
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
37
+ bucket: str = Field(default="runnable/run-logs")
38
+
39
+ def get_summary(self) -> Dict[str, Any]:
40
+ summary = {
41
+ "Type": self.service_name,
42
+ "Location": f"{self.endpoint_url}/{self.bucket}",
43
+ }
44
+
45
+ return summary
46
+
47
+ def get_run_log_bucket(self) -> S3Path:
48
+ run_id = self._context.run_id
49
+
50
+ return S3Path(
51
+ f"s3://{self.bucket}/{run_id}/",
52
+ client=get_minio_client(
53
+ self.endpoint_url,
54
+ self.aws_access_key_id.get_secret_value(),
55
+ self.aws_secret_access_key.get_secret_value(),
56
+ ),
57
+ )
58
+
59
+ def get_matches(
60
+ self, run_id: str, name: str, multiple_allowed: bool = False
61
+ ) -> None | str | list[str]:
62
+ """
63
+ Get contents of files matching the pattern name*
64
+
65
+ Args:
66
+ run_id (str): The run id
67
+ name (str): The suffix of the file name to check in the run log store.
68
+ """
69
+ run_log_bucket = self.get_run_log_bucket()
70
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
71
+
72
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
73
+ matches = list(run_log_bucket.glob(f"{sub_name}*"))
74
+
75
+ if matches:
76
+ if not multiple_allowed:
77
+ if len(matches) > 1:
78
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
79
+ raise Exception(msg)
80
+ return str(matches[0])
81
+ return [str(match) for match in matches]
82
+
83
+ return None
84
+
85
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
86
+ """
87
+ Store the contents against the name in the folder.
88
+
89
+ Args:
90
+ run_id (str): The run id
91
+ contents (dict): The dict to store
92
+ name (str): The name to store as
93
+ """
94
+
95
+ if insert:
96
+ name = str(self.get_run_log_bucket() / name)
97
+
98
+ self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
99
+ obj = S3Path(
100
+ name,
101
+ client=get_minio_client(
102
+ self.endpoint_url,
103
+ self.aws_access_key_id.get_secret_value(),
104
+ self.aws_secret_access_key.get_secret_value(),
105
+ ),
106
+ )
107
+
108
+ obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
109
+
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
111
+ """
112
+ Does the job of retrieving from the folder.
113
+
114
+ Args:
115
+ name (str): the name of the file to retrieve
116
+
117
+ Returns:
118
+ dict: The contents
119
+ """
120
+
121
+ obj = S3Path(
122
+ name,
123
+ client=get_minio_client(
124
+ self.endpoint_url,
125
+ self.aws_access_key_id.get_secret_value(),
126
+ self.aws_secret_access_key.get_secret_value(),
127
+ ),
128
+ )
129
+
130
+ run_log_text = json.loads(obj.read_text())
131
+ return run_log_text
@@ -1,12 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import time
3
4
  from abc import abstractmethod
4
5
  from enum import Enum
5
- from pathlib import Path
6
6
  from string import Template
7
- from typing import Any, Dict, Optional, Union
8
-
9
- from cloudpathlib import CloudPath
7
+ from typing import Any, Dict, Union
10
8
 
11
9
  from runnable import defaults, exceptions
12
10
  from runnable.datastore import (
@@ -23,9 +21,6 @@ from runnable.datastore import (
23
21
  logger = logging.getLogger(defaults.LOGGER_NAME)
24
22
 
25
23
 
26
- MixT = Union[CloudPath, Path] # Holds str, path
27
-
28
-
29
24
  class EntityNotFoundError(Exception):
30
25
  pass
31
26
 
@@ -89,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
89
84
  @abstractmethod
90
85
  def get_matches(
91
86
  self, run_id: str, name: str, multiple_allowed: bool = False
92
- ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
87
+ ) -> None | str | list[str]:
93
88
  """
94
89
  Get contents of persistence layer matching the pattern name*
95
90
 
@@ -100,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
100
95
  ...
101
96
 
102
97
  @abstractmethod
103
- def _store(self, run_id: str, contents: dict, name: MixT, insert: bool = False):
98
+ def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
104
99
  """
105
100
  Store the contents against the name in the persistence layer.
106
101
 
@@ -112,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
112
107
  ...
113
108
 
114
109
  @abstractmethod
115
- def _retrieve(self, run_id: str, name: MixT) -> dict:
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
116
111
  """
117
112
  Does the job of retrieving from the persistent layer.
118
113
 
@@ -142,18 +137,17 @@ class ChunkedRunLogStore(BaseRunLogStore):
142
137
  insert = False
143
138
 
144
139
  if match:
145
- existing_contents = self._retrieve(run_id=run_id, name=match) # type: ignore
140
+ assert isinstance(match, str)
141
+ existing_contents = self._retrieve(run_id=run_id, name=match)
146
142
  contents = dict(existing_contents, **contents)
147
- name_to_give = match # type: ignore
143
+ name_to_give = match
148
144
  else:
149
145
  name_to_give = Template(naming_pattern).safe_substitute(
150
146
  {"creation_time": str(int(time.time_ns()))}
151
147
  )
152
148
  insert = True
153
149
 
154
- self._store(
155
- run_id=run_id, contents=contents, name=Path(name_to_give), insert=insert
156
- )
150
+ self._store(run_id=run_id, contents=contents, name=name_to_give, insert=insert)
157
151
 
158
152
  def retrieve(
159
153
  self, run_id: str, log_type: LogTypes, name: str = "", multiple_allowed=False
@@ -194,12 +188,14 @@ class ChunkedRunLogStore(BaseRunLogStore):
194
188
 
195
189
  if matches:
196
190
  if not multiple_allowed:
197
- contents = self._retrieve(run_id=run_id, name=matches) # type: ignore
191
+ assert isinstance(matches, str)
192
+ contents = self._retrieve(run_id=run_id, name=matches)
198
193
  model = self.ModelTypes[log_type.name].value
199
194
  return model(**contents)
200
195
 
196
+ assert isinstance(matches, list)
201
197
  models = []
202
- for match in matches: # type: ignore
198
+ for match in matches:
203
199
  contents = self._retrieve(run_id=run_id, name=match)
204
200
  model = self.ModelTypes[log_type.name].value
205
201
  models.append(model(**contents))
@@ -347,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
347
343
  )
348
344
 
349
345
  self.store(
350
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
346
+ run_id=run_id,
347
+ contents=json.loads(run_log.model_dump_json()),
348
+ log_type=self.LogTypes.RUN_LOG,
351
349
  )
352
350
  return run_log
353
351
 
@@ -394,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
394
392
  """
395
393
  run_id = run_log.run_id
396
394
  self.store(
397
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
395
+ run_id=run_id,
396
+ contents=json.loads(run_log.model_dump_json()),
397
+ log_type=self.LogTypes.RUN_LOG,
398
398
  )
399
399
 
400
400
  def get_parameters(self, run_id: str) -> dict:
@@ -453,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
453
453
  self.store(
454
454
  run_id=run_id,
455
455
  log_type=self.LogTypes.PARAMETER,
456
- contents={key: value.model_dump(by_alias=True)},
456
+ contents={key: json.loads(value.model_dump_json(by_alias=True))},
457
457
  name=key,
458
458
  )
459
459
 
@@ -544,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
544
544
  self.store(
545
545
  run_id=run_id,
546
546
  log_type=self.LogTypes.STEP_LOG,
547
- contents=step_log.model_dump(),
547
+ contents=json.loads(step_log.model_dump_json()),
548
548
  name=step_log.internal_name,
549
549
  )
550
550
 
@@ -600,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
600
600
  self.store(
601
601
  run_id=run_id,
602
602
  log_type=self.LogTypes.BRANCH_LOG,
603
- contents=branch_log.model_dump(),
603
+ contents=json.loads(branch_log.model_dump_json()),
604
604
  name=internal_branch_name,
605
605
  )
@@ -46,7 +46,7 @@ class MinioRunLogStore(AnyPathRunLogStore):
46
46
 
47
47
  """
48
48
 
49
- service_name: str = "file-system"
49
+ service_name: str = "minio"
50
50
 
51
51
  endpoint_url: str = Field(default="http://localhost:9002")
52
52
  aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
@@ -54,7 +54,10 @@ class MinioRunLogStore(AnyPathRunLogStore):
54
54
  bucket: str = Field(default="runnable/run-logs")
55
55
 
56
56
  def get_summary(self) -> Dict[str, Any]:
57
- summary = {"Type": self.service_name, "Location": self.log_folder}
57
+ summary = {
58
+ "Type": self.service_name,
59
+ "Location": f"{self.endpoint_url}/{self.bucket}",
60
+ }
58
61
 
59
62
  return summary
60
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.27.0
3
+ Version: 0.28.1
4
4
  Summary: Add your description here
5
5
  Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
6
  License-File: LICENSE
@@ -7,7 +7,7 @@ extensions/catalog/minio.py,sha256=R3GvfCxN1GTcs4bQIAWh79_GHDTVd14gnpKlzwFeKUI,2
7
7
  extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79ktaA3Hk,279
8
8
  extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
9
9
  extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- extensions/job_executor/__init__.py,sha256=3zS2m6dg-L6SkKfL0kr4AxVUVmVJcepV6eipyMvQR6s,6006
10
+ extensions/job_executor/__init__.py,sha256=E2R6GV5cZTlZdqA5SVJ6ajZFh4oruM0k8AKHkpOZ3W8,5772
11
11
  extensions/job_executor/k8s.py,sha256=V5k6Rnf_sAFqptVbCrWs_x5sl3x3fSHwO96IZoiJxKU,15342
12
12
  extensions/job_executor/k8s_job_spec.yaml,sha256=7aFpxHdO_p6Hkc3YxusUOuAQTD1Myu0yTPX9DrhxbOg,1158
13
13
  extensions/job_executor/local.py,sha256=FvxTk0vyxdrbLOAyNkLyjvmmowypabWOSITQBK_ffVE,1907
@@ -26,11 +26,12 @@ extensions/pipeline_executor/pyproject.toml,sha256=ykTX7srR10PBYb8LsIwEj8vIPPIEZ
26
26
  extensions/pipeline_executor/retry.py,sha256=KGenhWrLLmOQgzMvqloXHDRJyoNs91t05rRW8aLW6FA,6969
27
27
  extensions/run_log_store/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  extensions/run_log_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- extensions/run_log_store/any_path.py,sha256=2Ff9-jd1mWazFXn4HvsF0MF-oQAdMNYrl698Kjpip4Q,2938
30
- extensions/run_log_store/chunked_fs.py,sha256=_2KmTaxK-p1e_YJqnQwyOqkLoCr80y-Wg1X2XdCC_9k,3546
29
+ extensions/run_log_store/any_path.py,sha256=0nN_LHbm2W6AHkerQmsVHq3EoybFQF8lxpCicacHo8Y,2861
30
+ extensions/run_log_store/chunked_fs.py,sha256=wHMKcAx6uFI4OOTp7QWCdGq9WvEFesbLp9VxHZU28l0,3341
31
+ extensions/run_log_store/chunked_minio.py,sha256=Itfkw4Ycf0uLCqxH3Uk_itmVgT7ipJp05yKfD22WBiY,4007
31
32
  extensions/run_log_store/file_system.py,sha256=hhrbhSnuzv8yzBr6DAu45NT8-sawPP86WA2-LY70vjw,2781
32
- extensions/run_log_store/generic_chunked.py,sha256=D08ADWK_rw4ed46_yeEankO-g9rwOjVYi4V-JaOOT6M,20445
33
- extensions/run_log_store/minio.py,sha256=pTHUzCHnehJ5JZoIrlZGjb4Cl-6yinp3MgqLZPvIxq4,3403
33
+ extensions/run_log_store/generic_chunked.py,sha256=bsGgChTDZN3dSbLmLJ9SIpcvArzVmzhTVAOYZytAUNc,20483
34
+ extensions/run_log_store/minio.py,sha256=omrKDSdRzmnVBg9xXkkdQb-icBIgBDRdpmwGRlMyCGk,3453
34
35
  extensions/run_log_store/pyproject.toml,sha256=YnmXsFvFG9uv_c0spLYBsNI_1sbktqxtHsOuClyvZ3g,288
35
36
  extensions/run_log_store/db/implementation_FF.py,sha256=euTnh0xzNF0e_DyfHQ4W-kG1AwTr8u7OuO3_cZkR5bM,5237
36
37
  extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -55,8 +56,8 @@ runnable/sdk.py,sha256=T1nqDpLN9fULvvU9L-oY0EHqYdKUI9qk7oekLynm02Y,33568
55
56
  runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
56
57
  runnable/tasks.py,sha256=X6xijut7ffwpfYDcXoN6y0AcRVd7fWHs676DJ00Kma4,29134
57
58
  runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
58
- runnable-0.27.0.dist-info/METADATA,sha256=Bmncq3RAxN2eZGtlWQf5X8O0CkWRqmiCQiY-rH90odM,10047
59
- runnable-0.27.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
60
- runnable-0.27.0.dist-info/entry_points.txt,sha256=VZ-VOpcr16mj9jwxxMycwGNJlG_cziICRIe-LQrDwig,1760
61
- runnable-0.27.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
- runnable-0.27.0.dist-info/RECORD,,
59
+ runnable-0.28.1.dist-info/METADATA,sha256=j-mrRLCkfv8GkWDLLFDtheBccn9CP4H-NvKAwm2L9ak,10047
60
+ runnable-0.28.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ runnable-0.28.1.dist-info/entry_points.txt,sha256=ioMbWojILtdibYVgh1jXJ00SpK-tX3gy7oVGDq61cSk,1839
62
+ runnable-0.28.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ runnable-0.28.1.dist-info/RECORD,,
@@ -35,6 +35,7 @@ retry = extensions.pipeline_executor.retry:RetryExecutor
35
35
  [run_log_store]
36
36
  buffered = runnable.datastore:BufferRunLogstore
37
37
  chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
38
+ chunked-minio = extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore
38
39
  file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
39
40
  minio = extensions.run_log_store.minio:MinioRunLogStore
40
41