runnable 0.27.0__py3-none-any.whl → 0.28.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -144,14 +144,10 @@ class GenericJobExecutor(BaseJobExecutor):
144
144
  logger.info("No catalog settings found")
145
145
  return None
146
146
 
147
- compute_data_folder = self._context.catalog_handler.compute_data_folder
148
-
149
147
  data_catalogs = []
150
148
  for name_pattern in catalog_settings:
151
149
  data_catalog = self._context.catalog_handler.put(
152
150
  name=name_pattern,
153
- run_id=self._context.run_id,
154
- compute_data_folder=compute_data_folder,
155
151
  )
156
152
 
157
153
  logger.debug(f"Added data catalog: {data_catalog} to job log")
@@ -168,7 +164,5 @@ class GenericJobExecutor(BaseJobExecutor):
168
164
  )
169
165
  task_console.save_text(log_file_name)
170
166
  # Put the log file in the catalog
171
- self._context.catalog_handler.put(
172
- name=log_file_name, run_id=self._context.run_id
173
- )
167
+ self._context.catalog_handler.put(name=log_file_name)
174
168
  os.remove(log_file_name)
@@ -33,10 +33,6 @@ class AnyPathRunLogStore(BaseRunLogStore):
33
33
  service_name: str = "file-system"
34
34
  log_folder: str = defaults.LOG_LOCATION_FOLDER
35
35
 
36
- @property
37
- def log_folder_name(self):
38
- return self.log_folder
39
-
40
36
  def get_summary(self) -> Dict[str, Any]:
41
37
  summary = {"Type": self.service_name, "Location": self.log_folder}
42
38
 
@@ -2,17 +2,13 @@ import json
2
2
  import logging
3
3
  from pathlib import Path
4
4
  from string import Template
5
- from typing import Any, Dict, Optional, Union
6
-
7
- from cloudpathlib import CloudPath
5
+ from typing import Any, Dict, Union
8
6
 
9
7
  from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
10
8
  from runnable import defaults, utils
11
9
 
12
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
13
11
 
14
- MixT = Union[CloudPath, Path]
15
-
16
12
 
17
13
  class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
18
14
  """
@@ -30,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
30
26
 
31
27
  def get_matches(
32
28
  self, run_id: str, name: str, multiple_allowed: bool = False
33
- ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
29
+ ) -> str | list[str] | None:
34
30
  """
35
31
  Get contents of files matching the pattern name*
36
32
 
@@ -48,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
48
44
  if len(matches) > 1:
49
45
  msg = f"Multiple matches found for {name} while multiple is not allowed"
50
46
  raise Exception(msg)
51
- return matches[0]
52
- return matches
47
+ return str(matches[0])
48
+ return [str(match) for match in matches]
53
49
 
54
50
  return None
55
51
 
@@ -80,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
80
76
 
81
77
  return str(name) + ".json"
82
78
 
83
- def _store(self, run_id: str, contents: dict, name: MixT, insert=False):
79
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
84
80
  """
85
81
  Store the contents against the name in the folder.
86
82
 
@@ -89,16 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
89
85
  contents (dict): The dict to store
90
86
  name (str): The name to store as
91
87
  """
88
+
92
89
  log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
93
90
  if insert:
94
- name = log_folder_with_run_id / name
91
+ name = str(log_folder_with_run_id / name)
95
92
 
96
93
  utils.safe_make_dir(log_folder_with_run_id)
97
94
 
98
- with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "w") as fw:
95
+ with open(self.safe_suffix_json(name), "w") as fw:
99
96
  json.dump(contents, fw, ensure_ascii=True, indent=4)
100
97
 
101
- def _retrieve(self, run_id: str, name: MixT) -> dict:
98
+ def _retrieve(self, run_id: str, name: str) -> dict:
102
99
  """
103
100
  Does the job of retrieving from the folder.
104
101
 
@@ -108,10 +105,10 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
108
105
  Returns:
109
106
  dict: The contents
110
107
  """
108
+
111
109
  contents: dict = {}
112
- log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
113
110
 
114
- with open(log_folder_with_run_id / self.safe_suffix_json(name.name), "r") as fr:
111
+ with open(self.safe_suffix_json(name), "r") as fr:
115
112
  contents = json.load(fr)
116
113
 
117
114
  return contents
@@ -0,0 +1,131 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from string import Template
5
+ from typing import Any, Dict
6
+
7
+ from cloudpathlib import S3Client, S3Path
8
+ from pydantic import Field, SecretStr
9
+
10
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
11
+ from runnable import defaults
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class ChunkedMinioRunLogStore(ChunkedRunLogStore):
28
+ """
29
+ File system run log store but chunks the run log into thread safe chunks.
30
+ This enables executions to be parallel.
31
+ """
32
+
33
+ service_name: str = "chunked-minio"
34
+ endpoint_url: str = Field(default="http://localhost:9002")
35
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
36
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
37
+ bucket: str = Field(default="runnable/run-logs")
38
+
39
+ def get_summary(self) -> Dict[str, Any]:
40
+ summary = {
41
+ "Type": self.service_name,
42
+ "Location": f"{self.endpoint_url}/{self.bucket}",
43
+ }
44
+
45
+ return summary
46
+
47
+ def get_run_log_bucket(self) -> S3Path:
48
+ run_id = self._context.run_id
49
+
50
+ return S3Path(
51
+ f"s3://{self.bucket}/{run_id}/",
52
+ client=get_minio_client(
53
+ self.endpoint_url,
54
+ self.aws_access_key_id.get_secret_value(),
55
+ self.aws_secret_access_key.get_secret_value(),
56
+ ),
57
+ )
58
+
59
+ def get_matches(
60
+ self, run_id: str, name: str, multiple_allowed: bool = False
61
+ ) -> None | str | list[str]:
62
+ """
63
+ Get contents of files matching the pattern name*
64
+
65
+ Args:
66
+ run_id (str): The run id
67
+ name (str): The suffix of the file name to check in the run log store.
68
+ """
69
+ run_log_bucket = self.get_run_log_bucket()
70
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
71
+
72
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
73
+ matches = list(run_log_bucket.glob(f"{sub_name}*"))
74
+
75
+ if matches:
76
+ if not multiple_allowed:
77
+ if len(matches) > 1:
78
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
79
+ raise Exception(msg)
80
+ return str(matches[0])
81
+ return [str(match) for match in matches]
82
+
83
+ return None
84
+
85
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
86
+ """
87
+ Store the contents against the name in the folder.
88
+
89
+ Args:
90
+ run_id (str): The run id
91
+ contents (dict): The dict to store
92
+ name (str): The name to store as
93
+ """
94
+
95
+ if insert:
96
+ name = str(self.get_run_log_bucket() / name)
97
+
98
+ self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
99
+ obj = S3Path(
100
+ name,
101
+ client=get_minio_client(
102
+ self.endpoint_url,
103
+ self.aws_access_key_id.get_secret_value(),
104
+ self.aws_secret_access_key.get_secret_value(),
105
+ ),
106
+ )
107
+
108
+ obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
109
+
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
111
+ """
112
+ Does the job of retrieving from the folder.
113
+
114
+ Args:
115
+ name (str): the name of the file to retrieve
116
+
117
+ Returns:
118
+ dict: The contents
119
+ """
120
+
121
+ obj = S3Path(
122
+ name,
123
+ client=get_minio_client(
124
+ self.endpoint_url,
125
+ self.aws_access_key_id.get_secret_value(),
126
+ self.aws_secret_access_key.get_secret_value(),
127
+ ),
128
+ )
129
+
130
+ run_log_text = json.loads(obj.read_text())
131
+ return run_log_text
@@ -1,12 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import time
3
4
  from abc import abstractmethod
4
5
  from enum import Enum
5
- from pathlib import Path
6
6
  from string import Template
7
- from typing import Any, Dict, Optional, Union
8
-
9
- from cloudpathlib import CloudPath
7
+ from typing import Any, Dict, Union
10
8
 
11
9
  from runnable import defaults, exceptions
12
10
  from runnable.datastore import (
@@ -23,9 +21,6 @@ from runnable.datastore import (
23
21
  logger = logging.getLogger(defaults.LOGGER_NAME)
24
22
 
25
23
 
26
- MixT = Union[CloudPath, Path] # Holds str, path
27
-
28
-
29
24
  class EntityNotFoundError(Exception):
30
25
  pass
31
26
 
@@ -89,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
89
84
  @abstractmethod
90
85
  def get_matches(
91
86
  self, run_id: str, name: str, multiple_allowed: bool = False
92
- ) -> Optional[Union[list[Path], list[CloudPath], MixT]]:
87
+ ) -> None | str | list[str]:
93
88
  """
94
89
  Get contents of persistence layer matching the pattern name*
95
90
 
@@ -100,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
100
95
  ...
101
96
 
102
97
  @abstractmethod
103
- def _store(self, run_id: str, contents: dict, name: MixT, insert: bool = False):
98
+ def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
104
99
  """
105
100
  Store the contents against the name in the persistence layer.
106
101
 
@@ -112,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
112
107
  ...
113
108
 
114
109
  @abstractmethod
115
- def _retrieve(self, run_id: str, name: MixT) -> dict:
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
116
111
  """
117
112
  Does the job of retrieving from the persistent layer.
118
113
 
@@ -142,18 +137,17 @@ class ChunkedRunLogStore(BaseRunLogStore):
142
137
  insert = False
143
138
 
144
139
  if match:
145
- existing_contents = self._retrieve(run_id=run_id, name=match) # type: ignore
140
+ assert isinstance(match, str)
141
+ existing_contents = self._retrieve(run_id=run_id, name=match)
146
142
  contents = dict(existing_contents, **contents)
147
- name_to_give = match # type: ignore
143
+ name_to_give = match
148
144
  else:
149
145
  name_to_give = Template(naming_pattern).safe_substitute(
150
146
  {"creation_time": str(int(time.time_ns()))}
151
147
  )
152
148
  insert = True
153
149
 
154
- self._store(
155
- run_id=run_id, contents=contents, name=Path(name_to_give), insert=insert
156
- )
150
+ self._store(run_id=run_id, contents=contents, name=name_to_give, insert=insert)
157
151
 
158
152
  def retrieve(
159
153
  self, run_id: str, log_type: LogTypes, name: str = "", multiple_allowed=False
@@ -194,12 +188,14 @@ class ChunkedRunLogStore(BaseRunLogStore):
194
188
 
195
189
  if matches:
196
190
  if not multiple_allowed:
197
- contents = self._retrieve(run_id=run_id, name=matches) # type: ignore
191
+ assert isinstance(matches, str)
192
+ contents = self._retrieve(run_id=run_id, name=matches)
198
193
  model = self.ModelTypes[log_type.name].value
199
194
  return model(**contents)
200
195
 
196
+ assert isinstance(matches, list)
201
197
  models = []
202
- for match in matches: # type: ignore
198
+ for match in matches:
203
199
  contents = self._retrieve(run_id=run_id, name=match)
204
200
  model = self.ModelTypes[log_type.name].value
205
201
  models.append(model(**contents))
@@ -347,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
347
343
  )
348
344
 
349
345
  self.store(
350
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
346
+ run_id=run_id,
347
+ contents=json.loads(run_log.model_dump_json()),
348
+ log_type=self.LogTypes.RUN_LOG,
351
349
  )
352
350
  return run_log
353
351
 
@@ -394,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
394
392
  """
395
393
  run_id = run_log.run_id
396
394
  self.store(
397
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
395
+ run_id=run_id,
396
+ contents=json.loads(run_log.model_dump_json()),
397
+ log_type=self.LogTypes.RUN_LOG,
398
398
  )
399
399
 
400
400
  def get_parameters(self, run_id: str) -> dict:
@@ -453,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
453
453
  self.store(
454
454
  run_id=run_id,
455
455
  log_type=self.LogTypes.PARAMETER,
456
- contents={key: value.model_dump(by_alias=True)},
456
+ contents={key: json.loads(value.model_dump_json(by_alias=True))},
457
457
  name=key,
458
458
  )
459
459
 
@@ -544,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
544
544
  self.store(
545
545
  run_id=run_id,
546
546
  log_type=self.LogTypes.STEP_LOG,
547
- contents=step_log.model_dump(),
547
+ contents=json.loads(step_log.model_dump_json()),
548
548
  name=step_log.internal_name,
549
549
  )
550
550
 
@@ -600,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
600
600
  self.store(
601
601
  run_id=run_id,
602
602
  log_type=self.LogTypes.BRANCH_LOG,
603
- contents=branch_log.model_dump(),
603
+ contents=json.loads(branch_log.model_dump_json()),
604
604
  name=internal_branch_name,
605
605
  )
@@ -46,7 +46,7 @@ class MinioRunLogStore(AnyPathRunLogStore):
46
46
 
47
47
  """
48
48
 
49
- service_name: str = "file-system"
49
+ service_name: str = "minio"
50
50
 
51
51
  endpoint_url: str = Field(default="http://localhost:9002")
52
52
  aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
@@ -54,7 +54,10 @@ class MinioRunLogStore(AnyPathRunLogStore):
54
54
  bucket: str = Field(default="runnable/run-logs")
55
55
 
56
56
  def get_summary(self) -> Dict[str, Any]:
57
- summary = {"Type": self.service_name, "Location": self.log_folder}
57
+ summary = {
58
+ "Type": self.service_name,
59
+ "Location": f"{self.endpoint_url}/{self.bucket}",
60
+ }
58
61
 
59
62
  return summary
60
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.27.0
3
+ Version: 0.28.1
4
4
  Summary: Add your description here
5
5
  Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
6
  License-File: LICENSE
@@ -7,7 +7,7 @@ extensions/catalog/minio.py,sha256=R3GvfCxN1GTcs4bQIAWh79_GHDTVd14gnpKlzwFeKUI,2
7
7
  extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79ktaA3Hk,279
8
8
  extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
9
9
  extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- extensions/job_executor/__init__.py,sha256=3zS2m6dg-L6SkKfL0kr4AxVUVmVJcepV6eipyMvQR6s,6006
10
+ extensions/job_executor/__init__.py,sha256=E2R6GV5cZTlZdqA5SVJ6ajZFh4oruM0k8AKHkpOZ3W8,5772
11
11
  extensions/job_executor/k8s.py,sha256=V5k6Rnf_sAFqptVbCrWs_x5sl3x3fSHwO96IZoiJxKU,15342
12
12
  extensions/job_executor/k8s_job_spec.yaml,sha256=7aFpxHdO_p6Hkc3YxusUOuAQTD1Myu0yTPX9DrhxbOg,1158
13
13
  extensions/job_executor/local.py,sha256=FvxTk0vyxdrbLOAyNkLyjvmmowypabWOSITQBK_ffVE,1907
@@ -26,11 +26,12 @@ extensions/pipeline_executor/pyproject.toml,sha256=ykTX7srR10PBYb8LsIwEj8vIPPIEZ
26
26
  extensions/pipeline_executor/retry.py,sha256=KGenhWrLLmOQgzMvqloXHDRJyoNs91t05rRW8aLW6FA,6969
27
27
  extensions/run_log_store/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  extensions/run_log_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- extensions/run_log_store/any_path.py,sha256=2Ff9-jd1mWazFXn4HvsF0MF-oQAdMNYrl698Kjpip4Q,2938
30
- extensions/run_log_store/chunked_fs.py,sha256=_2KmTaxK-p1e_YJqnQwyOqkLoCr80y-Wg1X2XdCC_9k,3546
29
+ extensions/run_log_store/any_path.py,sha256=0nN_LHbm2W6AHkerQmsVHq3EoybFQF8lxpCicacHo8Y,2861
30
+ extensions/run_log_store/chunked_fs.py,sha256=wHMKcAx6uFI4OOTp7QWCdGq9WvEFesbLp9VxHZU28l0,3341
31
+ extensions/run_log_store/chunked_minio.py,sha256=Itfkw4Ycf0uLCqxH3Uk_itmVgT7ipJp05yKfD22WBiY,4007
31
32
  extensions/run_log_store/file_system.py,sha256=hhrbhSnuzv8yzBr6DAu45NT8-sawPP86WA2-LY70vjw,2781
32
- extensions/run_log_store/generic_chunked.py,sha256=D08ADWK_rw4ed46_yeEankO-g9rwOjVYi4V-JaOOT6M,20445
33
- extensions/run_log_store/minio.py,sha256=pTHUzCHnehJ5JZoIrlZGjb4Cl-6yinp3MgqLZPvIxq4,3403
33
+ extensions/run_log_store/generic_chunked.py,sha256=bsGgChTDZN3dSbLmLJ9SIpcvArzVmzhTVAOYZytAUNc,20483
34
+ extensions/run_log_store/minio.py,sha256=omrKDSdRzmnVBg9xXkkdQb-icBIgBDRdpmwGRlMyCGk,3453
34
35
  extensions/run_log_store/pyproject.toml,sha256=YnmXsFvFG9uv_c0spLYBsNI_1sbktqxtHsOuClyvZ3g,288
35
36
  extensions/run_log_store/db/implementation_FF.py,sha256=euTnh0xzNF0e_DyfHQ4W-kG1AwTr8u7OuO3_cZkR5bM,5237
36
37
  extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -55,8 +56,8 @@ runnable/sdk.py,sha256=T1nqDpLN9fULvvU9L-oY0EHqYdKUI9qk7oekLynm02Y,33568
55
56
  runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
56
57
  runnable/tasks.py,sha256=X6xijut7ffwpfYDcXoN6y0AcRVd7fWHs676DJ00Kma4,29134
57
58
  runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
58
- runnable-0.27.0.dist-info/METADATA,sha256=Bmncq3RAxN2eZGtlWQf5X8O0CkWRqmiCQiY-rH90odM,10047
59
- runnable-0.27.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
60
- runnable-0.27.0.dist-info/entry_points.txt,sha256=VZ-VOpcr16mj9jwxxMycwGNJlG_cziICRIe-LQrDwig,1760
61
- runnable-0.27.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
- runnable-0.27.0.dist-info/RECORD,,
59
+ runnable-0.28.1.dist-info/METADATA,sha256=j-mrRLCkfv8GkWDLLFDtheBccn9CP4H-NvKAwm2L9ak,10047
60
+ runnable-0.28.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ runnable-0.28.1.dist-info/entry_points.txt,sha256=ioMbWojILtdibYVgh1jXJ00SpK-tX3gy7oVGDq61cSk,1839
62
+ runnable-0.28.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ runnable-0.28.1.dist-info/RECORD,,
@@ -35,6 +35,7 @@ retry = extensions.pipeline_executor.retry:RetryExecutor
35
35
  [run_log_store]
36
36
  buffered = runnable.datastore:BufferRunLogstore
37
37
  chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
38
+ chunked-minio = extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore
38
39
  file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
39
40
  minio = extensions.run_log_store.minio:MinioRunLogStore
40
41