runnable 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import Any
5
5
 
6
6
  from cloudpathlib import CloudPath, S3Client, S3Path
7
+ from pydantic import Field, SecretStr
7
8
 
8
9
  from extensions.catalog.any_path import AnyPathCatalog
9
10
  from runnable import defaults
@@ -25,9 +26,9 @@ def get_minio_client(
25
26
  class MinioCatalog(AnyPathCatalog):
26
27
  service_name: str = "minio"
27
28
 
28
- endpoint_url: str = "http://localhost:9002"
29
- aws_access_key_id: str = "minioadmin"
30
- aws_secret_access_key: str = "minioadmin"
29
+ endpoint_url: str = Field(default="http://localhost:9002")
30
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
31
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
31
32
  bucket: str = "runnable"
32
33
 
33
34
  def get_summary(self) -> dict[str, Any]:
@@ -44,7 +45,9 @@ class MinioCatalog(AnyPathCatalog):
44
45
  return S3Path(
45
46
  f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
46
47
  client=get_minio_client(
47
- self.endpoint_url, self.aws_access_key_id, self.aws_secret_access_key
48
+ self.endpoint_url,
49
+ self.aws_access_key_id.get_secret_value(),
50
+ self.aws_secret_access_key.get_secret_value(),
48
51
  ),
49
52
  )
50
53
 
@@ -0,0 +1,100 @@
1
+ import logging
2
+ from abc import abstractmethod
3
+ from typing import Any, Dict
4
+
5
+ from runnable import defaults, exceptions
6
+ from runnable.datastore import BaseRunLogStore, RunLog
7
+
8
+ logger = logging.getLogger(defaults.LOGGER_NAME)
9
+
10
+
11
+ class AnyPathRunLogStore(BaseRunLogStore):
12
+ """
13
+ In this type of Run Log store, we use a file system to store the JSON run log.
14
+
15
+ Every single run is stored as a different file which makes it compatible across other store types.
16
+
17
+ When to use:
18
+ When locally testing a pipeline and have the need to compare across runs.
19
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
20
+
21
+ Do not use:
22
+ If you need parallelization on local, this run log would not support it.
23
+
24
+ Example config:
25
+
26
+ run_log:
27
+ type: file-system
28
+ config:
29
+ log_folder: The folder to out the logs. Defaults to .run_log_store
30
+
31
+ """
32
+
33
+ service_name: str = "file-system"
34
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
35
+
36
+ def get_summary(self) -> Dict[str, Any]:
37
+ summary = {"Type": self.service_name, "Location": self.log_folder}
38
+
39
+ return summary
40
+
41
+ @abstractmethod
42
+ def write_to_path(self, run_log: RunLog): ...
43
+
44
+ @abstractmethod
45
+ def read_from_path(self, run_id: str) -> RunLog: ...
46
+
47
+ def create_run_log(
48
+ self,
49
+ run_id: str,
50
+ dag_hash: str = "",
51
+ use_cached: bool = False,
52
+ tag: str = "",
53
+ original_run_id: str = "",
54
+ status: str = defaults.CREATED,
55
+ ) -> RunLog:
56
+ """
57
+ # Creates a Run log
58
+ # Adds it to the db
59
+ """
60
+
61
+ try:
62
+ self.get_run_log_by_id(run_id=run_id, full=False)
63
+ raise exceptions.RunLogExistsError(run_id=run_id)
64
+ except exceptions.RunLogNotFoundError:
65
+ pass
66
+
67
+ logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
68
+ run_log = RunLog(
69
+ run_id=run_id,
70
+ dag_hash=dag_hash,
71
+ tag=tag,
72
+ status=status,
73
+ )
74
+ self.write_to_path(run_log)
75
+ return run_log
76
+
77
+ def get_run_log_by_id(
78
+ self,
79
+ run_id: str,
80
+ full: bool = False,
81
+ ) -> RunLog:
82
+ """
83
+ # Returns the run_log defined by id
84
+ # Raises Exception if not found
85
+ """
86
+ try:
87
+ logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
88
+ run_log = self.read_from_path(run_id)
89
+ return run_log
90
+ except FileNotFoundError as e:
91
+ raise exceptions.RunLogNotFoundError(run_id) from e
92
+
93
+ def put_run_log(self, run_log: RunLog):
94
+ """
95
+ # Puts the run_log into the database
96
+ """
97
+ logger.info(
98
+ f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
99
+ )
100
+ self.write_to_path(run_log)
@@ -2,15 +2,13 @@ import json
2
2
  import logging
3
3
  from pathlib import Path
4
4
  from string import Template
5
- from typing import Any, Dict, Optional, Sequence, Union
5
+ from typing import Any, Dict, Union
6
6
 
7
7
  from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
8
8
  from runnable import defaults, utils
9
9
 
10
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
11
11
 
12
- T = Union[str, Path]
13
-
14
12
 
15
13
  class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
16
14
  """
@@ -28,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
28
26
 
29
27
  def get_matches(
30
28
  self, run_id: str, name: str, multiple_allowed: bool = False
31
- ) -> Optional[Union[Sequence[T], T]]:
29
+ ) -> str | list[str] | None:
32
30
  """
33
31
  Get contents of files matching the pattern name*
34
32
 
@@ -46,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
46
44
  if len(matches) > 1:
47
45
  msg = f"Multiple matches found for {name} while multiple is not allowed"
48
46
  raise Exception(msg)
49
- return matches[0]
50
- return matches
47
+ return str(matches[0])
48
+ return [str(match) for match in matches]
51
49
 
52
50
  return None
53
51
 
@@ -78,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
78
76
 
79
77
  return str(name) + ".json"
80
78
 
81
- def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
79
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
82
80
  """
83
81
  Store the contents against the name in the folder.
84
82
 
@@ -87,15 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
87
85
  contents (dict): The dict to store
88
86
  name (str): The name to store as
89
87
  """
88
+
89
+ log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
90
90
  if insert:
91
- name = self.log_folder_with_run_id(run_id=run_id) / name
91
+ name = str(log_folder_with_run_id / name)
92
92
 
93
- utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
93
+ utils.safe_make_dir(log_folder_with_run_id)
94
94
 
95
95
  with open(self.safe_suffix_json(name), "w") as fw:
96
96
  json.dump(contents, fw, ensure_ascii=True, indent=4)
97
97
 
98
- def _retrieve(self, name: Union[str, Path]) -> dict:
98
+ def _retrieve(self, run_id: str, name: str) -> dict:
99
99
  """
100
100
  Does the job of retrieving from the folder.
101
101
 
@@ -105,6 +105,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
105
105
  Returns:
106
106
  dict: The contents
107
107
  """
108
+
108
109
  contents: dict = {}
109
110
 
110
111
  with open(self.safe_suffix_json(name), "r") as fr:
@@ -0,0 +1,131 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from string import Template
5
+ from typing import Any, Dict
6
+
7
+ from cloudpathlib import S3Client, S3Path
8
+ from pydantic import Field, SecretStr
9
+
10
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
11
+ from runnable import defaults
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class ChunkedMinioRunLogStore(ChunkedRunLogStore):
28
+ """
29
+ File system run log store but chunks the run log into thread safe chunks.
30
+ This enables executions to be parallel.
31
+ """
32
+
33
+ service_name: str = "chunked-minio"
34
+ endpoint_url: str = Field(default="http://localhost:9002")
35
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
36
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
37
+ bucket: str = Field(default="runnable/run-logs")
38
+
39
+ def get_summary(self) -> Dict[str, Any]:
40
+ summary = {
41
+ "Type": self.service_name,
42
+ "Location": f"{self.endpoint_url}/{self.bucket}",
43
+ }
44
+
45
+ return summary
46
+
47
+ def get_run_log_bucket(self) -> S3Path:
48
+ run_id = self._context.run_id
49
+
50
+ return S3Path(
51
+ f"s3://{self.bucket}/{run_id}/",
52
+ client=get_minio_client(
53
+ self.endpoint_url,
54
+ self.aws_access_key_id.get_secret_value(),
55
+ self.aws_secret_access_key.get_secret_value(),
56
+ ),
57
+ )
58
+
59
+ def get_matches(
60
+ self, run_id: str, name: str, multiple_allowed: bool = False
61
+ ) -> None | str | list[str]:
62
+ """
63
+ Get contents of files matching the pattern name*
64
+
65
+ Args:
66
+ run_id (str): The run id
67
+ name (str): The suffix of the file name to check in the run log store.
68
+ """
69
+ run_log_bucket = self.get_run_log_bucket()
70
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
71
+
72
+ sub_name = Template(name).safe_substitute({"creation_time": ""})
73
+ matches = list(run_log_bucket.glob(f"{sub_name}*"))
74
+
75
+ if matches:
76
+ if not multiple_allowed:
77
+ if len(matches) > 1:
78
+ msg = f"Multiple matches found for {name} while multiple is not allowed"
79
+ raise Exception(msg)
80
+ return str(matches[0])
81
+ return [str(match) for match in matches]
82
+
83
+ return None
84
+
85
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
86
+ """
87
+ Store the contents against the name in the folder.
88
+
89
+ Args:
90
+ run_id (str): The run id
91
+ contents (dict): The dict to store
92
+ name (str): The name to store as
93
+ """
94
+
95
+ if insert:
96
+ name = str(self.get_run_log_bucket() / name)
97
+
98
+ self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
99
+ obj = S3Path(
100
+ name,
101
+ client=get_minio_client(
102
+ self.endpoint_url,
103
+ self.aws_access_key_id.get_secret_value(),
104
+ self.aws_secret_access_key.get_secret_value(),
105
+ ),
106
+ )
107
+
108
+ obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
109
+
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
111
+ """
112
+ Does the job of retrieving from the folder.
113
+
114
+ Args:
115
+ name (str): the name of the file to retrieve
116
+
117
+ Returns:
118
+ dict: The contents
119
+ """
120
+
121
+ obj = S3Path(
122
+ name,
123
+ client=get_minio_client(
124
+ self.endpoint_url,
125
+ self.aws_access_key_id.get_secret_value(),
126
+ self.aws_secret_access_key.get_secret_value(),
127
+ ),
128
+ )
129
+
130
+ run_log_text = json.loads(obj.read_text())
131
+ return run_log_text
@@ -3,13 +3,14 @@ import logging
3
3
  from pathlib import Path
4
4
  from typing import Any, Dict
5
5
 
6
- from runnable import defaults, exceptions, utils
7
- from runnable.datastore import BaseRunLogStore, RunLog
6
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
7
+ from runnable import defaults, utils
8
+ from runnable.datastore import RunLog
8
9
 
9
10
  logger = logging.getLogger(defaults.LOGGER_NAME)
10
11
 
11
12
 
12
- class FileSystemRunLogstore(BaseRunLogStore):
13
+ class FileSystemRunLogstore(AnyPathRunLogStore):
13
14
  """
14
15
  In this type of Run Log store, we use a file system to store the JSON run log.
15
16
 
@@ -43,7 +44,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
43
44
 
44
45
  return summary
45
46
 
46
- def write_to_folder(self, run_log: RunLog):
47
+ def write_to_path(self, run_log: RunLog):
47
48
  """
48
49
  Write the run log to the folder
49
50
 
@@ -60,7 +61,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
60
61
  with json_file_path.open("w") as fw:
61
62
  json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
62
63
 
63
- def get_from_folder(self, run_id: str) -> RunLog:
64
+ def read_from_path(self, run_id: str) -> RunLog:
64
65
  """
65
66
  Look into the run log folder for the run log for the run id.
66
67
 
@@ -88,58 +89,3 @@ class FileSystemRunLogstore(BaseRunLogStore):
88
89
  json_str = json.load(fr)
89
90
  run_log = RunLog(**json_str) # pylint: disable=no-member
90
91
  return run_log
91
-
92
- def create_run_log(
93
- self,
94
- run_id: str,
95
- dag_hash: str = "",
96
- use_cached: bool = False,
97
- tag: str = "",
98
- original_run_id: str = "",
99
- status: str = defaults.CREATED,
100
- ) -> RunLog:
101
- """
102
- # Creates a Run log
103
- # Adds it to the db
104
- """
105
-
106
- try:
107
- self.get_run_log_by_id(run_id=run_id, full=False)
108
- raise exceptions.RunLogExistsError(run_id=run_id)
109
- except exceptions.RunLogNotFoundError:
110
- pass
111
-
112
- logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
113
- run_log = RunLog(
114
- run_id=run_id,
115
- dag_hash=dag_hash,
116
- tag=tag,
117
- status=status,
118
- )
119
- self.write_to_folder(run_log)
120
- return run_log
121
-
122
- def get_run_log_by_id(
123
- self,
124
- run_id: str,
125
- full: bool = False,
126
- ) -> RunLog:
127
- """
128
- # Returns the run_log defined by id
129
- # Raises Exception if not found
130
- """
131
- try:
132
- logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
133
- run_log = self.get_from_folder(run_id)
134
- return run_log
135
- except FileNotFoundError as e:
136
- raise exceptions.RunLogNotFoundError(run_id) from e
137
-
138
- def put_run_log(self, run_log: RunLog):
139
- """
140
- # Puts the run_log into the database
141
- """
142
- logger.info(
143
- f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
144
- )
145
- self.write_to_folder(run_log)
@@ -1,10 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import time
3
4
  from abc import abstractmethod
4
5
  from enum import Enum
5
- from pathlib import Path
6
6
  from string import Template
7
- from typing import Any, Dict, Optional, Sequence, Union
7
+ from typing import Any, Dict, Union
8
8
 
9
9
  from runnable import defaults, exceptions
10
10
  from runnable.datastore import (
@@ -21,9 +21,6 @@ from runnable.datastore import (
21
21
  logger = logging.getLogger(defaults.LOGGER_NAME)
22
22
 
23
23
 
24
- T = Union[str, Path] # Holds str, path
25
-
26
-
27
24
  class EntityNotFoundError(Exception):
28
25
  pass
29
26
 
@@ -87,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
87
84
  @abstractmethod
88
85
  def get_matches(
89
86
  self, run_id: str, name: str, multiple_allowed: bool = False
90
- ) -> Optional[Union[Sequence[T], T]]:
87
+ ) -> None | str | list[str]:
91
88
  """
92
89
  Get contents of persistence layer matching the pattern name*
93
90
 
@@ -98,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
98
95
  ...
99
96
 
100
97
  @abstractmethod
101
- def _store(self, run_id: str, contents: dict, name: T, insert: bool = False):
98
+ def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
102
99
  """
103
100
  Store the contents against the name in the persistence layer.
104
101
 
@@ -110,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
110
107
  ...
111
108
 
112
109
  @abstractmethod
113
- def _retrieve(self, name: T) -> dict:
110
+ def _retrieve(self, run_id: str, name: str) -> dict:
114
111
  """
115
112
  Does the job of retrieving from the persistent layer.
116
113
 
@@ -140,9 +137,10 @@ class ChunkedRunLogStore(BaseRunLogStore):
140
137
  insert = False
141
138
 
142
139
  if match:
143
- existing_contents = self._retrieve(name=match) # type: ignore
140
+ assert isinstance(match, str)
141
+ existing_contents = self._retrieve(run_id=run_id, name=match)
144
142
  contents = dict(existing_contents, **contents)
145
- name_to_give = match # type: ignore
143
+ name_to_give = match
146
144
  else:
147
145
  name_to_give = Template(naming_pattern).safe_substitute(
148
146
  {"creation_time": str(int(time.time_ns()))}
@@ -190,13 +188,15 @@ class ChunkedRunLogStore(BaseRunLogStore):
190
188
 
191
189
  if matches:
192
190
  if not multiple_allowed:
193
- contents = self._retrieve(name=matches) # type: ignore
191
+ assert isinstance(matches, str)
192
+ contents = self._retrieve(run_id=run_id, name=matches)
194
193
  model = self.ModelTypes[log_type.name].value
195
194
  return model(**contents)
196
195
 
196
+ assert isinstance(matches, list)
197
197
  models = []
198
- for match in matches: # type: ignore
199
- contents = self._retrieve(name=match)
198
+ for match in matches:
199
+ contents = self._retrieve(run_id=run_id, name=match)
200
200
  model = self.ModelTypes[log_type.name].value
201
201
  models.append(model(**contents))
202
202
  return models
@@ -225,7 +225,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
225
225
  # No branch logs are found
226
226
  return {}
227
227
  # Forcing get_matches to always return a list is a better design
228
- epoch_created = [str(match).split("-")[-1] for match in matches] # type: ignore
228
+
229
+ assert isinstance(matches, list)
230
+ epoch_created = [str(match).split("-")[-1] for match in matches]
229
231
 
230
232
  # sort matches by epoch created
231
233
  epoch_created, matches = zip(*sorted(zip(epoch_created, matches))) # type: ignore
@@ -234,7 +236,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
234
236
 
235
237
  for match in matches:
236
238
  model = self.ModelTypes[log_type.name].value
237
- log_model = model(**self._retrieve(match))
239
+ log_model = model(**self._retrieve(run_id=run_id, name=match))
238
240
  logs[log_model.internal_name] = log_model # type: ignore
239
241
 
240
242
  return logs
@@ -341,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
341
343
  )
342
344
 
343
345
  self.store(
344
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
346
+ run_id=run_id,
347
+ contents=json.loads(run_log.model_dump_json()),
348
+ log_type=self.LogTypes.RUN_LOG,
345
349
  )
346
350
  return run_log
347
351
 
@@ -388,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
388
392
  """
389
393
  run_id = run_log.run_id
390
394
  self.store(
391
- run_id=run_id, contents=run_log.model_dump(), log_type=self.LogTypes.RUN_LOG
395
+ run_id=run_id,
396
+ contents=json.loads(run_log.model_dump_json()),
397
+ log_type=self.LogTypes.RUN_LOG,
392
398
  )
393
399
 
394
400
  def get_parameters(self, run_id: str) -> dict:
@@ -447,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
447
453
  self.store(
448
454
  run_id=run_id,
449
455
  log_type=self.LogTypes.PARAMETER,
450
- contents={key: value.model_dump(by_alias=True)},
456
+ contents={key: json.loads(value.model_dump_json(by_alias=True))},
451
457
  name=key,
452
458
  )
453
459
 
@@ -538,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
538
544
  self.store(
539
545
  run_id=run_id,
540
546
  log_type=self.LogTypes.STEP_LOG,
541
- contents=step_log.model_dump(),
547
+ contents=json.loads(step_log.model_dump_json()),
542
548
  name=step_log.internal_name,
543
549
  )
544
550
 
@@ -594,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
594
600
  self.store(
595
601
  run_id=run_id,
596
602
  log_type=self.LogTypes.BRANCH_LOG,
597
- contents=branch_log.model_dump(),
603
+ contents=json.loads(branch_log.model_dump_json()),
598
604
  name=internal_branch_name,
599
605
  )
@@ -0,0 +1,114 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from typing import Any, Dict
5
+
6
+ from cloudpathlib import S3Client, S3Path
7
+ from pydantic import Field, SecretStr
8
+
9
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
10
+ from runnable import defaults
11
+ from runnable.datastore import RunLog
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class MinioRunLogStore(AnyPathRunLogStore):
28
+ """
29
+ In this type of Run Log store, we use a file system to store the JSON run log.
30
+
31
+ Every single run is stored as a different file which makes it compatible across other store types.
32
+
33
+ When to use:
34
+ When locally testing a pipeline and have the need to compare across runs.
35
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
36
+
37
+ Do not use:
38
+ If you need parallelization on local, this run log would not support it.
39
+
40
+ Example config:
41
+
42
+ run_log:
43
+ type: file-system
44
+ config:
45
+ log_folder: The folder to out the logs. Defaults to .run_log_store
46
+
47
+ """
48
+
49
+ service_name: str = "minio"
50
+
51
+ endpoint_url: str = Field(default="http://localhost:9002")
52
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
53
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
54
+ bucket: str = Field(default="runnable/run-logs")
55
+
56
+ def get_summary(self) -> Dict[str, Any]:
57
+ summary = {
58
+ "Type": self.service_name,
59
+ "Location": f"{self.endpoint_url}/{self.bucket}",
60
+ }
61
+
62
+ return summary
63
+
64
+ def get_run_log_bucket(self) -> S3Path:
65
+ run_id = self._context.run_id
66
+
67
+ return S3Path(
68
+ f"s3://{self.bucket}/{run_id}/",
69
+ client=get_minio_client(
70
+ self.endpoint_url,
71
+ self.aws_access_key_id.get_secret_value(),
72
+ self.aws_secret_access_key.get_secret_value(),
73
+ ),
74
+ )
75
+
76
+ def write_to_path(self, run_log: RunLog):
77
+ """
78
+ Write the run log to the folder
79
+
80
+ Args:
81
+ run_log (RunLog): The run log to be added to the database
82
+ """
83
+ run_log_bucket = self.get_run_log_bucket()
84
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
85
+
86
+ run_log_object = run_log_bucket / f"{run_log.run_id}.json"
87
+ run_log_object.write_text(
88
+ json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
89
+ )
90
+
91
+ def read_from_path(self, run_id: str) -> RunLog:
92
+ """
93
+ Look into the run log folder for the run log for the run id.
94
+
95
+ If the run log does not exist, raise an exception. If it does, decode it
96
+ as a RunLog and return it
97
+
98
+ Args:
99
+ run_id (str): The requested run id to retrieve the run log store
100
+
101
+ Raises:
102
+ FileNotFoundError: If the Run Log has not been found.
103
+
104
+ Returns:
105
+ RunLog: The decoded Run log
106
+ """
107
+ run_log_bucket = self.get_run_log_bucket()
108
+
109
+ run_log_object = run_log_bucket / f"{run_id}.json"
110
+
111
+ run_log_text = json.loads(run_log_object.read_text())
112
+ run_log = RunLog(**json.loads(run_log_text))
113
+
114
+ return run_log
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.26.0
3
+ Version: 0.28.0
4
4
  Summary: Add your description here
5
5
  Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
6
  License-File: LICENSE
@@ -3,7 +3,7 @@ extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  extensions/catalog/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  extensions/catalog/any_path.py,sha256=aNjphoPIyllUfY2uNDFWD1ErM3Px6izSGr0-oGowN8k,7263
5
5
  extensions/catalog/file_system.py,sha256=T_qFPFfrmykoAMc1rjNi_DBb437me8WPRcFglwAK744,1767
6
- extensions/catalog/minio.py,sha256=D5ofitU75OJGZdPM8s-ALCHrSR6jawIe6blDo8ebiXM,2179
6
+ extensions/catalog/minio.py,sha256=R3GvfCxN1GTcs4bQIAWh79_GHDTVd14gnpKlzwFeKUI,2363
7
7
  extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79ktaA3Hk,279
8
8
  extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
9
9
  extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,9 +26,12 @@ extensions/pipeline_executor/pyproject.toml,sha256=ykTX7srR10PBYb8LsIwEj8vIPPIEZ
26
26
  extensions/pipeline_executor/retry.py,sha256=KGenhWrLLmOQgzMvqloXHDRJyoNs91t05rRW8aLW6FA,6969
27
27
  extensions/run_log_store/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  extensions/run_log_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- extensions/run_log_store/chunked_fs.py,sha256=ElftNIwBmA2U2QAVGxruhcqepV312M2C9-GWVtiFaMM,3331
30
- extensions/run_log_store/file_system.py,sha256=SANQ3aFjQeUaq8euvdpwju-8uci9UxdiEDupXtLYppQ,4303
31
- extensions/run_log_store/generic_chunked.py,sha256=BX0j6S1Fwma3wuitHelUYm69FqXGToh10Zk2kamw6ZY,20253
29
+ extensions/run_log_store/any_path.py,sha256=0nN_LHbm2W6AHkerQmsVHq3EoybFQF8lxpCicacHo8Y,2861
30
+ extensions/run_log_store/chunked_fs.py,sha256=wHMKcAx6uFI4OOTp7QWCdGq9WvEFesbLp9VxHZU28l0,3341
31
+ extensions/run_log_store/chunked_minio.py,sha256=Itfkw4Ycf0uLCqxH3Uk_itmVgT7ipJp05yKfD22WBiY,4007
32
+ extensions/run_log_store/file_system.py,sha256=hhrbhSnuzv8yzBr6DAu45NT8-sawPP86WA2-LY70vjw,2781
33
+ extensions/run_log_store/generic_chunked.py,sha256=bsGgChTDZN3dSbLmLJ9SIpcvArzVmzhTVAOYZytAUNc,20483
34
+ extensions/run_log_store/minio.py,sha256=omrKDSdRzmnVBg9xXkkdQb-icBIgBDRdpmwGRlMyCGk,3453
32
35
  extensions/run_log_store/pyproject.toml,sha256=YnmXsFvFG9uv_c0spLYBsNI_1sbktqxtHsOuClyvZ3g,288
33
36
  extensions/run_log_store/db/implementation_FF.py,sha256=euTnh0xzNF0e_DyfHQ4W-kG1AwTr8u7OuO3_cZkR5bM,5237
34
37
  extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,8 +56,8 @@ runnable/sdk.py,sha256=T1nqDpLN9fULvvU9L-oY0EHqYdKUI9qk7oekLynm02Y,33568
53
56
  runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
54
57
  runnable/tasks.py,sha256=X6xijut7ffwpfYDcXoN6y0AcRVd7fWHs676DJ00Kma4,29134
55
58
  runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
56
- runnable-0.26.0.dist-info/METADATA,sha256=IiPhsPo9Vws83V72pYoPNG7cdexyVi7Ctf49lsgv1bY,10047
57
- runnable-0.26.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- runnable-0.26.0.dist-info/entry_points.txt,sha256=UCXvfBsVLpBjQY6znXNVzF6hof3Lro7oxtUD0t7kUp4,1704
59
- runnable-0.26.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- runnable-0.26.0.dist-info/RECORD,,
59
+ runnable-0.28.0.dist-info/METADATA,sha256=Nxwf20GCaSSHfvxOqUmUiM1zuE3CgSd1Vq2wH3s9Ybg,10047
60
+ runnable-0.28.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ runnable-0.28.0.dist-info/entry_points.txt,sha256=ioMbWojILtdibYVgh1jXJ00SpK-tX3gy7oVGDq61cSk,1839
62
+ runnable-0.28.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ runnable-0.28.0.dist-info/RECORD,,
@@ -35,7 +35,9 @@ retry = extensions.pipeline_executor.retry:RetryExecutor
35
35
  [run_log_store]
36
36
  buffered = runnable.datastore:BufferRunLogstore
37
37
  chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
38
+ chunked-minio = extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore
38
39
  file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
40
+ minio = extensions.run_log_store.minio:MinioRunLogStore
39
41
 
40
42
  [secrets]
41
43
  do-nothing = runnable.secrets:DoNothingSecretManager