runnable 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/catalog/minio.py +7 -4
- extensions/run_log_store/any_path.py +100 -0
- extensions/run_log_store/chunked_fs.py +11 -10
- extensions/run_log_store/chunked_minio.py +131 -0
- extensions/run_log_store/file_system.py +6 -60
- extensions/run_log_store/generic_chunked.py +26 -20
- extensions/run_log_store/minio.py +114 -0
- {runnable-0.26.0.dist-info → runnable-0.28.0.dist-info}/METADATA +1 -1
- {runnable-0.26.0.dist-info → runnable-0.28.0.dist-info}/RECORD +12 -9
- {runnable-0.26.0.dist-info → runnable-0.28.0.dist-info}/entry_points.txt +2 -0
- {runnable-0.26.0.dist-info → runnable-0.28.0.dist-info}/WHEEL +0 -0
- {runnable-0.26.0.dist-info → runnable-0.28.0.dist-info}/licenses/LICENSE +0 -0
extensions/catalog/minio.py
CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
4
4
|
from typing import Any
|
5
5
|
|
6
6
|
from cloudpathlib import CloudPath, S3Client, S3Path
|
7
|
+
from pydantic import Field, SecretStr
|
7
8
|
|
8
9
|
from extensions.catalog.any_path import AnyPathCatalog
|
9
10
|
from runnable import defaults
|
@@ -25,9 +26,9 @@ def get_minio_client(
|
|
25
26
|
class MinioCatalog(AnyPathCatalog):
|
26
27
|
service_name: str = "minio"
|
27
28
|
|
28
|
-
endpoint_url: str = "http://localhost:9002"
|
29
|
-
aws_access_key_id:
|
30
|
-
aws_secret_access_key:
|
29
|
+
endpoint_url: str = Field(default="http://localhost:9002")
|
30
|
+
aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
|
31
|
+
aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
|
31
32
|
bucket: str = "runnable"
|
32
33
|
|
33
34
|
def get_summary(self) -> dict[str, Any]:
|
@@ -44,7 +45,9 @@ class MinioCatalog(AnyPathCatalog):
|
|
44
45
|
return S3Path(
|
45
46
|
f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
|
46
47
|
client=get_minio_client(
|
47
|
-
self.endpoint_url,
|
48
|
+
self.endpoint_url,
|
49
|
+
self.aws_access_key_id.get_secret_value(),
|
50
|
+
self.aws_secret_access_key.get_secret_value(),
|
48
51
|
),
|
49
52
|
)
|
50
53
|
|
@@ -0,0 +1,100 @@
|
|
1
|
+
import logging
|
2
|
+
from abc import abstractmethod
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
from runnable import defaults, exceptions
|
6
|
+
from runnable.datastore import BaseRunLogStore, RunLog
|
7
|
+
|
8
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
9
|
+
|
10
|
+
|
11
|
+
class AnyPathRunLogStore(BaseRunLogStore):
|
12
|
+
"""
|
13
|
+
In this type of Run Log store, we use a file system to store the JSON run log.
|
14
|
+
|
15
|
+
Every single run is stored as a different file which makes it compatible across other store types.
|
16
|
+
|
17
|
+
When to use:
|
18
|
+
When locally testing a pipeline and have the need to compare across runs.
|
19
|
+
Its fully featured and perfectly fine if your local environment is where you would do everything.
|
20
|
+
|
21
|
+
Do not use:
|
22
|
+
If you need parallelization on local, this run log would not support it.
|
23
|
+
|
24
|
+
Example config:
|
25
|
+
|
26
|
+
run_log:
|
27
|
+
type: file-system
|
28
|
+
config:
|
29
|
+
log_folder: The folder to out the logs. Defaults to .run_log_store
|
30
|
+
|
31
|
+
"""
|
32
|
+
|
33
|
+
service_name: str = "file-system"
|
34
|
+
log_folder: str = defaults.LOG_LOCATION_FOLDER
|
35
|
+
|
36
|
+
def get_summary(self) -> Dict[str, Any]:
|
37
|
+
summary = {"Type": self.service_name, "Location": self.log_folder}
|
38
|
+
|
39
|
+
return summary
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
def write_to_path(self, run_log: RunLog): ...
|
43
|
+
|
44
|
+
@abstractmethod
|
45
|
+
def read_from_path(self, run_id: str) -> RunLog: ...
|
46
|
+
|
47
|
+
def create_run_log(
|
48
|
+
self,
|
49
|
+
run_id: str,
|
50
|
+
dag_hash: str = "",
|
51
|
+
use_cached: bool = False,
|
52
|
+
tag: str = "",
|
53
|
+
original_run_id: str = "",
|
54
|
+
status: str = defaults.CREATED,
|
55
|
+
) -> RunLog:
|
56
|
+
"""
|
57
|
+
# Creates a Run log
|
58
|
+
# Adds it to the db
|
59
|
+
"""
|
60
|
+
|
61
|
+
try:
|
62
|
+
self.get_run_log_by_id(run_id=run_id, full=False)
|
63
|
+
raise exceptions.RunLogExistsError(run_id=run_id)
|
64
|
+
except exceptions.RunLogNotFoundError:
|
65
|
+
pass
|
66
|
+
|
67
|
+
logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
|
68
|
+
run_log = RunLog(
|
69
|
+
run_id=run_id,
|
70
|
+
dag_hash=dag_hash,
|
71
|
+
tag=tag,
|
72
|
+
status=status,
|
73
|
+
)
|
74
|
+
self.write_to_path(run_log)
|
75
|
+
return run_log
|
76
|
+
|
77
|
+
def get_run_log_by_id(
|
78
|
+
self,
|
79
|
+
run_id: str,
|
80
|
+
full: bool = False,
|
81
|
+
) -> RunLog:
|
82
|
+
"""
|
83
|
+
# Returns the run_log defined by id
|
84
|
+
# Raises Exception if not found
|
85
|
+
"""
|
86
|
+
try:
|
87
|
+
logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
|
88
|
+
run_log = self.read_from_path(run_id)
|
89
|
+
return run_log
|
90
|
+
except FileNotFoundError as e:
|
91
|
+
raise exceptions.RunLogNotFoundError(run_id) from e
|
92
|
+
|
93
|
+
def put_run_log(self, run_log: RunLog):
|
94
|
+
"""
|
95
|
+
# Puts the run_log into the database
|
96
|
+
"""
|
97
|
+
logger.info(
|
98
|
+
f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
|
99
|
+
)
|
100
|
+
self.write_to_path(run_log)
|
@@ -2,15 +2,13 @@ import json
|
|
2
2
|
import logging
|
3
3
|
from pathlib import Path
|
4
4
|
from string import Template
|
5
|
-
from typing import Any, Dict,
|
5
|
+
from typing import Any, Dict, Union
|
6
6
|
|
7
7
|
from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
8
8
|
from runnable import defaults, utils
|
9
9
|
|
10
10
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
11
11
|
|
12
|
-
T = Union[str, Path]
|
13
|
-
|
14
12
|
|
15
13
|
class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
16
14
|
"""
|
@@ -28,7 +26,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
28
26
|
|
29
27
|
def get_matches(
|
30
28
|
self, run_id: str, name: str, multiple_allowed: bool = False
|
31
|
-
) ->
|
29
|
+
) -> str | list[str] | None:
|
32
30
|
"""
|
33
31
|
Get contents of files matching the pattern name*
|
34
32
|
|
@@ -46,8 +44,8 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
46
44
|
if len(matches) > 1:
|
47
45
|
msg = f"Multiple matches found for {name} while multiple is not allowed"
|
48
46
|
raise Exception(msg)
|
49
|
-
return matches[0]
|
50
|
-
return matches
|
47
|
+
return str(matches[0])
|
48
|
+
return [str(match) for match in matches]
|
51
49
|
|
52
50
|
return None
|
53
51
|
|
@@ -78,7 +76,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
78
76
|
|
79
77
|
return str(name) + ".json"
|
80
78
|
|
81
|
-
def _store(self, run_id: str, contents: dict, name:
|
79
|
+
def _store(self, run_id: str, contents: dict, name: str, insert=False):
|
82
80
|
"""
|
83
81
|
Store the contents against the name in the folder.
|
84
82
|
|
@@ -87,15 +85,17 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
87
85
|
contents (dict): The dict to store
|
88
86
|
name (str): The name to store as
|
89
87
|
"""
|
88
|
+
|
89
|
+
log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
|
90
90
|
if insert:
|
91
|
-
name =
|
91
|
+
name = str(log_folder_with_run_id / name)
|
92
92
|
|
93
|
-
utils.safe_make_dir(
|
93
|
+
utils.safe_make_dir(log_folder_with_run_id)
|
94
94
|
|
95
95
|
with open(self.safe_suffix_json(name), "w") as fw:
|
96
96
|
json.dump(contents, fw, ensure_ascii=True, indent=4)
|
97
97
|
|
98
|
-
def _retrieve(self,
|
98
|
+
def _retrieve(self, run_id: str, name: str) -> dict:
|
99
99
|
"""
|
100
100
|
Does the job of retrieving from the folder.
|
101
101
|
|
@@ -105,6 +105,7 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
105
105
|
Returns:
|
106
106
|
dict: The contents
|
107
107
|
"""
|
108
|
+
|
108
109
|
contents: dict = {}
|
109
110
|
|
110
111
|
with open(self.safe_suffix_json(name), "r") as fr:
|
@@ -0,0 +1,131 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from functools import lru_cache
|
4
|
+
from string import Template
|
5
|
+
from typing import Any, Dict
|
6
|
+
|
7
|
+
from cloudpathlib import S3Client, S3Path
|
8
|
+
from pydantic import Field, SecretStr
|
9
|
+
|
10
|
+
from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
11
|
+
from runnable import defaults
|
12
|
+
|
13
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
14
|
+
|
15
|
+
|
16
|
+
@lru_cache
|
17
|
+
def get_minio_client(
|
18
|
+
endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
|
19
|
+
) -> S3Client:
|
20
|
+
return S3Client(
|
21
|
+
endpoint_url=endpoint_url,
|
22
|
+
aws_access_key_id=aws_access_key_id,
|
23
|
+
aws_secret_access_key=aws_secret_access_key,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
class ChunkedMinioRunLogStore(ChunkedRunLogStore):
|
28
|
+
"""
|
29
|
+
File system run log store but chunks the run log into thread safe chunks.
|
30
|
+
This enables executions to be parallel.
|
31
|
+
"""
|
32
|
+
|
33
|
+
service_name: str = "chunked-minio"
|
34
|
+
endpoint_url: str = Field(default="http://localhost:9002")
|
35
|
+
aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
|
36
|
+
aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
|
37
|
+
bucket: str = Field(default="runnable/run-logs")
|
38
|
+
|
39
|
+
def get_summary(self) -> Dict[str, Any]:
|
40
|
+
summary = {
|
41
|
+
"Type": self.service_name,
|
42
|
+
"Location": f"{self.endpoint_url}/{self.bucket}",
|
43
|
+
}
|
44
|
+
|
45
|
+
return summary
|
46
|
+
|
47
|
+
def get_run_log_bucket(self) -> S3Path:
|
48
|
+
run_id = self._context.run_id
|
49
|
+
|
50
|
+
return S3Path(
|
51
|
+
f"s3://{self.bucket}/{run_id}/",
|
52
|
+
client=get_minio_client(
|
53
|
+
self.endpoint_url,
|
54
|
+
self.aws_access_key_id.get_secret_value(),
|
55
|
+
self.aws_secret_access_key.get_secret_value(),
|
56
|
+
),
|
57
|
+
)
|
58
|
+
|
59
|
+
def get_matches(
|
60
|
+
self, run_id: str, name: str, multiple_allowed: bool = False
|
61
|
+
) -> None | str | list[str]:
|
62
|
+
"""
|
63
|
+
Get contents of files matching the pattern name*
|
64
|
+
|
65
|
+
Args:
|
66
|
+
run_id (str): The run id
|
67
|
+
name (str): The suffix of the file name to check in the run log store.
|
68
|
+
"""
|
69
|
+
run_log_bucket = self.get_run_log_bucket()
|
70
|
+
run_log_bucket.mkdir(parents=True, exist_ok=True)
|
71
|
+
|
72
|
+
sub_name = Template(name).safe_substitute({"creation_time": ""})
|
73
|
+
matches = list(run_log_bucket.glob(f"{sub_name}*"))
|
74
|
+
|
75
|
+
if matches:
|
76
|
+
if not multiple_allowed:
|
77
|
+
if len(matches) > 1:
|
78
|
+
msg = f"Multiple matches found for {name} while multiple is not allowed"
|
79
|
+
raise Exception(msg)
|
80
|
+
return str(matches[0])
|
81
|
+
return [str(match) for match in matches]
|
82
|
+
|
83
|
+
return None
|
84
|
+
|
85
|
+
def _store(self, run_id: str, contents: dict, name: str, insert=False):
|
86
|
+
"""
|
87
|
+
Store the contents against the name in the folder.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
run_id (str): The run id
|
91
|
+
contents (dict): The dict to store
|
92
|
+
name (str): The name to store as
|
93
|
+
"""
|
94
|
+
|
95
|
+
if insert:
|
96
|
+
name = str(self.get_run_log_bucket() / name)
|
97
|
+
|
98
|
+
self.get_run_log_bucket().mkdir(parents=True, exist_ok=True)
|
99
|
+
obj = S3Path(
|
100
|
+
name,
|
101
|
+
client=get_minio_client(
|
102
|
+
self.endpoint_url,
|
103
|
+
self.aws_access_key_id.get_secret_value(),
|
104
|
+
self.aws_secret_access_key.get_secret_value(),
|
105
|
+
),
|
106
|
+
)
|
107
|
+
|
108
|
+
obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
|
109
|
+
|
110
|
+
def _retrieve(self, run_id: str, name: str) -> dict:
|
111
|
+
"""
|
112
|
+
Does the job of retrieving from the folder.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
name (str): the name of the file to retrieve
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
dict: The contents
|
119
|
+
"""
|
120
|
+
|
121
|
+
obj = S3Path(
|
122
|
+
name,
|
123
|
+
client=get_minio_client(
|
124
|
+
self.endpoint_url,
|
125
|
+
self.aws_access_key_id.get_secret_value(),
|
126
|
+
self.aws_secret_access_key.get_secret_value(),
|
127
|
+
),
|
128
|
+
)
|
129
|
+
|
130
|
+
run_log_text = json.loads(obj.read_text())
|
131
|
+
return run_log_text
|
@@ -3,13 +3,14 @@ import logging
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import Any, Dict
|
5
5
|
|
6
|
-
from
|
7
|
-
from runnable
|
6
|
+
from extensions.run_log_store.any_path import AnyPathRunLogStore
|
7
|
+
from runnable import defaults, utils
|
8
|
+
from runnable.datastore import RunLog
|
8
9
|
|
9
10
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
10
11
|
|
11
12
|
|
12
|
-
class FileSystemRunLogstore(
|
13
|
+
class FileSystemRunLogstore(AnyPathRunLogStore):
|
13
14
|
"""
|
14
15
|
In this type of Run Log store, we use a file system to store the JSON run log.
|
15
16
|
|
@@ -43,7 +44,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
|
|
43
44
|
|
44
45
|
return summary
|
45
46
|
|
46
|
-
def
|
47
|
+
def write_to_path(self, run_log: RunLog):
|
47
48
|
"""
|
48
49
|
Write the run log to the folder
|
49
50
|
|
@@ -60,7 +61,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
|
|
60
61
|
with json_file_path.open("w") as fw:
|
61
62
|
json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
|
62
63
|
|
63
|
-
def
|
64
|
+
def read_from_path(self, run_id: str) -> RunLog:
|
64
65
|
"""
|
65
66
|
Look into the run log folder for the run log for the run id.
|
66
67
|
|
@@ -88,58 +89,3 @@ class FileSystemRunLogstore(BaseRunLogStore):
|
|
88
89
|
json_str = json.load(fr)
|
89
90
|
run_log = RunLog(**json_str) # pylint: disable=no-member
|
90
91
|
return run_log
|
91
|
-
|
92
|
-
def create_run_log(
|
93
|
-
self,
|
94
|
-
run_id: str,
|
95
|
-
dag_hash: str = "",
|
96
|
-
use_cached: bool = False,
|
97
|
-
tag: str = "",
|
98
|
-
original_run_id: str = "",
|
99
|
-
status: str = defaults.CREATED,
|
100
|
-
) -> RunLog:
|
101
|
-
"""
|
102
|
-
# Creates a Run log
|
103
|
-
# Adds it to the db
|
104
|
-
"""
|
105
|
-
|
106
|
-
try:
|
107
|
-
self.get_run_log_by_id(run_id=run_id, full=False)
|
108
|
-
raise exceptions.RunLogExistsError(run_id=run_id)
|
109
|
-
except exceptions.RunLogNotFoundError:
|
110
|
-
pass
|
111
|
-
|
112
|
-
logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
|
113
|
-
run_log = RunLog(
|
114
|
-
run_id=run_id,
|
115
|
-
dag_hash=dag_hash,
|
116
|
-
tag=tag,
|
117
|
-
status=status,
|
118
|
-
)
|
119
|
-
self.write_to_folder(run_log)
|
120
|
-
return run_log
|
121
|
-
|
122
|
-
def get_run_log_by_id(
|
123
|
-
self,
|
124
|
-
run_id: str,
|
125
|
-
full: bool = False,
|
126
|
-
) -> RunLog:
|
127
|
-
"""
|
128
|
-
# Returns the run_log defined by id
|
129
|
-
# Raises Exception if not found
|
130
|
-
"""
|
131
|
-
try:
|
132
|
-
logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
|
133
|
-
run_log = self.get_from_folder(run_id)
|
134
|
-
return run_log
|
135
|
-
except FileNotFoundError as e:
|
136
|
-
raise exceptions.RunLogNotFoundError(run_id) from e
|
137
|
-
|
138
|
-
def put_run_log(self, run_log: RunLog):
|
139
|
-
"""
|
140
|
-
# Puts the run_log into the database
|
141
|
-
"""
|
142
|
-
logger.info(
|
143
|
-
f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
|
144
|
-
)
|
145
|
-
self.write_to_folder(run_log)
|
@@ -1,10 +1,10 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
import time
|
3
4
|
from abc import abstractmethod
|
4
5
|
from enum import Enum
|
5
|
-
from pathlib import Path
|
6
6
|
from string import Template
|
7
|
-
from typing import Any, Dict,
|
7
|
+
from typing import Any, Dict, Union
|
8
8
|
|
9
9
|
from runnable import defaults, exceptions
|
10
10
|
from runnable.datastore import (
|
@@ -21,9 +21,6 @@ from runnable.datastore import (
|
|
21
21
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
22
22
|
|
23
23
|
|
24
|
-
T = Union[str, Path] # Holds str, path
|
25
|
-
|
26
|
-
|
27
24
|
class EntityNotFoundError(Exception):
|
28
25
|
pass
|
29
26
|
|
@@ -87,7 +84,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
87
84
|
@abstractmethod
|
88
85
|
def get_matches(
|
89
86
|
self, run_id: str, name: str, multiple_allowed: bool = False
|
90
|
-
) ->
|
87
|
+
) -> None | str | list[str]:
|
91
88
|
"""
|
92
89
|
Get contents of persistence layer matching the pattern name*
|
93
90
|
|
@@ -98,7 +95,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
98
95
|
...
|
99
96
|
|
100
97
|
@abstractmethod
|
101
|
-
def _store(self, run_id: str, contents: dict, name:
|
98
|
+
def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
|
102
99
|
"""
|
103
100
|
Store the contents against the name in the persistence layer.
|
104
101
|
|
@@ -110,7 +107,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
110
107
|
...
|
111
108
|
|
112
109
|
@abstractmethod
|
113
|
-
def _retrieve(self, name:
|
110
|
+
def _retrieve(self, run_id: str, name: str) -> dict:
|
114
111
|
"""
|
115
112
|
Does the job of retrieving from the persistent layer.
|
116
113
|
|
@@ -140,9 +137,10 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
140
137
|
insert = False
|
141
138
|
|
142
139
|
if match:
|
143
|
-
|
140
|
+
assert isinstance(match, str)
|
141
|
+
existing_contents = self._retrieve(run_id=run_id, name=match)
|
144
142
|
contents = dict(existing_contents, **contents)
|
145
|
-
name_to_give = match
|
143
|
+
name_to_give = match
|
146
144
|
else:
|
147
145
|
name_to_give = Template(naming_pattern).safe_substitute(
|
148
146
|
{"creation_time": str(int(time.time_ns()))}
|
@@ -190,13 +188,15 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
190
188
|
|
191
189
|
if matches:
|
192
190
|
if not multiple_allowed:
|
193
|
-
|
191
|
+
assert isinstance(matches, str)
|
192
|
+
contents = self._retrieve(run_id=run_id, name=matches)
|
194
193
|
model = self.ModelTypes[log_type.name].value
|
195
194
|
return model(**contents)
|
196
195
|
|
196
|
+
assert isinstance(matches, list)
|
197
197
|
models = []
|
198
|
-
for match in matches:
|
199
|
-
contents = self._retrieve(name=match)
|
198
|
+
for match in matches:
|
199
|
+
contents = self._retrieve(run_id=run_id, name=match)
|
200
200
|
model = self.ModelTypes[log_type.name].value
|
201
201
|
models.append(model(**contents))
|
202
202
|
return models
|
@@ -225,7 +225,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
225
225
|
# No branch logs are found
|
226
226
|
return {}
|
227
227
|
# Forcing get_matches to always return a list is a better design
|
228
|
-
|
228
|
+
|
229
|
+
assert isinstance(matches, list)
|
230
|
+
epoch_created = [str(match).split("-")[-1] for match in matches]
|
229
231
|
|
230
232
|
# sort matches by epoch created
|
231
233
|
epoch_created, matches = zip(*sorted(zip(epoch_created, matches))) # type: ignore
|
@@ -234,7 +236,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
234
236
|
|
235
237
|
for match in matches:
|
236
238
|
model = self.ModelTypes[log_type.name].value
|
237
|
-
log_model = model(**self._retrieve(match))
|
239
|
+
log_model = model(**self._retrieve(run_id=run_id, name=match))
|
238
240
|
logs[log_model.internal_name] = log_model # type: ignore
|
239
241
|
|
240
242
|
return logs
|
@@ -341,7 +343,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
341
343
|
)
|
342
344
|
|
343
345
|
self.store(
|
344
|
-
run_id=run_id,
|
346
|
+
run_id=run_id,
|
347
|
+
contents=json.loads(run_log.model_dump_json()),
|
348
|
+
log_type=self.LogTypes.RUN_LOG,
|
345
349
|
)
|
346
350
|
return run_log
|
347
351
|
|
@@ -388,7 +392,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
388
392
|
"""
|
389
393
|
run_id = run_log.run_id
|
390
394
|
self.store(
|
391
|
-
run_id=run_id,
|
395
|
+
run_id=run_id,
|
396
|
+
contents=json.loads(run_log.model_dump_json()),
|
397
|
+
log_type=self.LogTypes.RUN_LOG,
|
392
398
|
)
|
393
399
|
|
394
400
|
def get_parameters(self, run_id: str) -> dict:
|
@@ -447,7 +453,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
447
453
|
self.store(
|
448
454
|
run_id=run_id,
|
449
455
|
log_type=self.LogTypes.PARAMETER,
|
450
|
-
contents={key: value.
|
456
|
+
contents={key: json.loads(value.model_dump_json(by_alias=True))},
|
451
457
|
name=key,
|
452
458
|
)
|
453
459
|
|
@@ -538,7 +544,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
538
544
|
self.store(
|
539
545
|
run_id=run_id,
|
540
546
|
log_type=self.LogTypes.STEP_LOG,
|
541
|
-
contents=step_log.
|
547
|
+
contents=json.loads(step_log.model_dump_json()),
|
542
548
|
name=step_log.internal_name,
|
543
549
|
)
|
544
550
|
|
@@ -594,6 +600,6 @@ class ChunkedRunLogStore(BaseRunLogStore):
|
|
594
600
|
self.store(
|
595
601
|
run_id=run_id,
|
596
602
|
log_type=self.LogTypes.BRANCH_LOG,
|
597
|
-
contents=branch_log.
|
603
|
+
contents=json.loads(branch_log.model_dump_json()),
|
598
604
|
name=internal_branch_name,
|
599
605
|
)
|
@@ -0,0 +1,114 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from functools import lru_cache
|
4
|
+
from typing import Any, Dict
|
5
|
+
|
6
|
+
from cloudpathlib import S3Client, S3Path
|
7
|
+
from pydantic import Field, SecretStr
|
8
|
+
|
9
|
+
from extensions.run_log_store.any_path import AnyPathRunLogStore
|
10
|
+
from runnable import defaults
|
11
|
+
from runnable.datastore import RunLog
|
12
|
+
|
13
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
14
|
+
|
15
|
+
|
16
|
+
@lru_cache
|
17
|
+
def get_minio_client(
|
18
|
+
endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
|
19
|
+
) -> S3Client:
|
20
|
+
return S3Client(
|
21
|
+
endpoint_url=endpoint_url,
|
22
|
+
aws_access_key_id=aws_access_key_id,
|
23
|
+
aws_secret_access_key=aws_secret_access_key,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
class MinioRunLogStore(AnyPathRunLogStore):
|
28
|
+
"""
|
29
|
+
In this type of Run Log store, we use a file system to store the JSON run log.
|
30
|
+
|
31
|
+
Every single run is stored as a different file which makes it compatible across other store types.
|
32
|
+
|
33
|
+
When to use:
|
34
|
+
When locally testing a pipeline and have the need to compare across runs.
|
35
|
+
Its fully featured and perfectly fine if your local environment is where you would do everything.
|
36
|
+
|
37
|
+
Do not use:
|
38
|
+
If you need parallelization on local, this run log would not support it.
|
39
|
+
|
40
|
+
Example config:
|
41
|
+
|
42
|
+
run_log:
|
43
|
+
type: file-system
|
44
|
+
config:
|
45
|
+
log_folder: The folder to out the logs. Defaults to .run_log_store
|
46
|
+
|
47
|
+
"""
|
48
|
+
|
49
|
+
service_name: str = "minio"
|
50
|
+
|
51
|
+
endpoint_url: str = Field(default="http://localhost:9002")
|
52
|
+
aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
|
53
|
+
aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
|
54
|
+
bucket: str = Field(default="runnable/run-logs")
|
55
|
+
|
56
|
+
def get_summary(self) -> Dict[str, Any]:
|
57
|
+
summary = {
|
58
|
+
"Type": self.service_name,
|
59
|
+
"Location": f"{self.endpoint_url}/{self.bucket}",
|
60
|
+
}
|
61
|
+
|
62
|
+
return summary
|
63
|
+
|
64
|
+
def get_run_log_bucket(self) -> S3Path:
|
65
|
+
run_id = self._context.run_id
|
66
|
+
|
67
|
+
return S3Path(
|
68
|
+
f"s3://{self.bucket}/{run_id}/",
|
69
|
+
client=get_minio_client(
|
70
|
+
self.endpoint_url,
|
71
|
+
self.aws_access_key_id.get_secret_value(),
|
72
|
+
self.aws_secret_access_key.get_secret_value(),
|
73
|
+
),
|
74
|
+
)
|
75
|
+
|
76
|
+
def write_to_path(self, run_log: RunLog):
|
77
|
+
"""
|
78
|
+
Write the run log to the folder
|
79
|
+
|
80
|
+
Args:
|
81
|
+
run_log (RunLog): The run log to be added to the database
|
82
|
+
"""
|
83
|
+
run_log_bucket = self.get_run_log_bucket()
|
84
|
+
run_log_bucket.mkdir(parents=True, exist_ok=True)
|
85
|
+
|
86
|
+
run_log_object = run_log_bucket / f"{run_log.run_id}.json"
|
87
|
+
run_log_object.write_text(
|
88
|
+
json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
|
89
|
+
)
|
90
|
+
|
91
|
+
def read_from_path(self, run_id: str) -> RunLog:
|
92
|
+
"""
|
93
|
+
Look into the run log folder for the run log for the run id.
|
94
|
+
|
95
|
+
If the run log does not exist, raise an exception. If it does, decode it
|
96
|
+
as a RunLog and return it
|
97
|
+
|
98
|
+
Args:
|
99
|
+
run_id (str): The requested run id to retrieve the run log store
|
100
|
+
|
101
|
+
Raises:
|
102
|
+
FileNotFoundError: If the Run Log has not been found.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
RunLog: The decoded Run log
|
106
|
+
"""
|
107
|
+
run_log_bucket = self.get_run_log_bucket()
|
108
|
+
|
109
|
+
run_log_object = run_log_bucket / f"{run_id}.json"
|
110
|
+
|
111
|
+
run_log_text = json.loads(run_log_object.read_text())
|
112
|
+
run_log = RunLog(**json.loads(run_log_text))
|
113
|
+
|
114
|
+
return run_log
|
@@ -3,7 +3,7 @@ extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
extensions/catalog/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
extensions/catalog/any_path.py,sha256=aNjphoPIyllUfY2uNDFWD1ErM3Px6izSGr0-oGowN8k,7263
|
5
5
|
extensions/catalog/file_system.py,sha256=T_qFPFfrmykoAMc1rjNi_DBb437me8WPRcFglwAK744,1767
|
6
|
-
extensions/catalog/minio.py,sha256=
|
6
|
+
extensions/catalog/minio.py,sha256=R3GvfCxN1GTcs4bQIAWh79_GHDTVd14gnpKlzwFeKUI,2363
|
7
7
|
extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79ktaA3Hk,279
|
8
8
|
extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
|
9
9
|
extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -26,9 +26,12 @@ extensions/pipeline_executor/pyproject.toml,sha256=ykTX7srR10PBYb8LsIwEj8vIPPIEZ
|
|
26
26
|
extensions/pipeline_executor/retry.py,sha256=KGenhWrLLmOQgzMvqloXHDRJyoNs91t05rRW8aLW6FA,6969
|
27
27
|
extensions/run_log_store/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
28
|
extensions/run_log_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
-
extensions/run_log_store/
|
30
|
-
extensions/run_log_store/
|
31
|
-
extensions/run_log_store/
|
29
|
+
extensions/run_log_store/any_path.py,sha256=0nN_LHbm2W6AHkerQmsVHq3EoybFQF8lxpCicacHo8Y,2861
|
30
|
+
extensions/run_log_store/chunked_fs.py,sha256=wHMKcAx6uFI4OOTp7QWCdGq9WvEFesbLp9VxHZU28l0,3341
|
31
|
+
extensions/run_log_store/chunked_minio.py,sha256=Itfkw4Ycf0uLCqxH3Uk_itmVgT7ipJp05yKfD22WBiY,4007
|
32
|
+
extensions/run_log_store/file_system.py,sha256=hhrbhSnuzv8yzBr6DAu45NT8-sawPP86WA2-LY70vjw,2781
|
33
|
+
extensions/run_log_store/generic_chunked.py,sha256=bsGgChTDZN3dSbLmLJ9SIpcvArzVmzhTVAOYZytAUNc,20483
|
34
|
+
extensions/run_log_store/minio.py,sha256=omrKDSdRzmnVBg9xXkkdQb-icBIgBDRdpmwGRlMyCGk,3453
|
32
35
|
extensions/run_log_store/pyproject.toml,sha256=YnmXsFvFG9uv_c0spLYBsNI_1sbktqxtHsOuClyvZ3g,288
|
33
36
|
extensions/run_log_store/db/implementation_FF.py,sha256=euTnh0xzNF0e_DyfHQ4W-kG1AwTr8u7OuO3_cZkR5bM,5237
|
34
37
|
extensions/run_log_store/db/integration_FF.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -53,8 +56,8 @@ runnable/sdk.py,sha256=T1nqDpLN9fULvvU9L-oY0EHqYdKUI9qk7oekLynm02Y,33568
|
|
53
56
|
runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
|
54
57
|
runnable/tasks.py,sha256=X6xijut7ffwpfYDcXoN6y0AcRVd7fWHs676DJ00Kma4,29134
|
55
58
|
runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
|
56
|
-
runnable-0.
|
57
|
-
runnable-0.
|
58
|
-
runnable-0.
|
59
|
-
runnable-0.
|
60
|
-
runnable-0.
|
59
|
+
runnable-0.28.0.dist-info/METADATA,sha256=Nxwf20GCaSSHfvxOqUmUiM1zuE3CgSd1Vq2wH3s9Ybg,10047
|
60
|
+
runnable-0.28.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
61
|
+
runnable-0.28.0.dist-info/entry_points.txt,sha256=ioMbWojILtdibYVgh1jXJ00SpK-tX3gy7oVGDq61cSk,1839
|
62
|
+
runnable-0.28.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
63
|
+
runnable-0.28.0.dist-info/RECORD,,
|
@@ -35,7 +35,9 @@ retry = extensions.pipeline_executor.retry:RetryExecutor
|
|
35
35
|
[run_log_store]
|
36
36
|
buffered = runnable.datastore:BufferRunLogstore
|
37
37
|
chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
|
38
|
+
chunked-minio = extensions.run_log_store.chunked_minio:ChunkedMinioRunLogStore
|
38
39
|
file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
|
40
|
+
minio = extensions.run_log_store.minio:MinioRunLogStore
|
39
41
|
|
40
42
|
[secrets]
|
41
43
|
do-nothing = runnable.secrets:DoNothingSecretManager
|
File without changes
|
File without changes
|