runnable 0.25.0__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/catalog/any_path.py +201 -0
- extensions/catalog/file_system.py +29 -230
- extensions/catalog/minio.py +69 -0
- extensions/catalog/s3.py +11 -0
- extensions/pipeline_executor/__init__.py +3 -34
- runnable/catalog.py +8 -28
- runnable/datastore.py +2 -2
- runnable/executor.py +0 -17
- runnable/tasks.py +1 -3
- runnable/utils.py +21 -18
- {runnable-0.25.0.dist-info → runnable-0.26.0.dist-info}/METADATA +4 -1
- {runnable-0.25.0.dist-info → runnable-0.26.0.dist-info}/RECORD +15 -12
- {runnable-0.25.0.dist-info → runnable-0.26.0.dist-info}/entry_points.txt +2 -0
- {runnable-0.25.0.dist-info → runnable-0.26.0.dist-info}/WHEEL +0 -0
- {runnable-0.25.0.dist-info → runnable-0.26.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
from abc import abstractmethod
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Any, Dict, List
|
7
|
+
|
8
|
+
from cloudpathlib import CloudPath
|
9
|
+
|
10
|
+
from runnable import defaults, utils
|
11
|
+
from runnable.catalog import BaseCatalog
|
12
|
+
from runnable.datastore import DataCatalog
|
13
|
+
|
14
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
15
|
+
|
16
|
+
|
17
|
+
class AnyPathCatalog(BaseCatalog):
|
18
|
+
"""
|
19
|
+
A Catalog handler that uses the local file system for cataloging.
|
20
|
+
|
21
|
+
Note: Do not use this if the steps of the pipeline run on different compute environments.
|
22
|
+
|
23
|
+
Example config:
|
24
|
+
|
25
|
+
catalog:
|
26
|
+
type: file-system
|
27
|
+
config:
|
28
|
+
catalog_location: The location to store the catalog.
|
29
|
+
compute_data_folder: The folder to source the data from.
|
30
|
+
|
31
|
+
"""
|
32
|
+
|
33
|
+
@abstractmethod
|
34
|
+
def get_summary(self) -> Dict[str, Any]: ...
|
35
|
+
|
36
|
+
@abstractmethod
|
37
|
+
def upload_to_catalog(self, file: Path) -> None: ...
|
38
|
+
|
39
|
+
@abstractmethod
|
40
|
+
def download_from_catalog(self, file: Path | CloudPath) -> None: ...
|
41
|
+
|
42
|
+
@abstractmethod
|
43
|
+
def get_catalog_location(self) -> Path | CloudPath:
|
44
|
+
"""
|
45
|
+
For local file systems, this is the .catalog/run_id/compute_data_folder
|
46
|
+
For cloud systems, this is s3://bucket/run_id/compute_data_folder
|
47
|
+
"""
|
48
|
+
...
|
49
|
+
|
50
|
+
def get(self, name: str) -> List[DataCatalog]:
|
51
|
+
"""
|
52
|
+
Get the file by matching glob pattern to the name
|
53
|
+
|
54
|
+
Args:
|
55
|
+
name ([str]): A glob matching the file name
|
56
|
+
run_id ([str]): The run id
|
57
|
+
|
58
|
+
Raises:
|
59
|
+
Exception: If the catalog location does not exist
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
List(object) : A list of catalog objects
|
63
|
+
"""
|
64
|
+
run_catalog = self.get_catalog_location()
|
65
|
+
|
66
|
+
# Iterate through the contents of the run_catalog and copy the files that fit the name pattern
|
67
|
+
# We should also return a list of data hashes
|
68
|
+
glob_files = run_catalog.glob(name)
|
69
|
+
logger.debug(
|
70
|
+
f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}"
|
71
|
+
)
|
72
|
+
|
73
|
+
data_catalogs = []
|
74
|
+
run_log_store = self._context.run_log_store
|
75
|
+
for file in glob_files:
|
76
|
+
if file.is_dir():
|
77
|
+
# Need not add a data catalog for the folder
|
78
|
+
continue
|
79
|
+
|
80
|
+
if str(file).endswith(".execution.log"):
|
81
|
+
continue
|
82
|
+
|
83
|
+
self.download_from_catalog(file)
|
84
|
+
relative_file_path = file.relative_to(run_catalog) # type: ignore
|
85
|
+
|
86
|
+
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
87
|
+
data_catalog.catalog_relative_path = str(relative_file_path)
|
88
|
+
data_catalog.data_hash = utils.get_data_hash(str(relative_file_path))
|
89
|
+
data_catalog.stage = "get"
|
90
|
+
data_catalogs.append(data_catalog)
|
91
|
+
|
92
|
+
if not data_catalogs:
|
93
|
+
raise Exception(f"Did not find any files matching {name} in {run_catalog}")
|
94
|
+
|
95
|
+
return data_catalogs
|
96
|
+
|
97
|
+
def put(self, name: str) -> List[DataCatalog]:
|
98
|
+
"""
|
99
|
+
Put the files matching the glob pattern into the catalog.
|
100
|
+
|
101
|
+
If previously synced catalogs are provided, and no changes were observed, we do not sync them.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
name (str): The glob pattern of the files to catalog
|
105
|
+
run_id (str): The run id of the run
|
106
|
+
compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
|
107
|
+
synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
|
108
|
+
|
109
|
+
Raises:
|
110
|
+
Exception: If the compute data folder does not exist.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
List(object) : A list of catalog objects
|
114
|
+
"""
|
115
|
+
run_id = self._context.run_id
|
116
|
+
logger.info(
|
117
|
+
f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}"
|
118
|
+
)
|
119
|
+
|
120
|
+
copy_from = Path(self.compute_data_folder)
|
121
|
+
|
122
|
+
if not copy_from.is_dir():
|
123
|
+
msg = (
|
124
|
+
f"Expected compute data folder to be present at: {copy_from} but not found. \n"
|
125
|
+
"Note: runnable does not create the compute data folder for you. Please ensure that the "
|
126
|
+
"folder exists.\n"
|
127
|
+
)
|
128
|
+
raise Exception(msg)
|
129
|
+
|
130
|
+
# Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
|
131
|
+
# We should also return a list of datastore.DataCatalog items
|
132
|
+
glob_files = copy_from.glob(name)
|
133
|
+
logger.debug(
|
134
|
+
f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}"
|
135
|
+
)
|
136
|
+
|
137
|
+
data_catalogs = []
|
138
|
+
run_log_store = self._context.run_log_store
|
139
|
+
for file in glob_files:
|
140
|
+
if file.is_dir():
|
141
|
+
# Need not add a data catalog for the folder
|
142
|
+
continue
|
143
|
+
|
144
|
+
relative_file_path = file.relative_to(copy_from)
|
145
|
+
|
146
|
+
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
147
|
+
data_catalog.catalog_relative_path = (
|
148
|
+
run_id + os.sep + str(relative_file_path)
|
149
|
+
)
|
150
|
+
data_catalog.data_hash = utils.get_data_hash(str(file))
|
151
|
+
data_catalog.stage = "put"
|
152
|
+
data_catalogs.append(data_catalog)
|
153
|
+
|
154
|
+
# TODO: Think about syncing only if the file is changed
|
155
|
+
self.upload_to_catalog(file)
|
156
|
+
|
157
|
+
if not data_catalogs:
|
158
|
+
raise Exception(f"Did not find any files matching {name} in {copy_from}")
|
159
|
+
|
160
|
+
return data_catalogs
|
161
|
+
|
162
|
+
def sync_between_runs(self, previous_run_id: str, run_id: str):
|
163
|
+
"""
|
164
|
+
Given the previous run id, sync the catalogs between the current one and previous
|
165
|
+
|
166
|
+
Args:
|
167
|
+
previous_run_id (str): The previous run id to sync the catalogs from
|
168
|
+
run_id (str): The run_id to which the data catalogs should be synced to.
|
169
|
+
|
170
|
+
Raises:
|
171
|
+
Exception: If the previous run log does not exist in the catalog
|
172
|
+
|
173
|
+
"""
|
174
|
+
logger.info(
|
175
|
+
f"Using the {self.service_name} catalog and syncing catalogs"
|
176
|
+
"between old: {previous_run_id} to new: {run_id}"
|
177
|
+
)
|
178
|
+
|
179
|
+
catalog_location = Path(self.get_catalog_location())
|
180
|
+
run_catalog = catalog_location / run_id
|
181
|
+
utils.safe_make_dir(run_catalog)
|
182
|
+
|
183
|
+
if not utils.does_dir_exist(catalog_location / previous_run_id):
|
184
|
+
msg = (
|
185
|
+
f"Catalogs from previous run : {previous_run_id} are not found.\n"
|
186
|
+
"Note: Please provision the catalog objects generated by previous run in the same catalog location"
|
187
|
+
" as the current run, even if the catalog handler for the previous run was different"
|
188
|
+
)
|
189
|
+
raise Exception(msg)
|
190
|
+
|
191
|
+
cataloged_files = list((catalog_location / previous_run_id).glob("*"))
|
192
|
+
|
193
|
+
for cataloged_file in cataloged_files:
|
194
|
+
if str(cataloged_file).endswith("execution.log"):
|
195
|
+
continue
|
196
|
+
|
197
|
+
if cataloged_file.is_file():
|
198
|
+
shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
|
199
|
+
else:
|
200
|
+
shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
|
201
|
+
logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
|
@@ -1,253 +1,52 @@
|
|
1
1
|
import logging
|
2
|
-
import os
|
3
2
|
import shutil
|
4
3
|
from pathlib import Path
|
5
|
-
from typing import Any
|
4
|
+
from typing import Any
|
6
5
|
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from runnable.datastore import DataCatalog
|
6
|
+
from cloudpathlib import CloudPath
|
7
|
+
from pydantic import Field
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
class FileSystemCatalog(BaseCatalog):
|
15
|
-
"""
|
16
|
-
A Catalog handler that uses the local file system for cataloging.
|
17
|
-
|
18
|
-
Note: Do not use this if the steps of the pipeline run on different compute environments.
|
9
|
+
from extensions.catalog.any_path import AnyPathCatalog
|
10
|
+
from runnable import defaults
|
19
11
|
|
20
|
-
|
21
|
-
|
22
|
-
catalog:
|
23
|
-
type: file-system
|
24
|
-
config:
|
25
|
-
catalog_location: The location to store the catalog.
|
26
|
-
compute_data_folder: The folder to source the data from.
|
12
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
27
13
|
|
28
|
-
"""
|
29
14
|
|
15
|
+
class FileSystemCatalog(AnyPathCatalog):
|
30
16
|
service_name: str = "file-system"
|
31
|
-
catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
|
32
17
|
|
33
|
-
|
34
|
-
return self.catalog_location
|
18
|
+
catalog_location: str = Field(default=defaults.CATALOG_LOCATION_FOLDER)
|
35
19
|
|
36
|
-
def get_summary(self) ->
|
37
|
-
|
38
|
-
"
|
20
|
+
def get_summary(self) -> dict[str, Any]:
|
21
|
+
return {
|
22
|
+
"compute_data_folder": self.compute_data_folder,
|
23
|
+
"catalog_location": self.catalog_location,
|
39
24
|
}
|
40
25
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
self, name: str, run_id: str, compute_data_folder: str = "", **kwargs
|
45
|
-
) -> List[DataCatalog]:
|
46
|
-
"""
|
47
|
-
Get the file by matching glob pattern to the name
|
26
|
+
def get_catalog_location(self) -> Path:
|
27
|
+
run_id = self._context.run_id
|
28
|
+
return Path(self.catalog_location) / run_id / self.compute_data_folder
|
48
29
|
|
49
|
-
|
50
|
-
|
51
|
-
run_id ([str]): The run id
|
30
|
+
def download_from_catalog(self, file: Path | CloudPath) -> None:
|
31
|
+
assert isinstance(file, Path)
|
52
32
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
Returns:
|
57
|
-
List(object) : A list of catalog objects
|
58
|
-
"""
|
59
|
-
logger.info(
|
60
|
-
f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}"
|
61
|
-
)
|
33
|
+
run_catalog = self.get_catalog_location()
|
34
|
+
relative_file_path = file.relative_to(run_catalog)
|
62
35
|
|
63
36
|
copy_to = self.compute_data_folder
|
64
|
-
if
|
65
|
-
|
66
|
-
|
67
|
-
copy_to = Path(copy_to) # type: ignore
|
68
|
-
|
69
|
-
catalog_location = self.get_catalog_location()
|
70
|
-
run_catalog = Path(catalog_location) / run_id / copy_to
|
71
|
-
|
72
|
-
logger.debug(
|
73
|
-
f"Copying objects to {copy_to} from the run catalog location of {run_catalog}"
|
74
|
-
)
|
37
|
+
# Make the directory in the data folder if required
|
38
|
+
Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
39
|
+
shutil.copy(file, copy_to / relative_file_path)
|
75
40
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
"Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
|
80
|
-
)
|
81
|
-
raise Exception(msg)
|
41
|
+
def upload_to_catalog(self, file: Path) -> None:
|
42
|
+
run_catalog = self.get_catalog_location()
|
43
|
+
run_catalog.mkdir(parents=True, exist_ok=True)
|
82
44
|
|
83
|
-
# Iterate through the contents of the run_catalog and copy the files that fit the name pattern
|
84
|
-
# We should also return a list of data hashes
|
85
|
-
glob_files = run_catalog.glob(name)
|
86
45
|
logger.debug(
|
87
|
-
f"
|
46
|
+
f"Copying objects from {self.compute_data_folder} to the run catalog location of {run_catalog}"
|
88
47
|
)
|
89
48
|
|
90
|
-
|
91
|
-
run_log_store = self._context.run_log_store
|
92
|
-
for file in glob_files:
|
93
|
-
if file.is_dir():
|
94
|
-
# Need not add a data catalog for the folder
|
95
|
-
continue
|
96
|
-
|
97
|
-
if str(file).endswith(".execution.log"):
|
98
|
-
continue
|
99
|
-
|
100
|
-
relative_file_path = file.relative_to(run_catalog)
|
101
|
-
|
102
|
-
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
103
|
-
data_catalog.catalog_handler_location = catalog_location
|
104
|
-
data_catalog.catalog_relative_path = str(relative_file_path)
|
105
|
-
data_catalog.data_hash = utils.get_data_hash(str(file))
|
106
|
-
data_catalog.stage = "get"
|
107
|
-
data_catalogs.append(data_catalog)
|
108
|
-
|
109
|
-
# Make the directory in the data folder if required
|
110
|
-
Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
111
|
-
shutil.copy(file, copy_to / relative_file_path)
|
112
|
-
|
113
|
-
logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
|
114
|
-
|
115
|
-
if not data_catalogs:
|
116
|
-
raise Exception(f"Did not find any files matching {name} in {run_catalog}")
|
117
|
-
|
118
|
-
return data_catalogs
|
119
|
-
|
120
|
-
def put(
|
121
|
-
self,
|
122
|
-
name: str,
|
123
|
-
run_id: str,
|
124
|
-
compute_data_folder: str = "",
|
125
|
-
synced_catalogs: Optional[List[DataCatalog]] = None,
|
126
|
-
**kwargs,
|
127
|
-
) -> List[DataCatalog]:
|
128
|
-
"""
|
129
|
-
Put the files matching the glob pattern into the catalog.
|
130
|
-
|
131
|
-
If previously synced catalogs are provided, and no changes were observed, we do not sync them.
|
132
|
-
|
133
|
-
Args:
|
134
|
-
name (str): The glob pattern of the files to catalog
|
135
|
-
run_id (str): The run id of the run
|
136
|
-
compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
|
137
|
-
synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
|
138
|
-
|
139
|
-
Raises:
|
140
|
-
Exception: If the compute data folder does not exist.
|
141
|
-
|
142
|
-
Returns:
|
143
|
-
List(object) : A list of catalog objects
|
144
|
-
"""
|
145
|
-
logger.info(
|
146
|
-
f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}"
|
147
|
-
)
|
148
|
-
|
149
|
-
copy_from = self.compute_data_folder
|
150
|
-
if compute_data_folder:
|
151
|
-
copy_from = compute_data_folder
|
152
|
-
copy_from = Path(copy_from) # type: ignore
|
153
|
-
|
154
|
-
catalog_location = self.get_catalog_location()
|
155
|
-
run_catalog = Path(catalog_location) / run_id
|
156
|
-
utils.safe_make_dir(run_catalog)
|
157
|
-
|
158
|
-
logger.debug(
|
159
|
-
f"Copying objects from {copy_from} to the run catalog location of {run_catalog}"
|
160
|
-
)
|
161
|
-
|
162
|
-
if not utils.does_dir_exist(copy_from):
|
163
|
-
msg = (
|
164
|
-
f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
|
165
|
-
"Note: runnable does not create the compute data folder for you. Please ensure that the "
|
166
|
-
"folder exists.\n"
|
167
|
-
)
|
168
|
-
raise Exception(msg)
|
169
|
-
|
170
|
-
# Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
|
171
|
-
# We should also return a list of datastore.DataCatalog items
|
172
|
-
|
173
|
-
glob_files = copy_from.glob(name) # type: ignore
|
174
|
-
logger.debug(
|
175
|
-
f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}"
|
176
|
-
)
|
177
|
-
|
178
|
-
data_catalogs = []
|
179
|
-
run_log_store = self._context.run_log_store
|
180
|
-
for file in glob_files:
|
181
|
-
if file.is_dir():
|
182
|
-
# Need not add a data catalog for the folder
|
183
|
-
continue
|
184
|
-
|
185
|
-
relative_file_path = file.relative_to(".")
|
186
|
-
|
187
|
-
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
188
|
-
data_catalog.catalog_handler_location = catalog_location
|
189
|
-
data_catalog.catalog_relative_path = (
|
190
|
-
run_id + os.sep + str(relative_file_path)
|
191
|
-
)
|
192
|
-
data_catalog.data_hash = utils.get_data_hash(str(file))
|
193
|
-
data_catalog.stage = "put"
|
194
|
-
data_catalogs.append(data_catalog)
|
195
|
-
|
196
|
-
if is_catalog_out_of_sync(data_catalog, synced_catalogs):
|
197
|
-
logger.info(f"{data_catalog.name} was found to be changed, syncing")
|
198
|
-
|
199
|
-
# Make the directory in the catalog if required
|
200
|
-
Path(run_catalog / relative_file_path.parent).mkdir(
|
201
|
-
parents=True, exist_ok=True
|
202
|
-
)
|
203
|
-
shutil.copy(file, run_catalog / relative_file_path)
|
204
|
-
else:
|
205
|
-
logger.info(
|
206
|
-
f"{data_catalog.name} was found to be unchanged, ignoring syncing"
|
207
|
-
)
|
208
|
-
|
209
|
-
if not data_catalogs:
|
210
|
-
raise Exception(f"Did not find any files matching {name} in {copy_from}")
|
211
|
-
|
212
|
-
return data_catalogs
|
213
|
-
|
214
|
-
def sync_between_runs(self, previous_run_id: str, run_id: str):
|
215
|
-
"""
|
216
|
-
Given the previous run id, sync the catalogs between the current one and previous
|
217
|
-
|
218
|
-
Args:
|
219
|
-
previous_run_id (str): The previous run id to sync the catalogs from
|
220
|
-
run_id (str): The run_id to which the data catalogs should be synced to.
|
221
|
-
|
222
|
-
Raises:
|
223
|
-
Exception: If the previous run log does not exist in the catalog
|
224
|
-
|
225
|
-
"""
|
226
|
-
logger.info(
|
227
|
-
f"Using the {self.service_name} catalog and syncing catalogs"
|
228
|
-
"between old: {previous_run_id} to new: {run_id}"
|
229
|
-
)
|
230
|
-
|
231
|
-
catalog_location = Path(self.get_catalog_location())
|
232
|
-
run_catalog = catalog_location / run_id
|
233
|
-
utils.safe_make_dir(run_catalog)
|
234
|
-
|
235
|
-
if not utils.does_dir_exist(catalog_location / previous_run_id):
|
236
|
-
msg = (
|
237
|
-
f"Catalogs from previous run : {previous_run_id} are not found.\n"
|
238
|
-
"Note: Please provision the catalog objects generated by previous run in the same catalog location"
|
239
|
-
" as the current run, even if the catalog handler for the previous run was different"
|
240
|
-
)
|
241
|
-
raise Exception(msg)
|
242
|
-
|
243
|
-
cataloged_files = list((catalog_location / previous_run_id).glob("*"))
|
244
|
-
|
245
|
-
for cataloged_file in cataloged_files:
|
246
|
-
if str(cataloged_file).endswith("execution.log"):
|
247
|
-
continue
|
49
|
+
relative_file_path = file.relative_to(self.compute_data_folder)
|
248
50
|
|
249
|
-
|
250
|
-
|
251
|
-
else:
|
252
|
-
shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
|
253
|
-
logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
|
51
|
+
(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
52
|
+
shutil.copy(file, run_catalog / relative_file_path)
|
@@ -0,0 +1,69 @@
|
|
1
|
+
import logging
|
2
|
+
from functools import lru_cache
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
from cloudpathlib import CloudPath, S3Client, S3Path
|
7
|
+
|
8
|
+
from extensions.catalog.any_path import AnyPathCatalog
|
9
|
+
from runnable import defaults
|
10
|
+
|
11
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
12
|
+
|
13
|
+
|
14
|
+
@lru_cache
|
15
|
+
def get_minio_client(
|
16
|
+
endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
|
17
|
+
) -> S3Client:
|
18
|
+
return S3Client(
|
19
|
+
endpoint_url=endpoint_url,
|
20
|
+
aws_access_key_id=aws_access_key_id,
|
21
|
+
aws_secret_access_key=aws_secret_access_key,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
class MinioCatalog(AnyPathCatalog):
|
26
|
+
service_name: str = "minio"
|
27
|
+
|
28
|
+
endpoint_url: str = "http://localhost:9002"
|
29
|
+
aws_access_key_id: str = "minioadmin"
|
30
|
+
aws_secret_access_key: str = "minioadmin"
|
31
|
+
bucket: str = "runnable"
|
32
|
+
|
33
|
+
def get_summary(self) -> dict[str, Any]:
|
34
|
+
return {
|
35
|
+
"service_name": self.service_name,
|
36
|
+
"compute_data_folder": self.compute_data_folder,
|
37
|
+
"endpoint_url": self.endpoint_url,
|
38
|
+
"bucket": self.bucket,
|
39
|
+
}
|
40
|
+
|
41
|
+
def get_catalog_location(self) -> S3Path:
|
42
|
+
run_id = self._context.run_id
|
43
|
+
|
44
|
+
return S3Path(
|
45
|
+
f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
|
46
|
+
client=get_minio_client(
|
47
|
+
self.endpoint_url, self.aws_access_key_id, self.aws_secret_access_key
|
48
|
+
),
|
49
|
+
)
|
50
|
+
|
51
|
+
def download_from_catalog(self, file: Path | CloudPath) -> None:
|
52
|
+
assert isinstance(file, S3Path)
|
53
|
+
|
54
|
+
relative_file_path = file.relative_to(self.get_catalog_location())
|
55
|
+
|
56
|
+
file_to_download = Path(self.compute_data_folder) / relative_file_path
|
57
|
+
file_to_download.parent.mkdir(parents=True, exist_ok=True)
|
58
|
+
|
59
|
+
file.download_to(file_to_download)
|
60
|
+
|
61
|
+
def upload_to_catalog(self, file: Path) -> None:
|
62
|
+
run_catalog = self.get_catalog_location()
|
63
|
+
|
64
|
+
relative_file_path = file.relative_to(self.compute_data_folder)
|
65
|
+
(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
66
|
+
|
67
|
+
file_in_cloud = run_catalog / file
|
68
|
+
assert isinstance(file_in_cloud, S3Path)
|
69
|
+
file_in_cloud.upload_from(file)
|
extensions/catalog/s3.py
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
from cloudpathlib import S3Path
|
2
|
+
|
3
|
+
from extensions.catalog.any_path import AnyPathCatalog
|
4
|
+
|
5
|
+
|
6
|
+
class S3Catalog(AnyPathCatalog):
|
7
|
+
service_name: str = "s3"
|
8
|
+
|
9
|
+
def get_path(self, path: str) -> S3Path:
|
10
|
+
# TODO: Might need to assert the credentials are set
|
11
|
+
return S3Path(path)
|
@@ -151,54 +151,25 @@ class GenericPipelineExecutor(BasePipelineExecutor):
|
|
151
151
|
# Nothing to get/put from the catalog
|
152
152
|
return None
|
153
153
|
|
154
|
-
compute_data_folder = self.get_effective_compute_data_folder()
|
155
|
-
|
156
154
|
data_catalogs = []
|
157
155
|
for name_pattern in node_catalog_settings.get(stage) or []:
|
158
156
|
if stage == "get":
|
159
157
|
data_catalog = self._context.catalog_handler.get(
|
160
158
|
name=name_pattern,
|
161
|
-
run_id=self._context.run_id,
|
162
|
-
compute_data_folder=compute_data_folder,
|
163
159
|
)
|
164
160
|
|
165
161
|
elif stage == "put":
|
166
162
|
data_catalog = self._context.catalog_handler.put(
|
167
163
|
name=name_pattern,
|
168
|
-
run_id=self._context.run_id,
|
169
|
-
compute_data_folder=compute_data_folder,
|
170
|
-
synced_catalogs=synced_catalogs,
|
171
164
|
)
|
165
|
+
else:
|
166
|
+
raise Exception(f"Stage {stage} not supported")
|
172
167
|
|
173
168
|
logger.debug(f"Added data catalog: {data_catalog} to step log")
|
174
169
|
data_catalogs.extend(data_catalog)
|
175
170
|
|
176
171
|
return data_catalogs
|
177
172
|
|
178
|
-
def get_effective_compute_data_folder(self) -> str:
|
179
|
-
"""
|
180
|
-
Get the effective compute data folder for the given stage.
|
181
|
-
If there is nothing to catalog, we return None.
|
182
|
-
|
183
|
-
The default is the compute data folder of the catalog but this can be over-ridden by the node.
|
184
|
-
|
185
|
-
Args:
|
186
|
-
stage (str): The stage we are in the process of cataloging
|
187
|
-
|
188
|
-
|
189
|
-
Returns:
|
190
|
-
str: The compute data folder as defined by the node defaulting to catalog handler
|
191
|
-
"""
|
192
|
-
assert isinstance(self._context_node, BaseNode)
|
193
|
-
compute_data_folder = self._context.catalog_handler.compute_data_folder
|
194
|
-
|
195
|
-
catalog_settings = self._context_node._get_catalog_settings()
|
196
|
-
effective_compute_data_folder = (
|
197
|
-
catalog_settings.get("compute_data_folder", "") or compute_data_folder
|
198
|
-
)
|
199
|
-
|
200
|
-
return effective_compute_data_folder
|
201
|
-
|
202
173
|
@property
|
203
174
|
def step_attempt_number(self) -> int:
|
204
175
|
"""
|
@@ -219,9 +190,7 @@ class GenericPipelineExecutor(BasePipelineExecutor):
|
|
219
190
|
)
|
220
191
|
task_console.save_text(log_file_name)
|
221
192
|
# Put the log file in the catalog
|
222
|
-
self._context.catalog_handler.put(
|
223
|
-
name=log_file_name, run_id=self._context.run_id
|
224
|
-
)
|
193
|
+
self._context.catalog_handler.put(name=log_file_name)
|
225
194
|
os.remove(log_file_name)
|
226
195
|
|
227
196
|
def _execute_node(
|
runnable/catalog.py
CHANGED
@@ -2,7 +2,7 @@ import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
3
3
|
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
|
-
from pydantic import BaseModel, ConfigDict
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
6
6
|
|
7
7
|
import runnable.context as context
|
8
8
|
from runnable import defaults
|
@@ -43,6 +43,9 @@ class BaseCatalog(ABC, BaseModel):
|
|
43
43
|
|
44
44
|
service_name: str = ""
|
45
45
|
service_type: str = "catalog"
|
46
|
+
|
47
|
+
compute_data_folder: str = Field(default=defaults.COMPUTE_DATA_FOLDER)
|
48
|
+
|
46
49
|
model_config = ConfigDict(extra="forbid")
|
47
50
|
|
48
51
|
@abstractmethod
|
@@ -52,14 +55,8 @@ class BaseCatalog(ABC, BaseModel):
|
|
52
55
|
def _context(self):
|
53
56
|
return context.run_context
|
54
57
|
|
55
|
-
@property
|
56
|
-
def compute_data_folder(self) -> str:
|
57
|
-
return defaults.COMPUTE_DATA_FOLDER
|
58
|
-
|
59
58
|
@abstractmethod
|
60
|
-
def get(
|
61
|
-
self, name: str, run_id: str, compute_data_folder: str = "", **kwargs
|
62
|
-
) -> List[DataCatalog]:
|
59
|
+
def get(self, name: str) -> List[DataCatalog]:
|
63
60
|
"""
|
64
61
|
Get the catalog item by 'name' for the 'run id' and store it in compute data folder.
|
65
62
|
|
@@ -79,14 +76,7 @@ class BaseCatalog(ABC, BaseModel):
|
|
79
76
|
raise NotImplementedError
|
80
77
|
|
81
78
|
@abstractmethod
|
82
|
-
def put(
|
83
|
-
self,
|
84
|
-
name: str,
|
85
|
-
run_id: str,
|
86
|
-
compute_data_folder: str = "",
|
87
|
-
synced_catalogs: Optional[List[DataCatalog]] = None,
|
88
|
-
**kwargs,
|
89
|
-
) -> List[DataCatalog]:
|
79
|
+
def put(self, name: str) -> List[DataCatalog]:
|
90
80
|
"""
|
91
81
|
Put the file by 'name' from the 'compute_data_folder' in the catalog for the run_id.
|
92
82
|
|
@@ -140,23 +130,14 @@ class DoNothingCatalog(BaseCatalog):
|
|
140
130
|
def get_summary(self) -> Dict[str, Any]:
|
141
131
|
return {}
|
142
132
|
|
143
|
-
def get(
|
144
|
-
self, name: str, run_id: str, compute_data_folder: str = "", **kwargs
|
145
|
-
) -> List[DataCatalog]:
|
133
|
+
def get(self, name: str) -> List[DataCatalog]:
|
146
134
|
"""
|
147
135
|
Does nothing
|
148
136
|
"""
|
149
137
|
logger.info("Using a do-nothing catalog, doing nothing in get")
|
150
138
|
return []
|
151
139
|
|
152
|
-
def put(
|
153
|
-
self,
|
154
|
-
name: str,
|
155
|
-
run_id: str,
|
156
|
-
compute_data_folder: str = "",
|
157
|
-
synced_catalogs: Optional[List[DataCatalog]] = None,
|
158
|
-
**kwargs,
|
159
|
-
) -> List[DataCatalog]:
|
140
|
+
def put(self, name: str) -> List[DataCatalog]:
|
160
141
|
"""
|
161
142
|
Does nothing
|
162
143
|
"""
|
@@ -168,4 +149,3 @@ class DoNothingCatalog(BaseCatalog):
|
|
168
149
|
Does nothing
|
169
150
|
"""
|
170
151
|
logger.info("Using a do-nothing catalog, doing nothing while sync between runs")
|
171
|
-
logger.info("Using a do-nothing catalog, doing nothing while sync between runs")
|
runnable/datastore.py
CHANGED
@@ -114,7 +114,7 @@ class ObjectParameter(BaseModel):
|
|
114
114
|
|
115
115
|
# If the object was serialised, get it from the catalog
|
116
116
|
catalog_handler = context.run_context.catalog_handler
|
117
|
-
catalog_handler.get(name=self.file_name
|
117
|
+
catalog_handler.get(name=self.file_name)
|
118
118
|
obj = context.run_context.pickler.load(path=self.file_name)
|
119
119
|
os.remove(self.file_name) # Remove after loading
|
120
120
|
return obj
|
@@ -128,7 +128,7 @@ class ObjectParameter(BaseModel):
|
|
128
128
|
context.run_context.pickler.dump(data=data, path=self.file_name)
|
129
129
|
|
130
130
|
catalog_handler = context.run_context.catalog_handler
|
131
|
-
catalog_handler.put(name=self.file_name
|
131
|
+
catalog_handler.put(name=self.file_name)
|
132
132
|
os.remove(self.file_name) # Remove after loading
|
133
133
|
|
134
134
|
|
runnable/executor.py
CHANGED
@@ -173,23 +173,6 @@ class BasePipelineExecutor(BaseExecutor):
|
|
173
173
|
"""
|
174
174
|
...
|
175
175
|
|
176
|
-
@abstractmethod
|
177
|
-
def get_effective_compute_data_folder(self) -> Optional[str]:
|
178
|
-
"""
|
179
|
-
Get the effective compute data folder for the given stage.
|
180
|
-
If there is nothing to catalog, we return None.
|
181
|
-
|
182
|
-
The default is the compute data folder of the catalog but this can be over-ridden by the node.
|
183
|
-
|
184
|
-
Args:
|
185
|
-
stage (str): The stage we are in the process of cataloging
|
186
|
-
|
187
|
-
|
188
|
-
Returns:
|
189
|
-
Optional[str]: The compute data folder as defined by catalog handler or the node or None.
|
190
|
-
"""
|
191
|
-
...
|
192
|
-
|
193
176
|
@abstractmethod
|
194
177
|
def _sync_catalog(
|
195
178
|
self, stage: str, synced_catalogs=None
|
runnable/tasks.py
CHANGED
@@ -501,9 +501,7 @@ class NotebookTaskType(BaseTaskType):
|
|
501
501
|
pm.execute_notebook(**kwds)
|
502
502
|
task_console.print(out_file.getvalue())
|
503
503
|
|
504
|
-
context.run_context.catalog_handler.put(
|
505
|
-
name=notebook_output_path, run_id=context.run_context.run_id
|
506
|
-
)
|
504
|
+
context.run_context.catalog_handler.put(name=notebook_output_path)
|
507
505
|
|
508
506
|
client = PloomberClient.from_path(path=notebook_output_path)
|
509
507
|
namespace = client.get_namespace()
|
runnable/utils.py
CHANGED
@@ -359,26 +359,26 @@ def diff_dict(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]:
|
|
359
359
|
return diff
|
360
360
|
|
361
361
|
|
362
|
-
def hash_bytestr_iter(bytesiter, hasher, ashexstr=True): # pylint: disable=C0116
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
362
|
+
# def hash_bytestr_iter(bytesiter, hasher, ashexstr=True): # pylint: disable=C0116
|
363
|
+
# """Hashes the given bytesiter using the given hasher."""
|
364
|
+
# for block in bytesiter: # pragma: no cover
|
365
|
+
# hasher.update(block)
|
366
|
+
# return hasher.hexdigest() if ashexstr else hasher.digest() # pragma: no cover
|
367
367
|
|
368
368
|
|
369
|
-
def file_as_blockiter(afile, blocksize=65536): # pylint: disable=C0116
|
370
|
-
|
371
|
-
|
369
|
+
# def file_as_blockiter(afile, blocksize=65536): # pylint: disable=C0116
|
370
|
+
# """From a StackOverflow answer: that is used to generate a MD5 hash of a large files.
|
371
|
+
# # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file.
|
372
372
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
373
|
+
# """
|
374
|
+
# with afile: # pragma: no cover
|
375
|
+
# block = afile.read(blocksize)
|
376
|
+
# while len(block) > 0:
|
377
|
+
# yield block
|
378
|
+
# block = afile.read(blocksize)
|
379
379
|
|
380
380
|
|
381
|
-
def get_data_hash(file_name: str):
|
381
|
+
def get_data_hash(file_name: str) -> str:
|
382
382
|
"""Returns the hash of the data file.
|
383
383
|
|
384
384
|
Args:
|
@@ -389,9 +389,12 @@ def get_data_hash(file_name: str):
|
|
389
389
|
"""
|
390
390
|
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
391
391
|
# TODO: For a big file, we should only hash the first few bytes
|
392
|
-
|
393
|
-
|
394
|
-
|
392
|
+
with open(file_name, "rb") as f:
|
393
|
+
file_hash = hashlib.md5()
|
394
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
395
|
+
file_hash.update(chunk)
|
396
|
+
|
397
|
+
return file_hash.hexdigest()
|
395
398
|
|
396
399
|
|
397
400
|
# TODO: This is not the right place for this.
|
@@ -1,12 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: runnable
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.26.0
|
4
4
|
Summary: Add your description here
|
5
5
|
Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
|
6
6
|
License-File: LICENSE
|
7
7
|
Requires-Python: >=3.10
|
8
8
|
Requires-Dist: click-plugins>=1.1.1
|
9
9
|
Requires-Dist: click<=8.1.3
|
10
|
+
Requires-Dist: cloudpathlib>=0.20.0
|
10
11
|
Requires-Dist: dill>=0.3.9
|
11
12
|
Requires-Dist: pydantic>=2.10.3
|
12
13
|
Requires-Dist: python-dotenv>=1.0.1
|
@@ -23,6 +24,8 @@ Provides-Extra: k8s
|
|
23
24
|
Requires-Dist: kubernetes>=31.0.0; extra == 'k8s'
|
24
25
|
Provides-Extra: notebook
|
25
26
|
Requires-Dist: ploomber-engine>=0.0.33; extra == 'notebook'
|
27
|
+
Provides-Extra: s3
|
28
|
+
Requires-Dist: cloudpathlib[s3]; extra == 's3'
|
26
29
|
Description-Content-Type: text/markdown
|
27
30
|
|
28
31
|
|
@@ -1,8 +1,11 @@
|
|
1
1
|
extensions/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
extensions/catalog/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
extensions/catalog/
|
4
|
+
extensions/catalog/any_path.py,sha256=aNjphoPIyllUfY2uNDFWD1ErM3Px6izSGr0-oGowN8k,7263
|
5
|
+
extensions/catalog/file_system.py,sha256=T_qFPFfrmykoAMc1rjNi_DBb437me8WPRcFglwAK744,1767
|
6
|
+
extensions/catalog/minio.py,sha256=D5ofitU75OJGZdPM8s-ALCHrSR6jawIe6blDo8ebiXM,2179
|
5
7
|
extensions/catalog/pyproject.toml,sha256=lLNxY6v04c8I5QK_zKw_E6sJTArSJRA_V-79ktaA3Hk,279
|
8
|
+
extensions/catalog/s3.py,sha256=Sw5t8_kVRprn3uGGJCiHn7M9zw1CLaCOFj6YErtfG0o,287
|
6
9
|
extensions/job_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
10
|
extensions/job_executor/__init__.py,sha256=3zS2m6dg-L6SkKfL0kr4AxVUVmVJcepV6eipyMvQR6s,6006
|
8
11
|
extensions/job_executor/k8s.py,sha256=V5k6Rnf_sAFqptVbCrWs_x5sl3x3fSHwO96IZoiJxKU,15342
|
@@ -14,7 +17,7 @@ extensions/nodes/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
17
|
extensions/nodes/nodes.py,sha256=WdOmep4uxmY2mTOtsuVZ5QhYl96jqJprkG6jkIg7BVg,34774
|
15
18
|
extensions/nodes/pyproject.toml,sha256=YTu-ETN3JNFSkMzzWeOwn4m-O2nbRH-PmiPBALDCUw4,278
|
16
19
|
extensions/pipeline_executor/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
extensions/pipeline_executor/__init__.py,sha256=
|
20
|
+
extensions/pipeline_executor/__init__.py,sha256=lk_QmbfzXNrgpF_KvMPuPpzxp0B8SJobDHWrK_0Q5FE,24359
|
18
21
|
extensions/pipeline_executor/argo.py,sha256=nnlR_D6arQMUSgAevnW1RXeN48SoB1wVcEfQ4TBireY,34543
|
19
22
|
extensions/pipeline_executor/local.py,sha256=H8s6AdML_9_f-vdGG_6k0y9FbLqAqvA1S_7xMNyARzY,1946
|
20
23
|
extensions/pipeline_executor/local_container.py,sha256=HOT9I-cPDCvgy6_bzNEtl4jPhTyeYSn1GK7lplH3vDA,12515
|
@@ -33,14 +36,14 @@ extensions/secrets/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
33
36
|
extensions/secrets/dotenv.py,sha256=FbYYd_pVuJuVuIDIvXbzKuSSQ9GPq7xJXTDbJMTQbhM,1583
|
34
37
|
extensions/secrets/pyproject.toml,sha256=mLJNImNcBlbLKHh-0ugVWT9V83R4RibyyYDtBCSqVF4,282
|
35
38
|
runnable/__init__.py,sha256=n14AnTUUEYxXlTJ6-YLT0tMmeFb7Co_3kNldV6pgKSs,662
|
36
|
-
runnable/catalog.py,sha256=
|
39
|
+
runnable/catalog.py,sha256=W_erYbLZ-ffuA9RQuWVqz1DUJOuWayf32ne32IDbAbc,4358
|
37
40
|
runnable/cli.py,sha256=3BiKSj95h2Drn__YlchMPZ5rBMafuRb2OGIsVpbsO5Y,8788
|
38
41
|
runnable/context.py,sha256=by5uepmuCP0dmM9BmsliXihSes5QEFejwAsmekcqylE,1388
|
39
|
-
runnable/datastore.py,sha256=
|
42
|
+
runnable/datastore.py,sha256=ZobM1aVkgeUJ2fZYt63IFDsoNzObwc93hdByegS5YKQ,32396
|
40
43
|
runnable/defaults.py,sha256=3o9IVGryyCE6PoQTOoaIaHHTbJGEzmdXMcwzOhwAYoI,3518
|
41
44
|
runnable/entrypoints.py,sha256=xkUa568-7x9xALz13qW14DxS1nnLDKwLwdIBJZG-vM0,18982
|
42
45
|
runnable/exceptions.py,sha256=LFbp0-Qxg2PAMLEVt7w2whhBxSG-5pzUEv5qN-Rc4_c,3003
|
43
|
-
runnable/executor.py,sha256=
|
46
|
+
runnable/executor.py,sha256=UCBBtyD0khl9QjT4SRTFMQDHDLWfJUC2U4_b3KQzaBE,15127
|
44
47
|
runnable/graph.py,sha256=poQz5zcvq89ju_u5sYlunQLPbHnXTaUmjcvstPwvT4U,16536
|
45
48
|
runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
|
46
49
|
runnable/nodes.py,sha256=YU9u7r1ESzui1uVtJ1dgwdv1ozyJnF2k-MCFieT8CLI,17519
|
@@ -48,10 +51,10 @@ runnable/parameters.py,sha256=LyQb1d0SaFeI4PJ_yDYt9wArm9ThSPASWb36TwIdDUs,5213
|
|
48
51
|
runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
|
49
52
|
runnable/sdk.py,sha256=T1nqDpLN9fULvvU9L-oY0EHqYdKUI9qk7oekLynm02Y,33568
|
50
53
|
runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
|
51
|
-
runnable/tasks.py,sha256=
|
52
|
-
runnable/utils.py,sha256=
|
53
|
-
runnable-0.
|
54
|
-
runnable-0.
|
55
|
-
runnable-0.
|
56
|
-
runnable-0.
|
57
|
-
runnable-0.
|
54
|
+
runnable/tasks.py,sha256=X6xijut7ffwpfYDcXoN6y0AcRVd7fWHs676DJ00Kma4,29134
|
55
|
+
runnable/utils.py,sha256=hBr7oGwGL2VgfITlQCTz-a1iwvvf7Mfl-HY8UdENZac,19929
|
56
|
+
runnable-0.26.0.dist-info/METADATA,sha256=IiPhsPo9Vws83V72pYoPNG7cdexyVi7Ctf49lsgv1bY,10047
|
57
|
+
runnable-0.26.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
58
|
+
runnable-0.26.0.dist-info/entry_points.txt,sha256=UCXvfBsVLpBjQY6znXNVzF6hof3Lro7oxtUD0t7kUp4,1704
|
59
|
+
runnable-0.26.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
60
|
+
runnable-0.26.0.dist-info/RECORD,,
|
@@ -4,6 +4,8 @@ runnable = runnable.cli:app
|
|
4
4
|
[catalog]
|
5
5
|
do-nothing = runnable.catalog:DoNothingCatalog
|
6
6
|
file-system = extensions.catalog.file_system:FileSystemCatalog
|
7
|
+
minio = extensions.catalog.minio:MinioCatalog
|
8
|
+
s3 = extensions.catalog.s3:S3Catalog
|
7
9
|
|
8
10
|
[job_executor]
|
9
11
|
k8s-job = extensions.job_executor.k8s:K8sJobExecutor
|
File without changes
|
File without changes
|