runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- runnable/__init__.py +1 -12
- runnable/catalog.py +29 -5
- runnable/cli.py +268 -215
- runnable/context.py +10 -3
- runnable/datastore.py +212 -53
- runnable/defaults.py +13 -55
- runnable/entrypoints.py +270 -183
- runnable/exceptions.py +28 -2
- runnable/executor.py +133 -86
- runnable/graph.py +37 -13
- runnable/nodes.py +50 -22
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +230 -66
- runnable/secrets.py +3 -1
- runnable/tasks.py +99 -41
- runnable/utils.py +59 -39
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
- runnable-0.16.0.dist-info/entry_points.txt +45 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local.py +0 -69
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -870
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable/integration.py +0 -192
- runnable-0.13.0.dist-info/RECORD +0 -63
- runnable-0.13.0.dist-info/entry_points.txt +0 -41
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/extensions/__init__.py
DELETED
File without changes
|
@@ -1,21 +0,0 @@
|
|
1
|
-
from typing import List, Optional
|
2
|
-
|
3
|
-
from runnable.datastore import DataCatalog
|
4
|
-
|
5
|
-
|
6
|
-
def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool:
|
7
|
-
"""
|
8
|
-
Check if the catalog items are out of sync from already cataloged objects.
|
9
|
-
If they are, return False.
|
10
|
-
If the object does not exist or synced catalog does not exist, return True
|
11
|
-
"""
|
12
|
-
if not synced_catalogs:
|
13
|
-
return True # If nothing has been synced in the past
|
14
|
-
|
15
|
-
for synced_catalog in synced_catalogs:
|
16
|
-
if synced_catalog.catalog_relative_path == catalog.catalog_relative_path:
|
17
|
-
if synced_catalog.data_hash == catalog.data_hash:
|
18
|
-
return False
|
19
|
-
return True
|
20
|
-
|
21
|
-
return True # The object does not exist, sync it
|
File without changes
|
@@ -1,234 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import os
|
3
|
-
import shutil
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import Any, Dict, List, Optional
|
6
|
-
|
7
|
-
from runnable import defaults, utils
|
8
|
-
from runnable.catalog import BaseCatalog
|
9
|
-
from runnable.datastore import DataCatalog
|
10
|
-
from runnable.extensions.catalog import is_catalog_out_of_sync
|
11
|
-
|
12
|
-
logger = logging.getLogger(defaults.LOGGER_NAME)
|
13
|
-
|
14
|
-
|
15
|
-
class FileSystemCatalog(BaseCatalog):
|
16
|
-
"""
|
17
|
-
A Catalog handler that uses the local file system for cataloging.
|
18
|
-
|
19
|
-
Note: Do not use this if the steps of the pipeline run on different compute environments.
|
20
|
-
|
21
|
-
Example config:
|
22
|
-
|
23
|
-
catalog:
|
24
|
-
type: file-system
|
25
|
-
config:
|
26
|
-
catalog_location: The location to store the catalog.
|
27
|
-
compute_data_folder: The folder to source the data from.
|
28
|
-
|
29
|
-
"""
|
30
|
-
|
31
|
-
service_name: str = "file-system"
|
32
|
-
catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
|
33
|
-
|
34
|
-
def get_catalog_location(self):
|
35
|
-
return self.catalog_location
|
36
|
-
|
37
|
-
def get_summary(self) -> Dict[str, Any]:
|
38
|
-
summary = {
|
39
|
-
"Catalog Location": self.get_catalog_location(),
|
40
|
-
}
|
41
|
-
|
42
|
-
return summary
|
43
|
-
|
44
|
-
def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
|
45
|
-
"""
|
46
|
-
Get the file by matching glob pattern to the name
|
47
|
-
|
48
|
-
Args:
|
49
|
-
name ([str]): A glob matching the file name
|
50
|
-
run_id ([str]): The run id
|
51
|
-
|
52
|
-
Raises:
|
53
|
-
Exception: If the catalog location does not exist
|
54
|
-
|
55
|
-
Returns:
|
56
|
-
List(object) : A list of catalog objects
|
57
|
-
"""
|
58
|
-
logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}")
|
59
|
-
|
60
|
-
copy_to = self.compute_data_folder
|
61
|
-
if compute_data_folder:
|
62
|
-
copy_to = compute_data_folder
|
63
|
-
|
64
|
-
copy_to = Path(copy_to) # type: ignore
|
65
|
-
|
66
|
-
catalog_location = self.get_catalog_location()
|
67
|
-
run_catalog = Path(catalog_location) / run_id / copy_to
|
68
|
-
|
69
|
-
logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}")
|
70
|
-
|
71
|
-
if not utils.does_dir_exist(run_catalog):
|
72
|
-
msg = (
|
73
|
-
f"Expected Catalog to be present at: {run_catalog} but not found.\n"
|
74
|
-
"Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
|
75
|
-
)
|
76
|
-
raise Exception(msg)
|
77
|
-
|
78
|
-
# Iterate through the contents of the run_catalog and copy the files that fit the name pattern
|
79
|
-
# We should also return a list of data hashes
|
80
|
-
glob_files = run_catalog.glob(name)
|
81
|
-
logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}")
|
82
|
-
|
83
|
-
data_catalogs = []
|
84
|
-
run_log_store = self._context.run_log_store
|
85
|
-
for file in glob_files:
|
86
|
-
if file.is_dir():
|
87
|
-
# Need not add a data catalog for the folder
|
88
|
-
continue
|
89
|
-
|
90
|
-
if str(file).endswith(".execution.log"):
|
91
|
-
continue
|
92
|
-
|
93
|
-
relative_file_path = file.relative_to(run_catalog)
|
94
|
-
|
95
|
-
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
96
|
-
data_catalog.catalog_handler_location = catalog_location
|
97
|
-
data_catalog.catalog_relative_path = str(relative_file_path)
|
98
|
-
data_catalog.data_hash = utils.get_data_hash(str(file))
|
99
|
-
data_catalog.stage = "get"
|
100
|
-
data_catalogs.append(data_catalog)
|
101
|
-
|
102
|
-
# Make the directory in the data folder if required
|
103
|
-
Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
104
|
-
shutil.copy(file, copy_to / relative_file_path)
|
105
|
-
|
106
|
-
logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
|
107
|
-
|
108
|
-
if not data_catalogs:
|
109
|
-
raise Exception(f"Did not find any files matching {name} in {run_catalog}")
|
110
|
-
|
111
|
-
return data_catalogs
|
112
|
-
|
113
|
-
def put(
|
114
|
-
self,
|
115
|
-
name: str,
|
116
|
-
run_id: str,
|
117
|
-
compute_data_folder: str = "",
|
118
|
-
synced_catalogs: Optional[List[DataCatalog]] = None,
|
119
|
-
**kwargs,
|
120
|
-
) -> List[DataCatalog]:
|
121
|
-
"""
|
122
|
-
Put the files matching the glob pattern into the catalog.
|
123
|
-
|
124
|
-
If previously synced catalogs are provided, and no changes were observed, we do not sync them.
|
125
|
-
|
126
|
-
Args:
|
127
|
-
name (str): The glob pattern of the files to catalog
|
128
|
-
run_id (str): The run id of the run
|
129
|
-
compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
|
130
|
-
synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
|
131
|
-
|
132
|
-
Raises:
|
133
|
-
Exception: If the compute data folder does not exist.
|
134
|
-
|
135
|
-
Returns:
|
136
|
-
List(object) : A list of catalog objects
|
137
|
-
"""
|
138
|
-
logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}")
|
139
|
-
|
140
|
-
copy_from = self.compute_data_folder
|
141
|
-
if compute_data_folder:
|
142
|
-
copy_from = compute_data_folder
|
143
|
-
copy_from = Path(copy_from) # type: ignore
|
144
|
-
|
145
|
-
catalog_location = self.get_catalog_location()
|
146
|
-
run_catalog = Path(catalog_location) / run_id
|
147
|
-
utils.safe_make_dir(run_catalog)
|
148
|
-
|
149
|
-
logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}")
|
150
|
-
|
151
|
-
if not utils.does_dir_exist(copy_from):
|
152
|
-
msg = (
|
153
|
-
f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
|
154
|
-
"Note: runnable does not create the compute data folder for you. Please ensure that the "
|
155
|
-
"folder exists.\n"
|
156
|
-
)
|
157
|
-
raise Exception(msg)
|
158
|
-
|
159
|
-
# Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
|
160
|
-
# We should also return a list of datastore.DataCatalog items
|
161
|
-
|
162
|
-
glob_files = copy_from.glob(name) # type: ignore
|
163
|
-
logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}")
|
164
|
-
|
165
|
-
data_catalogs = []
|
166
|
-
run_log_store = self._context.run_log_store
|
167
|
-
for file in glob_files:
|
168
|
-
if file.is_dir():
|
169
|
-
# Need not add a data catalog for the folder
|
170
|
-
continue
|
171
|
-
|
172
|
-
relative_file_path = file.relative_to(".")
|
173
|
-
|
174
|
-
data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
|
175
|
-
data_catalog.catalog_handler_location = catalog_location
|
176
|
-
data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path)
|
177
|
-
data_catalog.data_hash = utils.get_data_hash(str(file))
|
178
|
-
data_catalog.stage = "put"
|
179
|
-
data_catalogs.append(data_catalog)
|
180
|
-
|
181
|
-
if is_catalog_out_of_sync(data_catalog, synced_catalogs):
|
182
|
-
logger.info(f"{data_catalog.name} was found to be changed, syncing")
|
183
|
-
|
184
|
-
# Make the directory in the catalog if required
|
185
|
-
Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
|
186
|
-
shutil.copy(file, run_catalog / relative_file_path)
|
187
|
-
else:
|
188
|
-
logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing")
|
189
|
-
|
190
|
-
if not data_catalogs:
|
191
|
-
raise Exception(f"Did not find any files matching {name} in {copy_from}")
|
192
|
-
|
193
|
-
return data_catalogs
|
194
|
-
|
195
|
-
def sync_between_runs(self, previous_run_id: str, run_id: str):
|
196
|
-
"""
|
197
|
-
Given the previous run id, sync the catalogs between the current one and previous
|
198
|
-
|
199
|
-
Args:
|
200
|
-
previous_run_id (str): The previous run id to sync the catalogs from
|
201
|
-
run_id (str): The run_id to which the data catalogs should be synced to.
|
202
|
-
|
203
|
-
Raises:
|
204
|
-
Exception: If the previous run log does not exist in the catalog
|
205
|
-
|
206
|
-
"""
|
207
|
-
logger.info(
|
208
|
-
f"Using the {self.service_name} catalog and syncing catalogs"
|
209
|
-
"between old: {previous_run_id} to new: {run_id}"
|
210
|
-
)
|
211
|
-
|
212
|
-
catalog_location = Path(self.get_catalog_location())
|
213
|
-
run_catalog = catalog_location / run_id
|
214
|
-
utils.safe_make_dir(run_catalog)
|
215
|
-
|
216
|
-
if not utils.does_dir_exist(catalog_location / previous_run_id):
|
217
|
-
msg = (
|
218
|
-
f"Catalogs from previous run : {previous_run_id} are not found.\n"
|
219
|
-
"Note: Please provision the catalog objects generated by previous run in the same catalog location"
|
220
|
-
" as the current run, even if the catalog handler for the previous run was different"
|
221
|
-
)
|
222
|
-
raise Exception(msg)
|
223
|
-
|
224
|
-
cataloged_files = list((catalog_location / previous_run_id).glob("*"))
|
225
|
-
|
226
|
-
for cataloged_file in cataloged_files:
|
227
|
-
if str(cataloged_file).endswith("execution.log"):
|
228
|
-
continue
|
229
|
-
|
230
|
-
if cataloged_file.is_file():
|
231
|
-
shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
|
232
|
-
else:
|
233
|
-
shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
|
234
|
-
logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
|
File without changes
|
@@ -1,16 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
from runnable import defaults
|
5
|
-
from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
|
6
|
-
|
7
|
-
logger = logging.getLogger(defaults.LOGGER_NAME)
|
8
|
-
|
9
|
-
|
10
|
-
class K8sPVCatalog(FileSystemCatalog):
|
11
|
-
service_name: str = "k8s-pvc"
|
12
|
-
persistent_volume_name: str
|
13
|
-
mount_path: str
|
14
|
-
|
15
|
-
def get_catalog_location(self):
|
16
|
-
return str(Path(self.mount_path) / self.catalog_location)
|
@@ -1,59 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import cast
|
3
|
-
|
4
|
-
from runnable import defaults
|
5
|
-
from runnable.integration import BaseIntegration
|
6
|
-
|
7
|
-
logger = logging.getLogger(defaults.NAME)
|
8
|
-
|
9
|
-
|
10
|
-
class LocalCompute(BaseIntegration):
|
11
|
-
"""
|
12
|
-
Integration between local and k8's pvc
|
13
|
-
"""
|
14
|
-
|
15
|
-
executor_type = "local"
|
16
|
-
service_type = "catalog" # One of secret, catalog, datastore
|
17
|
-
service_provider = "k8s-pvc" # The actual implementation of the service
|
18
|
-
|
19
|
-
def validate(self, **kwargs):
|
20
|
-
msg = "We can't use the local compute k8s pvc store integration."
|
21
|
-
raise Exception(msg)
|
22
|
-
|
23
|
-
|
24
|
-
class LocalContainerCompute(BaseIntegration):
|
25
|
-
"""
|
26
|
-
Integration between local-container and k8's pvc
|
27
|
-
"""
|
28
|
-
|
29
|
-
executor_type = "local-container"
|
30
|
-
service_type = "catalog" # One of secret, catalog, datastore
|
31
|
-
service_provider = "k8s-pvc" # The actual implementation of the service
|
32
|
-
|
33
|
-
def validate(self, **kwargs):
|
34
|
-
msg = "We can't use the local-container compute k8s pvc store integration."
|
35
|
-
raise Exception(msg)
|
36
|
-
|
37
|
-
|
38
|
-
class ArgoCompute(BaseIntegration):
|
39
|
-
"""
|
40
|
-
Integration between argo and k8's pvc
|
41
|
-
"""
|
42
|
-
|
43
|
-
executor_type = "argo"
|
44
|
-
service_type = "catalog" # One of secret, catalog, datastore
|
45
|
-
service_provider = "k8s-pvc" # The actual implementation of the service
|
46
|
-
|
47
|
-
def configure_for_traversal(self, **kwargs):
|
48
|
-
from runnable.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog
|
49
|
-
from runnable.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts
|
50
|
-
|
51
|
-
self.executor = cast(ArgoExecutor, self.executor)
|
52
|
-
self.service = cast(K8sPVCatalog, self.service)
|
53
|
-
|
54
|
-
volume_mount = UserVolumeMounts(
|
55
|
-
name=self.service.persistent_volume_name,
|
56
|
-
mount_path=self.service.mount_path,
|
57
|
-
)
|
58
|
-
|
59
|
-
self.executor.persistent_volumes.append(volume_mount)
|