runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,24 @@
1
+ runnable/__init__.py,sha256=WuJwXEBxjiz2E1jBapkOkYpIaCAPZ1Udyep0dnN4bkE,666
2
+ runnable/catalog.py,sha256=5eTYwZWqfVBXIIn8WbweTMqiXZ9ccvtJBnAiIxSQ3Vk,4835
3
+ runnable/cli.py,sha256=rBTvkNDetN6psHmDLa0kko8IHGvND5xMuO30hU_gcvY,9931
4
+ runnable/context.py,sha256=QhiXJHRcEBfSKB1ijvL5yB9w44x0HCe7VEiwK1cUJ9U,1124
5
+ runnable/datastore.py,sha256=Q_KKb4PNP2IXnUlR2bjOclDFAsAVJ_oNiCd5x0vB5jc,28127
6
+ runnable/defaults.py,sha256=HYkXNI2hg0Y-SsXySjliwdc-3FUGJvJV3TnarmMIFFs,4656
7
+ runnable/entrypoints.py,sha256=gMywHyoUheSAXCyqLMJ0QWK4IxiFVgEYwRDuZWsk-uI,18612
8
+ runnable/exceptions.py,sha256=3gyN2bhqYvaZF_bo8hA7I09u8aQCAeh8NclBp5lCH8w,2574
9
+ runnable/executor.py,sha256=Y-yCw4ZIz88nHn47QzCXvXm7VjByTIyBWzsqsaIpNP8,14653
10
+ runnable/graph.py,sha256=EuH0210DcbEFlc6J-aSvfXJOb0SqORUiTpgFYyb_KPM,16602
11
+ runnable/integration.py,sha256=IXBH20QKpFYW7pQwwbTI0qQvrg4kJseM0KMacQKli74,6791
12
+ runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
13
+ runnable/nodes.py,sha256=I9C65nj3kAHHXJSwn5QYximFjV7tbjBiTk0ayEgrmK4,16526
14
+ runnable/parameters.py,sha256=g_bJurLjuppFDiDpfFqy6BRF36o_EY0OC5APl7HJFok,5450
15
+ runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
16
+ runnable/sdk.py,sha256=tEwTwcfm1KVfnEql3G_yJpgymDWOqoIIA4q3RzKmHp0,30365
17
+ runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
18
+ runnable/tasks.py,sha256=QPCgH_D7YkN2oAi7-w6Ipt9IZ397SayjhAl_PPyVto8,29822
19
+ runnable/utils.py,sha256=THMHnWVrUhNKdIvUbeZdDiXnP1WEOuee9e9OB8zzW5M,20441
20
+ runnable-0.14.0.dist-info/METADATA,sha256=n3mrLZadanBuHaUz4h5WgtsfuXmkjaHdtsfOCSAmEEk,9994
21
+ runnable-0.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ runnable-0.14.0.dist-info/entry_points.txt,sha256=8yBeduXOnO3SUnafZQwzXiE8rQMPXGDbqueyL7G9euM,1297
23
+ runnable-0.14.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
+ runnable-0.14.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,40 @@
1
+ [console_scripts]
2
+ runnable = runnable.cli:cli
3
+
4
+ [catalog]
5
+ do-nothing = runnable.catalog:DoNothingCatalog
6
+ file-system = extensions.catalog.file_system:FileSystemCatalog
7
+
8
+ [executor]
9
+ argo = extensions.executor.argo:ArgoExecutor
10
+ local = extensions.executor.local:LocalExecutor
11
+ local-container = extensions.executor.local_container:LocalContainerExecutor
12
+ mocked = extensions.executor.mocked:MockedExecutor
13
+ retry = extensions.executor.retry:RetryExecutor
14
+
15
+ [nodes]
16
+ dag = extensions.nodes.nodes:DagNode
17
+ fail = extensions.nodes.nodes:FailNode
18
+ map = extensions.nodes.nodes:MapNode
19
+ parallel = extensions.nodes.nodes:ParallelNode
20
+ stub = extensions.nodes.nodes:StubNode
21
+ success = extensions.nodes.nodes:SuccessNode
22
+ task = extensions.nodes.nodes:TaskNode
23
+
24
+ [pickler]
25
+ pickle = runnable.pickler:NativePickler
26
+
27
+ [run_log_store]
28
+ buffered = runnable.datastore:BufferRunLogstore
29
+ chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
30
+ file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
31
+
32
+ [secrets]
33
+ do-nothing = runnable.secrets:DoNothingSecretManager
34
+ dotenv = extensions.secrets.dotenv:DotEnvSecrets
35
+ env-secrets = runnable.secrets:EnvSecretsManager
36
+
37
+ [tasks]
38
+ notebook = runnable.tasks:NotebookTaskType
39
+ python = runnable.tasks:PythonTaskType
40
+ shell = runnable.tasks:ShellTaskType
File without changes
@@ -1,21 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from runnable.datastore import DataCatalog
4
-
5
-
6
- def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool:
7
- """
8
- Check if the catalog items are out of sync from already cataloged objects.
9
- If they are, return False.
10
- If the object does not exist or synced catalog does not exist, return True
11
- """
12
- if not synced_catalogs:
13
- return True # If nothing has been synced in the past
14
-
15
- for synced_catalog in synced_catalogs:
16
- if synced_catalog.catalog_relative_path == catalog.catalog_relative_path:
17
- if synced_catalog.data_hash == catalog.data_hash:
18
- return False
19
- return True
20
-
21
- return True # The object does not exist, sync it
File without changes
@@ -1,234 +0,0 @@
1
- import logging
2
- import os
3
- import shutil
4
- from pathlib import Path
5
- from typing import Any, Dict, List, Optional
6
-
7
- from runnable import defaults, utils
8
- from runnable.catalog import BaseCatalog
9
- from runnable.datastore import DataCatalog
10
- from runnable.extensions.catalog import is_catalog_out_of_sync
11
-
12
- logger = logging.getLogger(defaults.LOGGER_NAME)
13
-
14
-
15
- class FileSystemCatalog(BaseCatalog):
16
- """
17
- A Catalog handler that uses the local file system for cataloging.
18
-
19
- Note: Do not use this if the steps of the pipeline run on different compute environments.
20
-
21
- Example config:
22
-
23
- catalog:
24
- type: file-system
25
- config:
26
- catalog_location: The location to store the catalog.
27
- compute_data_folder: The folder to source the data from.
28
-
29
- """
30
-
31
- service_name: str = "file-system"
32
- catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
33
-
34
- def get_catalog_location(self):
35
- return self.catalog_location
36
-
37
- def get_summary(self) -> Dict[str, Any]:
38
- summary = {
39
- "Catalog Location": self.get_catalog_location(),
40
- }
41
-
42
- return summary
43
-
44
- def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
45
- """
46
- Get the file by matching glob pattern to the name
47
-
48
- Args:
49
- name ([str]): A glob matching the file name
50
- run_id ([str]): The run id
51
-
52
- Raises:
53
- Exception: If the catalog location does not exist
54
-
55
- Returns:
56
- List(object) : A list of catalog objects
57
- """
58
- logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}")
59
-
60
- copy_to = self.compute_data_folder
61
- if compute_data_folder:
62
- copy_to = compute_data_folder
63
-
64
- copy_to = Path(copy_to) # type: ignore
65
-
66
- catalog_location = self.get_catalog_location()
67
- run_catalog = Path(catalog_location) / run_id / copy_to
68
-
69
- logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}")
70
-
71
- if not utils.does_dir_exist(run_catalog):
72
- msg = (
73
- f"Expected Catalog to be present at: {run_catalog} but not found.\n"
74
- "Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
75
- )
76
- raise Exception(msg)
77
-
78
- # Iterate through the contents of the run_catalog and copy the files that fit the name pattern
79
- # We should also return a list of data hashes
80
- glob_files = run_catalog.glob(name)
81
- logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}")
82
-
83
- data_catalogs = []
84
- run_log_store = self._context.run_log_store
85
- for file in glob_files:
86
- if file.is_dir():
87
- # Need not add a data catalog for the folder
88
- continue
89
-
90
- if str(file).endswith(".execution.log"):
91
- continue
92
-
93
- relative_file_path = file.relative_to(run_catalog)
94
-
95
- data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
96
- data_catalog.catalog_handler_location = catalog_location
97
- data_catalog.catalog_relative_path = str(relative_file_path)
98
- data_catalog.data_hash = utils.get_data_hash(str(file))
99
- data_catalog.stage = "get"
100
- data_catalogs.append(data_catalog)
101
-
102
- # Make the directory in the data folder if required
103
- Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
104
- shutil.copy(file, copy_to / relative_file_path)
105
-
106
- logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
107
-
108
- if not data_catalogs:
109
- raise Exception(f"Did not find any files matching {name} in {run_catalog}")
110
-
111
- return data_catalogs
112
-
113
- def put(
114
- self,
115
- name: str,
116
- run_id: str,
117
- compute_data_folder: str = "",
118
- synced_catalogs: Optional[List[DataCatalog]] = None,
119
- **kwargs,
120
- ) -> List[DataCatalog]:
121
- """
122
- Put the files matching the glob pattern into the catalog.
123
-
124
- If previously synced catalogs are provided, and no changes were observed, we do not sync them.
125
-
126
- Args:
127
- name (str): The glob pattern of the files to catalog
128
- run_id (str): The run id of the run
129
- compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
130
- synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
131
-
132
- Raises:
133
- Exception: If the compute data folder does not exist.
134
-
135
- Returns:
136
- List(object) : A list of catalog objects
137
- """
138
- logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}")
139
-
140
- copy_from = self.compute_data_folder
141
- if compute_data_folder:
142
- copy_from = compute_data_folder
143
- copy_from = Path(copy_from) # type: ignore
144
-
145
- catalog_location = self.get_catalog_location()
146
- run_catalog = Path(catalog_location) / run_id
147
- utils.safe_make_dir(run_catalog)
148
-
149
- logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}")
150
-
151
- if not utils.does_dir_exist(copy_from):
152
- msg = (
153
- f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
154
- "Note: runnable does not create the compute data folder for you. Please ensure that the "
155
- "folder exists.\n"
156
- )
157
- raise Exception(msg)
158
-
159
- # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
160
- # We should also return a list of datastore.DataCatalog items
161
-
162
- glob_files = copy_from.glob(name) # type: ignore
163
- logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}")
164
-
165
- data_catalogs = []
166
- run_log_store = self._context.run_log_store
167
- for file in glob_files:
168
- if file.is_dir():
169
- # Need not add a data catalog for the folder
170
- continue
171
-
172
- relative_file_path = file.relative_to(".")
173
-
174
- data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
175
- data_catalog.catalog_handler_location = catalog_location
176
- data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path)
177
- data_catalog.data_hash = utils.get_data_hash(str(file))
178
- data_catalog.stage = "put"
179
- data_catalogs.append(data_catalog)
180
-
181
- if is_catalog_out_of_sync(data_catalog, synced_catalogs):
182
- logger.info(f"{data_catalog.name} was found to be changed, syncing")
183
-
184
- # Make the directory in the catalog if required
185
- Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
186
- shutil.copy(file, run_catalog / relative_file_path)
187
- else:
188
- logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing")
189
-
190
- if not data_catalogs:
191
- raise Exception(f"Did not find any files matching {name} in {copy_from}")
192
-
193
- return data_catalogs
194
-
195
- def sync_between_runs(self, previous_run_id: str, run_id: str):
196
- """
197
- Given the previous run id, sync the catalogs between the current one and previous
198
-
199
- Args:
200
- previous_run_id (str): The previous run id to sync the catalogs from
201
- run_id (str): The run_id to which the data catalogs should be synced to.
202
-
203
- Raises:
204
- Exception: If the previous run log does not exist in the catalog
205
-
206
- """
207
- logger.info(
208
- f"Using the {self.service_name} catalog and syncing catalogs"
209
- "between old: {previous_run_id} to new: {run_id}"
210
- )
211
-
212
- catalog_location = Path(self.get_catalog_location())
213
- run_catalog = catalog_location / run_id
214
- utils.safe_make_dir(run_catalog)
215
-
216
- if not utils.does_dir_exist(catalog_location / previous_run_id):
217
- msg = (
218
- f"Catalogs from previous run : {previous_run_id} are not found.\n"
219
- "Note: Please provision the catalog objects generated by previous run in the same catalog location"
220
- " as the current run, even if the catalog handler for the previous run was different"
221
- )
222
- raise Exception(msg)
223
-
224
- cataloged_files = list((catalog_location / previous_run_id).glob("*"))
225
-
226
- for cataloged_file in cataloged_files:
227
- if str(cataloged_file).endswith("execution.log"):
228
- continue
229
-
230
- if cataloged_file.is_file():
231
- shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
232
- else:
233
- shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
234
- logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
File without changes
@@ -1,16 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from runnable import defaults
5
- from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
6
-
7
- logger = logging.getLogger(defaults.LOGGER_NAME)
8
-
9
-
10
- class K8sPVCatalog(FileSystemCatalog):
11
- service_name: str = "k8s-pvc"
12
- persistent_volume_name: str
13
- mount_path: str
14
-
15
- def get_catalog_location(self):
16
- return str(Path(self.mount_path) / self.catalog_location)
@@ -1,59 +0,0 @@
1
- import logging
2
- from typing import cast
3
-
4
- from runnable import defaults
5
- from runnable.integration import BaseIntegration
6
-
7
- logger = logging.getLogger(defaults.NAME)
8
-
9
-
10
- class LocalCompute(BaseIntegration):
11
- """
12
- Integration between local and k8's pvc
13
- """
14
-
15
- executor_type = "local"
16
- service_type = "catalog" # One of secret, catalog, datastore
17
- service_provider = "k8s-pvc" # The actual implementation of the service
18
-
19
- def validate(self, **kwargs):
20
- msg = "We can't use the local compute k8s pvc store integration."
21
- raise Exception(msg)
22
-
23
-
24
- class LocalContainerCompute(BaseIntegration):
25
- """
26
- Integration between local-container and k8's pvc
27
- """
28
-
29
- executor_type = "local-container"
30
- service_type = "catalog" # One of secret, catalog, datastore
31
- service_provider = "k8s-pvc" # The actual implementation of the service
32
-
33
- def validate(self, **kwargs):
34
- msg = "We can't use the local-container compute k8s pvc store integration."
35
- raise Exception(msg)
36
-
37
-
38
- class ArgoCompute(BaseIntegration):
39
- """
40
- Integration between argo and k8's pvc
41
- """
42
-
43
- executor_type = "argo"
44
- service_type = "catalog" # One of secret, catalog, datastore
45
- service_provider = "k8s-pvc" # The actual implementation of the service
46
-
47
- def configure_for_traversal(self, **kwargs):
48
- from runnable.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog
49
- from runnable.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts
50
-
51
- self.executor = cast(ArgoExecutor, self.executor)
52
- self.service = cast(K8sPVCatalog, self.service)
53
-
54
- volume_mount = UserVolumeMounts(
55
- name=self.service.persistent_volume_name,
56
- mount_path=self.service.mount_path,
57
- )
58
-
59
- self.executor.persistent_volumes.append(volume_mount)