runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,24 @@
1
+ runnable/__init__.py,sha256=WuJwXEBxjiz2E1jBapkOkYpIaCAPZ1Udyep0dnN4bkE,666
2
+ runnable/catalog.py,sha256=5eTYwZWqfVBXIIn8WbweTMqiXZ9ccvtJBnAiIxSQ3Vk,4835
3
+ runnable/cli.py,sha256=rBTvkNDetN6psHmDLa0kko8IHGvND5xMuO30hU_gcvY,9931
4
+ runnable/context.py,sha256=QhiXJHRcEBfSKB1ijvL5yB9w44x0HCe7VEiwK1cUJ9U,1124
5
+ runnable/datastore.py,sha256=Q_KKb4PNP2IXnUlR2bjOclDFAsAVJ_oNiCd5x0vB5jc,28127
6
+ runnable/defaults.py,sha256=HYkXNI2hg0Y-SsXySjliwdc-3FUGJvJV3TnarmMIFFs,4656
7
+ runnable/entrypoints.py,sha256=gMywHyoUheSAXCyqLMJ0QWK4IxiFVgEYwRDuZWsk-uI,18612
8
+ runnable/exceptions.py,sha256=3gyN2bhqYvaZF_bo8hA7I09u8aQCAeh8NclBp5lCH8w,2574
9
+ runnable/executor.py,sha256=Y-yCw4ZIz88nHn47QzCXvXm7VjByTIyBWzsqsaIpNP8,14653
10
+ runnable/graph.py,sha256=EuH0210DcbEFlc6J-aSvfXJOb0SqORUiTpgFYyb_KPM,16602
11
+ runnable/integration.py,sha256=IXBH20QKpFYW7pQwwbTI0qQvrg4kJseM0KMacQKli74,6791
12
+ runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
13
+ runnable/nodes.py,sha256=I9C65nj3kAHHXJSwn5QYximFjV7tbjBiTk0ayEgrmK4,16526
14
+ runnable/parameters.py,sha256=g_bJurLjuppFDiDpfFqy6BRF36o_EY0OC5APl7HJFok,5450
15
+ runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
16
+ runnable/sdk.py,sha256=tEwTwcfm1KVfnEql3G_yJpgymDWOqoIIA4q3RzKmHp0,30365
17
+ runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
18
+ runnable/tasks.py,sha256=QPCgH_D7YkN2oAi7-w6Ipt9IZ397SayjhAl_PPyVto8,29822
19
+ runnable/utils.py,sha256=THMHnWVrUhNKdIvUbeZdDiXnP1WEOuee9e9OB8zzW5M,20441
20
+ runnable-0.14.0.dist-info/METADATA,sha256=n3mrLZadanBuHaUz4h5WgtsfuXmkjaHdtsfOCSAmEEk,9994
21
+ runnable-0.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ runnable-0.14.0.dist-info/entry_points.txt,sha256=8yBeduXOnO3SUnafZQwzXiE8rQMPXGDbqueyL7G9euM,1297
23
+ runnable-0.14.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
+ runnable-0.14.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,40 @@
1
+ [console_scripts]
2
+ runnable = runnable.cli:cli
3
+
4
+ [catalog]
5
+ do-nothing = runnable.catalog:DoNothingCatalog
6
+ file-system = extensions.catalog.file_system:FileSystemCatalog
7
+
8
+ [executor]
9
+ argo = extensions.executor.argo:ArgoExecutor
10
+ local = extensions.executor.local:LocalExecutor
11
+ local-container = extensions.executor.local_container:LocalContainerExecutor
12
+ mocked = extensions.executor.mocked:MockedExecutor
13
+ retry = extensions.executor.retry:RetryExecutor
14
+
15
+ [nodes]
16
+ dag = extensions.nodes.nodes:DagNode
17
+ fail = extensions.nodes.nodes:FailNode
18
+ map = extensions.nodes.nodes:MapNode
19
+ parallel = extensions.nodes.nodes:ParallelNode
20
+ stub = extensions.nodes.nodes:StubNode
21
+ success = extensions.nodes.nodes:SuccessNode
22
+ task = extensions.nodes.nodes:TaskNode
23
+
24
+ [pickler]
25
+ pickle = runnable.pickler:NativePickler
26
+
27
+ [run_log_store]
28
+ buffered = runnable.datastore:BufferRunLogstore
29
+ chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
30
+ file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
31
+
32
+ [secrets]
33
+ do-nothing = runnable.secrets:DoNothingSecretManager
34
+ dotenv = extensions.secrets.dotenv:DotEnvSecrets
35
+ env-secrets = runnable.secrets:EnvSecretsManager
36
+
37
+ [tasks]
38
+ notebook = runnable.tasks:NotebookTaskType
39
+ python = runnable.tasks:PythonTaskType
40
+ shell = runnable.tasks:ShellTaskType
File without changes
@@ -1,21 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from runnable.datastore import DataCatalog
4
-
5
-
6
- def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool:
7
- """
8
- Check if the catalog items are out of sync from already cataloged objects.
9
- If they are, return False.
10
- If the object does not exist or synced catalog does not exist, return True
11
- """
12
- if not synced_catalogs:
13
- return True # If nothing has been synced in the past
14
-
15
- for synced_catalog in synced_catalogs:
16
- if synced_catalog.catalog_relative_path == catalog.catalog_relative_path:
17
- if synced_catalog.data_hash == catalog.data_hash:
18
- return False
19
- return True
20
-
21
- return True # The object does not exist, sync it
File without changes
@@ -1,234 +0,0 @@
1
- import logging
2
- import os
3
- import shutil
4
- from pathlib import Path
5
- from typing import Any, Dict, List, Optional
6
-
7
- from runnable import defaults, utils
8
- from runnable.catalog import BaseCatalog
9
- from runnable.datastore import DataCatalog
10
- from runnable.extensions.catalog import is_catalog_out_of_sync
11
-
12
- logger = logging.getLogger(defaults.LOGGER_NAME)
13
-
14
-
15
- class FileSystemCatalog(BaseCatalog):
16
- """
17
- A Catalog handler that uses the local file system for cataloging.
18
-
19
- Note: Do not use this if the steps of the pipeline run on different compute environments.
20
-
21
- Example config:
22
-
23
- catalog:
24
- type: file-system
25
- config:
26
- catalog_location: The location to store the catalog.
27
- compute_data_folder: The folder to source the data from.
28
-
29
- """
30
-
31
- service_name: str = "file-system"
32
- catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
33
-
34
- def get_catalog_location(self):
35
- return self.catalog_location
36
-
37
- def get_summary(self) -> Dict[str, Any]:
38
- summary = {
39
- "Catalog Location": self.get_catalog_location(),
40
- }
41
-
42
- return summary
43
-
44
- def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
45
- """
46
- Get the file by matching glob pattern to the name
47
-
48
- Args:
49
- name ([str]): A glob matching the file name
50
- run_id ([str]): The run id
51
-
52
- Raises:
53
- Exception: If the catalog location does not exist
54
-
55
- Returns:
56
- List(object) : A list of catalog objects
57
- """
58
- logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}")
59
-
60
- copy_to = self.compute_data_folder
61
- if compute_data_folder:
62
- copy_to = compute_data_folder
63
-
64
- copy_to = Path(copy_to) # type: ignore
65
-
66
- catalog_location = self.get_catalog_location()
67
- run_catalog = Path(catalog_location) / run_id / copy_to
68
-
69
- logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}")
70
-
71
- if not utils.does_dir_exist(run_catalog):
72
- msg = (
73
- f"Expected Catalog to be present at: {run_catalog} but not found.\n"
74
- "Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
75
- )
76
- raise Exception(msg)
77
-
78
- # Iterate through the contents of the run_catalog and copy the files that fit the name pattern
79
- # We should also return a list of data hashes
80
- glob_files = run_catalog.glob(name)
81
- logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}")
82
-
83
- data_catalogs = []
84
- run_log_store = self._context.run_log_store
85
- for file in glob_files:
86
- if file.is_dir():
87
- # Need not add a data catalog for the folder
88
- continue
89
-
90
- if str(file).endswith(".execution.log"):
91
- continue
92
-
93
- relative_file_path = file.relative_to(run_catalog)
94
-
95
- data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
96
- data_catalog.catalog_handler_location = catalog_location
97
- data_catalog.catalog_relative_path = str(relative_file_path)
98
- data_catalog.data_hash = utils.get_data_hash(str(file))
99
- data_catalog.stage = "get"
100
- data_catalogs.append(data_catalog)
101
-
102
- # Make the directory in the data folder if required
103
- Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
104
- shutil.copy(file, copy_to / relative_file_path)
105
-
106
- logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
107
-
108
- if not data_catalogs:
109
- raise Exception(f"Did not find any files matching {name} in {run_catalog}")
110
-
111
- return data_catalogs
112
-
113
- def put(
114
- self,
115
- name: str,
116
- run_id: str,
117
- compute_data_folder: str = "",
118
- synced_catalogs: Optional[List[DataCatalog]] = None,
119
- **kwargs,
120
- ) -> List[DataCatalog]:
121
- """
122
- Put the files matching the glob pattern into the catalog.
123
-
124
- If previously synced catalogs are provided, and no changes were observed, we do not sync them.
125
-
126
- Args:
127
- name (str): The glob pattern of the files to catalog
128
- run_id (str): The run id of the run
129
- compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
130
- synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
131
-
132
- Raises:
133
- Exception: If the compute data folder does not exist.
134
-
135
- Returns:
136
- List(object) : A list of catalog objects
137
- """
138
- logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}")
139
-
140
- copy_from = self.compute_data_folder
141
- if compute_data_folder:
142
- copy_from = compute_data_folder
143
- copy_from = Path(copy_from) # type: ignore
144
-
145
- catalog_location = self.get_catalog_location()
146
- run_catalog = Path(catalog_location) / run_id
147
- utils.safe_make_dir(run_catalog)
148
-
149
- logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}")
150
-
151
- if not utils.does_dir_exist(copy_from):
152
- msg = (
153
- f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
154
- "Note: runnable does not create the compute data folder for you. Please ensure that the "
155
- "folder exists.\n"
156
- )
157
- raise Exception(msg)
158
-
159
- # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
160
- # We should also return a list of datastore.DataCatalog items
161
-
162
- glob_files = copy_from.glob(name) # type: ignore
163
- logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}")
164
-
165
- data_catalogs = []
166
- run_log_store = self._context.run_log_store
167
- for file in glob_files:
168
- if file.is_dir():
169
- # Need not add a data catalog for the folder
170
- continue
171
-
172
- relative_file_path = file.relative_to(".")
173
-
174
- data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
175
- data_catalog.catalog_handler_location = catalog_location
176
- data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path)
177
- data_catalog.data_hash = utils.get_data_hash(str(file))
178
- data_catalog.stage = "put"
179
- data_catalogs.append(data_catalog)
180
-
181
- if is_catalog_out_of_sync(data_catalog, synced_catalogs):
182
- logger.info(f"{data_catalog.name} was found to be changed, syncing")
183
-
184
- # Make the directory in the catalog if required
185
- Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
186
- shutil.copy(file, run_catalog / relative_file_path)
187
- else:
188
- logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing")
189
-
190
- if not data_catalogs:
191
- raise Exception(f"Did not find any files matching {name} in {copy_from}")
192
-
193
- return data_catalogs
194
-
195
- def sync_between_runs(self, previous_run_id: str, run_id: str):
196
- """
197
- Given the previous run id, sync the catalogs between the current one and previous
198
-
199
- Args:
200
- previous_run_id (str): The previous run id to sync the catalogs from
201
- run_id (str): The run_id to which the data catalogs should be synced to.
202
-
203
- Raises:
204
- Exception: If the previous run log does not exist in the catalog
205
-
206
- """
207
- logger.info(
208
- f"Using the {self.service_name} catalog and syncing catalogs"
209
- "between old: {previous_run_id} to new: {run_id}"
210
- )
211
-
212
- catalog_location = Path(self.get_catalog_location())
213
- run_catalog = catalog_location / run_id
214
- utils.safe_make_dir(run_catalog)
215
-
216
- if not utils.does_dir_exist(catalog_location / previous_run_id):
217
- msg = (
218
- f"Catalogs from previous run : {previous_run_id} are not found.\n"
219
- "Note: Please provision the catalog objects generated by previous run in the same catalog location"
220
- " as the current run, even if the catalog handler for the previous run was different"
221
- )
222
- raise Exception(msg)
223
-
224
- cataloged_files = list((catalog_location / previous_run_id).glob("*"))
225
-
226
- for cataloged_file in cataloged_files:
227
- if str(cataloged_file).endswith("execution.log"):
228
- continue
229
-
230
- if cataloged_file.is_file():
231
- shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
232
- else:
233
- shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
234
- logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
File without changes
@@ -1,16 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from runnable import defaults
5
- from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
6
-
7
- logger = logging.getLogger(defaults.LOGGER_NAME)
8
-
9
-
10
- class K8sPVCatalog(FileSystemCatalog):
11
- service_name: str = "k8s-pvc"
12
- persistent_volume_name: str
13
- mount_path: str
14
-
15
- def get_catalog_location(self):
16
- return str(Path(self.mount_path) / self.catalog_location)
@@ -1,59 +0,0 @@
1
- import logging
2
- from typing import cast
3
-
4
- from runnable import defaults
5
- from runnable.integration import BaseIntegration
6
-
7
- logger = logging.getLogger(defaults.NAME)
8
-
9
-
10
- class LocalCompute(BaseIntegration):
11
- """
12
- Integration between local and k8's pvc
13
- """
14
-
15
- executor_type = "local"
16
- service_type = "catalog" # One of secret, catalog, datastore
17
- service_provider = "k8s-pvc" # The actual implementation of the service
18
-
19
- def validate(self, **kwargs):
20
- msg = "We can't use the local compute k8s pvc store integration."
21
- raise Exception(msg)
22
-
23
-
24
- class LocalContainerCompute(BaseIntegration):
25
- """
26
- Integration between local-container and k8's pvc
27
- """
28
-
29
- executor_type = "local-container"
30
- service_type = "catalog" # One of secret, catalog, datastore
31
- service_provider = "k8s-pvc" # The actual implementation of the service
32
-
33
- def validate(self, **kwargs):
34
- msg = "We can't use the local-container compute k8s pvc store integration."
35
- raise Exception(msg)
36
-
37
-
38
- class ArgoCompute(BaseIntegration):
39
- """
40
- Integration between argo and k8's pvc
41
- """
42
-
43
- executor_type = "argo"
44
- service_type = "catalog" # One of secret, catalog, datastore
45
- service_provider = "k8s-pvc" # The actual implementation of the service
46
-
47
- def configure_for_traversal(self, **kwargs):
48
- from runnable.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog
49
- from runnable.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts
50
-
51
- self.executor = cast(ArgoExecutor, self.executor)
52
- self.service = cast(K8sPVCatalog, self.service)
53
-
54
- volume_mount = UserVolumeMounts(
55
- name=self.service.persistent_volume_name,
56
- mount_path=self.service.mount_path,
57
- )
58
-
59
- self.executor.persistent_volumes.append(volume_mount)