runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +687 -0
  6. runnable/defaults.py +182 -0
  7. runnable/entrypoints.py +448 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +421 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +227 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +725 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1183 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +70 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +361 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +189 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +655 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +404 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +501 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +472 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +395 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.3.0.dist-info/METADATA +437 -0
  65. runnable-0.3.0.dist-info/RECORD +69 -0
  66. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
  67. runnable-0.3.0.dist-info/entry_points.txt +44 -0
  68. runnable-0.1.0.dist-info/METADATA +0 -16
  69. runnable-0.1.0.dist-info/RECORD +0 -6
  70. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,227 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from runnable import defaults, utils
8
+ from runnable.catalog import BaseCatalog
9
+ from runnable.datastore import DataCatalog
10
+ from runnable.extensions.catalog import is_catalog_out_of_sync
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+
15
+ class FileSystemCatalog(BaseCatalog):
16
+ """
17
+ A Catalog handler that uses the local file system for cataloging.
18
+
19
+ Note: Do not use this if the steps of the pipeline run on different compute environments.
20
+
21
+ Example config:
22
+
23
+ catalog:
24
+ type: file-system
25
+ config:
26
+ catalog_location: The location to store the catalog.
27
+ compute_data_folder: The folder to source the data from.
28
+
29
+ """
30
+
31
+ service_name: str = "file-system"
32
+ catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
33
+
34
+ def get_catalog_location(self):
35
+ return self.catalog_location
36
+
37
+ def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
38
+ """
39
+ Get the file by matching glob pattern to the name
40
+
41
+ Args:
42
+ name ([str]): A glob matching the file name
43
+ run_id ([str]): The run id
44
+
45
+ Raises:
46
+ Exception: If the catalog location does not exist
47
+
48
+ Returns:
49
+ List(object) : A list of catalog objects
50
+ """
51
+ logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}")
52
+
53
+ copy_to = self.compute_data_folder
54
+ if compute_data_folder:
55
+ copy_to = compute_data_folder
56
+
57
+ copy_to = Path(copy_to) # type: ignore
58
+
59
+ catalog_location = self.get_catalog_location()
60
+ run_catalog = Path(catalog_location) / run_id / copy_to
61
+
62
+ logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}")
63
+
64
+ if not utils.does_dir_exist(run_catalog):
65
+ msg = (
66
+ f"Expected Catalog to be present at: {run_catalog} but not found.\n"
67
+ "Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
68
+ )
69
+ raise Exception(msg)
70
+
71
+ # Iterate through the contents of the run_catalog and copy the files that fit the name pattern
72
+ # We should also return a list of data hashes
73
+ glob_files = run_catalog.glob(name)
74
+ logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}")
75
+
76
+ data_catalogs = []
77
+ run_log_store = self._context.run_log_store
78
+ for file in glob_files:
79
+ if file.is_dir():
80
+ # Need not add a data catalog for the folder
81
+ continue
82
+
83
+ if str(file).endswith(".execution.log"):
84
+ continue
85
+
86
+ relative_file_path = file.relative_to(run_catalog)
87
+
88
+ data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
89
+ data_catalog.catalog_handler_location = catalog_location
90
+ data_catalog.catalog_relative_path = str(relative_file_path)
91
+ data_catalog.data_hash = utils.get_data_hash(str(file))
92
+ data_catalog.stage = "get"
93
+ data_catalogs.append(data_catalog)
94
+
95
+ # Make the directory in the data folder if required
96
+ Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
97
+ shutil.copy(file, copy_to / relative_file_path)
98
+
99
+ logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
100
+
101
+ if not data_catalogs:
102
+ raise Exception(f"Did not find any files matching {name} in {run_catalog}")
103
+
104
+ return data_catalogs
105
+
106
+ def put(
107
+ self,
108
+ name: str,
109
+ run_id: str,
110
+ compute_data_folder: str = "",
111
+ synced_catalogs: Optional[List[DataCatalog]] = None,
112
+ **kwargs,
113
+ ) -> List[DataCatalog]:
114
+ """
115
+ Put the files matching the glob pattern into the catalog.
116
+
117
+ If previously synced catalogs are provided, and no changes were observed, we do not sync them.
118
+
119
+ Args:
120
+ name (str): The glob pattern of the files to catalog
121
+ run_id (str): The run id of the run
122
+ compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
123
+ synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
124
+
125
+ Raises:
126
+ Exception: If the compute data folder does not exist.
127
+
128
+ Returns:
129
+ List(object) : A list of catalog objects
130
+ """
131
+ logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}")
132
+
133
+ copy_from = self.compute_data_folder
134
+ if compute_data_folder:
135
+ copy_from = compute_data_folder
136
+ copy_from = Path(copy_from) # type: ignore
137
+
138
+ catalog_location = self.get_catalog_location()
139
+ run_catalog = Path(catalog_location) / run_id
140
+ utils.safe_make_dir(run_catalog)
141
+
142
+ logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}")
143
+
144
+ if not utils.does_dir_exist(copy_from):
145
+ msg = (
146
+ f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
147
+ "Note: runnable does not create the compute data folder for you. Please ensure that the "
148
+ "folder exists.\n"
149
+ )
150
+ raise Exception(msg)
151
+
152
+ # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
153
+ # We should also return a list of datastore.DataCatalog items
154
+
155
+ glob_files = copy_from.glob(name) # type: ignore
156
+ logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}")
157
+
158
+ data_catalogs = []
159
+ run_log_store = self._context.run_log_store
160
+ for file in glob_files:
161
+ if file.is_dir():
162
+ # Need not add a data catalog for the folder
163
+ continue
164
+
165
+ relative_file_path = file.relative_to(".")
166
+
167
+ data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
168
+ data_catalog.catalog_handler_location = catalog_location
169
+ data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path)
170
+ data_catalog.data_hash = utils.get_data_hash(str(file))
171
+ data_catalog.stage = "put"
172
+ data_catalogs.append(data_catalog)
173
+
174
+ if is_catalog_out_of_sync(data_catalog, synced_catalogs):
175
+ logger.info(f"{data_catalog.name} was found to be changed, syncing")
176
+
177
+ # Make the directory in the catalog if required
178
+ Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
179
+ shutil.copy(file, run_catalog / relative_file_path)
180
+ else:
181
+ logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing")
182
+
183
+ if not data_catalogs:
184
+ raise Exception(f"Did not find any files matching {name} in {copy_from}")
185
+
186
+ return data_catalogs
187
+
188
+ def sync_between_runs(self, previous_run_id: str, run_id: str):
189
+ """
190
+ Given the previous run id, sync the catalogs between the current one and previous
191
+
192
+ Args:
193
+ previous_run_id (str): The previous run id to sync the catalogs from
194
+ run_id (str): The run_id to which the data catalogs should be synced to.
195
+
196
+ Raises:
197
+ Exception: If the previous run log does not exist in the catalog
198
+
199
+ """
200
+ logger.info(
201
+ f"Using the {self.service_name} catalog and syncing catalogs"
202
+ "between old: {previous_run_id} to new: {run_id}"
203
+ )
204
+
205
+ catalog_location = Path(self.get_catalog_location())
206
+ run_catalog = catalog_location / run_id
207
+ utils.safe_make_dir(run_catalog)
208
+
209
+ if not utils.does_dir_exist(catalog_location / previous_run_id):
210
+ msg = (
211
+ f"Catalogs from previous run : {previous_run_id} are not found.\n"
212
+ "Note: Please provision the catalog objects generated by previous run in the same catalog location"
213
+ " as the current run, even if the catalog handler for the previous run was different"
214
+ )
215
+ raise Exception(msg)
216
+
217
+ cataloged_files = list((catalog_location / previous_run_id).glob("*"))
218
+
219
+ for cataloged_file in cataloged_files:
220
+ if str(cataloged_file).endswith("execution.log"):
221
+ continue
222
+ print(cataloged_file.name)
223
+ if cataloged_file.is_file():
224
+ shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
225
+ else:
226
+ shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
227
+ logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
File without changes
@@ -0,0 +1,16 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from runnable import defaults
5
+ from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
6
+
7
+ logger = logging.getLogger(defaults.LOGGER_NAME)
8
+
9
+
10
+ class K8sPVCatalog(FileSystemCatalog):
11
+ service_name: str = "k8s-pvc"
12
+ persistent_volume_name: str
13
+ mount_path: str
14
+
15
+ def get_catalog_location(self):
16
+ return str(Path(self.mount_path) / self.catalog_location)
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from typing import cast
3
+
4
+ from runnable import defaults
5
+ from runnable.integration import BaseIntegration
6
+
7
+ logger = logging.getLogger(defaults.NAME)
8
+
9
+
10
+ class LocalCompute(BaseIntegration):
11
+ """
12
+ Integration between local and k8's pvc
13
+ """
14
+
15
+ executor_type = "local"
16
+ service_type = "catalog" # One of secret, catalog, datastore
17
+ service_provider = "k8s-pvc" # The actual implementation of the service
18
+
19
+ def validate(self, **kwargs):
20
+ msg = "We can't use the local compute k8s pvc store integration."
21
+ raise Exception(msg)
22
+
23
+
24
+ class LocalContainerCompute(BaseIntegration):
25
+ """
26
+ Integration between local-container and k8's pvc
27
+ """
28
+
29
+ executor_type = "local-container"
30
+ service_type = "catalog" # One of secret, catalog, datastore
31
+ service_provider = "k8s-pvc" # The actual implementation of the service
32
+
33
+ def validate(self, **kwargs):
34
+ msg = "We can't use the local-container compute k8s pvc store integration."
35
+ raise Exception(msg)
36
+
37
+
38
+ class ArgoCompute(BaseIntegration):
39
+ """
40
+ Integration between argo and k8's pvc
41
+ """
42
+
43
+ executor_type = "argo"
44
+ service_type = "catalog" # One of secret, catalog, datastore
45
+ service_provider = "k8s-pvc" # The actual implementation of the service
46
+
47
+ def configure_for_traversal(self, **kwargs):
48
+ from runnable.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog
49
+ from runnable.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts
50
+
51
+ self.executor = cast(ArgoExecutor, self.executor)
52
+ self.service = cast(K8sPVCatalog, self.service)
53
+
54
+ volume_mount = UserVolumeMounts(
55
+ name=self.service.persistent_volume_name,
56
+ mount_path=self.service.mount_path,
57
+ )
58
+
59
+ self.executor.persistent_volumes.append(volume_mount)