runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +686 -0
  6. runnable/defaults.py +179 -0
  7. runnable/entrypoints.py +484 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +431 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +226 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +714 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1182 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +69 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +367 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +220 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +675 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +399 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +489 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +470 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +392 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.2.0.dist-info/METADATA +437 -0
  65. runnable-0.2.0.dist-info/RECORD +69 -0
  66. runnable-0.2.0.dist-info/entry_points.txt +44 -0
  67. runnable-0.1.0.dist-info/METADATA +0 -16
  68. runnable-0.1.0.dist-info/RECORD +0 -6
  69. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  70. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0
runnable/exceptions.py ADDED
@@ -0,0 +1,94 @@
1
+ class RunLogExistsError(Exception): # pragma: no cover
2
+ """
3
+ Exception class
4
+ Args:
5
+ Exception ([type]): [description]
6
+ """
7
+
8
+ def __init__(self, run_id):
9
+ super().__init__()
10
+ self.message = f"Run id for {run_id} is already found in the datastore"
11
+
12
+
13
+ class RunLogNotFoundError(Exception): # pragma: no cover
14
+ """
15
+ Exception class
16
+ Args:
17
+ Exception ([type]): [description]
18
+ """
19
+
20
+ def __init__(self, run_id):
21
+ super().__init__()
22
+ self.message = f"Run id for {run_id} is not found in the datastore"
23
+
24
+
25
+ class StepLogNotFoundError(Exception): # pragma: no cover
26
+ """
27
+ Exception class
28
+ Args:
29
+ Exception ([type]): [description]
30
+ """
31
+
32
+ def __init__(self, run_id, name):
33
+ super().__init__()
34
+ self.message = f"Step log for {name} is not found in the datastore for Run id: {run_id}"
35
+
36
+
37
+ class BranchLogNotFoundError(Exception): # pragma: no cover
38
+ """
39
+ Exception class
40
+ Args:
41
+ Exception ([type]): [description]
42
+ """
43
+
44
+ def __init__(self, run_id, name):
45
+ super().__init__()
46
+ self.message = f"Branch log for {name} is not found in the datastore for Run id: {run_id}"
47
+
48
+
49
+ class NodeNotFoundError(Exception): # pragma: no cover
50
+ """
51
+ Exception class
52
+ Args:
53
+ Exception ([type]): [description]
54
+ """
55
+
56
+ def __init__(self, name):
57
+ super().__init__()
58
+ self.message = f"Node of name {name} is not found the graph"
59
+
60
+
61
+ class BranchNotFoundError(Exception): # pragma: no cover
62
+ """
63
+ Exception class
64
+ Args:
65
+ Exception ([type]): [description]
66
+ """
67
+
68
+ def __init__(self, name):
69
+ super().__init__()
70
+ self.message = f"Branch of name {name} is not found the graph"
71
+
72
+
73
+ class TerminalNodeError(Exception): # pragma: no cover
74
+ def __init__(self):
75
+ super().__init__()
76
+ self.message = "Terminal Nodes do not have next node"
77
+
78
+
79
+ class SecretNotFoundError(Exception): # pragma: no cover
80
+ """
81
+ Exception class
82
+ Args:
83
+ Exception ([type]): [description]
84
+ """
85
+
86
+ def __init__(self, secret_name, secret_setting):
87
+ super().__init__()
88
+ self.message = f"No secret found by name:{secret_name} in {secret_setting}"
89
+
90
+
91
+ class ExecutionFailedError(Exception): # pragma: no cover
92
+ def __init__(self, run_id: str):
93
+ super().__init__()
94
+ self.message = f"Execution failed for run id: {run_id}"
runnable/executor.py ADDED
@@ -0,0 +1,431 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
7
+
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
+ import runnable.context as context
11
+ from runnable import defaults
12
+ from runnable.datastore import DataCatalog, RunLog, StepLog
13
+ from runnable.defaults import TypeMapVariable
14
+ from runnable.graph import Graph
15
+
16
+ if TYPE_CHECKING: # pragma: no cover
17
+ from runnable.extensions.nodes import TaskNode
18
+ from runnable.nodes import BaseNode
19
+
20
+ logger = logging.getLogger(defaults.LOGGER_NAME)
21
+
22
+
23
+ class BaseExecutor(ABC, BaseModel):
24
+ """
25
+ The skeleton of an executor class.
26
+ Any implementation of an executor should inherit this class and over-ride accordingly.
27
+
28
+ There is a extension available in magnus/extensions/executor/__init__.py
29
+ which implements the most common functionality which is easier to
30
+ extend/override in most scenarios.
31
+
32
+ """
33
+
34
+ service_name: str = ""
35
+ service_type: str = "executor"
36
+
37
+ enable_parallel: bool = defaults.ENABLE_PARALLEL
38
+ overrides: dict = {}
39
+
40
+ _previous_run_log: Optional[RunLog] = None
41
+ _single_step: str = ""
42
+
43
+ _context_step_log = None # type : StepLog
44
+ _context_node = None # type: BaseNode
45
+ model_config = ConfigDict(extra="forbid")
46
+
47
+ @property
48
+ def _context(self):
49
+ return context.run_context
50
+
51
+ def _is_parallel_execution(self) -> bool:
52
+ """
53
+ Controls the parallelization of branches in map and parallel state.
54
+ Defaults to False and left for the compute modes to decide.
55
+
56
+ Interactive executors like local and local-container need decisions.
57
+ For most transpilers it is inconsequential as its always True and supported by platforms.
58
+
59
+ Returns:
60
+ bool: True if the execution allows parallel execution of branches.
61
+ """
62
+ return self.enable_parallel
63
+
64
+ @abstractmethod
65
+ def _get_parameters(self) -> Dict[str, Any]:
66
+ """
67
+ Get the parameters for the execution.
68
+ The parameters can be defined in parameters file and can be overridden by environment variables.
69
+
70
+ Returns:
71
+ Dict[str, Any]: The parameters for the execution.
72
+ """
73
+ ...
74
+
75
+ @abstractmethod
76
+ def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
77
+ """
78
+ Set up the executor for using a previous execution.
79
+
80
+ Retrieve the older run log, error out if it does not exist.
81
+ Sync the catalogs from the previous run log with the current one.
82
+
83
+ Update the parameters of this execution with the previous one. The previous one take precedence.
84
+
85
+ Args:
86
+ parameters (Dict[str, Any]): The parameters for the current execution.
87
+ """
88
+
89
+ @abstractmethod
90
+ def _set_up_run_log(self, exists_ok=False):
91
+ """
92
+ Create a run log and put that in the run log store
93
+
94
+ If exists_ok, we allow the run log to be already present in the run log store.
95
+ """
96
+ ...
97
+
98
+ @abstractmethod
99
+ def prepare_for_graph_execution(self):
100
+ """
101
+ This method should be called prior to calling execute_graph.
102
+ Perform any steps required before doing the graph execution.
103
+
104
+ The most common implementation is to prepare a run log for the run if the run uses local interactive compute.
105
+
106
+ But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK.
107
+ We do not set up a run log as its not relevant.
108
+ """
109
+ ...
110
+
111
+ @abstractmethod
112
+ def prepare_for_node_execution(self):
113
+ """
114
+ Perform any modifications to the services prior to execution of the node.
115
+
116
+ Args:
117
+ node (Node): [description]
118
+ map_variable (dict, optional): [description]. Defaults to None.
119
+ """
120
+ ...
121
+
122
+ @abstractmethod
123
+ def _sync_catalog(self, step_log: StepLog, stage: str, synced_catalogs=None) -> Optional[List[DataCatalog]]:
124
+ """
125
+ 1). Identify the catalog settings by over-riding node settings with the global settings.
126
+ 2). For stage = get:
127
+ Identify the catalog items that are being asked to get from the catalog
128
+ And copy them to the local compute data folder
129
+ 3). For stage = put:
130
+ Identify the catalog items that are being asked to put into the catalog
131
+ Copy the items from local compute folder to the catalog
132
+ 4). Add the items onto the step log according to the stage
133
+
134
+ Args:
135
+ node (Node): The current node being processed
136
+ step_log (StepLog): The step log corresponding to that node
137
+ stage (str): One of get or put
138
+
139
+ Raises:
140
+ Exception: If the stage is not in one of get/put
141
+
142
+ """
143
+ ...
144
+
145
+ @abstractmethod
146
+ def get_effective_compute_data_folder(self) -> Optional[str]:
147
+ """
148
+ Get the effective compute data folder for the given stage.
149
+ If there is nothing to catalog, we return None.
150
+
151
+ The default is the compute data folder of the catalog but this can be over-ridden by the node.
152
+
153
+ Args:
154
+ stage (str): The stage we are in the process of cataloging
155
+
156
+
157
+ Returns:
158
+ Optional[str]: The compute data folder as defined by catalog handler or the node or None.
159
+ """
160
+ ...
161
+
162
+ @property
163
+ def step_attempt_number(self) -> int:
164
+ """
165
+ The attempt number of the current step.
166
+ Orchestrators should use this step to submit multiple attempts of the job.
167
+
168
+ Returns:
169
+ int: The attempt number of the current step. Defaults to 1.
170
+ """
171
+ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
172
+
173
+ @abstractmethod
174
+ def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
175
+ """
176
+ This is the entry point when we do the actual execution of the function.
177
+
178
+ While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
179
+ this function.
180
+
181
+ In most cases,
182
+ * We get the corresponding step_log of the node and the parameters.
183
+ * We sync the catalog to GET any data sets that are in the catalog
184
+ * We call the execute method of the node for the actual compute and retry it as many times as asked.
185
+ * If the node succeeds, we get any of the user defined metrics provided by the user.
186
+ * We sync the catalog to PUT any data sets that are in the catalog.
187
+
188
+ Args:
189
+ node (Node): The node to execute
190
+ map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
191
+ Defaults to None.
192
+ """
193
+ ...
194
+
195
+ @abstractmethod
196
+ def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
197
+ """
198
+ The entry point for all executors apart from local.
199
+ We have already prepared for node execution.
200
+
201
+ Args:
202
+ node (BaseNode): The node to execute
203
+ map_variable (dict, optional): If the node is part of a map, send in the map dictionary. Defaults to None.
204
+
205
+ Raises:
206
+ NotImplementedError: _description_
207
+ """
208
+ ...
209
+
210
+ @abstractmethod
211
+ def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
212
+ """
213
+ Add code identities specific to the implementation.
214
+
215
+ The Base class has an implementation of adding git code identities.
216
+
217
+ Args:
218
+ step_log (object): The step log object
219
+ node (BaseNode): The node we are adding the step log for
220
+ """
221
+ ...
222
+
223
+ @abstractmethod
224
+ def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
225
+ """
226
+ This is the entry point to from the graph execution.
227
+
228
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
229
+ actual execution of the node.
230
+
231
+ If the node type is:
232
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
233
+ * success: We can delegate to _execute_node
234
+ * fail: We can delegate to _execute_node
235
+
236
+ For nodes that are internally graphs:
237
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
238
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
239
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
240
+
241
+ Transpilers will NEVER use this method and will NEVER call ths method.
242
+ This method should only be used by interactive executors.
243
+
244
+ Args:
245
+ node (Node): The node to execute
246
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
247
+ Defaults to None.
248
+ """
249
+ ...
250
+
251
+ @abstractmethod
252
+ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
253
+ """
254
+ Executor specific way of triggering jobs when magnus does both traversal and execution
255
+
256
+ Transpilers will NEVER use this method and will NEVER call them.
257
+ Only interactive executors who need execute_from_graph will ever implement it.
258
+
259
+ Args:
260
+ node (BaseNode): The node to execute
261
+ map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
262
+ Defaults to ''.
263
+
264
+ NOTE: We do not raise an exception as this method is not required by many extensions
265
+ """
266
+ ...
267
+
268
+ @abstractmethod
269
+ def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None):
270
+ """
271
+ Given the current node and the graph, returns the name of the next node to execute.
272
+
273
+ The name is always relative the graph that the node resides in.
274
+
275
+ If the current node succeeded, we return the next node as per the graph.
276
+ If the current node failed, we return the on failure node of the node (if provided) or the global one.
277
+
278
+ Args:
279
+ current_node (BaseNode): The current node.
280
+ dag (Graph): The dag we are traversing.
281
+ map_variable (dict): If the node belongs to a map branch.
282
+ """
283
+
284
+ ...
285
+
286
+ @abstractmethod
287
+ def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
288
+ """
289
+ The parallelization is controlled by the nodes and not by this function.
290
+
291
+ Transpilers should over ride this method to do the translation of dag to the platform specific way.
292
+ Interactive methods should use this to traverse and execute the dag.
293
+ - Use execute_from_graph to handle sub-graphs
294
+
295
+ Logically the method should:
296
+ * Start at the dag.start_at of the dag.
297
+ * Call the self.execute_from_graph(node)
298
+ * depending upon the status of the execution, either move to the success node or failure node.
299
+
300
+ Args:
301
+ dag (Graph): The directed acyclic graph to traverse and execute.
302
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
303
+ Defaults to None.
304
+ """
305
+ ...
306
+
307
+ @abstractmethod
308
+ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
309
+ """
310
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
311
+ necessary.
312
+ * True: If its not a re-run.
313
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
314
+ * False: If its a re-run and we succeeded in the last run.
315
+
316
+ Most cases, this logic need not be touched
317
+
318
+ Args:
319
+ node (Node): The node to check against re-run
320
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
321
+ Defaults to None.
322
+
323
+ Returns:
324
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
325
+ """
326
+ ...
327
+
328
+ @abstractmethod
329
+ def send_return_code(self, stage="traversal"):
330
+ """
331
+ Convenience function used by pipeline to send return code to the caller of the cli
332
+
333
+ Raises:
334
+ Exception: If the pipeline execution failed
335
+ """
336
+ ...
337
+
338
+ @abstractmethod
339
+ def _resolve_executor_config(self, node: BaseNode):
340
+ """
341
+ The overrides section can contain specific over-rides to an global executor config.
342
+ To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
343
+ The nodes can over-ride the global config by referring to key in the overrides.
344
+
345
+ For example:
346
+ # configuration.yaml
347
+ execution:
348
+ type: cloud-implementation
349
+ config:
350
+ k1: v1
351
+ k3: v3
352
+ overrides:
353
+ k2: v2 # Could be a mapping internally.
354
+
355
+ # in pipeline definition.yaml
356
+ dag:
357
+ steps:
358
+ step1:
359
+ overrides:
360
+ cloud-implementation:
361
+ k1: value_specific_to_node
362
+ k2:
363
+
364
+ This method should resolve the node_config to {'k1': 'value_specific_to_node', 'k2': 'v2', 'k3': 'v3'}
365
+
366
+ Args:
367
+ node (BaseNode): The current node being processed.
368
+
369
+ """
370
+ ...
371
+
372
+ @abstractmethod
373
+ def execute_job(self, node: TaskNode):
374
+ """
375
+ Executor specific way of executing a job (python function or a notebook).
376
+
377
+ Interactive executors should execute the job.
378
+ Transpilers should write the instructions.
379
+
380
+ Args:
381
+ node (BaseNode): The job node to execute
382
+
383
+ Raises:
384
+ NotImplementedError: Executors should choose to extend this functionality or not.
385
+ """
386
+ ...
387
+
388
+ @abstractmethod
389
+ def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
390
+ """
391
+ This method is used to appropriately fan-out the execution of a composite node.
392
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
393
+
394
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
395
+ step which is composite. By calling this fan-out before calling the leaf nodes, we have an opportunity to
396
+ do the right set up (creating the step log, exposing the parameters, etc.) for the composite step.
397
+
398
+ All 3rd party orchestrators should use this method to fan-out the execution of a composite node.
399
+ This ensures:
400
+ - The dot path notation is preserved, this method should create the step and call the node's fan out to
401
+ create the branch logs and let the 3rd party do the actual step execution.
402
+ - Gives 3rd party orchestrators an opportunity to set out the required for running a composite node.
403
+
404
+ Args:
405
+ node (BaseNode): The node to fan-out
406
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
407
+
408
+ """
409
+ ...
410
+
411
+ @abstractmethod
412
+ def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None):
413
+ """
414
+ This method is used to appropriately fan-in after the execution of a composite node.
415
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
416
+
417
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
418
+ step which is composite. By calling this fan-in after calling the leaf nodes, we have an opportunity to
419
+ act depending upon the status of the individual branches.
420
+
421
+ All 3rd party orchestrators should use this method to fan-in the execution of a composite node.
422
+ This ensures:
423
+ - Gives the renderer's the control on where to go depending upon the state of the composite node.
424
+ - The status of the step and its underlying branches are correctly updated.
425
+
426
+ Args:
427
+ node (BaseNode): The node to fan-in
428
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
429
+
430
+ """
431
+ ...
@@ -0,0 +1,139 @@
1
+ import contextlib
2
+ import json
3
+ import logging
4
+ import os
5
+ from abc import ABC, abstractmethod
6
+ from collections import defaultdict
7
+ from typing import Any, ContextManager, Dict, Tuple, Union
8
+
9
+ from pydantic import BaseModel, ConfigDict
10
+
11
+ import runnable.context as context
12
+ from runnable import defaults
13
+ from runnable.utils import remove_prefix
14
+
15
+ logger = logging.getLogger(defaults.LOGGER_NAME)
16
+
17
+
18
+ def retrieve_step_details(key: str) -> Tuple[str, int]:
19
+ key = remove_prefix(key, defaults.TRACK_PREFIX)
20
+ data = key.split(defaults.STEP_INDICATOR)
21
+
22
+ key = data[0].lower()
23
+ step = 0
24
+
25
+ if len(data) > 1:
26
+ step = int(data[1])
27
+
28
+ return key, step
29
+
30
+
31
+ def get_tracked_data() -> Dict[str, Any]:
32
+ tracked_data: Dict[str, Any] = defaultdict(dict)
33
+ for env_var, value in os.environ.items():
34
+ if env_var.startswith(defaults.TRACK_PREFIX):
35
+ key, step = retrieve_step_details(env_var)
36
+
37
+ # print(value, type(value))
38
+ try:
39
+ value = json.loads(value)
40
+ except json.decoder.JSONDecodeError:
41
+ logger.warning(f"Tracker {key} could not be JSON decoded, adding the literal value")
42
+
43
+ tracked_data[key][step] = value
44
+ del os.environ[env_var]
45
+
46
+ for key, value in tracked_data.items():
47
+ if len(value) == 1:
48
+ tracked_data[key] = value[0]
49
+
50
+ return tracked_data
51
+
52
+
53
+ # --8<-- [start:docs]
54
+
55
+
56
+ class BaseExperimentTracker(ABC, BaseModel):
57
+ """
58
+ Base Experiment tracker class definition.
59
+ """
60
+
61
+ service_name: str = ""
62
+ service_type: str = "experiment_tracker"
63
+
64
+ @property
65
+ def _context(self):
66
+ return context.run_context
67
+
68
+ model_config = ConfigDict(extra="forbid")
69
+
70
+ @property
71
+ def client_context(self) -> ContextManager:
72
+ """
73
+ Returns the client context.
74
+ """
75
+ return contextlib.nullcontext()
76
+
77
+ def publish_data(self, tracked_data: Dict[str, Any]):
78
+ for key, value in tracked_data.items():
79
+ if isinstance(value, dict):
80
+ for key2, value2 in value.items():
81
+ self.log_metric(key, value2, step=key2)
82
+ continue
83
+ self.log_metric(key, value)
84
+
85
+ @abstractmethod
86
+ def log_metric(self, key: str, value: Union[int, float], step: int = 0):
87
+ """
88
+ Sets the metric in the experiment tracking.
89
+
90
+ Args:
91
+ key (str): The key against you want to store the value
92
+ value (float): The value of the metric
93
+ step (int): Optional step at which it was recorded
94
+
95
+ Raises:
96
+ NotImplementedError: Base class, hence not implemented
97
+ """
98
+ raise NotImplementedError
99
+
100
+ @abstractmethod
101
+ def log_parameter(self, key: str, value: Any):
102
+ """
103
+ Logs a parameter in the experiment tracking.
104
+
105
+ Args:
106
+ key (str): The key against you want to store the value
107
+ value (any): The value of the metric
108
+
109
+ Raises:
110
+ NotImplementedError: Base class, hence not implemented
111
+ """
112
+ pass
113
+
114
+
115
+ # --8<-- [end:docs]
116
+
117
+
118
+ class DoNothingTracker(BaseExperimentTracker):
119
+ """
120
+ A Do nothing tracker
121
+ """
122
+
123
+ service_name: str = "do-nothing"
124
+
125
+ def log_metric(self, key: str, value: Union[int, float], step: int = 0):
126
+ """
127
+ Sets the metric in the experiment tracking.
128
+
129
+ Args:
130
+ key (str): The key against you want to store the value
131
+ value (float): The value of the metric
132
+ """
133
+ ...
134
+
135
+ def log_parameter(self, key: str, value: Any):
136
+ """
137
+ Since this is a Do nothing tracker, we don't need to log anything.
138
+ """
139
+ ...
@@ -0,0 +1,21 @@
1
+ from typing import List, Optional
2
+
3
+ from runnable.datastore import DataCatalog
4
+
5
+
6
+ def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool:
7
+ """
8
+ Check if the catalog items are out of sync from already cataloged objects.
9
+ If they are, return False.
10
+ If the object does not exist or synced catalog does not exist, return True
11
+ """
12
+ if not synced_catalogs:
13
+ return True # If nothing has been synced in the past
14
+
15
+ for synced_catalog in synced_catalogs:
16
+ if synced_catalog.catalog_relative_path == catalog.catalog_relative_path:
17
+ if synced_catalog.data_hash == catalog.data_hash:
18
+ return False
19
+ return True
20
+
21
+ return True # The object does not exist, sync it
File without changes