runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +687 -0
  6. runnable/defaults.py +182 -0
  7. runnable/entrypoints.py +448 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +421 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +227 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +725 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1183 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +70 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +361 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +189 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +655 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +404 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +501 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +472 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +395 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.3.0.dist-info/METADATA +437 -0
  65. runnable-0.3.0.dist-info/RECORD +69 -0
  66. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
  67. runnable-0.3.0.dist-info/entry_points.txt +44 -0
  68. runnable-0.1.0.dist-info/METADATA +0 -16
  69. runnable-0.1.0.dist-info/RECORD +0 -6
  70. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,725 @@
1
+ import copy
2
+ import json
3
+ import logging
4
+ import os
5
+ from abc import abstractmethod
6
+ from typing import Any, Dict, List, Optional, cast
7
+
8
+ from rich import print
9
+
10
+ from runnable import context, defaults, exceptions, integration, parameters, utils
11
+ from runnable.datastore import DataCatalog, RunLog, StepLog
12
+ from runnable.defaults import TypeMapVariable
13
+ from runnable.executor import BaseExecutor
14
+ from runnable.experiment_tracker import get_tracked_data
15
+ from runnable.extensions.nodes import TaskNode
16
+ from runnable.graph import Graph
17
+ from runnable.nodes import BaseNode
18
+
19
+ logger = logging.getLogger(defaults.LOGGER_NAME)
20
+
21
+
22
+ class GenericExecutor(BaseExecutor):
23
+ """
24
+ The skeleton of an executor class.
25
+ Any implementation of an executor should inherit this class and over-ride accordingly.
26
+
27
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
28
+ Look at the function docs to understand how to use them appropriately.
29
+
30
+ For any implementation:
31
+ 1). Who/when should the run log be set up?
32
+ 2). Who/When should the step log be set up?
33
+
34
+ """
35
+
36
+ service_name: str = ""
37
+ service_type: str = "executor"
38
+
39
+ @property
40
+ def _context(self):
41
+ return context.run_context
42
+
43
+ @property
44
+ def step_decorator_run_id(self):
45
+ """
46
+ TODO: Experimental feature, design is not mature yet.
47
+
48
+ This function is used by the decorator function.
49
+ The design idea is we can over-ride this method in different implementations to retrieve the run_id.
50
+ But is it really intrusive to ask to set the environmental variable runnable_RUN_ID?
51
+
52
+ Returns:
53
+ _type_: _description_
54
+ """
55
+ return os.environ.get("runnable_RUN_ID", None)
56
+
57
+ def _get_parameters(self) -> Dict[str, Any]:
58
+ """
59
+ Consolidate the parameters from the environment variables
60
+ and the parameters file.
61
+
62
+ The parameters defined in the environment variables take precedence over the parameters file.
63
+
64
+ Returns:
65
+ _type_: _description_
66
+ """
67
+ params: Dict[str, Any] = {}
68
+ if self._context.parameters_file:
69
+ params.update(utils.load_yaml(self._context.parameters_file))
70
+
71
+ # Update these with some from the environment variables
72
+ params.update(parameters.get_user_set_parameters())
73
+ return params
74
+
75
+ def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
76
+ try:
77
+ attempt_run_log = self._context.run_log_store.get_run_log_by_id(
78
+ run_id=self._context.original_run_id, full=False
79
+ )
80
+ except exceptions.RunLogNotFoundError as e:
81
+ msg = (
82
+ f"Expected a run log with id: {self._context.original_run_id} "
83
+ "but it does not exist in the run log store. "
84
+ "If the original execution was in a different environment, ensure that it is available in the current "
85
+ "environment."
86
+ )
87
+ logger.exception(msg)
88
+ raise Exception(msg) from e
89
+
90
+ # Sync the previous run log catalog to this one.
91
+ self._context.catalog_handler.sync_between_runs(
92
+ previous_run_id=self._context.original_run_id, run_id=self._context.run_id
93
+ )
94
+
95
+ parameters.update(cast(RunLog, attempt_run_log).parameters)
96
+
97
+ def _set_up_run_log(self, exists_ok=False):
98
+ """
99
+ Create a run log and put that in the run log store
100
+
101
+ If exists_ok, we allow the run log to be already present in the run log store.
102
+ """
103
+ try:
104
+ attempt_run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=False)
105
+
106
+ logger.warning(f"The run log by id: {self._context.run_id} already exists")
107
+ raise exceptions.RunLogExistsError(
108
+ f"The run log by id: {self._context.run_id} already exists and is {attempt_run_log.status}"
109
+ )
110
+ except exceptions.RunLogNotFoundError:
111
+ pass
112
+ except exceptions.RunLogExistsError:
113
+ if exists_ok:
114
+ return
115
+ raise
116
+
117
+ # Consolidate and get the parameters
118
+ parameters = self._get_parameters()
119
+
120
+ # TODO: This needs to go away
121
+ if self._context.use_cached:
122
+ self._set_up_for_re_run(parameters=parameters)
123
+
124
+ self._context.run_log_store.create_run_log(
125
+ run_id=self._context.run_id,
126
+ tag=self._context.tag,
127
+ status=defaults.PROCESSING,
128
+ dag_hash=self._context.dag_hash,
129
+ use_cached=self._context.use_cached,
130
+ original_run_id=self._context.original_run_id,
131
+ )
132
+ # Any interaction with run log store attributes should happen via API if available.
133
+ self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=parameters)
134
+
135
+ # Update run_config
136
+ run_config = utils.get_run_config()
137
+ self._context.run_log_store.set_run_config(run_id=self._context.run_id, run_config=run_config)
138
+
139
+ def prepare_for_graph_execution(self):
140
+ """
141
+ This method should be called prior to calling execute_graph.
142
+ Perform any steps required before doing the graph execution.
143
+
144
+ The most common implementation is to prepare a run log for the run if the run uses local interactive compute.
145
+
146
+ But in cases of actual rendering the job specs (eg: AWS step functions, K8's) we check if the services are OK.
147
+ We do not set up a run log as its not relevant.
148
+ """
149
+
150
+ integration.validate(self, self._context.run_log_store)
151
+ integration.configure_for_traversal(self, self._context.run_log_store)
152
+
153
+ integration.validate(self, self._context.catalog_handler)
154
+ integration.configure_for_traversal(self, self._context.catalog_handler)
155
+
156
+ integration.validate(self, self._context.secrets_handler)
157
+ integration.configure_for_traversal(self, self._context.secrets_handler)
158
+
159
+ integration.validate(self, self._context.experiment_tracker)
160
+ integration.configure_for_traversal(self, self._context.experiment_tracker)
161
+
162
+ self._set_up_run_log()
163
+
164
+ def prepare_for_node_execution(self):
165
+ """
166
+ Perform any modifications to the services prior to execution of the node.
167
+
168
+ Args:
169
+ node (Node): [description]
170
+ map_variable (dict, optional): [description]. Defaults to None.
171
+ """
172
+ integration.validate(self, self._context.run_log_store)
173
+ integration.configure_for_execution(self, self._context.run_log_store)
174
+
175
+ integration.validate(self, self._context.catalog_handler)
176
+ integration.configure_for_execution(self, self._context.catalog_handler)
177
+
178
+ integration.validate(self, self._context.secrets_handler)
179
+ integration.configure_for_execution(self, self._context.secrets_handler)
180
+
181
+ integration.validate(self, self._context.experiment_tracker)
182
+ integration.configure_for_execution(self, self._context.experiment_tracker)
183
+
184
+ def _sync_catalog(self, step_log: StepLog, stage: str, synced_catalogs=None) -> Optional[List[DataCatalog]]:
185
+ """
186
+ 1). Identify the catalog settings by over-riding node settings with the global settings.
187
+ 2). For stage = get:
188
+ Identify the catalog items that are being asked to get from the catalog
189
+ And copy them to the local compute data folder
190
+ 3). For stage = put:
191
+ Identify the catalog items that are being asked to put into the catalog
192
+ Copy the items from local compute folder to the catalog
193
+ 4). Add the items onto the step log according to the stage
194
+
195
+ Args:
196
+ node (Node): The current node being processed
197
+ step_log (StepLog): The step log corresponding to that node
198
+ stage (str): One of get or put
199
+
200
+ Raises:
201
+ Exception: If the stage is not in one of get/put
202
+
203
+ """
204
+ if stage not in ["get", "put"]:
205
+ msg = (
206
+ "Catalog service only accepts get/put possible actions as part of node execution."
207
+ f"Sync catalog of the executor: {self.service_name} asks for {stage} which is not accepted"
208
+ )
209
+ raise Exception(msg)
210
+
211
+ try:
212
+ node_catalog_settings = self._context_node._get_catalog_settings()
213
+ except exceptions.TerminalNodeError:
214
+ return None
215
+
216
+ if not (node_catalog_settings and stage in node_catalog_settings):
217
+ logger.info("No catalog settings found for stage: %s", stage)
218
+ # Nothing to get/put from the catalog
219
+ return None
220
+
221
+ compute_data_folder = self.get_effective_compute_data_folder()
222
+
223
+ data_catalogs = []
224
+ for name_pattern in node_catalog_settings.get(stage) or []:
225
+ if stage == "get":
226
+ data_catalog = self._context.catalog_handler.get(
227
+ name=name_pattern, run_id=self._context.run_id, compute_data_folder=compute_data_folder
228
+ )
229
+ elif stage == "put":
230
+ data_catalog = self._context.catalog_handler.put(
231
+ name=name_pattern,
232
+ run_id=self._context.run_id,
233
+ compute_data_folder=compute_data_folder,
234
+ synced_catalogs=synced_catalogs,
235
+ )
236
+ else:
237
+ raise Exception(f"Invalid stage: {stage}")
238
+ logger.info(f"Added data catalog: {data_catalog} to step log")
239
+ data_catalogs.extend(data_catalog)
240
+
241
+ if data_catalogs:
242
+ step_log.add_data_catalogs(data_catalogs)
243
+
244
+ return data_catalogs
245
+
246
+ def get_effective_compute_data_folder(self) -> str:
247
+ """
248
+ Get the effective compute data folder for the given stage.
249
+ If there is nothing to catalog, we return None.
250
+
251
+ The default is the compute data folder of the catalog but this can be over-ridden by the node.
252
+
253
+ Args:
254
+ stage (str): The stage we are in the process of cataloging
255
+
256
+
257
+ Returns:
258
+ str: The compute data folder as defined by the node defaulting to catalog handler
259
+ """
260
+ compute_data_folder = self._context.catalog_handler.compute_data_folder
261
+
262
+ catalog_settings = self._context_node._get_catalog_settings()
263
+ effective_compute_data_folder = catalog_settings.get("compute_data_folder", "") or compute_data_folder
264
+
265
+ return effective_compute_data_folder
266
+
267
+ @property
268
+ def step_attempt_number(self) -> int:
269
+ """
270
+ The attempt number of the current step.
271
+ Orchestrators should use this step to submit multiple attempts of the job.
272
+
273
+ Returns:
274
+ int: The attempt number of the current step. Defaults to 1.
275
+ """
276
+ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
277
+
278
+ def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
279
+ """
280
+ This is the entry point when we do the actual execution of the function.
281
+ DO NOT Over-ride this function.
282
+
283
+ While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
284
+ this function.
285
+
286
+ In most cases,
287
+ * We get the corresponding step_log of the node and the parameters.
288
+ * We sync the catalog to GET any data sets that are in the catalog
289
+ * We call the execute method of the node for the actual compute and retry it as many times as asked.
290
+ * If the node succeeds, we get any of the user defined metrics provided by the user.
291
+ * We sync the catalog to PUT any data sets that are in the catalog.
292
+
293
+ Args:
294
+ node (Node): The node to execute
295
+ map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
296
+ Defaults to None.
297
+ """
298
+ step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
299
+ """
300
+ By now, all the parameters are part of the run log as a dictionary.
301
+ We set them as environment variables, serialized as json strings.
302
+ """
303
+ params = self._context.run_log_store.get_parameters(run_id=self._context.run_id)
304
+ params_copy = copy.deepcopy(params)
305
+ # This is only for the API to work.
306
+ parameters.set_user_defined_params_as_environment_variables(params)
307
+
308
+ attempt = self.step_attempt_number
309
+ logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}")
310
+
311
+ attempt_log = self._context.run_log_store.create_attempt_log()
312
+ self._context_step_log = step_log
313
+ self._context_node = node
314
+
315
+ data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get")
316
+ try:
317
+ attempt_log = node.execute(
318
+ executor=self,
319
+ mock=step_log.mock,
320
+ map_variable=map_variable,
321
+ params=params,
322
+ **kwargs,
323
+ )
324
+ except Exception as e:
325
+ # Any exception here is a runnable exception as node suppresses exceptions.
326
+ msg = "This is clearly runnable fault, please report a bug and the logs"
327
+ logger.exception(msg)
328
+ raise Exception(msg) from e
329
+ finally:
330
+ attempt_log.attempt_number = attempt
331
+ step_log.attempts.append(attempt_log)
332
+
333
+ tracked_data = get_tracked_data()
334
+
335
+ self._context.experiment_tracker.publish_data(tracked_data)
336
+ parameters_out = attempt_log.output_parameters
337
+
338
+ if attempt_log.status == defaults.FAIL:
339
+ logger.exception(f"Node: {node} failed")
340
+ step_log.status = defaults.FAIL
341
+ else:
342
+ # Mock is always set to False, bad design??
343
+ # TODO: Stub nodes should not sync back data
344
+ # TODO: Errors in catalog syncing should point to Fail step
345
+ # TODO: Even for a failed execution, the catalog can happen
346
+ step_log.status = defaults.SUCCESS
347
+ self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get)
348
+ step_log.user_defined_metrics = tracked_data
349
+
350
+ diff_parameters = utils.diff_dict(params_copy, parameters_out)
351
+ self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters)
352
+
353
+ # Remove the step context
354
+ parameters.get_user_set_parameters(remove=True)
355
+ self._context_step_log = None
356
+ self._context_node = None # type: ignore
357
+ self._context_metrics = {} # type: ignore
358
+
359
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
360
+
361
+ def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
362
+ """
363
+ Add code identities specific to the implementation.
364
+
365
+ The Base class has an implementation of adding git code identities.
366
+
367
+ Args:
368
+ step_log (object): The step log object
369
+ node (BaseNode): The node we are adding the step log for
370
+ """
371
+ step_log.code_identities.append(utils.get_git_code_identity())
372
+
373
+ def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
374
+ """
375
+ This is the entry point to from the graph execution.
376
+
377
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
378
+ actual execution of the node.
379
+
380
+ If the node type is:
381
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
382
+ * success: We can delegate to _execute_node
383
+ * fail: We can delegate to _execute_node
384
+
385
+ For nodes that are internally graphs:
386
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
387
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
388
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
389
+
390
+ Transpilers will NEVER use this method and will NEVER call ths method.
391
+ This method should only be used by interactive executors.
392
+
393
+ Args:
394
+ node (Node): The node to execute
395
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
396
+ Defaults to None.
397
+ """
398
+ step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
399
+
400
+ self.add_code_identities(node=node, step_log=step_log)
401
+
402
+ step_log.step_type = node.node_type
403
+ step_log.status = defaults.PROCESSING
404
+
405
+ # Add the step log to the database as per the situation.
406
+ # If its a terminal node, complete it now
407
+ if node.node_type in ["success", "fail"]:
408
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
409
+ self._execute_node(node, map_variable=map_variable, **kwargs)
410
+ return
411
+
412
+ # TODO: This needs to go away
413
+ # In single step
414
+ if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun(
415
+ node, map_variable=map_variable
416
+ ):
417
+ # If the node name does not match, we move on to the next node.
418
+ # If previous run was successful, move on to the next step
419
+ step_log.mock = True
420
+ step_log.status = defaults.SUCCESS
421
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
422
+ return
423
+ # We call an internal function to iterate the sub graphs and execute them
424
+ if node.is_composite:
425
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
426
+ node.execute_as_graph(map_variable=map_variable, **kwargs)
427
+ return
428
+
429
+ # Executor specific way to trigger a job
430
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
431
+ self.trigger_job(node=node, map_variable=map_variable, **kwargs)
432
+
433
+ def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
434
+ """
435
+ Call this method only if we are responsible for traversing the graph via
436
+ execute_from_graph().
437
+
438
+ We are not prepared to execute node as of now.
439
+
440
+ Args:
441
+ node (BaseNode): The node to execute
442
+ map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
443
+ Defaults to ''.
444
+
445
+ NOTE: We do not raise an exception as this method is not required by many extensions
446
+ """
447
+ pass
448
+
449
+ def _get_status_and_next_node_name(self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None):
450
+ """
451
+ Given the current node and the graph, returns the name of the next node to execute.
452
+
453
+ The name is always relative the graph that the node resides in.
454
+
455
+ If the current node succeeded, we return the next node as per the graph.
456
+ If the current node failed, we return the on failure node of the node (if provided) or the global one.
457
+
458
+ This method is only used by interactive executors i.e local and local-container
459
+
460
+ Args:
461
+ current_node (BaseNode): The current node.
462
+ dag (Graph): The dag we are traversing.
463
+ map_variable (dict): If the node belongs to a map branch.
464
+
465
+ """
466
+
467
+ step_log = self._context.run_log_store.get_step_log(
468
+ current_node._get_step_log_name(map_variable), self._context.run_id
469
+ )
470
+ logger.info(f"Finished executing the node {current_node} with status {step_log.status}")
471
+
472
+ try:
473
+ next_node_name = current_node._get_next_node()
474
+ except exceptions.TerminalNodeError:
475
+ next_node_name = ""
476
+
477
+ if step_log.status == defaults.FAIL:
478
+ next_node_name = dag.get_fail_node().name
479
+ if current_node._get_on_failure_node():
480
+ next_node_name = current_node._get_on_failure_node()
481
+
482
+ return step_log.status, next_node_name
483
+
484
+ def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
485
+ """
486
+ The parallelization is controlled by the nodes and not by this function.
487
+
488
+ Transpilers should over ride this method to do the translation of dag to the platform specific way.
489
+ Interactive methods should use this to traverse and execute the dag.
490
+ - Use execute_from_graph to handle sub-graphs
491
+
492
+ Logically the method should:
493
+ * Start at the dag.start_at of the dag.
494
+ * Call the self.execute_from_graph(node)
495
+ * depending upon the status of the execution, either move to the success node or failure node.
496
+
497
+ Args:
498
+ dag (Graph): The directed acyclic graph to traverse and execute.
499
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
500
+ Defaults to None.
501
+ """
502
+ current_node = dag.start_at
503
+ previous_node = None
504
+ logger.info(f"Running the execution with {current_node}")
505
+
506
+ while True:
507
+ working_on = dag.get_node_by_name(current_node)
508
+
509
+ if previous_node == current_node:
510
+ raise Exception("Potentially running in a infinite loop")
511
+
512
+ previous_node = current_node
513
+
514
+ logger.info(f"Creating execution log for {working_on}")
515
+ self.execute_from_graph(working_on, map_variable=map_variable, **kwargs)
516
+
517
+ status, next_node_name = self._get_status_and_next_node_name(
518
+ current_node=working_on, dag=dag, map_variable=map_variable
519
+ )
520
+
521
+ if status == defaults.TRIGGERED:
522
+ # Some nodes go into triggered state and self traverse
523
+ logger.info(f"Triggered the job to execute the node {current_node}")
524
+ break
525
+
526
+ if working_on.node_type in ["success", "fail"]:
527
+ break
528
+
529
+ current_node = next_node_name
530
+
531
+ run_log = self._context.run_log_store.get_branch_log(
532
+ working_on._get_branch_log_name(map_variable), self._context.run_id
533
+ )
534
+
535
+ branch = "graph"
536
+ if working_on.internal_branch_name:
537
+ branch = working_on.internal_branch_name
538
+
539
+ logger.info(f"Finished execution of the {branch} with status {run_log.status}")
540
+
541
+ # get the final run log
542
+ if branch == "graph":
543
+ run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
544
+ print(json.dumps(run_log.model_dump(), indent=4))
545
+
546
+ # TODO: This needs to go away
547
+ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
548
+ """
549
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
550
+ necessary.
551
+ * True: If its not a re-run.
552
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
553
+ * False: If its a re-run and we succeeded in the last run.
554
+
555
+ Most cases, this logic need not be touched
556
+
557
+ Args:
558
+ node (Node): The node to check against re-run
559
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
560
+ Defaults to None.
561
+
562
+ Returns:
563
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
564
+ """
565
+ if self._context.use_cached:
566
+ node_step_log_name = node._get_step_log_name(map_variable=map_variable)
567
+ logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
568
+
569
+ try:
570
+ previous_node_log = self._context.run_log_store.get_step_log(
571
+ internal_name=node_step_log_name, run_id=self._context.original_run_id
572
+ )
573
+ except exceptions.StepLogNotFoundError:
574
+ logger.warning(f"Did not find the node {node.name} in previous run log")
575
+ return True # We should re-run the node.
576
+
577
+ logger.info(f"The original step status: {previous_node_log.status}")
578
+
579
+ if previous_node_log.status == defaults.SUCCESS:
580
+ return False # We need not run the node
581
+
582
+ logger.info(f"The new execution should start executing graph from this node {node.name}")
583
+ return True
584
+
585
+ return True
586
+
587
+ def send_return_code(self, stage="traversal"):
588
+ """
589
+ Convenience function used by pipeline to send return code to the caller of the cli
590
+
591
+ Raises:
592
+ Exception: If the pipeline execution failed
593
+ """
594
+ run_id = self._context.run_id
595
+
596
+ run_log = self._context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
597
+ if run_log.status == defaults.FAIL:
598
+ raise exceptions.ExecutionFailedError(run_id=run_id)
599
+
600
+ def _resolve_executor_config(self, node: BaseNode):
601
+ """
602
+ The overrides section can contain specific over-rides to an global executor config.
603
+ To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
604
+ The nodes can over-ride the global config by referring to key in the overrides.
605
+
606
+ This function also applies variables to the effective node config.
607
+
608
+ For example:
609
+ # configuration.yaml
610
+ execution:
611
+ type: cloud-implementation
612
+ config:
613
+ k1: v1
614
+ k3: v3
615
+ overrides:
616
+ custom_config:
617
+ k1: v11
618
+ k2: v2 # Could be a mapping internally.
619
+
620
+ # in pipeline definition.yaml
621
+ dag:
622
+ steps:
623
+ step1:
624
+ overrides:
625
+ cloud-implementation: custom_config
626
+
627
+ This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
628
+
629
+ Args:
630
+ node (BaseNode): The current node being processed.
631
+
632
+ """
633
+ effective_node_config = copy.deepcopy(self.model_dump())
634
+ try:
635
+ ctx_node_config = node._get_executor_config(self.service_name)
636
+ except exceptions.TerminalNodeError:
637
+ # Some modes request for effective node config even for success or fail nodes
638
+ return utils.apply_variables(effective_node_config, self._context.variables)
639
+
640
+ if ctx_node_config:
641
+ if ctx_node_config not in self.overrides:
642
+ raise Exception(f"No override of key: {ctx_node_config} found in the overrides section")
643
+
644
+ effective_node_config.update(self.overrides[ctx_node_config])
645
+
646
+ effective_node_config = utils.apply_variables(effective_node_config, self._context.variables)
647
+ logger.debug(f"Effective node config: {effective_node_config}")
648
+
649
+ return effective_node_config
650
+
651
+ @abstractmethod
652
+ def execute_job(self, node: TaskNode):
653
+ """
654
+ Executor specific way of executing a job (python function or a notebook).
655
+
656
+ Interactive executors should execute the job.
657
+ Transpilers should write the instructions.
658
+
659
+ Args:
660
+ node (BaseNode): The job node to execute
661
+
662
+ Raises:
663
+ NotImplementedError: Executors should choose to extend this functionality or not.
664
+ """
665
+ raise NotImplementedError
666
+
667
+ def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
668
+ """
669
+ This method is used to appropriately fan-out the execution of a composite node.
670
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
671
+
672
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
673
+ step which is composite. By calling this fan-out before calling the leaf nodes, we have an opportunity to
674
+ do the right set up (creating the step log, exposing the parameters, etc.) for the composite step.
675
+
676
+ All 3rd party orchestrators should use this method to fan-out the execution of a composite node.
677
+ This ensures:
678
+ - The dot path notation is preserved, this method should create the step and call the node's fan out to
679
+ create the branch logs and let the 3rd party do the actual step execution.
680
+ - Gives 3rd party orchestrators an opportunity to set out the required for running a composite node.
681
+
682
+ Args:
683
+ node (BaseNode): The node to fan-out
684
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
685
+
686
+ """
687
+ step_log = self._context.run_log_store.create_step_log(
688
+ node.name, node._get_step_log_name(map_variable=map_variable)
689
+ )
690
+
691
+ self.add_code_identities(node=node, step_log=step_log)
692
+
693
+ step_log.step_type = node.node_type
694
+ step_log.status = defaults.PROCESSING
695
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
696
+
697
+ node.fan_out(executor=self, map_variable=map_variable)
698
+
699
+ def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None):
700
+ """
701
+ This method is used to appropriately fan-in after the execution of a composite node.
702
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
703
+
704
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
705
+ step which is composite. By calling this fan-in after calling the leaf nodes, we have an opportunity to
706
+ act depending upon the status of the individual branches.
707
+
708
+ All 3rd party orchestrators should use this method to fan-in the execution of a composite node.
709
+ This ensures:
710
+ - Gives the renderer's the control on where to go depending upon the state of the composite node.
711
+ - The status of the step and its underlying branches are correctly updated.
712
+
713
+ Args:
714
+ node (BaseNode): The node to fan-in
715
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
716
+
717
+ """
718
+ node.fan_in(executor=self, map_variable=map_variable)
719
+
720
+ step_log = self._context.run_log_store.get_step_log(
721
+ node._get_step_log_name(map_variable=map_variable), self._context.run_id
722
+ )
723
+
724
+ if step_log.status == defaults.FAIL:
725
+ raise Exception(f"Step {node.name} failed")