runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,871 @@
1
+ import copy
2
+ import logging
3
+ import os
4
+ from typing import Any, Dict, List, Optional, cast
5
+
6
+ from runnable import (
7
+ console,
8
+ context,
9
+ defaults,
10
+ exceptions,
11
+ parameters,
12
+ task_console,
13
+ utils,
14
+ )
15
+ from runnable.datastore import DataCatalog, JsonParameter, RunLog, StepAttempt
16
+ from runnable.defaults import IterableParameterModel
17
+ from runnable.executor import BasePipelineExecutor
18
+ from runnable.graph import Graph
19
+ from runnable.nodes import BaseNode
20
+
21
+ logger = logging.getLogger(defaults.LOGGER_NAME)
22
+
23
+
24
+ class GenericPipelineExecutor(BasePipelineExecutor):
25
+ """
26
+ The skeleton of an executor class.
27
+ Any implementation of an executor should inherit this class and over-ride accordingly.
28
+
29
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
30
+ Look at the function docs to understand how to use them appropriately.
31
+
32
+ For any implementation:
33
+ 1). Who/when should the run log be set up?
34
+ 2). Who/When should the step log be set up?
35
+
36
+ """
37
+
38
+ service_name: str = ""
39
+ service_type: str = "pipeline_executor"
40
+
41
+ @property
42
+ def _context(self):
43
+ current_context = context.get_run_context()
44
+ if current_context is None:
45
+ raise RuntimeError("No run context available")
46
+ if not isinstance(
47
+ current_context, (context.PipelineContext, context.AsyncPipelineContext)
48
+ ):
49
+ raise TypeError(
50
+ f"Expected PipelineContext or AsyncPipelineContext, got {type(current_context).__name__}"
51
+ )
52
+ return current_context
53
+
54
+ def _get_parameters(self) -> Dict[str, JsonParameter]:
55
+ """
56
+ Consolidate the parameters from the environment variables
57
+ and the parameters file.
58
+
59
+ The parameters defined in the environment variables take precedence over the parameters file.
60
+
61
+ Returns:
62
+ _type_: _description_
63
+ """
64
+ params: Dict[str, JsonParameter] = {}
65
+ if self._context.parameters_file:
66
+ user_defined = utils.load_yaml(self._context.parameters_file) or {}
67
+
68
+ for key, value in user_defined.items():
69
+ params[key] = JsonParameter(value=value, kind="json")
70
+
71
+ # Update these with some from the environment variables
72
+ params.update(parameters.get_user_set_parameters())
73
+ logger.debug(f"parameters as seen by executor: {params}")
74
+ return params
75
+
76
+ def _get_parameters_for_retry(self) -> Dict[str, JsonParameter]:
77
+ """
78
+ Get parameters for execution, handling retry logic.
79
+
80
+ For retry runs, loads parameters from original run metadata.
81
+ For normal runs, uses standard parameter loading logic.
82
+
83
+ Returns:
84
+ Dict[str, JsonParameter]: Parameters for execution
85
+ """
86
+ if not self._context.is_retry:
87
+ return self._get_parameters()
88
+
89
+ # Load original run log to get parameters
90
+ original_run_log = self._context.run_log_store.get_run_log_by_id(
91
+ run_id=self._context.run_id, full=True
92
+ )
93
+
94
+ # Warn if user provided new parameters file
95
+ if self._context.parameters_file:
96
+ console.print(
97
+ f"⚠️ [bold yellow]RETRY MODE:[/bold yellow] Ignoring provided parameters file "
98
+ f"'{self._context.parameters_file}'. Using parameters from original run.",
99
+ style="yellow",
100
+ )
101
+
102
+ # Check for environment variable parameter overrides
103
+ env_params = {
104
+ key.replace(defaults.PARAMETER_PREFIX, ""): value
105
+ for key, value in os.environ.items()
106
+ if key.startswith(defaults.PARAMETER_PREFIX)
107
+ }
108
+
109
+ if env_params:
110
+ console.print(
111
+ f"⚠️ [bold yellow]RETRY MODE:[/bold yellow] Ignoring {len(env_params)} environment "
112
+ f"parameter overrides. Using parameters from original run.",
113
+ style="yellow",
114
+ )
115
+
116
+ console.print(
117
+ f"📋 [bold green]RETRY MODE:[/bold green] Using parameters from original run "
118
+ f"'{self._context.run_id}' with {len(original_run_log.parameters or {})} parameters.",
119
+ style="green",
120
+ )
121
+
122
+ return original_run_log.parameters or {}
123
+
124
+ def _validate_retry_prerequisites(self):
125
+ """
126
+ Validate prerequisites for retry execution.
127
+
128
+ Raises:
129
+ RetryValidationError: If retry cannot proceed due to validation failures
130
+ """
131
+ if not self._context.is_retry:
132
+ return # Not a retry, skip validation
133
+
134
+ try:
135
+ # Check if original run log exists
136
+ original_run_log = self._context.run_log_store.get_run_log_by_id(
137
+ run_id=self._context.run_id, full=True
138
+ )
139
+ except exceptions.RunLogNotFoundError:
140
+ raise exceptions.RetryValidationError(
141
+ f"Original run log not found for run_id: {self._context.run_id}. "
142
+ f"Cannot retry a run that doesn't exist.",
143
+ run_id=self._context.run_id,
144
+ )
145
+
146
+ # Validate DAG structure hasn't changed
147
+ if original_run_log.dag_hash != self._context.dag_hash:
148
+ raise exceptions.RetryValidationError(
149
+ f"DAG structure has changed since original run. "
150
+ f"Original hash: {original_run_log.dag_hash}, "
151
+ f"Current hash: {self._context.dag_hash}. "
152
+ f"Retry is not allowed when DAG structure changes.",
153
+ run_id=self._context.run_id,
154
+ )
155
+
156
+ logger.info(f"Retry validation passed for run_id: {self._context.run_id}")
157
+
158
+ def _should_skip_step_in_retry(
159
+ self, node: BaseNode, iter_variable: Optional[IterableParameterModel] = None
160
+ ) -> bool:
161
+ """
162
+ Determine if a step should be skipped during retry execution.
163
+
164
+ Steps are skipped if:
165
+ - This is not a retry run AND
166
+ - The step was previously executed AND
167
+ - The last attempt was successful
168
+
169
+ Args:
170
+ node: The node to check
171
+ iter_variable: Optional iterable variable if in map context
172
+
173
+ Returns:
174
+ bool: True if step should be skipped, False otherwise
175
+ """
176
+ if not self._context.is_retry:
177
+ return False
178
+
179
+ step_log_name = node._get_step_log_name(iter_variable)
180
+
181
+ try:
182
+ # Get step log from original run
183
+ step_log = self._context.run_log_store.get_step_log(
184
+ step_log_name, self._context.run_id
185
+ )
186
+
187
+ # Composite nodes do not have attempts, their status is consolidated from child nodes
188
+ if node.is_composite and step_log.status == defaults.SUCCESS:
189
+ logger.info(
190
+ f"Skipping composite step '{node.internal_name}' - already successful in original run"
191
+ )
192
+ return True
193
+
194
+ if node._is_terminal_node():
195
+ logger.info(
196
+ f"Terminal step '{node.internal_name}' will always execute in retry runs"
197
+ )
198
+ return False
199
+
200
+ # Check if last attempt was successful
201
+ if step_log.attempts:
202
+ last_attempt = step_log.attempts[-1]
203
+ is_successful = last_attempt.status == defaults.SUCCESS
204
+
205
+ if is_successful:
206
+ logger.info(
207
+ f"Skipping step '{node.internal_name}' - already successful in original run"
208
+ )
209
+ return True
210
+
211
+ return False
212
+
213
+ except exceptions.StepLogNotFoundError:
214
+ # Step was never executed, don't skip
215
+ logger.info(
216
+ f"Step '{node.internal_name}' was never executed in original run - will execute"
217
+ )
218
+ return False
219
+
220
+ def _set_up_run_log(self, exists_ok=False):
221
+ """
222
+ Create a run log and put that in the run log store
223
+
224
+ If exists_ok, we allow the run log to be already present in the run log store.
225
+ Enhanced to support retry execution with validation only.
226
+ """
227
+ # For retry runs: validate prerequisites and return early
228
+ if self._context.is_retry:
229
+ logger.info(
230
+ f"Validating retry prerequisites for run_id: {self._context.run_id}"
231
+ )
232
+ self._validate_retry_prerequisites()
233
+ logger.info(
234
+ f"Retry validation passed. Reusing existing run log: {self._context.run_id}"
235
+ )
236
+ return # Don't create new run log, reuse existing one
237
+
238
+ # Normal run log creation logic (unchanged)
239
+ try:
240
+ attempt_run_log = self._context.run_log_store.get_run_log_by_id(
241
+ run_id=self._context.run_id, full=False
242
+ )
243
+
244
+ logger.warning(
245
+ f"The run log by id: {self._context.run_id} already exists, is this designed?"
246
+ )
247
+ raise exceptions.RunLogExistsError(
248
+ f"The run log by id: {self._context.run_id} already exists and is {attempt_run_log.status}"
249
+ )
250
+ except exceptions.RunLogNotFoundError:
251
+ pass
252
+ except exceptions.RunLogExistsError:
253
+ if exists_ok:
254
+ return
255
+ raise
256
+
257
+ # Consolidate and get the parameters
258
+ params = self._get_parameters()
259
+
260
+ self._context.run_log_store.create_run_log(
261
+ run_id=self._context.run_id,
262
+ tag=self._context.tag,
263
+ status=defaults.PROCESSING,
264
+ dag_hash=self._context.dag_hash,
265
+ )
266
+ # Any interaction with run log store attributes should happen via API if available.
267
+ self._context.run_log_store.set_parameters(
268
+ run_id=self._context.run_id, parameters=params
269
+ )
270
+
271
+ # Update run_config
272
+ run_config = self._context.model_dump()
273
+ logger.debug(f"run_config as seen by executor: {run_config}")
274
+ self._context.run_log_store.set_run_config(
275
+ run_id=self._context.run_id, run_config=run_config
276
+ )
277
+
278
+ def _sync_catalog(
279
+ self, stage: str, synced_catalogs=None, allow_file_no_found_exc: bool = False
280
+ ) -> Optional[List[DataCatalog]]:
281
+ """
282
+ 1). Identify the catalog settings by over-riding node settings with the global settings.
283
+ 2). For stage = get:
284
+ Identify the catalog items that are being asked to get from the catalog
285
+ And copy them to the local compute data folder
286
+ 3). For stage = put:
287
+ Identify the catalog items that are being asked to put into the catalog
288
+ Copy the items from local compute folder to the catalog
289
+ 4). Add the items onto the step log according to the stage
290
+
291
+ Args:
292
+ node (Node): The current node being processed
293
+ step_log (StepLog): The step log corresponding to that node
294
+ stage (str): One of get or put
295
+
296
+ Raises:
297
+ Exception: If the stage is not in one of get/put
298
+
299
+ """
300
+ assert isinstance(self._context_node, BaseNode)
301
+ if stage not in ["get", "put"]:
302
+ msg = (
303
+ "Catalog service only accepts get/put possible actions as part of node execution."
304
+ f"Sync catalog of the executor: {self.service_name} asks for {stage} which is not accepted"
305
+ )
306
+ logger.exception(msg)
307
+ raise Exception(msg)
308
+
309
+ try:
310
+ node_catalog_settings = self._context_node._get_catalog_settings()
311
+ except exceptions.TerminalNodeError:
312
+ return None
313
+
314
+ if not (node_catalog_settings and stage in node_catalog_settings):
315
+ logger.info("No catalog settings found for stage: %s", stage)
316
+ # Nothing to get/put from the catalog
317
+ return None
318
+
319
+ data_catalogs = []
320
+ for name_pattern in node_catalog_settings.get(stage) or []:
321
+ if stage == "get":
322
+ data_catalog = self._context.catalog.get(
323
+ name=name_pattern,
324
+ )
325
+
326
+ elif stage == "put":
327
+ data_catalog = self._context.catalog.put(
328
+ name=name_pattern,
329
+ allow_file_not_found_exc=allow_file_no_found_exc,
330
+ store_copy=node_catalog_settings.get("store_copy", True),
331
+ )
332
+ else:
333
+ raise Exception(f"Stage {stage} not supported")
334
+
335
+ logger.debug(f"Added data catalog: {data_catalog} to step log")
336
+ data_catalogs.extend(data_catalog)
337
+
338
+ return data_catalogs
339
+
340
+ @property
341
+ def step_attempt_number(self) -> int:
342
+ """
343
+ The attempt number of the current step.
344
+ Orchestrators should use this step to submit multiple attempts of the job.
345
+
346
+ Returns:
347
+ int: The attempt number of the current step. Defaults to 1.
348
+ """
349
+ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
350
+
351
+ def add_task_log_to_catalog(
352
+ self,
353
+ name: str,
354
+ iter_variable: Optional[IterableParameterModel] = None,
355
+ ):
356
+ log_file_name = utils.make_log_file_name(
357
+ name=name,
358
+ iter_variable=iter_variable,
359
+ )
360
+ task_console.save_text(log_file_name)
361
+ task_console.export_text(clear=True)
362
+ # Put the log file in the catalog
363
+ self._context.catalog.put(name=log_file_name)
364
+ os.remove(log_file_name)
365
+
366
+ def _calculate_attempt_number(
367
+ self,
368
+ node: BaseNode,
369
+ iter_variable: Optional[IterableParameterModel] = None,
370
+ ) -> int:
371
+ """
372
+ Calculate the attempt number for a node based on existing attempts in the run log.
373
+
374
+ Args:
375
+ node: The node to calculate attempt number for
376
+ iter_variable: Optional iteration variable if node is in a map state
377
+
378
+ Returns:
379
+ int: The attempt number (starting from 1)
380
+ """
381
+ step_log_name = node._get_step_log_name(iter_variable)
382
+
383
+ try:
384
+ existing_step_log = self._context.run_log_store.get_step_log(
385
+ step_log_name, self._context.run_id
386
+ )
387
+ # If step log exists, increment attempt number based on existing attempts
388
+ return len(existing_step_log.attempts) + 1
389
+ except exceptions.StepLogNotFoundError:
390
+ # This is the first attempt, use attempt number 1
391
+ return 1
392
+
393
+ def _execute_node(
394
+ self,
395
+ node: BaseNode,
396
+ iter_variable: Optional[IterableParameterModel] = None,
397
+ mock: bool = False,
398
+ ):
399
+ """
400
+ This is the entry point when we do the actual execution of the function.
401
+ DO NOT Over-ride this function.
402
+
403
+ While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
404
+ this function.
405
+
406
+ In most cases,
407
+ * We get the corresponding step_log of the node and the parameters.
408
+ * We sync the catalog to GET any data sets that are in the catalog
409
+ * We call the execute method of the node for the actual compute and retry it as many times as asked.
410
+ * If the node succeeds, we get any of the user defined metrics provided by the user.
411
+ * We sync the catalog to PUT any data sets that are in the catalog.
412
+
413
+ Args:
414
+ node (Node): The node to execute
415
+ iter_variable: Optional iteration variable if the node is of a map state.
416
+ Defaults to None.
417
+ """
418
+ # Calculate attempt number based on existing attempts in run log
419
+ current_attempt_number = self._calculate_attempt_number(node, iter_variable)
420
+
421
+ # Set the environment variable for this attempt
422
+ os.environ[defaults.ATTEMPT_NUMBER] = str(current_attempt_number)
423
+
424
+ logger.info(
425
+ f"Trying to execute node: {node.internal_name}, attempt : {current_attempt_number}"
426
+ )
427
+
428
+ self._context_node = node
429
+
430
+ data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(stage="get")
431
+ logger.debug(f"data_catalogs_get: {data_catalogs_get}")
432
+
433
+ step_log = node.execute(
434
+ iter_variable=iter_variable,
435
+ attempt_number=current_attempt_number,
436
+ mock=mock,
437
+ )
438
+
439
+ allow_file_not_found_exc = True
440
+ if step_log.status == defaults.SUCCESS:
441
+ # raise exception if we succeeded but the file was not found
442
+ allow_file_not_found_exc = False
443
+
444
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
445
+ stage="put", allow_file_no_found_exc=allow_file_not_found_exc
446
+ )
447
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
448
+ step_log.add_data_catalogs(data_catalogs_put or [])
449
+
450
+ # get catalog should always be added to the step log
451
+ step_log.add_data_catalogs(data_catalogs_get or [])
452
+
453
+ console.print(f"Summary of the step: {step_log.internal_name}")
454
+ console.print(step_log.get_summary(), style=defaults.info_style)
455
+
456
+ self.add_task_log_to_catalog(
457
+ name=self._context_node.internal_name, iter_variable=iter_variable
458
+ )
459
+
460
+ self._context_node = None
461
+
462
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
463
+
464
+ def add_code_identities(self, node: BaseNode, attempt_log: StepAttempt):
465
+ """
466
+ Add code identities specific to the implementation.
467
+
468
+ The Base class has an implementation of adding git code identities.
469
+
470
+ Args:
471
+ attempt_log (StepAttempt): The step attempt log object
472
+ node (BaseNode): The node we are adding the code identities for
473
+ """
474
+ attempt_log.code_identities.append(utils.get_git_code_identity())
475
+
476
+ # ═══════════════════════════════════════════════════════════════
477
+ # Shared helpers - called by both sync and async execution paths
478
+ # ═══════════════════════════════════════════════════════════════
479
+
480
+ def _prepare_node_for_execution(
481
+ self,
482
+ node: BaseNode,
483
+ iter_variable: Optional[IterableParameterModel] = None,
484
+ ):
485
+ """
486
+ Setup before node execution - shared by sync/async paths.
487
+
488
+ Returns None if node should be skipped (retry logic).
489
+ """
490
+ if self._should_skip_step_in_retry(node, iter_variable):
491
+ logger.info(
492
+ f"Skipping execution of '{node.internal_name}' due to retry logic"
493
+ )
494
+ console.print(
495
+ f":fast_forward: Skipping node {node.internal_name} - already successful",
496
+ style="bold yellow",
497
+ )
498
+ return None
499
+
500
+ # Handle step log creation for retry vs normal runs
501
+ if self._context.is_retry:
502
+ try:
503
+ step_log = self._context.run_log_store.get_step_log(
504
+ node._get_step_log_name(iter_variable), self._context.run_id
505
+ )
506
+ logger.info(
507
+ f"Reusing existing step log for retry: {node.internal_name}"
508
+ )
509
+ except exceptions.StepLogNotFoundError:
510
+ step_log = self._context.run_log_store.create_step_log(
511
+ node.name, node._get_step_log_name(iter_variable)
512
+ )
513
+ logger.info(f"Creating new step log for retry: {node.internal_name}")
514
+ else:
515
+ step_log = self._context.run_log_store.create_step_log(
516
+ node.name, node._get_step_log_name(iter_variable)
517
+ )
518
+
519
+ step_log.step_type = node.node_type
520
+ step_log.status = defaults.PROCESSING
521
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
522
+
523
+ return step_log
524
+
525
+ def _finalize_graph_execution(
526
+ self,
527
+ node: BaseNode,
528
+ dag: Graph,
529
+ iter_variable: Optional[IterableParameterModel] = None,
530
+ ):
531
+ """Finalize after graph traversal - shared by sync/async paths."""
532
+ run_log = self._context.run_log_store.get_branch_log(
533
+ node._get_branch_log_name(iter_variable), self._context.run_id
534
+ )
535
+
536
+ branch = "graph"
537
+ if node.internal_branch_name:
538
+ branch = node.internal_branch_name
539
+
540
+ logger.info(f"Finished execution of {branch} with status {run_log.status}")
541
+
542
+ if dag == self._context.dag:
543
+ run_log = cast(RunLog, run_log)
544
+ console.print("Completed Execution, Summary:", style="bold color(208)")
545
+ console.print(run_log.get_summary(), style=defaults.info_style)
546
+
547
+ def execute_from_graph(
548
+ self,
549
+ node: BaseNode,
550
+ iter_variable: Optional[IterableParameterModel] = None,
551
+ ):
552
+ """
553
+ Sync node execution entry point.
554
+
555
+ Uses _prepare_node_for_execution helper for setup (shared with async path).
556
+ """
557
+ step_log = self._prepare_node_for_execution(node, iter_variable)
558
+ if step_log is None:
559
+ return # Skipped due to retry logic
560
+
561
+ logger.info(f"Executing node: {node.get_summary()}")
562
+
563
+ # Terminal nodes
564
+ if node.node_type in ["success", "fail"]:
565
+ self._execute_node(node, iter_variable=iter_variable)
566
+ return
567
+
568
+ # Composite nodes delegate to their sub-graph
569
+ if node.is_composite:
570
+ node.execute_as_graph(iter_variable=iter_variable)
571
+ return
572
+
573
+ # Task nodes
574
+ task_name = node._resolve_map_placeholders(node.internal_name, iter_variable)
575
+ console.print(
576
+ f":runner: Executing the node {task_name} ... ", style="bold color(208)"
577
+ )
578
+ self.trigger_node_execution(node=node, iter_variable=iter_variable)
579
+
580
+ def trigger_node_execution(
581
+ self,
582
+ node: BaseNode,
583
+ iter_variable: Optional[IterableParameterModel] = None,
584
+ ):
585
+ """
586
+ Call this method only if we are responsible for traversing the graph via
587
+ execute_from_graph().
588
+
589
+ We are not prepared to execute node as of now.
590
+
591
+ Args:
592
+ node (BaseNode): The node to execute
593
+ iter_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
594
+ Defaults to ''.
595
+
596
+ NOTE: We do not raise an exception as this method is not required by many extensions
597
+ """
598
+ pass
599
+
600
+ def _get_status_and_next_node_name(
601
+ self,
602
+ current_node: BaseNode,
603
+ dag: Graph,
604
+ iter_variable: Optional[IterableParameterModel] = None,
605
+ ) -> tuple[str, str]:
606
+ """
607
+ Given the current node and the graph, returns the name of the next node to execute.
608
+
609
+ The name is always relative the graph that the node resides in.
610
+
611
+ If the current node succeeded, we return the next node as per the graph.
612
+ If the current node failed, we return the on failure node of the node (if provided) or the global one.
613
+
614
+ This method is only used by interactive executors i.e local and local-container
615
+
616
+ Args:
617
+ current_node (BaseNode): The current node.
618
+ dag (Graph): The dag we are traversing.
619
+ iter_variable (dict): If the node belongs to a map branch.
620
+
621
+ """
622
+
623
+ step_log = self._context.run_log_store.get_step_log(
624
+ current_node._get_step_log_name(iter_variable), self._context.run_id
625
+ )
626
+ logger.info(
627
+ f"Finished executing the node {current_node} with status {step_log.status}"
628
+ )
629
+
630
+ try:
631
+ next_node_name = current_node._get_next_node()
632
+ except exceptions.TerminalNodeError:
633
+ next_node_name = ""
634
+
635
+ if step_log.status == defaults.FAIL:
636
+ next_node_name = dag.get_fail_node().name
637
+ if current_node._get_on_failure_node():
638
+ next_node_name = current_node._get_on_failure_node()
639
+
640
+ return step_log.status, next_node_name
641
+
642
+ def execute_graph(
643
+ self,
644
+ dag: Graph,
645
+ iter_variable: Optional[IterableParameterModel] = None,
646
+ ):
647
+ """
648
+ The parallelization is controlled by the nodes and not by this function.
649
+
650
+ Transpilers should over ride this method to do the translation of dag to the
651
+ platform specific way.
652
+
653
+ Interactive methods should use this to traverse and execute the dag.
654
+ - Use execute_from_graph to handle sub-graphs
655
+
656
+ Logically the method should:
657
+ * Start at the dag.start_at of the dag.
658
+ * Call the self.execute_from_graph(node)
659
+ * depending upon the status of the execution, either move to the
660
+ success node or failure node.
661
+
662
+ Args:
663
+ dag (Graph): The directed acyclic graph to traverse and execute.
664
+ iter_variable (dict, optional): If the node if of a map state, this
665
+ corresponds to the value of the iterable.
666
+ Defaults to None.
667
+ """
668
+ current_node = dag.start_at
669
+ previous_node = None
670
+ logger.info(f"Running the execution with {current_node}")
671
+ logger.info(
672
+ "iter_variable: %s",
673
+ iter_variable.model_dump_json() if iter_variable else None,
674
+ )
675
+
676
+ branch_task_name: str = ""
677
+ if dag.internal_branch_name:
678
+ branch_task_name = BaseNode._resolve_map_placeholders(
679
+ dag.internal_branch_name or "Graph",
680
+ iter_variable,
681
+ )
682
+ console.print(
683
+ f":runner: Executing the branch {branch_task_name} ... ",
684
+ style="bold color(208)",
685
+ )
686
+
687
+ while True:
688
+ working_on = dag.get_node_by_name(current_node)
689
+ task_name = working_on._resolve_map_placeholders(
690
+ working_on.internal_name, iter_variable
691
+ )
692
+
693
+ if previous_node == current_node:
694
+ raise Exception("Potentially running in a infinite loop")
695
+
696
+ previous_node = current_node
697
+
698
+ try:
699
+ self.execute_from_graph(working_on, iter_variable=iter_variable)
700
+ status, next_node_name = self._get_status_and_next_node_name(
701
+ current_node=working_on, dag=dag, iter_variable=iter_variable
702
+ )
703
+
704
+ if status == defaults.SUCCESS:
705
+ console.print(
706
+ f":white_check_mark: Node {task_name} succeeded",
707
+ )
708
+ else:
709
+ console.print(
710
+ f":x: Node {task_name} failed",
711
+ )
712
+ except Exception as e: # noqa: E722
713
+ console.print(":x: Error during execution", style="bold red")
714
+ console.print(e, style=defaults.error_style)
715
+ logger.exception(e)
716
+ raise
717
+
718
+ console.rule(style="[dark orange]")
719
+
720
+ if working_on.node_type in ["success", "fail"]:
721
+ break
722
+
723
+ current_node = next_node_name
724
+
725
+ # Use shared helper for finalization
726
+ self._finalize_graph_execution(working_on, dag, iter_variable)
727
+
728
+ def send_return_code(self, stage="traversal"):
729
+ """
730
+ Convenience function used by pipeline to send return code to the caller of the cli
731
+
732
+ Raises:
733
+ Exception: If the pipeline execution failed
734
+ """
735
+ run_id = self._context.run_id
736
+
737
+ run_log = self._context.run_log_store.get_run_log_by_id(
738
+ run_id=run_id, full=False
739
+ )
740
+ if run_log.status == defaults.FAIL:
741
+ raise exceptions.ExecutionFailedError(run_id=run_id)
742
+
743
+ def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
744
+ """
745
+ The overrides section can contain specific over-rides to an global executor config.
746
+ To avoid too much clutter in the dag definition, we allow the configuration file to
747
+ have overrides block.
748
+
749
+ The nodes can over-ride the global config by referring to key in the overrides.
750
+
751
+ This function also applies variables to the effective node config.
752
+
753
+ For example:
754
+ # configuration.yaml
755
+ execution:
756
+ type: cloud-implementation
757
+ config:
758
+ k1: v1
759
+ k3: v3
760
+ overrides:
761
+ custom_config:
762
+ k1: v11
763
+ k2: v2 # Could be a mapping internally.
764
+
765
+ # in pipeline definition.yaml
766
+ dag:
767
+ steps:
768
+ step1:
769
+ overrides:
770
+ cloud-implementation: custom_config
771
+
772
+ This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
773
+
774
+ Args:
775
+ node (BaseNode): The current node being processed.
776
+
777
+ """
778
+ effective_node_config = copy.deepcopy(self.model_dump())
779
+ try:
780
+ ctx_node_config = node._get_executor_config(self.service_name)
781
+ except exceptions.TerminalNodeError:
782
+ # Some modes request for effective node config even for success or fail nodes
783
+ return utils.apply_variables(effective_node_config, self._context.variables)
784
+
785
+ if ctx_node_config:
786
+ if ctx_node_config not in self.overrides:
787
+ raise Exception(
788
+ f"No override of key: {ctx_node_config} found in the overrides section"
789
+ )
790
+
791
+ effective_node_config.update(self.overrides[ctx_node_config])
792
+
793
+ effective_node_config = utils.apply_variables(
794
+ effective_node_config, self._context.variables
795
+ )
796
+ logger.debug(f"Effective node config: {effective_node_config}")
797
+
798
+ return effective_node_config
799
+
800
+ def fan_out(
801
+ self,
802
+ node: BaseNode,
803
+ iter_variable: Optional[IterableParameterModel] = None,
804
+ ):
805
+ """
806
+ This method is used to appropriately fan-out the execution of a composite node.
807
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
808
+
809
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity
810
+ to do anything for the step which is composite. By calling this fan-out before calling the
811
+ leaf nodes, we have an opportunity to do the right set up (creating the step log,
812
+ exposing the parameters, etc.) for the composite step.
813
+
814
+ All 3rd party orchestrators should use this method to fan-out the execution of
815
+ a composite node.
816
+ This ensures:
817
+ - The dot path notation is preserved, this method should create the step and
818
+ call the node's fan out to create the branch logs and let the 3rd party do the
819
+ actual step execution.
820
+ - Gives 3rd party orchestrators an opportunity to set out the required
821
+ for running a composite node.
822
+
823
+ Args:
824
+ node (BaseNode): The node to fan-out
825
+ iter_variable (dict, optional): If the node if of a map state,.Defaults to None.
826
+
827
+ """
828
+ step_log = self._context.run_log_store.create_step_log(
829
+ node.name, node._get_step_log_name(iter_variable=iter_variable)
830
+ )
831
+
832
+ step_log.step_type = node.node_type
833
+ step_log.status = defaults.PROCESSING
834
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
835
+
836
+ node.fan_out(iter_variable=iter_variable)
837
+
838
+ def fan_in(
839
+ self,
840
+ node: BaseNode,
841
+ iter_variable: Optional[IterableParameterModel] = None,
842
+ ):
843
+ """
844
+ This method is used to appropriately fan-in after the execution of a composite node.
845
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
846
+
847
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity
848
+ to do anything for the step which is composite. By calling this fan-in after calling
849
+ the leaf nodes, we have an opportunity to act depending upon the status of the
850
+ individual branches.
851
+
852
+ All 3rd party orchestrators should use this method to fan-in the execution of a
853
+ composite node.
854
+ This ensures:
855
+ - Gives the renderer's the control on where to go depending upon the state of
856
+ the composite node.
857
+ - The status of the step and its underlying branches are correctly updated.
858
+
859
+ Args:
860
+ node (BaseNode): The node to fan-in
861
+ iter_variable (dict, optional): If the node if of a map state,.Defaults to None.
862
+
863
+ """
864
+ node.fan_in(iter_variable=iter_variable)
865
+
866
+ step_log = self._context.run_log_store.get_step_log(
867
+ node._get_step_log_name(iter_variable=iter_variable), self._context.run_id
868
+ )
869
+
870
+ if step_log.status == defaults.FAIL:
871
+ raise Exception(f"Step {node.name} failed")