runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,855 +0,0 @@
1
- import importlib
2
- import logging
3
- import os
4
- import sys
5
- from collections import OrderedDict
6
- from copy import deepcopy
7
- from datetime import datetime
8
- from typing import Annotated, Any, Callable, Dict, List, Optional, Tuple, Union, cast
9
-
10
- from pydantic import (
11
- ConfigDict,
12
- Field,
13
- ValidationInfo,
14
- field_serializer,
15
- field_validator,
16
- )
17
-
18
- from runnable import console, datastore, defaults, utils
19
- from runnable.datastore import (
20
- JsonParameter,
21
- MetricParameter,
22
- ObjectParameter,
23
- Parameter,
24
- StepLog,
25
- )
26
- from runnable.defaults import TypeMapVariable
27
- from runnable.graph import Graph, create_graph
28
- from runnable.nodes import CompositeNode, ExecutableNode, TerminalNode
29
- from runnable.tasks import BaseTaskType, create_task
30
-
31
- logger = logging.getLogger(defaults.LOGGER_NAME)
32
-
33
-
34
- class TaskNode(ExecutableNode):
35
- """
36
- A node of type Task.
37
-
38
- This node does the actual function execution of the graph in all cases.
39
- """
40
-
41
- executable: BaseTaskType = Field(exclude=True)
42
- node_type: str = Field(default="task", serialization_alias="type")
43
-
44
- # It is technically not allowed as parse_from_config filters them.
45
- # This is just to get the task level configuration to be present during serialization.
46
- model_config = ConfigDict(extra="allow")
47
-
48
- @classmethod
49
- def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode":
50
- # separate task config from node config
51
- task_config = {k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()}
52
- node_config = {k: v for k, v in config.items() if k in TaskNode.model_fields.keys()}
53
-
54
- executable = create_task(task_config)
55
- return cls(executable=executable, **node_config, **task_config)
56
-
57
- def get_summary(self) -> Dict[str, Any]:
58
- summary = {
59
- "name": self.name,
60
- "type": self.node_type,
61
- "executable": self.executable.get_summary(),
62
- "catalog": self._get_catalog_settings(),
63
- }
64
-
65
- return summary
66
-
67
- def execute(
68
- self,
69
- mock=False,
70
- map_variable: TypeMapVariable = None,
71
- attempt_number: int = 1,
72
- **kwargs,
73
- ) -> StepLog:
74
- """
75
- All that we do in runnable is to come to this point where we actually execute the command.
76
-
77
- Args:
78
- executor (_type_): The executor class
79
- mock (bool, optional): If we should just mock and not execute. Defaults to False.
80
- map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
81
-
82
- Returns:
83
- StepAttempt: The attempt object
84
- """
85
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
86
-
87
- if not mock:
88
- # Do not run if we are mocking the execution, could be useful for caching and dry runs
89
- attempt_log = self.executable.execute_command(map_variable=map_variable)
90
- attempt_log.attempt_number = attempt_number
91
- else:
92
- attempt_log = datastore.StepAttempt(
93
- status=defaults.SUCCESS,
94
- start_time=str(datetime.now()),
95
- end_time=str(datetime.now()),
96
- attempt_number=attempt_number,
97
- )
98
-
99
- logger.info(f"attempt_log: {attempt_log}")
100
- logger.info(f"Step {self.name} completed with status: {attempt_log.status}")
101
-
102
- step_log.status = attempt_log.status
103
- step_log.attempts.append(attempt_log)
104
-
105
- return step_log
106
-
107
-
108
- class FailNode(TerminalNode):
109
- """
110
- A leaf node of the graph that represents a failure node
111
- """
112
-
113
- node_type: str = Field(default="fail", serialization_alias="type")
114
-
115
- @classmethod
116
- def parse_from_config(cls, config: Dict[str, Any]) -> "FailNode":
117
- return cast("FailNode", super().parse_from_config(config))
118
-
119
- def get_summary(self) -> Dict[str, Any]:
120
- summary = {
121
- "name": self.name,
122
- "type": self.node_type,
123
- }
124
-
125
- return summary
126
-
127
- def execute(
128
- self,
129
- mock=False,
130
- map_variable: TypeMapVariable = None,
131
- attempt_number: int = 1,
132
- **kwargs,
133
- ) -> StepLog:
134
- """
135
- Execute the failure node.
136
- Set the run or branch log status to failure.
137
-
138
- Args:
139
- executor (_type_): the executor class
140
- mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False.
141
- map_variable (dict, optional): If the node belongs to internal branches. Defaults to None.
142
-
143
- Returns:
144
- StepAttempt: The step attempt object
145
- """
146
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
147
-
148
- attempt_log = datastore.StepAttempt(
149
- status=defaults.SUCCESS,
150
- start_time=str(datetime.now()),
151
- end_time=str(datetime.now()),
152
- attempt_number=attempt_number,
153
- )
154
-
155
- run_or_branch_log = self._context.run_log_store.get_branch_log(
156
- self._get_branch_log_name(map_variable), self._context.run_id
157
- )
158
- run_or_branch_log.status = defaults.FAIL
159
- self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id)
160
-
161
- step_log.status = attempt_log.status
162
-
163
- step_log.attempts.append(attempt_log)
164
-
165
- return step_log
166
-
167
-
168
- class SuccessNode(TerminalNode):
169
- """
170
- A leaf node of the graph that represents a success node
171
- """
172
-
173
- node_type: str = Field(default="success", serialization_alias="type")
174
-
175
- @classmethod
176
- def parse_from_config(cls, config: Dict[str, Any]) -> "SuccessNode":
177
- return cast("SuccessNode", super().parse_from_config(config))
178
-
179
- def get_summary(self) -> Dict[str, Any]:
180
- summary = {
181
- "name": self.name,
182
- "type": self.node_type,
183
- }
184
-
185
- return summary
186
-
187
- def execute(
188
- self,
189
- mock=False,
190
- map_variable: TypeMapVariable = None,
191
- attempt_number: int = 1,
192
- **kwargs,
193
- ) -> StepLog:
194
- """
195
- Execute the success node.
196
- Set the run or branch log status to success.
197
-
198
- Args:
199
- executor (_type_): The executor class
200
- mock (bool, optional): If we should just mock and not perform anything. Defaults to False.
201
- map_variable (dict, optional): If the node belongs to an internal branch. Defaults to None.
202
-
203
- Returns:
204
- StepAttempt: The step attempt object
205
- """
206
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
207
-
208
- attempt_log = datastore.StepAttempt(
209
- status=defaults.SUCCESS,
210
- start_time=str(datetime.now()),
211
- end_time=str(datetime.now()),
212
- attempt_number=attempt_number,
213
- )
214
-
215
- run_or_branch_log = self._context.run_log_store.get_branch_log(
216
- self._get_branch_log_name(map_variable), self._context.run_id
217
- )
218
- run_or_branch_log.status = defaults.SUCCESS
219
- self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id)
220
-
221
- step_log.status = attempt_log.status
222
-
223
- step_log.attempts.append(attempt_log)
224
-
225
- return step_log
226
-
227
-
228
- class ParallelNode(CompositeNode):
229
- """
230
- A composite node containing many graph objects within itself.
231
-
232
- The structure is generally:
233
- ParallelNode:
234
- Branch A:
235
- Sub graph definition
236
- Branch B:
237
- Sub graph definition
238
- . . .
239
-
240
- """
241
-
242
- node_type: str = Field(default="parallel", serialization_alias="type")
243
- branches: Dict[str, Graph]
244
- is_composite: bool = Field(default=True, exclude=True)
245
-
246
- def get_summary(self) -> Dict[str, Any]:
247
- summary = {
248
- "name": self.name,
249
- "type": self.node_type,
250
- "branches": [branch.get_summary() for branch in self.branches.values()],
251
- }
252
-
253
- return summary
254
-
255
- @field_serializer("branches")
256
- def ser_branches(self, branches: Dict[str, Graph]) -> Dict[str, Graph]:
257
- ret: Dict[str, Graph] = {}
258
-
259
- for branch_name, branch in branches.items():
260
- ret[branch_name.split(".")[-1]] = branch
261
-
262
- return ret
263
-
264
- @classmethod
265
- def parse_from_config(cls, config: Dict[str, Any]) -> "ParallelNode":
266
- internal_name = cast(str, config.get("internal_name"))
267
-
268
- config_branches = config.pop("branches", {})
269
- branches = {}
270
- for branch_name, branch_config in config_branches.items():
271
- sub_graph = create_graph(
272
- deepcopy(branch_config),
273
- internal_branch_name=internal_name + "." + branch_name,
274
- )
275
- branches[internal_name + "." + branch_name] = sub_graph
276
-
277
- if not branches:
278
- raise Exception("A parallel node should have branches")
279
- return cls(branches=branches, **config)
280
-
281
- def _get_branch_by_name(self, branch_name: str) -> Graph:
282
- if branch_name in self.branches:
283
- return self.branches[branch_name]
284
-
285
- raise Exception(f"Branch {branch_name} does not exist")
286
-
287
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
288
- """
289
- The general fan out method for a node of type Parallel.
290
- This method assumes that the step log has already been created.
291
-
292
- 3rd party orchestrators should create the step log and use this method to create the branch logs.
293
-
294
- Args:
295
- executor (BaseExecutor): The executor class as defined by the config
296
- map_variable (dict, optional): If the node is part of a map node. Defaults to None.
297
- """
298
- # Prepare the branch logs
299
- for internal_branch_name, _ in self.branches.items():
300
- effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable)
301
-
302
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
303
- branch_log.status = defaults.PROCESSING
304
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
305
-
306
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
307
- """
308
- This function does the actual execution of the sub-branches of the parallel node.
309
-
310
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
311
-
312
- The modes that render the job specifications, do not need to interact with this node at all as they have their
313
- own internal mechanisms of handing parallel states.
314
- If they do not, you can find a way using as-is nodes as hack nodes.
315
-
316
- The execution of a dag, could result in
317
- * The dag being completely executed with a definite (fail, success) state in case of
318
- local or local-container execution
319
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
320
-
321
- Only fail state is considered failure during this phase of execution.
322
-
323
- Args:
324
- executor (Executor): The Executor as per the use config
325
- **kwargs: Optional kwargs passed around
326
- """
327
- self.fan_out(map_variable=map_variable, **kwargs)
328
-
329
- for _, branch in self.branches.items():
330
- self._context.executor.execute_graph(branch, map_variable=map_variable, **kwargs)
331
-
332
- self.fan_in(map_variable=map_variable, **kwargs)
333
-
334
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
335
- """
336
- The general fan in method for a node of type Parallel.
337
-
338
- 3rd party orchestrators should use this method to find the status of the composite step.
339
-
340
- Args:
341
- executor (BaseExecutor): The executor class as defined by the config
342
- map_variable (dict, optional): If the node is part of a map. Defaults to None.
343
- """
344
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
345
- step_success_bool = True
346
- for internal_branch_name, _ in self.branches.items():
347
- effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable)
348
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
349
-
350
- if branch_log.status != defaults.SUCCESS:
351
- step_success_bool = False
352
-
353
- # Collate all the results and update the status of the step
354
-
355
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
356
-
357
- if step_success_bool: #  If none failed
358
- step_log.status = defaults.SUCCESS
359
- else:
360
- step_log.status = defaults.FAIL
361
-
362
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
363
-
364
-
365
- class MapNode(CompositeNode):
366
- """
367
- A composite node that contains ONE graph object within itself that has to be executed with an iterable.
368
-
369
- The structure is generally:
370
- MapNode:
371
- branch
372
-
373
- The config is expected to have a variable 'iterate_on' and iterate_as which are looked for in the parameters.
374
- for iter_variable in parameters['iterate_on']:
375
- Execute the Branch by sending {'iterate_as': iter_variable}
376
-
377
- The internal naming convention creates branches dynamically based on the iteration value
378
- """
379
-
380
- # TODO: Should it be one function or a dict of functions indexed by the return name
381
-
382
- node_type: str = Field(default="map", serialization_alias="type")
383
- iterate_on: str
384
- iterate_as: str
385
- iterate_index: bool = Field(default=False) # TODO: Need to design this
386
- reducer: Optional[str] = Field(default=None)
387
- branch: Graph
388
- is_composite: bool = True
389
-
390
- def get_summary(self) -> Dict[str, Any]:
391
- summary = {
392
- "name": self.name,
393
- "type": self.node_type,
394
- "branch": self.branch.get_summary(),
395
- "iterate_on": self.iterate_on,
396
- "iterate_as": self.iterate_as,
397
- "iterate_index": self.iterate_index,
398
- "reducer": self.reducer,
399
- }
400
-
401
- return summary
402
-
403
- def get_reducer_function(self):
404
- if not self.reducer:
405
- return lambda *x: list(x) # returns a list of the args
406
-
407
- # try a lambda function
408
- try:
409
- f = eval(self.reducer)
410
- if callable(f):
411
- return f
412
- except SyntaxError:
413
- logger.info(f"{self.reducer} is not a lambda function")
414
-
415
- # Load the reducer function from dotted path
416
- mod, func = utils.get_module_and_attr_names(self.reducer)
417
- sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
418
- imported_module = importlib.import_module(mod)
419
- f = getattr(imported_module, func)
420
-
421
- return f
422
-
423
- @classmethod
424
- def parse_from_config(cls, config: Dict[str, Any]) -> "MapNode":
425
- internal_name = cast(str, config.get("internal_name"))
426
-
427
- config_branch = config.pop("branch", {})
428
- if not config_branch:
429
- raise Exception("A map node should have a branch")
430
-
431
- branch = create_graph(
432
- deepcopy(config_branch),
433
- internal_branch_name=internal_name + "." + defaults.MAP_PLACEHOLDER,
434
- )
435
- return cls(branch=branch, **config)
436
-
437
- @property
438
- def branch_returns(self):
439
- branch_returns: List[Tuple[str, Union[ObjectParameter, MetricParameter, JsonParameter]]] = []
440
- for _, node in self.branch.nodes.items():
441
- if isinstance(node, TaskNode):
442
- for task_return in node.executable.returns:
443
- if task_return.kind == "json":
444
- branch_returns.append((task_return.name, JsonParameter(kind="json", value="", reduced=False)))
445
- elif task_return.kind == "object":
446
- branch_returns.append(
447
- (
448
- task_return.name,
449
- ObjectParameter(
450
- kind="object",
451
- value="Will be reduced",
452
- reduced=False,
453
- ),
454
- )
455
- )
456
- elif task_return.kind == "metric":
457
- branch_returns.append(
458
- (task_return.name, MetricParameter(kind="metric", value="", reduced=False))
459
- )
460
- else:
461
- raise Exception("kind should be either json or object")
462
-
463
- return branch_returns
464
-
465
- def _get_branch_by_name(self, branch_name: str) -> Graph:
466
- """
467
- Retrieve a branch by name.
468
-
469
- In the case of a Map Object, the branch naming is dynamic as it is parameterized on iterable.
470
- This method takes no responsibility in checking the validity of the naming.
471
-
472
- Returns a Graph Object
473
-
474
- Args:
475
- branch_name (str): The name of the branch to retrieve
476
-
477
- Raises:
478
- Exception: If the branch by that name does not exist
479
- """
480
- return self.branch
481
-
482
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
483
- """
484
- The general method to fan out for a node of type map.
485
- This method assumes that the step log has already been created.
486
-
487
- 3rd party orchestrators should call this method to create the individual branch logs.
488
-
489
- Args:
490
- executor (BaseExecutor): The executor class as defined by the config
491
- map_variable (dict, optional): If the node is part of map. Defaults to None.
492
- """
493
- iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on].get_value()
494
-
495
- # Prepare the branch logs
496
- for iter_variable in iterate_on:
497
- effective_branch_name = self._resolve_map_placeholders(
498
- self.internal_name + "." + str(iter_variable), map_variable=map_variable
499
- )
500
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
501
-
502
- console.print(f"Branch log created for {effective_branch_name}: {branch_log}")
503
- branch_log.status = defaults.PROCESSING
504
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
505
-
506
- # Gather all the returns of the task nodes and create parameters in reduced=False state.
507
- raw_parameters = {}
508
- if map_variable:
509
- # If we are in a map state already, the param should have an index of the map variable.
510
- for _, v in map_variable.items():
511
- for branch_return in self.branch_returns:
512
- param_name, param_type = branch_return
513
- raw_parameters[f"{v}_{param_name}"] = param_type.copy()
514
- else:
515
- for branch_return in self.branch_returns:
516
- param_name, param_type = branch_return
517
- raw_parameters[f"{param_name}"] = param_type.copy()
518
-
519
- self._context.run_log_store.set_parameters(parameters=raw_parameters, run_id=self._context.run_id)
520
-
521
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
522
- """
523
- This function does the actual execution of the branch of the map node.
524
-
525
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
526
-
527
- The modes that render the job specifications, do not need to interact with this node at all as
528
- they have their own internal mechanisms of handing map states or dynamic parallel states.
529
- If they do not, you can find a way using as-is nodes as hack nodes.
530
-
531
- The actual logic is :
532
- * We iterate over the iterable as mentioned in the config
533
- * For every value in the iterable we call the executor.execute_graph(branch, iterate_as: iter_variable)
534
-
535
- The execution of a dag, could result in
536
- * The dag being completely executed with a definite (fail, success) state in case of local
537
- or local-container execution
538
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
539
-
540
- Only fail state is considered failure during this phase of execution.
541
-
542
- Args:
543
- executor (Executor): The Executor as per the use config
544
- map_variable (dict): The map variables the graph belongs to
545
- **kwargs: Optional kwargs passed around
546
- """
547
-
548
- iterate_on = None
549
- try:
550
- iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on].get_value()
551
- except KeyError as e:
552
- raise Exception(
553
- (
554
- f"Expected parameter {self.iterate_on}",
555
- "not present in Run Log parameters",
556
- "was it ever set before?",
557
- )
558
- ) from e
559
-
560
- if not isinstance(iterate_on, list):
561
- raise Exception("Only list is allowed as a valid iterator type")
562
-
563
- self.fan_out(map_variable=map_variable, **kwargs)
564
-
565
- for iter_variable in iterate_on:
566
- effective_map_variable = map_variable or OrderedDict()
567
- effective_map_variable[self.iterate_as] = iter_variable
568
-
569
- self._context.executor.execute_graph(self.branch, map_variable=effective_map_variable, **kwargs)
570
-
571
- self.fan_in(map_variable=map_variable, **kwargs)
572
-
573
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
574
- """
575
- The general method to fan in for a node of type map.
576
-
577
- 3rd party orchestrators should call this method to find the status of the step log.
578
-
579
- Args:
580
- executor (BaseExecutor): The executor class as defined by the config
581
- map_variable (dict, optional): If the node is part of map node. Defaults to None.
582
- """
583
- params = self._context.run_log_store.get_parameters(self._context.run_id)
584
- iterate_on = params[self.iterate_on].get_value()
585
- # # Find status of the branches
586
- step_success_bool = True
587
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
588
-
589
- for iter_variable in iterate_on:
590
- effective_branch_name = self._resolve_map_placeholders(
591
- self.internal_name + "." + str(iter_variable), map_variable=map_variable
592
- )
593
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
594
- # console.print(f"Branch log for {effective_branch_name}: {branch_log}")
595
-
596
- if branch_log.status != defaults.SUCCESS:
597
- step_success_bool = False
598
-
599
- # Collate all the results and update the status of the step
600
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
601
-
602
- if step_success_bool: #  If none failed and nothing is waiting
603
- step_log.status = defaults.SUCCESS
604
- else:
605
- step_log.status = defaults.FAIL
606
-
607
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
608
-
609
- # Apply the reduce function and reduce the returns of the task nodes.
610
- # The final value of the parameter is the result of the reduce function.
611
- reducer_f = self.get_reducer_function()
612
-
613
- def update_param(params: Dict[str, Parameter], reducer_f: Callable, map_prefix: str = ""):
614
- from runnable.extensions.executor.mocked.implementation import (
615
- MockedExecutor,
616
- )
617
-
618
- for branch_return in self.branch_returns:
619
- param_name, _ = branch_return
620
-
621
- to_reduce = []
622
- for iter_variable in iterate_on:
623
- try:
624
- to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value())
625
- except KeyError as e:
626
- if isinstance(self._context.executor, MockedExecutor):
627
- pass
628
- else:
629
- raise Exception(
630
- (
631
- f"Expected parameter {iter_variable}_{param_name}",
632
- "not present in Run Log parameters",
633
- "was it ever set before?",
634
- )
635
- ) from e
636
-
637
- param_name = f"{map_prefix}{param_name}"
638
- if to_reduce:
639
- params[param_name].value = reducer_f(*to_reduce)
640
- else:
641
- params[param_name].value = ""
642
- params[param_name].reduced = True
643
-
644
- if map_variable:
645
- # If we are in a map state already, the param should have an index of the map variable.
646
- for _, v in map_variable.items():
647
- update_param(params, reducer_f, map_prefix=f"{v}_")
648
- else:
649
- update_param(params, reducer_f)
650
-
651
- self._context.run_log_store.set_parameters(parameters=params, run_id=self._context.run_id)
652
-
653
-
654
- class DagNode(CompositeNode):
655
- """
656
- A composite node that internally holds a dag.
657
-
658
- The structure is generally:
659
- DagNode:
660
- dag_definition: A YAML file that holds the dag in 'dag' block
661
-
662
- The config is expected to have a variable 'dag_definition'.
663
- """
664
-
665
- node_type: str = Field(default="dag", serialization_alias="type")
666
- dag_definition: str
667
- branch: Graph
668
- is_composite: bool = True
669
- internal_branch_name: Annotated[str, Field(validate_default=True)] = ""
670
-
671
- def get_summary(self) -> Dict[str, Any]:
672
- summary = {
673
- "name": self.name,
674
- "type": self.node_type,
675
- }
676
- return summary
677
-
678
- @field_validator("internal_branch_name")
679
- @classmethod
680
- def validate_internal_branch_name(cls, internal_branch_name: str, info: ValidationInfo):
681
- internal_name = info.data["internal_name"]
682
- return internal_name + "." + defaults.DAG_BRANCH_NAME
683
-
684
- @field_validator("dag_definition")
685
- @classmethod
686
- def validate_dag_definition(cls, value):
687
- if not value.endswith(".yaml"): # TODO: Might have a problem with the SDK
688
- raise ValueError("dag_definition must be a YAML file")
689
- return value
690
-
691
- @classmethod
692
- def parse_from_config(cls, config: Dict[str, Any]) -> "DagNode":
693
- internal_name = cast(str, config.get("internal_name"))
694
-
695
- if "dag_definition" not in config:
696
- raise Exception(f"No dag definition found in {config}")
697
-
698
- dag_config = utils.load_yaml(config["dag_definition"])
699
- if "dag" not in dag_config:
700
- raise Exception("No DAG found in dag_definition, please provide it in dag block")
701
-
702
- branch = create_graph(dag_config["dag"], internal_branch_name=internal_name + "." + defaults.DAG_BRANCH_NAME)
703
-
704
- return cls(branch=branch, **config)
705
-
706
- def _get_branch_by_name(self, branch_name: str):
707
- """
708
- Retrieve a branch by name.
709
- The name is expected to follow a dot path convention.
710
-
711
- Returns a Graph Object
712
-
713
- Args:
714
- branch_name (str): The name of the branch to retrieve
715
-
716
- Raises:
717
- Exception: If the branch_name is not 'dag'
718
- """
719
- if branch_name != self.internal_branch_name:
720
- raise Exception(f"Node of type {self.node_type} only allows a branch of name {defaults.DAG_BRANCH_NAME}")
721
-
722
- return self.branch
723
-
724
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
725
- """
726
- The general method to fan out for a node of type dag.
727
- The method assumes that the step log has already been created.
728
-
729
- Args:
730
- executor (BaseExecutor): The executor class as defined by the config
731
- map_variable (dict, optional): _description_. Defaults to None.
732
- """
733
- effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable)
734
-
735
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
736
- branch_log.status = defaults.PROCESSING
737
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
738
-
739
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
740
- """
741
- This function does the actual execution of the branch of the dag node.
742
-
743
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
744
-
745
- The modes that render the job specifications, do not need to interact with this node at all
746
- as they have their own internal mechanisms of handling sub dags.
747
- If they do not, you can find a way using as-is nodes as hack nodes.
748
-
749
- The actual logic is :
750
- * We just execute the branch as with any other composite nodes
751
- * The branch name is called 'dag'
752
-
753
- The execution of a dag, could result in
754
- * The dag being completely executed with a definite (fail, success) state in case of
755
- local or local-container execution
756
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
757
-
758
- Only fail state is considered failure during this phase of execution.
759
-
760
- Args:
761
- executor (Executor): The Executor as per the use config
762
- **kwargs: Optional kwargs passed around
763
- """
764
- self.fan_out(map_variable=map_variable, **kwargs)
765
- self._context.executor.execute_graph(self.branch, map_variable=map_variable, **kwargs)
766
- self.fan_in(map_variable=map_variable, **kwargs)
767
-
768
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
769
- """
770
- The general method to fan in for a node of type dag.
771
-
772
- 3rd party orchestrators should call this method to find the status of the step log.
773
-
774
- Args:
775
- executor (BaseExecutor): The executor class as defined by the config
776
- map_variable (dict, optional): If the node is part of type dag. Defaults to None.
777
- """
778
- step_success_bool = True
779
- effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable)
780
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
781
-
782
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
783
- if branch_log.status != defaults.SUCCESS:
784
- step_success_bool = False
785
-
786
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
787
- step_log.status = defaults.PROCESSING
788
-
789
- if step_success_bool: #  If none failed and nothing is waiting
790
- step_log.status = defaults.SUCCESS
791
- else:
792
- step_log.status = defaults.FAIL
793
-
794
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
795
-
796
-
797
- class StubNode(ExecutableNode):
798
- """
799
- Stub is a convenience design node.
800
-
801
- It always returns success in the attempt log and does nothing.
802
-
803
- This node is very similar to pass state in Step functions.
804
-
805
- This node type could be handy when designing the pipeline and stubbing functions
806
- """
807
-
808
- node_type: str = Field(default="stub", serialization_alias="type")
809
- model_config = ConfigDict(extra="ignore")
810
-
811
- def get_summary(self) -> Dict[str, Any]:
812
- summary = {
813
- "name": self.name,
814
- "type": self.node_type,
815
- }
816
-
817
- return summary
818
-
819
- @classmethod
820
- def parse_from_config(cls, config: Dict[str, Any]) -> "StubNode":
821
- return cls(**config)
822
-
823
- def execute(
824
- self,
825
- mock=False,
826
- map_variable: TypeMapVariable = None,
827
- attempt_number: int = 1,
828
- **kwargs,
829
- ) -> StepLog:
830
- """
831
- Do Nothing node.
832
- We just send an success attempt log back to the caller
833
-
834
- Args:
835
- executor ([type]): [description]
836
- mock (bool, optional): [description]. Defaults to False.
837
- map_variable (str, optional): [description]. Defaults to ''.
838
-
839
- Returns:
840
- [type]: [description]
841
- """
842
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
843
-
844
- attempt_log = datastore.StepAttempt(
845
- status=defaults.SUCCESS,
846
- start_time=str(datetime.now()),
847
- end_time=str(datetime.now()),
848
- attempt_number=attempt_number,
849
- )
850
-
851
- step_log.status = attempt_log.status
852
-
853
- step_log.attempts.append(attempt_log)
854
-
855
- return step_log