runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,855 +0,0 @@
1
- import importlib
2
- import logging
3
- import os
4
- import sys
5
- from collections import OrderedDict
6
- from copy import deepcopy
7
- from datetime import datetime
8
- from typing import Annotated, Any, Callable, Dict, List, Optional, Tuple, Union, cast
9
-
10
- from pydantic import (
11
- ConfigDict,
12
- Field,
13
- ValidationInfo,
14
- field_serializer,
15
- field_validator,
16
- )
17
-
18
- from runnable import console, datastore, defaults, utils
19
- from runnable.datastore import (
20
- JsonParameter,
21
- MetricParameter,
22
- ObjectParameter,
23
- Parameter,
24
- StepLog,
25
- )
26
- from runnable.defaults import TypeMapVariable
27
- from runnable.graph import Graph, create_graph
28
- from runnable.nodes import CompositeNode, ExecutableNode, TerminalNode
29
- from runnable.tasks import BaseTaskType, create_task
30
-
31
- logger = logging.getLogger(defaults.LOGGER_NAME)
32
-
33
-
34
- class TaskNode(ExecutableNode):
35
- """
36
- A node of type Task.
37
-
38
- This node does the actual function execution of the graph in all cases.
39
- """
40
-
41
- executable: BaseTaskType = Field(exclude=True)
42
- node_type: str = Field(default="task", serialization_alias="type")
43
-
44
- # It is technically not allowed as parse_from_config filters them.
45
- # This is just to get the task level configuration to be present during serialization.
46
- model_config = ConfigDict(extra="allow")
47
-
48
- @classmethod
49
- def parse_from_config(cls, config: Dict[str, Any]) -> "TaskNode":
50
- # separate task config from node config
51
- task_config = {k: v for k, v in config.items() if k not in TaskNode.model_fields.keys()}
52
- node_config = {k: v for k, v in config.items() if k in TaskNode.model_fields.keys()}
53
-
54
- executable = create_task(task_config)
55
- return cls(executable=executable, **node_config, **task_config)
56
-
57
- def get_summary(self) -> Dict[str, Any]:
58
- summary = {
59
- "name": self.name,
60
- "type": self.node_type,
61
- "executable": self.executable.get_summary(),
62
- "catalog": self._get_catalog_settings(),
63
- }
64
-
65
- return summary
66
-
67
- def execute(
68
- self,
69
- mock=False,
70
- map_variable: TypeMapVariable = None,
71
- attempt_number: int = 1,
72
- **kwargs,
73
- ) -> StepLog:
74
- """
75
- All that we do in runnable is to come to this point where we actually execute the command.
76
-
77
- Args:
78
- executor (_type_): The executor class
79
- mock (bool, optional): If we should just mock and not execute. Defaults to False.
80
- map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
81
-
82
- Returns:
83
- StepAttempt: The attempt object
84
- """
85
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
86
-
87
- if not mock:
88
- # Do not run if we are mocking the execution, could be useful for caching and dry runs
89
- attempt_log = self.executable.execute_command(map_variable=map_variable)
90
- attempt_log.attempt_number = attempt_number
91
- else:
92
- attempt_log = datastore.StepAttempt(
93
- status=defaults.SUCCESS,
94
- start_time=str(datetime.now()),
95
- end_time=str(datetime.now()),
96
- attempt_number=attempt_number,
97
- )
98
-
99
- logger.info(f"attempt_log: {attempt_log}")
100
- logger.info(f"Step {self.name} completed with status: {attempt_log.status}")
101
-
102
- step_log.status = attempt_log.status
103
- step_log.attempts.append(attempt_log)
104
-
105
- return step_log
106
-
107
-
108
- class FailNode(TerminalNode):
109
- """
110
- A leaf node of the graph that represents a failure node
111
- """
112
-
113
- node_type: str = Field(default="fail", serialization_alias="type")
114
-
115
- @classmethod
116
- def parse_from_config(cls, config: Dict[str, Any]) -> "FailNode":
117
- return cast("FailNode", super().parse_from_config(config))
118
-
119
- def get_summary(self) -> Dict[str, Any]:
120
- summary = {
121
- "name": self.name,
122
- "type": self.node_type,
123
- }
124
-
125
- return summary
126
-
127
- def execute(
128
- self,
129
- mock=False,
130
- map_variable: TypeMapVariable = None,
131
- attempt_number: int = 1,
132
- **kwargs,
133
- ) -> StepLog:
134
- """
135
- Execute the failure node.
136
- Set the run or branch log status to failure.
137
-
138
- Args:
139
- executor (_type_): the executor class
140
- mock (bool, optional): If we should just mock and not do the actual execution. Defaults to False.
141
- map_variable (dict, optional): If the node belongs to internal branches. Defaults to None.
142
-
143
- Returns:
144
- StepAttempt: The step attempt object
145
- """
146
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
147
-
148
- attempt_log = datastore.StepAttempt(
149
- status=defaults.SUCCESS,
150
- start_time=str(datetime.now()),
151
- end_time=str(datetime.now()),
152
- attempt_number=attempt_number,
153
- )
154
-
155
- run_or_branch_log = self._context.run_log_store.get_branch_log(
156
- self._get_branch_log_name(map_variable), self._context.run_id
157
- )
158
- run_or_branch_log.status = defaults.FAIL
159
- self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id)
160
-
161
- step_log.status = attempt_log.status
162
-
163
- step_log.attempts.append(attempt_log)
164
-
165
- return step_log
166
-
167
-
168
- class SuccessNode(TerminalNode):
169
- """
170
- A leaf node of the graph that represents a success node
171
- """
172
-
173
- node_type: str = Field(default="success", serialization_alias="type")
174
-
175
- @classmethod
176
- def parse_from_config(cls, config: Dict[str, Any]) -> "SuccessNode":
177
- return cast("SuccessNode", super().parse_from_config(config))
178
-
179
- def get_summary(self) -> Dict[str, Any]:
180
- summary = {
181
- "name": self.name,
182
- "type": self.node_type,
183
- }
184
-
185
- return summary
186
-
187
- def execute(
188
- self,
189
- mock=False,
190
- map_variable: TypeMapVariable = None,
191
- attempt_number: int = 1,
192
- **kwargs,
193
- ) -> StepLog:
194
- """
195
- Execute the success node.
196
- Set the run or branch log status to success.
197
-
198
- Args:
199
- executor (_type_): The executor class
200
- mock (bool, optional): If we should just mock and not perform anything. Defaults to False.
201
- map_variable (dict, optional): If the node belongs to an internal branch. Defaults to None.
202
-
203
- Returns:
204
- StepAttempt: The step attempt object
205
- """
206
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
207
-
208
- attempt_log = datastore.StepAttempt(
209
- status=defaults.SUCCESS,
210
- start_time=str(datetime.now()),
211
- end_time=str(datetime.now()),
212
- attempt_number=attempt_number,
213
- )
214
-
215
- run_or_branch_log = self._context.run_log_store.get_branch_log(
216
- self._get_branch_log_name(map_variable), self._context.run_id
217
- )
218
- run_or_branch_log.status = defaults.SUCCESS
219
- self._context.run_log_store.add_branch_log(run_or_branch_log, self._context.run_id)
220
-
221
- step_log.status = attempt_log.status
222
-
223
- step_log.attempts.append(attempt_log)
224
-
225
- return step_log
226
-
227
-
228
- class ParallelNode(CompositeNode):
229
- """
230
- A composite node containing many graph objects within itself.
231
-
232
- The structure is generally:
233
- ParallelNode:
234
- Branch A:
235
- Sub graph definition
236
- Branch B:
237
- Sub graph definition
238
- . . .
239
-
240
- """
241
-
242
- node_type: str = Field(default="parallel", serialization_alias="type")
243
- branches: Dict[str, Graph]
244
- is_composite: bool = Field(default=True, exclude=True)
245
-
246
- def get_summary(self) -> Dict[str, Any]:
247
- summary = {
248
- "name": self.name,
249
- "type": self.node_type,
250
- "branches": [branch.get_summary() for branch in self.branches.values()],
251
- }
252
-
253
- return summary
254
-
255
- @field_serializer("branches")
256
- def ser_branches(self, branches: Dict[str, Graph]) -> Dict[str, Graph]:
257
- ret: Dict[str, Graph] = {}
258
-
259
- for branch_name, branch in branches.items():
260
- ret[branch_name.split(".")[-1]] = branch
261
-
262
- return ret
263
-
264
- @classmethod
265
- def parse_from_config(cls, config: Dict[str, Any]) -> "ParallelNode":
266
- internal_name = cast(str, config.get("internal_name"))
267
-
268
- config_branches = config.pop("branches", {})
269
- branches = {}
270
- for branch_name, branch_config in config_branches.items():
271
- sub_graph = create_graph(
272
- deepcopy(branch_config),
273
- internal_branch_name=internal_name + "." + branch_name,
274
- )
275
- branches[internal_name + "." + branch_name] = sub_graph
276
-
277
- if not branches:
278
- raise Exception("A parallel node should have branches")
279
- return cls(branches=branches, **config)
280
-
281
- def _get_branch_by_name(self, branch_name: str) -> Graph:
282
- if branch_name in self.branches:
283
- return self.branches[branch_name]
284
-
285
- raise Exception(f"Branch {branch_name} does not exist")
286
-
287
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
288
- """
289
- The general fan out method for a node of type Parallel.
290
- This method assumes that the step log has already been created.
291
-
292
- 3rd party orchestrators should create the step log and use this method to create the branch logs.
293
-
294
- Args:
295
- executor (BaseExecutor): The executor class as defined by the config
296
- map_variable (dict, optional): If the node is part of a map node. Defaults to None.
297
- """
298
- # Prepare the branch logs
299
- for internal_branch_name, _ in self.branches.items():
300
- effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable)
301
-
302
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
303
- branch_log.status = defaults.PROCESSING
304
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
305
-
306
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
307
- """
308
- This function does the actual execution of the sub-branches of the parallel node.
309
-
310
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
311
-
312
- The modes that render the job specifications, do not need to interact with this node at all as they have their
313
- own internal mechanisms of handing parallel states.
314
- If they do not, you can find a way using as-is nodes as hack nodes.
315
-
316
- The execution of a dag, could result in
317
- * The dag being completely executed with a definite (fail, success) state in case of
318
- local or local-container execution
319
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
320
-
321
- Only fail state is considered failure during this phase of execution.
322
-
323
- Args:
324
- executor (Executor): The Executor as per the use config
325
- **kwargs: Optional kwargs passed around
326
- """
327
- self.fan_out(map_variable=map_variable, **kwargs)
328
-
329
- for _, branch in self.branches.items():
330
- self._context.executor.execute_graph(branch, map_variable=map_variable, **kwargs)
331
-
332
- self.fan_in(map_variable=map_variable, **kwargs)
333
-
334
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
335
- """
336
- The general fan in method for a node of type Parallel.
337
-
338
- 3rd party orchestrators should use this method to find the status of the composite step.
339
-
340
- Args:
341
- executor (BaseExecutor): The executor class as defined by the config
342
- map_variable (dict, optional): If the node is part of a map. Defaults to None.
343
- """
344
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
345
- step_success_bool = True
346
- for internal_branch_name, _ in self.branches.items():
347
- effective_branch_name = self._resolve_map_placeholders(internal_branch_name, map_variable=map_variable)
348
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
349
-
350
- if branch_log.status != defaults.SUCCESS:
351
- step_success_bool = False
352
-
353
- # Collate all the results and update the status of the step
354
-
355
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
356
-
357
- if step_success_bool: #  If none failed
358
- step_log.status = defaults.SUCCESS
359
- else:
360
- step_log.status = defaults.FAIL
361
-
362
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
363
-
364
-
365
- class MapNode(CompositeNode):
366
- """
367
- A composite node that contains ONE graph object within itself that has to be executed with an iterable.
368
-
369
- The structure is generally:
370
- MapNode:
371
- branch
372
-
373
- The config is expected to have a variable 'iterate_on' and iterate_as which are looked for in the parameters.
374
- for iter_variable in parameters['iterate_on']:
375
- Execute the Branch by sending {'iterate_as': iter_variable}
376
-
377
- The internal naming convention creates branches dynamically based on the iteration value
378
- """
379
-
380
- # TODO: Should it be one function or a dict of functions indexed by the return name
381
-
382
- node_type: str = Field(default="map", serialization_alias="type")
383
- iterate_on: str
384
- iterate_as: str
385
- iterate_index: bool = Field(default=False) # TODO: Need to design this
386
- reducer: Optional[str] = Field(default=None)
387
- branch: Graph
388
- is_composite: bool = True
389
-
390
- def get_summary(self) -> Dict[str, Any]:
391
- summary = {
392
- "name": self.name,
393
- "type": self.node_type,
394
- "branch": self.branch.get_summary(),
395
- "iterate_on": self.iterate_on,
396
- "iterate_as": self.iterate_as,
397
- "iterate_index": self.iterate_index,
398
- "reducer": self.reducer,
399
- }
400
-
401
- return summary
402
-
403
- def get_reducer_function(self):
404
- if not self.reducer:
405
- return lambda *x: list(x) # returns a list of the args
406
-
407
- # try a lambda function
408
- try:
409
- f = eval(self.reducer)
410
- if callable(f):
411
- return f
412
- except SyntaxError:
413
- logger.info(f"{self.reducer} is not a lambda function")
414
-
415
- # Load the reducer function from dotted path
416
- mod, func = utils.get_module_and_attr_names(self.reducer)
417
- sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
418
- imported_module = importlib.import_module(mod)
419
- f = getattr(imported_module, func)
420
-
421
- return f
422
-
423
- @classmethod
424
- def parse_from_config(cls, config: Dict[str, Any]) -> "MapNode":
425
- internal_name = cast(str, config.get("internal_name"))
426
-
427
- config_branch = config.pop("branch", {})
428
- if not config_branch:
429
- raise Exception("A map node should have a branch")
430
-
431
- branch = create_graph(
432
- deepcopy(config_branch),
433
- internal_branch_name=internal_name + "." + defaults.MAP_PLACEHOLDER,
434
- )
435
- return cls(branch=branch, **config)
436
-
437
- @property
438
- def branch_returns(self):
439
- branch_returns: List[Tuple[str, Union[ObjectParameter, MetricParameter, JsonParameter]]] = []
440
- for _, node in self.branch.nodes.items():
441
- if isinstance(node, TaskNode):
442
- for task_return in node.executable.returns:
443
- if task_return.kind == "json":
444
- branch_returns.append((task_return.name, JsonParameter(kind="json", value="", reduced=False)))
445
- elif task_return.kind == "object":
446
- branch_returns.append(
447
- (
448
- task_return.name,
449
- ObjectParameter(
450
- kind="object",
451
- value="Will be reduced",
452
- reduced=False,
453
- ),
454
- )
455
- )
456
- elif task_return.kind == "metric":
457
- branch_returns.append(
458
- (task_return.name, MetricParameter(kind="metric", value="", reduced=False))
459
- )
460
- else:
461
- raise Exception("kind should be either json or object")
462
-
463
- return branch_returns
464
-
465
- def _get_branch_by_name(self, branch_name: str) -> Graph:
466
- """
467
- Retrieve a branch by name.
468
-
469
- In the case of a Map Object, the branch naming is dynamic as it is parameterized on iterable.
470
- This method takes no responsibility in checking the validity of the naming.
471
-
472
- Returns a Graph Object
473
-
474
- Args:
475
- branch_name (str): The name of the branch to retrieve
476
-
477
- Raises:
478
- Exception: If the branch by that name does not exist
479
- """
480
- return self.branch
481
-
482
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
483
- """
484
- The general method to fan out for a node of type map.
485
- This method assumes that the step log has already been created.
486
-
487
- 3rd party orchestrators should call this method to create the individual branch logs.
488
-
489
- Args:
490
- executor (BaseExecutor): The executor class as defined by the config
491
- map_variable (dict, optional): If the node is part of map. Defaults to None.
492
- """
493
- iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on].get_value()
494
-
495
- # Prepare the branch logs
496
- for iter_variable in iterate_on:
497
- effective_branch_name = self._resolve_map_placeholders(
498
- self.internal_name + "." + str(iter_variable), map_variable=map_variable
499
- )
500
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
501
-
502
- console.print(f"Branch log created for {effective_branch_name}: {branch_log}")
503
- branch_log.status = defaults.PROCESSING
504
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
505
-
506
- # Gather all the returns of the task nodes and create parameters in reduced=False state.
507
- raw_parameters = {}
508
- if map_variable:
509
- # If we are in a map state already, the param should have an index of the map variable.
510
- for _, v in map_variable.items():
511
- for branch_return in self.branch_returns:
512
- param_name, param_type = branch_return
513
- raw_parameters[f"{v}_{param_name}"] = param_type.copy()
514
- else:
515
- for branch_return in self.branch_returns:
516
- param_name, param_type = branch_return
517
- raw_parameters[f"{param_name}"] = param_type.copy()
518
-
519
- self._context.run_log_store.set_parameters(parameters=raw_parameters, run_id=self._context.run_id)
520
-
521
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
522
- """
523
- This function does the actual execution of the branch of the map node.
524
-
525
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
526
-
527
- The modes that render the job specifications, do not need to interact with this node at all as
528
- they have their own internal mechanisms of handing map states or dynamic parallel states.
529
- If they do not, you can find a way using as-is nodes as hack nodes.
530
-
531
- The actual logic is :
532
- * We iterate over the iterable as mentioned in the config
533
- * For every value in the iterable we call the executor.execute_graph(branch, iterate_as: iter_variable)
534
-
535
- The execution of a dag, could result in
536
- * The dag being completely executed with a definite (fail, success) state in case of local
537
- or local-container execution
538
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
539
-
540
- Only fail state is considered failure during this phase of execution.
541
-
542
- Args:
543
- executor (Executor): The Executor as per the use config
544
- map_variable (dict): The map variables the graph belongs to
545
- **kwargs: Optional kwargs passed around
546
- """
547
-
548
- iterate_on = None
549
- try:
550
- iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[self.iterate_on].get_value()
551
- except KeyError as e:
552
- raise Exception(
553
- (
554
- f"Expected parameter {self.iterate_on}",
555
- "not present in Run Log parameters",
556
- "was it ever set before?",
557
- )
558
- ) from e
559
-
560
- if not isinstance(iterate_on, list):
561
- raise Exception("Only list is allowed as a valid iterator type")
562
-
563
- self.fan_out(map_variable=map_variable, **kwargs)
564
-
565
- for iter_variable in iterate_on:
566
- effective_map_variable = map_variable or OrderedDict()
567
- effective_map_variable[self.iterate_as] = iter_variable
568
-
569
- self._context.executor.execute_graph(self.branch, map_variable=effective_map_variable, **kwargs)
570
-
571
- self.fan_in(map_variable=map_variable, **kwargs)
572
-
573
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
574
- """
575
- The general method to fan in for a node of type map.
576
-
577
- 3rd party orchestrators should call this method to find the status of the step log.
578
-
579
- Args:
580
- executor (BaseExecutor): The executor class as defined by the config
581
- map_variable (dict, optional): If the node is part of map node. Defaults to None.
582
- """
583
- params = self._context.run_log_store.get_parameters(self._context.run_id)
584
- iterate_on = params[self.iterate_on].get_value()
585
- # # Find status of the branches
586
- step_success_bool = True
587
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
588
-
589
- for iter_variable in iterate_on:
590
- effective_branch_name = self._resolve_map_placeholders(
591
- self.internal_name + "." + str(iter_variable), map_variable=map_variable
592
- )
593
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
594
- # console.print(f"Branch log for {effective_branch_name}: {branch_log}")
595
-
596
- if branch_log.status != defaults.SUCCESS:
597
- step_success_bool = False
598
-
599
- # Collate all the results and update the status of the step
600
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
601
-
602
- if step_success_bool: #  If none failed and nothing is waiting
603
- step_log.status = defaults.SUCCESS
604
- else:
605
- step_log.status = defaults.FAIL
606
-
607
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
608
-
609
- # Apply the reduce function and reduce the returns of the task nodes.
610
- # The final value of the parameter is the result of the reduce function.
611
- reducer_f = self.get_reducer_function()
612
-
613
- def update_param(params: Dict[str, Parameter], reducer_f: Callable, map_prefix: str = ""):
614
- from runnable.extensions.executor.mocked.implementation import (
615
- MockedExecutor,
616
- )
617
-
618
- for branch_return in self.branch_returns:
619
- param_name, _ = branch_return
620
-
621
- to_reduce = []
622
- for iter_variable in iterate_on:
623
- try:
624
- to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value())
625
- except KeyError as e:
626
- if isinstance(self._context.executor, MockedExecutor):
627
- pass
628
- else:
629
- raise Exception(
630
- (
631
- f"Expected parameter {iter_variable}_{param_name}",
632
- "not present in Run Log parameters",
633
- "was it ever set before?",
634
- )
635
- ) from e
636
-
637
- param_name = f"{map_prefix}{param_name}"
638
- if to_reduce:
639
- params[param_name].value = reducer_f(*to_reduce)
640
- else:
641
- params[param_name].value = ""
642
- params[param_name].reduced = True
643
-
644
- if map_variable:
645
- # If we are in a map state already, the param should have an index of the map variable.
646
- for _, v in map_variable.items():
647
- update_param(params, reducer_f, map_prefix=f"{v}_")
648
- else:
649
- update_param(params, reducer_f)
650
-
651
- self._context.run_log_store.set_parameters(parameters=params, run_id=self._context.run_id)
652
-
653
-
654
- class DagNode(CompositeNode):
655
- """
656
- A composite node that internally holds a dag.
657
-
658
- The structure is generally:
659
- DagNode:
660
- dag_definition: A YAML file that holds the dag in 'dag' block
661
-
662
- The config is expected to have a variable 'dag_definition'.
663
- """
664
-
665
- node_type: str = Field(default="dag", serialization_alias="type")
666
- dag_definition: str
667
- branch: Graph
668
- is_composite: bool = True
669
- internal_branch_name: Annotated[str, Field(validate_default=True)] = ""
670
-
671
- def get_summary(self) -> Dict[str, Any]:
672
- summary = {
673
- "name": self.name,
674
- "type": self.node_type,
675
- }
676
- return summary
677
-
678
- @field_validator("internal_branch_name")
679
- @classmethod
680
- def validate_internal_branch_name(cls, internal_branch_name: str, info: ValidationInfo):
681
- internal_name = info.data["internal_name"]
682
- return internal_name + "." + defaults.DAG_BRANCH_NAME
683
-
684
- @field_validator("dag_definition")
685
- @classmethod
686
- def validate_dag_definition(cls, value):
687
- if not value.endswith(".yaml"): # TODO: Might have a problem with the SDK
688
- raise ValueError("dag_definition must be a YAML file")
689
- return value
690
-
691
- @classmethod
692
- def parse_from_config(cls, config: Dict[str, Any]) -> "DagNode":
693
- internal_name = cast(str, config.get("internal_name"))
694
-
695
- if "dag_definition" not in config:
696
- raise Exception(f"No dag definition found in {config}")
697
-
698
- dag_config = utils.load_yaml(config["dag_definition"])
699
- if "dag" not in dag_config:
700
- raise Exception("No DAG found in dag_definition, please provide it in dag block")
701
-
702
- branch = create_graph(dag_config["dag"], internal_branch_name=internal_name + "." + defaults.DAG_BRANCH_NAME)
703
-
704
- return cls(branch=branch, **config)
705
-
706
- def _get_branch_by_name(self, branch_name: str):
707
- """
708
- Retrieve a branch by name.
709
- The name is expected to follow a dot path convention.
710
-
711
- Returns a Graph Object
712
-
713
- Args:
714
- branch_name (str): The name of the branch to retrieve
715
-
716
- Raises:
717
- Exception: If the branch_name is not 'dag'
718
- """
719
- if branch_name != self.internal_branch_name:
720
- raise Exception(f"Node of type {self.node_type} only allows a branch of name {defaults.DAG_BRANCH_NAME}")
721
-
722
- return self.branch
723
-
724
- def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
725
- """
726
- The general method to fan out for a node of type dag.
727
- The method assumes that the step log has already been created.
728
-
729
- Args:
730
- executor (BaseExecutor): The executor class as defined by the config
731
- map_variable (dict, optional): _description_. Defaults to None.
732
- """
733
- effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable)
734
-
735
- branch_log = self._context.run_log_store.create_branch_log(effective_branch_name)
736
- branch_log.status = defaults.PROCESSING
737
- self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
738
-
739
- def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
740
- """
741
- This function does the actual execution of the branch of the dag node.
742
-
743
- From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
744
-
745
- The modes that render the job specifications, do not need to interact with this node at all
746
- as they have their own internal mechanisms of handling sub dags.
747
- If they do not, you can find a way using as-is nodes as hack nodes.
748
-
749
- The actual logic is :
750
- * We just execute the branch as with any other composite nodes
751
- * The branch name is called 'dag'
752
-
753
- The execution of a dag, could result in
754
- * The dag being completely executed with a definite (fail, success) state in case of
755
- local or local-container execution
756
- * The dag being in a processing state with PROCESSING status in case of local-aws-batch
757
-
758
- Only fail state is considered failure during this phase of execution.
759
-
760
- Args:
761
- executor (Executor): The Executor as per the use config
762
- **kwargs: Optional kwargs passed around
763
- """
764
- self.fan_out(map_variable=map_variable, **kwargs)
765
- self._context.executor.execute_graph(self.branch, map_variable=map_variable, **kwargs)
766
- self.fan_in(map_variable=map_variable, **kwargs)
767
-
768
- def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
769
- """
770
- The general method to fan in for a node of type dag.
771
-
772
- 3rd party orchestrators should call this method to find the status of the step log.
773
-
774
- Args:
775
- executor (BaseExecutor): The executor class as defined by the config
776
- map_variable (dict, optional): If the node is part of type dag. Defaults to None.
777
- """
778
- step_success_bool = True
779
- effective_branch_name = self._resolve_map_placeholders(self.internal_branch_name, map_variable=map_variable)
780
- effective_internal_name = self._resolve_map_placeholders(self.internal_name, map_variable=map_variable)
781
-
782
- branch_log = self._context.run_log_store.get_branch_log(effective_branch_name, self._context.run_id)
783
- if branch_log.status != defaults.SUCCESS:
784
- step_success_bool = False
785
-
786
- step_log = self._context.run_log_store.get_step_log(effective_internal_name, self._context.run_id)
787
- step_log.status = defaults.PROCESSING
788
-
789
- if step_success_bool: #  If none failed and nothing is waiting
790
- step_log.status = defaults.SUCCESS
791
- else:
792
- step_log.status = defaults.FAIL
793
-
794
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
795
-
796
-
797
- class StubNode(ExecutableNode):
798
- """
799
- Stub is a convenience design node.
800
-
801
- It always returns success in the attempt log and does nothing.
802
-
803
- This node is very similar to pass state in Step functions.
804
-
805
- This node type could be handy when designing the pipeline and stubbing functions
806
- """
807
-
808
- node_type: str = Field(default="stub", serialization_alias="type")
809
- model_config = ConfigDict(extra="ignore")
810
-
811
- def get_summary(self) -> Dict[str, Any]:
812
- summary = {
813
- "name": self.name,
814
- "type": self.node_type,
815
- }
816
-
817
- return summary
818
-
819
- @classmethod
820
- def parse_from_config(cls, config: Dict[str, Any]) -> "StubNode":
821
- return cls(**config)
822
-
823
- def execute(
824
- self,
825
- mock=False,
826
- map_variable: TypeMapVariable = None,
827
- attempt_number: int = 1,
828
- **kwargs,
829
- ) -> StepLog:
830
- """
831
- Do Nothing node.
832
- We just send an success attempt log back to the caller
833
-
834
- Args:
835
- executor ([type]): [description]
836
- mock (bool, optional): [description]. Defaults to False.
837
- map_variable (str, optional): [description]. Defaults to ''.
838
-
839
- Returns:
840
- [type]: [description]
841
- """
842
- step_log = self._context.run_log_store.get_step_log(self._get_step_log_name(map_variable), self._context.run_id)
843
-
844
- attempt_log = datastore.StepAttempt(
845
- status=defaults.SUCCESS,
846
- start_time=str(datetime.now()),
847
- end_time=str(datetime.now()),
848
- attempt_number=attempt_number,
849
- )
850
-
851
- step_log.status = attempt_log.status
852
-
853
- step_log.attempts.append(attempt_log)
854
-
855
- return step_log