runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/nodes.py ADDED
@@ -0,0 +1,593 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
6
+
7
+ import runnable.context as context
8
+ from runnable import defaults, exceptions
9
+ from runnable.datastore import StepLog
10
+ from runnable.defaults import IterableParameterModel
11
+ from runnable.graph import Graph
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+ # --8<-- [start:docs]
16
+
17
+
18
+ class BaseNode(ABC, BaseModel):
19
+ """
20
+ Base class with common functionality provided for a Node of a graph.
21
+
22
+ A node of a graph could be a
23
+ * single execution node as task, success, fail.
24
+ * Could be graph in itself as parallel, dag and map.
25
+ * could be a convenience function like as-is.
26
+
27
+ The name is relative to the DAG.
28
+ The internal name of the node, is absolute name in dot path convention.
29
+ This has one to one mapping to the name in the run log
30
+ The internal name of a node, should always be odd when split against dot.
31
+
32
+ The internal branch name, only applies for branched nodes, is the branch it belongs to.
33
+ The internal branch name should always be even when split against dot.
34
+ """
35
+
36
+ node_type: str = Field(serialization_alias="type")
37
+ name: str
38
+ internal_name: str = Field(exclude=True)
39
+ internal_branch_name: str = Field(default="", exclude=True)
40
+
41
+ is_composite: bool = Field(default=False, exclude=True)
42
+
43
+ @property
44
+ def _context(self):
45
+ current_context = context.get_run_context()
46
+ if current_context is None:
47
+ raise RuntimeError("No run context available")
48
+ if not isinstance(
49
+ current_context, (context.PipelineContext, context.AsyncPipelineContext)
50
+ ):
51
+ raise TypeError(
52
+ f"Expected PipelineContext or AsyncPipelineContext, got {type(current_context).__name__}"
53
+ )
54
+ return current_context
55
+
56
+ model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=False)
57
+
58
+ @field_validator("name")
59
+ @classmethod
60
+ def validate_name(cls, name: str):
61
+ if "." in name or "%" in name:
62
+ raise ValueError("Node names cannot have . or '%' in them")
63
+ return name
64
+
65
+ def _command_friendly_name(
66
+ self, replace_with=defaults.COMMAND_FRIENDLY_CHARACTER
67
+ ) -> str:
68
+ """
69
+ Replace spaces with special character for spaces.
70
+ Spaces in the naming of the node is convenient for the user but causes issues when used programmatically.
71
+
72
+ Returns:
73
+ str: The command friendly name of the node
74
+ """
75
+ return self.internal_name.replace(" ", replace_with)
76
+
77
+ @classmethod
78
+ def _get_internal_name_from_command_name(cls, command_name: str) -> str:
79
+ """
80
+ Replace runnable specific character (%) with whitespace.
81
+ The opposite of _command_friendly_name.
82
+
83
+ Args:
84
+ command_name (str): The command friendly node name
85
+
86
+ Returns:
87
+ str: The internal name of the step
88
+ """
89
+ return command_name.replace(defaults.COMMAND_FRIENDLY_CHARACTER, " ")
90
+
91
+ @classmethod
92
+ def _resolve_iter_placeholders(
93
+ cls,
94
+ name: str,
95
+ iter_variable: Optional[IterableParameterModel] = None,
96
+ ) -> str:
97
+ """
98
+ Resolve iteration placeholders (map and loop) in node names.
99
+
100
+ Replaces MAP_PLACEHOLDER with map variable values and LOOP_PLACEHOLDER
101
+ with loop iteration indices in order.
102
+
103
+ Args:
104
+ name: The name containing placeholders
105
+ iter_variable: Iteration variables (map and loop)
106
+
107
+ Returns:
108
+ str: Name with placeholders resolved
109
+ """
110
+ if not iter_variable:
111
+ return name
112
+
113
+ resolved_name = name
114
+
115
+ # Resolve map placeholders
116
+ if iter_variable.map_variable:
117
+ for _, value in iter_variable.map_variable.items():
118
+ resolved_name = resolved_name.replace(
119
+ defaults.MAP_PLACEHOLDER, str(value.value), 1
120
+ )
121
+
122
+ # Resolve loop placeholders
123
+ if iter_variable.loop_variable:
124
+ for loop_index in iter_variable.loop_variable:
125
+ resolved_name = resolved_name.replace(
126
+ defaults.LOOP_PLACEHOLDER, str(loop_index.value), 1
127
+ )
128
+
129
+ return resolved_name
130
+
131
+ @classmethod
132
+ def _resolve_map_placeholders(
133
+ cls,
134
+ name: str,
135
+ iter_variable: Optional[IterableParameterModel] = None,
136
+ ) -> str:
137
+ """Deprecated: Use _resolve_iter_placeholders instead."""
138
+ return cls._resolve_iter_placeholders(name, iter_variable)
139
+
140
+ def _get_step_log_name(
141
+ self,
142
+ iter_variable: Optional[IterableParameterModel] = None,
143
+ ) -> str:
144
+ """
145
+ For every step in the dag, there is a corresponding step log name.
146
+ This method returns the step log name in dot path convention.
147
+
148
+ All node types except a map state has a "static" defined step_log names and are equivalent to internal_name.
149
+ For nodes belonging to map state, the internal name has a placeholder that is replaced at runtime.
150
+
151
+ Args:
152
+ map_variable (dict): If the node is of type map, the names are based on the current iteration state of the
153
+ parameter.
154
+
155
+ Returns:
156
+ str: The dot path name of the step log name
157
+ """
158
+ return self._resolve_map_placeholders(
159
+ self.internal_name, iter_variable=iter_variable
160
+ )
161
+
162
+ def _get_branch_log_name(
163
+ self,
164
+ iter_variable: Optional[IterableParameterModel] = None,
165
+ ) -> str:
166
+ """
167
+ For nodes that are internally branches, this method returns the branch log name.
168
+ The branch log name is in dot path convention.
169
+
170
+ For nodes that are not map, the internal branch name is equivalent to the branch name.
171
+ For map nodes, the internal branch name has a placeholder that is replaced at runtime.
172
+
173
+ Args:
174
+ map_variable (dict): If the node is of type map, the names are based on the current iteration state of the
175
+ parameter.
176
+
177
+ Returns:
178
+ str: The dot path name of the branch log
179
+ """
180
+ return self._resolve_map_placeholders(
181
+ self.internal_branch_name, iter_variable=iter_variable
182
+ )
183
+
184
+ def __str__(self) -> str: # pragma: no cover
185
+ """
186
+ String representation of the node.
187
+
188
+ Returns:
189
+ str: The string representation of the node.
190
+ """
191
+ return f"Node of type {self.node_type} and name {self.internal_name}"
192
+
193
+ @abstractmethod
194
+ def _get_on_failure_node(self) -> str:
195
+ """
196
+ If the node defines a on_failure node in the config, return this or None.
197
+
198
+ The naming is relative to the dag, the caller is supposed to resolve it to the correct graph
199
+
200
+ Returns:
201
+ str: The on_failure node defined by the dag or ''
202
+ This is a base implementation which the BaseNode does not satisfy
203
+ """
204
+
205
+ @abstractmethod
206
+ def _get_next_node(self) -> str:
207
+ """
208
+ Return the next node as defined by the config.
209
+
210
+ Returns:
211
+ str: The node name, relative to the dag, as defined by the config
212
+ """
213
+
214
+ @abstractmethod
215
+ def _is_terminal_node(self) -> bool:
216
+ """
217
+ Returns whether a node has a next node
218
+
219
+ Returns:
220
+ bool: True or False of whether there is next node.
221
+ """
222
+
223
+ @abstractmethod
224
+ def _get_catalog_settings(self) -> Dict[str, Any]:
225
+ """
226
+ If the node defines a catalog settings, return it or None
227
+
228
+ Returns:
229
+ dict: catalog settings defined as per the node or None
230
+ """
231
+
232
+ @abstractmethod
233
+ def _get_branch_by_name(self, branch_name: str) -> Graph:
234
+ """
235
+ Retrieve a branch by name.
236
+
237
+ The name is expected to follow a dot path convention.
238
+
239
+ Args:
240
+ branch_name (str): [description]
241
+
242
+ Raises:
243
+ Exception: [description]
244
+ """
245
+
246
+ def _get_neighbors(self) -> List[str]:
247
+ """
248
+ Gets the connecting neighbor nodes, either the "next" node or "on_failure" node.
249
+
250
+ Returns:
251
+ list: List of connected neighbors for a given node. Empty if terminal node.
252
+ """
253
+ neighbors = []
254
+ try:
255
+ next_node = self._get_next_node()
256
+ neighbors += [next_node]
257
+ except exceptions.TerminalNodeError:
258
+ pass
259
+
260
+ try:
261
+ fail_node = self._get_on_failure_node()
262
+ if fail_node:
263
+ neighbors += [fail_node]
264
+ except exceptions.TerminalNodeError:
265
+ pass
266
+
267
+ return neighbors
268
+
269
+ @abstractmethod
270
+ def _get_executor_config(self, executor_type: str) -> str:
271
+ """
272
+ Return the executor config of the node, if defined, or empty dict
273
+
274
+ Args:
275
+ executor_type (str): The executor type that the config refers to.
276
+
277
+ Returns:
278
+ dict: The executor config, if defined or an empty dict
279
+ """
280
+
281
+ @abstractmethod
282
+ def _get_max_attempts(self) -> int:
283
+ """
284
+ The number of max attempts as defined by the config or 1.
285
+
286
+ Returns:
287
+ int: The number of maximum retries as defined by the config or 1.
288
+ """
289
+
290
+ @abstractmethod
291
+ def execute(
292
+ self,
293
+ mock=False,
294
+ iter_variable: Optional[IterableParameterModel] = None,
295
+ attempt_number: int = 1,
296
+ ) -> StepLog:
297
+ """
298
+ The actual function that does the execution of the command in the config.
299
+
300
+ Should only be implemented for task, success, fail and as-is and never for
301
+ composite nodes.
302
+
303
+ Args:
304
+ executor (runnable.executor.BaseExecutor): The executor class
305
+ mock (bool, optional): Don't run, just pretend. Defaults to False.
306
+ map_variable (str, optional): The value of the map iteration variable, if part of a map node.
307
+ Defaults to ''.
308
+
309
+ Raises:
310
+ NotImplementedError: Base class, hence not implemented.
311
+ """
312
+
313
+ async def execute_async(
314
+ self,
315
+ iter_variable: Optional[IterableParameterModel] = None,
316
+ attempt_number: int = 1,
317
+ mock: bool = False,
318
+ ) -> StepLog:
319
+ """
320
+ Async execution - default delegates to sync execute().
321
+
322
+ Override in subclasses that support true async execution (TaskNode).
323
+ Terminal nodes (SuccessNode, FailNode) use this default.
324
+ """
325
+ return self.execute(
326
+ iter_variable=iter_variable,
327
+ attempt_number=attempt_number,
328
+ mock=mock,
329
+ )
330
+
331
+ @abstractmethod
332
+ def execute_as_graph(
333
+ self,
334
+ iter_variable: Optional[IterableParameterModel] = None,
335
+ ):
336
+ """
337
+ This function would be called to set up the execution of the individual
338
+ branches of a composite node.
339
+
340
+ Function should only be implemented for composite nodes like dag, map, parallel.
341
+
342
+ Args:
343
+ executor (runnable.executor.BaseExecutor): The executor.
344
+
345
+ Raises:
346
+ NotImplementedError: Base class, hence not implemented.
347
+ """
348
+
349
+ @abstractmethod
350
+ def fan_out(
351
+ self,
352
+ iter_variable: Optional[IterableParameterModel] = None,
353
+ ):
354
+ """
355
+ This function would be called to set up the execution of the individual
356
+ branches of a composite node.
357
+
358
+ Function should only be implemented for composite nodes like dag, map, parallel.
359
+
360
+ Args:
361
+ executor (runnable.executor.BaseExecutor): The executor.
362
+ map_variable (str, optional): The value of the map iteration variable, if part of a map node.
363
+
364
+ Raises:
365
+ Exception: If the node is not a composite node.
366
+ """
367
+
368
+ @abstractmethod
369
+ def fan_in(
370
+ self,
371
+ iter_variable: Optional[IterableParameterModel] = None,
372
+ ):
373
+ """
374
+ This function would be called to tear down the execution of the individual
375
+ branches of a composite node.
376
+
377
+ Function should only be implemented for composite nodes like dag, map, parallel.
378
+
379
+ Args:
380
+ executor (runnable.executor.BaseExecutor): The executor.
381
+ map_variable (str, optional): The value of the map iteration variable, if part of a map node.
382
+
383
+ Raises:
384
+ Exception: If the node is not a composite node.
385
+ """
386
+
387
+ @classmethod
388
+ @abstractmethod
389
+ def parse_from_config(cls, config: Dict[str, Any]) -> "BaseNode":
390
+ """
391
+ Parse the config from the user and create the corresponding node.
392
+
393
+ Args:
394
+ config (Dict[str, Any]): The config of the node from the yaml or from the sdk.
395
+
396
+ Returns:
397
+ BaseNode: The corresponding node.
398
+ """
399
+
400
+ @abstractmethod
401
+ def get_summary(self) -> Dict[str, Any]:
402
+ """
403
+ Return the summary of the node
404
+
405
+ Returns:
406
+ Dict[str, Any]: _description_
407
+ """
408
+
409
+
410
+ # --8<-- [end:docs]
411
+ class TraversalNode(BaseNode):
412
+ next_node: str = Field(serialization_alias="next")
413
+ on_failure: str = Field(default="")
414
+ overrides: Dict[str, str] = Field(default_factory=dict)
415
+
416
+ def _get_on_failure_node(self) -> str:
417
+ """
418
+ If the node defines a on_failure node in the config, return this or None.
419
+
420
+ The naming is relative to the dag, the caller is supposed to resolve it to the correct graph
421
+
422
+ Returns:
423
+ str: The on_failure node defined by the dag or ''
424
+ This is a base implementation which the BaseNode does not satisfy
425
+ """
426
+ return self.on_failure
427
+
428
+ def _get_next_node(self) -> str:
429
+ """
430
+ Return the next node as defined by the config.
431
+
432
+ Returns:
433
+ str: The node name, relative to the dag, as defined by the config
434
+ """
435
+
436
+ return self.next_node
437
+
438
+ def _is_terminal_node(self) -> bool:
439
+ """
440
+ Returns whether a node has a next node
441
+
442
+ Returns:
443
+ bool: True or False of whether there is next node.
444
+ """
445
+ return False
446
+
447
+ def _get_executor_config(self, executor_type) -> str:
448
+ return self.overrides.get(executor_type) or ""
449
+
450
+
451
+ # Unfortunately, this is defined in 2 places. Look in SDK
452
+ class CatalogStructure(BaseModel):
453
+ model_config = ConfigDict(extra="forbid") # Need to forbid
454
+
455
+ get: List[str] = Field(default_factory=list)
456
+ put: List[str] = Field(default_factory=list)
457
+ store_copy: bool = Field(default=True, alias="store_copy")
458
+
459
+
460
+ class ExecutableNode(TraversalNode):
461
+ catalog: Optional[CatalogStructure] = Field(default=None)
462
+ max_attempts: int = Field(default=1, ge=1)
463
+
464
+ def _get_catalog_settings(self) -> Dict[str, Any]:
465
+ """
466
+ If the node defines a catalog settings, return it or None
467
+
468
+ Returns:
469
+ dict: catalog settings defined as per the node or None
470
+ """
471
+ if self.catalog:
472
+ return self.catalog.model_dump()
473
+ return {}
474
+
475
+ def _get_max_attempts(self) -> int:
476
+ return self.max_attempts
477
+
478
+ def _get_branch_by_name(self, branch_name: str):
479
+ raise exceptions.NodeMethodCallError(
480
+ "This is an executable node and does not have branches"
481
+ )
482
+
483
+ def execute_as_graph(
484
+ self,
485
+ iter_variable: Optional[IterableParameterModel] = None,
486
+ ):
487
+ raise exceptions.NodeMethodCallError(
488
+ "This is an executable node and does not have a graph"
489
+ )
490
+
491
+ def fan_in(
492
+ self,
493
+ iter_variable: Optional[IterableParameterModel] = None,
494
+ ):
495
+ raise exceptions.NodeMethodCallError(
496
+ "This is an executable node and does not have a fan in"
497
+ )
498
+
499
+ def fan_out(
500
+ self,
501
+ iter_variable: Optional[IterableParameterModel] = None,
502
+ ):
503
+ raise exceptions.NodeMethodCallError(
504
+ "This is an executable node and does not have a fan out"
505
+ )
506
+
507
+
508
+ class CompositeNode(TraversalNode):
509
+ is_composite: bool = True
510
+
511
+ def _get_catalog_settings(self) -> Dict[str, Any]:
512
+ """
513
+ If the node defines a catalog settings, return it or None
514
+
515
+ Returns:
516
+ dict: catalog settings defined as per the node or None
517
+ """
518
+ raise exceptions.NodeMethodCallError(
519
+ "This is a composite node and does not have a catalog settings"
520
+ )
521
+
522
+ def _get_max_attempts(self) -> int:
523
+ raise Exception("This is a composite node and does not have a max_attempts")
524
+
525
+ def execute(
526
+ self,
527
+ mock=False,
528
+ iter_variable: Optional[IterableParameterModel] = None,
529
+ attempt_number: int = 1,
530
+ ) -> StepLog:
531
+ raise exceptions.NodeMethodCallError(
532
+ "This is a composite node and does not have an execute function"
533
+ )
534
+
535
+ async def execute_as_graph_async(
536
+ self,
537
+ iter_variable: Optional[IterableParameterModel] = None,
538
+ ):
539
+ """
540
+ Async execution of sub-graph.
541
+
542
+ Default raises NotImplementedError - override in subclasses
543
+ that support async execution (ParallelNode, MapNode, DagNode).
544
+ """
545
+ raise NotImplementedError(
546
+ f"{self.__class__.__name__} must implement execute_as_graph_async() "
547
+ f"for async execution support."
548
+ )
549
+
550
+
551
+ class TerminalNode(BaseNode):
552
+ def _get_on_failure_node(self) -> str:
553
+ return ""
554
+
555
+ def _get_next_node(self) -> str:
556
+ raise exceptions.TerminalNodeError()
557
+
558
+ def _is_terminal_node(self) -> bool:
559
+ return True
560
+
561
+ def _get_catalog_settings(self) -> Dict[str, Any]:
562
+ raise exceptions.TerminalNodeError()
563
+
564
+ def _get_branch_by_name(self, branch_name: str):
565
+ raise exceptions.TerminalNodeError()
566
+
567
+ def _get_executor_config(self, executor_type) -> str:
568
+ raise exceptions.TerminalNodeError()
569
+
570
+ def _get_max_attempts(self) -> int:
571
+ return 1
572
+
573
+ def execute_as_graph(
574
+ self,
575
+ iter_variable: Optional[IterableParameterModel] = None,
576
+ ):
577
+ raise exceptions.TerminalNodeError()
578
+
579
+ def fan_in(
580
+ self,
581
+ iter_variable: Optional[IterableParameterModel] = None,
582
+ ):
583
+ raise exceptions.TerminalNodeError()
584
+
585
+ def fan_out(
586
+ self,
587
+ iter_variable: Optional[IterableParameterModel] = None,
588
+ ):
589
+ raise exceptions.TerminalNodeError()
590
+
591
+ @classmethod
592
+ def parse_from_config(cls, config: Dict[str, Any]) -> "TerminalNode":
593
+ return cls(**config)