runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/graph.py ADDED
@@ -0,0 +1,501 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Optional, cast
5
+
6
+ from pydantic import BaseModel, Field, SerializeAsAny
7
+ from stevedore import driver
8
+
9
+ from runnable import defaults, exceptions
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+ logging.getLogger("stevedore").setLevel(logging.CRITICAL)
13
+
14
+
15
+ class Graph(BaseModel):
16
+ """
17
+ A class representing a graph.
18
+
19
+ The representation is similar to AWS step functions.
20
+ We have nodes and traversal is based on start_at and on_failure definition of individual nodes of the graph
21
+ """
22
+
23
+ start_at: str
24
+ name: str = ""
25
+ description: Optional[str] = ""
26
+ internal_branch_name: str = Field(default="", exclude=True)
27
+ nodes: SerializeAsAny[Dict[str, "BaseNode"]] = Field(
28
+ default_factory=dict, serialization_alias="steps"
29
+ )
30
+
31
+ def get_summary(self) -> Dict[str, Any]:
32
+ """
33
+ Return a summary of the graph
34
+ """
35
+ return {
36
+ "name": self.name,
37
+ "description": self.description,
38
+ "start_at": self.start_at,
39
+ "nodes": [node.get_summary() for node in list(self.nodes.values())],
40
+ }
41
+
42
+ def get_node_by_name(self, name: str) -> "BaseNode":
43
+ """
44
+ Return the Node object by the name
45
+ The name is always relative to the graph
46
+
47
+ Args:
48
+ name (str): Name of the node
49
+
50
+ Raises:
51
+ NodeNotFoundError: If the node of name is not found in the graph
52
+
53
+ Returns:
54
+ Node: The Node object by name
55
+ """
56
+ for key, value in self.nodes.items():
57
+ if key == name:
58
+ return value
59
+ raise exceptions.NodeNotFoundError(name)
60
+
61
+ def get_node_by_internal_name(self, internal_name: str) -> "BaseNode":
62
+ """
63
+ Return the node by the internal name of the node.
64
+ The internal name uses dot path convention.
65
+ This method is only relative to the nodes of the graph and does not perform graph search of sub-graphs
66
+
67
+ Args:
68
+ internal_name (str): The internal name of the node, follows a dot path convention
69
+
70
+ Raises:
71
+ NodeNotFoundError: If the node of the internal name is not found in the graph
72
+
73
+ Returns:
74
+ Node: The Node object by the name
75
+ """
76
+ for _, value in self.nodes.items():
77
+ if value.internal_name == internal_name:
78
+ return value
79
+ raise exceptions.NodeNotFoundError(internal_name)
80
+
81
+ def __str__(self): # pragma: no cover
82
+ """
83
+ Return a string representation of the graph
84
+ """
85
+ node_str = ", ".join([x.name for x in list(self.nodes.values())])
86
+ return f"Starts at: {self.start_at} and {node_str}"
87
+
88
+ def add_node(self, node: "BaseNode"):
89
+ """
90
+ Add a node to the nodes of the graph
91
+
92
+ Args:
93
+ node (object): The node to add
94
+ """
95
+ self.nodes[node.name] = node
96
+
97
+ def check_graph(self):
98
+ """
99
+ Validate the graph to make sure,
100
+ 1). All the neighbors of nodes are present.
101
+ 2). Detection of cycles.
102
+ 3). Confirming that the start_at is actually a node present in graph
103
+ 4). Detection of one and only one success node.
104
+ 5). Detection of one and only one fail node.
105
+ 6). Missing nodes if referred by next or on_failure
106
+
107
+ Raises:
108
+ Exception: [description]
109
+ """
110
+ messages = []
111
+
112
+ missing_nodes = self.missing_neighbors()
113
+ if missing_nodes:
114
+ message = "The graph has references to nodes (next, on_failure), these nodes are missing from the DAG:\n"
115
+ message += f'{", ".join(missing_nodes)}'
116
+ logger.error(message)
117
+ messages.append(message)
118
+
119
+ if not missing_nodes:
120
+ if not self.is_dag():
121
+ message = "The DAG is cyclic or does not reach an end state"
122
+ logger.error(message)
123
+ messages.append(message)
124
+
125
+ if not self.is_start_node_present():
126
+ message = "The start node is not part of the graph"
127
+ logger.error(message)
128
+ messages.append(message)
129
+
130
+ if not self.success_node_validation():
131
+ message = "There should be exactly one success node"
132
+ logger.error(message)
133
+ messages.append(message)
134
+
135
+ if not self.fail_node_validation():
136
+ message = "There should be exactly one fail node"
137
+ logger.error(message)
138
+ messages.append(message)
139
+
140
+ if messages:
141
+ raise Exception(", ".join(messages))
142
+
143
+ def get_success_node(self) -> "BaseNode":
144
+ """
145
+ Return the success node of the graph
146
+
147
+ Raises:
148
+ Exception: If no success node is present in the graph
149
+
150
+ Returns:
151
+ object: The success node
152
+ """
153
+ for _, value in self.nodes.items():
154
+ if value.node_type == "success":
155
+ return value
156
+ raise Exception("No success node defined")
157
+
158
+ def get_fail_node(self) -> "BaseNode":
159
+ """
160
+ Returns the fail node of the graph
161
+
162
+ Raises:
163
+ Exception: If no fail node is present in the graph
164
+
165
+ Returns:
166
+ object: The fail node of the graph
167
+ """
168
+ for _, value in self.nodes.items():
169
+ if value.node_type == "fail":
170
+ return value
171
+ raise Exception("No fail node defined")
172
+
173
+ def is_start_node_present(self) -> bool:
174
+ """
175
+ A check to ensure the start_at is part of the graph
176
+
177
+ Returns:
178
+ bool: True if start_at is one of the nodes, false otherwise
179
+ """
180
+ try:
181
+ self.get_node_by_name(self.start_at)
182
+ return True
183
+ except exceptions.NodeNotFoundError:
184
+ logger.exception("Could not find the node")
185
+ return False
186
+
187
+ def success_node_validation(self) -> bool:
188
+ """
189
+ Check to ensure there is one and only one success node in the graph
190
+
191
+ Returns:
192
+ bool: True if there is only one, false otherwise
193
+ """
194
+ node_count = 0
195
+ for _, value in self.nodes.items():
196
+ if value.node_type == "success":
197
+ node_count += 1
198
+ if node_count == 1:
199
+ return True
200
+ return False
201
+
202
+ def fail_node_validation(self) -> bool:
203
+ """
204
+ Check to make sure there is one and only one fail node in the graph
205
+
206
+ Returns:
207
+ bool: true if there is one and only one fail node, false otherwise
208
+ """
209
+ node_count = 0
210
+ for _, value in self.nodes.items():
211
+ if value.node_type == "fail":
212
+ node_count += 1
213
+ if node_count == 1:
214
+ return True
215
+ return False
216
+
217
+ def is_dag(self) -> bool:
218
+ """
219
+ Determines whether the graph is acyclic and directed
220
+
221
+ Returns:
222
+ bool: Returns True if it is directed and acyclic.
223
+ """
224
+ visited = {n: False for n in self.nodes.keys()}
225
+ recstack = {n: False for n in self.nodes.keys()}
226
+
227
+ for name, node in self.nodes.items():
228
+ if not visited[name]:
229
+ if self.is_cyclic_util(node, visited, recstack):
230
+ return False
231
+ return True
232
+
233
+ def is_cyclic_util(
234
+ self, node: "BaseNode", visited: Dict[str, bool], recstack: Dict[str, bool]
235
+ ) -> bool:
236
+ """
237
+ Recursive utility that determines if a node and neighbors has a cycle. Is used in is_dag method.
238
+
239
+ Args:
240
+ node (BaseNode): The node to check
241
+ visited (dict): Dictionary storing which nodes have been checked
242
+ recstack (dict): Stores what nodes have been visited recursively
243
+
244
+ Returns:
245
+ bool: True if cyclic.
246
+ """
247
+ visited[node.name] = True
248
+ recstack[node.name] = True
249
+
250
+ neighbors = node._get_neighbors()
251
+ for neighbor in neighbors:
252
+ neighbor_node = self.get_node_by_name(neighbor)
253
+ if not visited[neighbor]:
254
+ if self.is_cyclic_util(neighbor_node, visited, recstack):
255
+ return True
256
+ elif recstack[neighbor]:
257
+ return True
258
+
259
+ recstack[node.name] = False
260
+ return False
261
+
262
+ def missing_neighbors(self) -> List[str]:
263
+ """
264
+ Iterates through nodes and gets their connecting neighbors and checks if they exist in the graph.
265
+
266
+ Returns:
267
+ list: List of the missing nodes. Empty list if all neighbors are in the graph.
268
+ """
269
+ missing_nodes = []
270
+ for _, node in self.nodes.items():
271
+ neighbors = node._get_neighbors()
272
+ for neighbor in neighbors:
273
+ try:
274
+ self.get_node_by_name(neighbor)
275
+ except exceptions.NodeNotFoundError:
276
+ logger.exception(f"Could not find the node {neighbor}")
277
+ if neighbor not in missing_nodes:
278
+ missing_nodes.append(neighbor)
279
+ return missing_nodes
280
+
281
+ def add_terminal_nodes(
282
+ self,
283
+ success_node_name: str = "success",
284
+ failure_node_name: str = "fail",
285
+ internal_branch_name: str = "",
286
+ ):
287
+ """
288
+ Add the success and fail nodes to the graph
289
+
290
+ Args:
291
+ success_node_name (str, optional): The name of the success node. Defaults to 'success'.
292
+ failure_node_name (str, optional): The name of the failure node. Defaults to 'fail'.
293
+ """
294
+ success_step_config = {"type": "success"}
295
+ success_node = create_node(
296
+ success_node_name,
297
+ step_config=success_step_config,
298
+ internal_branch_name=internal_branch_name,
299
+ )
300
+ fail_step_config = {"type": "fail"}
301
+ fail_node = create_node(
302
+ failure_node_name,
303
+ step_config=fail_step_config,
304
+ internal_branch_name=internal_branch_name,
305
+ )
306
+ self.add_node(success_node)
307
+ self.add_node(fail_node)
308
+
309
+
310
+ from runnable.nodes import BaseNode # noqa: E402
311
+
312
+ Graph.model_rebuild()
313
+
314
+
315
+ def create_graph(dag_config: Dict[str, Any], internal_branch_name: str = "") -> Graph:
316
+ """
317
+ Creates a dag object from the dag definition.
318
+
319
+ Composite nodes like map, parallel, dag can have sub-branches which are internally graphs.
320
+ Use internal_branch_name to fit the right dot path convention.
321
+
322
+ Args:
323
+ dag_config (dict): The dag definition
324
+ internal_branch_name ([type], optional): In case of sub-graph, the name of the node. Defaults to None.
325
+
326
+ Raises:
327
+ Exception: If the node or graph validation fails.
328
+
329
+ Returns:
330
+ Graph: The created graph object
331
+ """
332
+ description: str | None = dag_config.get("description", None)
333
+ start_at: str = cast(
334
+ str, dag_config.get("start_at")
335
+ ) # Let the start_at be relative to the graph
336
+
337
+ graph = Graph(
338
+ start_at=start_at,
339
+ description=description,
340
+ internal_branch_name=internal_branch_name,
341
+ )
342
+
343
+ logger.info(f"Initialized a graph object that starts at {start_at}")
344
+ for name, step_config in dag_config.get("steps", {}).items():
345
+ logger.info(f"Adding node {name} with :{step_config}")
346
+
347
+ node = create_node(
348
+ name, step_config=step_config, internal_branch_name=internal_branch_name
349
+ )
350
+ graph.add_node(node)
351
+
352
+ graph.add_terminal_nodes(internal_branch_name=internal_branch_name)
353
+
354
+ graph.check_graph()
355
+
356
+ return graph
357
+
358
+
359
+ def create_node(name: str, step_config: dict, internal_branch_name: Optional[str] = ""):
360
+ """
361
+ Creates a node object from the step configuration.
362
+
363
+ Args:
364
+ name (str): The name of the node
365
+ step_config (dict): The configuration of the node
366
+ internal_branch_name (str, optional): If the node belongs to a internal branch. Defaults to None.
367
+
368
+ Raises:
369
+ Exception: If the node type is not supported
370
+
371
+ Returns:
372
+ BaseNode: The created node object
373
+ """
374
+ internal_name = name
375
+ if internal_branch_name:
376
+ internal_name = internal_branch_name + "." + name
377
+
378
+ try:
379
+ node_type = step_config.pop(
380
+ "type"
381
+ ) # Remove the type as it is not used in node creation.
382
+ node_mgr: BaseNode = cast(
383
+ BaseNode, driver.DriverManager(namespace="nodes", name=node_type).driver
384
+ )
385
+
386
+ next_node = step_config.pop("next", None)
387
+
388
+ if next_node:
389
+ step_config["next_node"] = next_node
390
+
391
+ invoke_kwds = {
392
+ "name": name,
393
+ "internal_name": internal_name,
394
+ "internal_branch_name": internal_branch_name,
395
+ **step_config,
396
+ }
397
+ node = node_mgr.parse_from_config(config=invoke_kwds)
398
+ return node
399
+ except KeyError:
400
+ msg = "The node configuration does not contain the required key 'type'."
401
+ logger.exception(step_config)
402
+ raise Exception(msg)
403
+ except Exception as _e:
404
+ msg = (
405
+ f"Could not find the node type {node_type}. Please ensure you have installed "
406
+ "the extension that provides the node type."
407
+ "\nCore supports: task, success, fail, parallel, dag, map, stub"
408
+ )
409
+ raise Exception(msg) from _e
410
+
411
+
412
+ def search_node_by_internal_name(dag: Graph, internal_name: str):
413
+ """
414
+ Given a DAG, search the node by internal name of the node.
415
+
416
+ The node naming convention follows dot path naming convention
417
+
418
+ Currently it is implemented to search only against the base dag.
419
+
420
+ Args:
421
+ dag (Graph): The graph to search the node
422
+ internal_name (str): The internal name of the node.
423
+ """
424
+ # If the node is not part of any branches, then the base graph is where the node belongs
425
+ dot_path = internal_name.split(".")
426
+ if len(dot_path) == 1:
427
+ return dag.get_node_by_internal_name(internal_name), dag
428
+
429
+ # Any node internal name is: And is always going to be odd in number when split against .
430
+ # Step.Branch.Step.Branch etc
431
+ current_node = None
432
+ current_branch = dag
433
+
434
+ for i in range(len(dot_path)):
435
+ if i % 2:
436
+ # Its odd, so we are in brach name
437
+
438
+ current_branch = current_node._get_branch_by_name( # type: ignore
439
+ ".".join(dot_path[: i + 1])
440
+ )
441
+ logger.debug(
442
+ f"Finding step for {internal_name} in branch: {current_branch}"
443
+ )
444
+ else:
445
+ # Its even, so we are in Step, we start here!
446
+ current_node = current_branch.get_node_by_internal_name(
447
+ ".".join(dot_path[: i + 1])
448
+ )
449
+ logger.debug(f"Finding {internal_name} in node: {current_node}")
450
+
451
+ logger.debug(f"current branch : {current_branch}, current step {current_node}")
452
+ if current_branch and current_node:
453
+ return current_node, current_branch
454
+
455
+ raise exceptions.NodeNotFoundError(internal_name)
456
+
457
+
458
+ def search_branch_by_internal_name(dag: Graph, internal_name: str):
459
+ """
460
+ Given a DAG, search the branch by internal name of the branch.
461
+
462
+ The branch naming convention follows dot path naming convention
463
+
464
+ Currently it is implemented to search only against the base dag.
465
+
466
+ Args:
467
+ dag (Graph): The graph to search the node
468
+ internal_name (str): The internal name of the branch.
469
+ """
470
+ # If the node is not part of any branches, then the base graph is where the node belongs
471
+ dot_path = internal_name.split(".")
472
+ if len(dot_path) == 1:
473
+ return dag
474
+
475
+ # Any branch internal name is: And is always going to be even in number when split against .
476
+ # Step.Branch.Step.Branch
477
+ current_node = None
478
+ current_branch = dag
479
+
480
+ for i in range(len(dot_path)):
481
+ if i % 2:
482
+ # Its odd, so we are in brach name
483
+ current_branch = current_node._get_branch_by_name( # type: ignore
484
+ ".".join(dot_path[: i + 1])
485
+ )
486
+ logger.debug(
487
+ f"Finding step for {internal_name} in branch: {current_branch}"
488
+ )
489
+
490
+ else:
491
+ # Its even, so we are in Step, we start here!
492
+ current_node = current_branch.get_node_by_internal_name(
493
+ ".".join(dot_path[: i + 1])
494
+ )
495
+ logger.debug(f"Finding {internal_name} in node: {current_node}")
496
+
497
+ logger.debug(f"current branch : {current_branch}, current step {current_node}")
498
+ if current_branch and current_node:
499
+ return current_branch
500
+
501
+ raise exceptions.BranchNotFoundError(internal_name)