runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/executor.py ADDED
@@ -0,0 +1,561 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
7
+
8
+ from pydantic import BaseModel, ConfigDict, PrivateAttr
9
+
10
+ import runnable.context as context
11
+ from runnable import defaults
12
+ from runnable.datastore import DataCatalog, JobLog, StepAttempt
13
+ from runnable.defaults import IterableParameterModel
14
+
15
+ if TYPE_CHECKING: # pragma: no cover
16
+ from runnable.graph import Graph
17
+ from runnable.nodes import BaseNode
18
+ from runnable.tasks import BaseTaskType
19
+
20
+ logger = logging.getLogger(defaults.LOGGER_NAME)
21
+
22
+
23
+ class BaseExecutor(ABC, BaseModel):
24
+ """
25
+ The skeleton of an executor class.
26
+ Any implementation of an executor should inherit this class and over-ride accordingly.
27
+
28
+ There is a extension available in runnable/extensions/executor/__init__.py
29
+ which implements the most common functionality which is easier to
30
+ extend/override in most scenarios.
31
+
32
+ """
33
+
34
+ service_name: str = ""
35
+ service_type: str = "executor"
36
+
37
+ # Should have _should_setup_run_log_at_traversal, local, local_container, emulator is true
38
+ # False for everything else
39
+ _should_setup_run_log_at_traversal: bool = PrivateAttr(default=True)
40
+
41
+ model_config = ConfigDict(extra="forbid")
42
+
43
+ @property
44
+ def _context(self):
45
+ current_context = context.get_run_context()
46
+ if current_context is None:
47
+ raise RuntimeError("No run context available in current execution context")
48
+ return current_context
49
+
50
+ @abstractmethod
51
+ def _get_parameters(self) -> Dict[str, Any]:
52
+ """
53
+ Get the parameters for the execution.
54
+ The parameters can be defined in parameters file and can be overridden by environment variables.
55
+
56
+ Returns:
57
+ Dict[str, Any]: The parameters for the execution.
58
+ """
59
+ ...
60
+
61
+ @abstractmethod
62
+ def _set_up_run_log(self, exists_ok=False):
63
+ """
64
+ Create a run log and put that in the run log store
65
+
66
+ If exists_ok, we allow the run log to be already present in the run log store.
67
+ """
68
+ ...
69
+
70
+ @abstractmethod
71
+ def _calculate_attempt_number(
72
+ self,
73
+ node: "BaseNode",
74
+ iter_variable: IterableParameterModel | None = None,
75
+ ) -> int:
76
+ """
77
+ Calculate the attempt number for a node based on existing attempts in the run log.
78
+
79
+ Args:
80
+ node: The node to calculate attempt number for
81
+ map_variable: Optional map variable if node is in a map state
82
+
83
+ Returns:
84
+ int: The attempt number (starting from 1)
85
+ """
86
+ ...
87
+
88
+ @abstractmethod
89
+ def send_return_code(self, stage="traversal"):
90
+ """
91
+ Convenience function used by pipeline to send return code to the caller of the cli
92
+
93
+ Raises:
94
+ Exception: If the pipeline execution failed
95
+ """
96
+ ...
97
+
98
+ @abstractmethod
99
+ def add_task_log_to_catalog(
100
+ self,
101
+ name: str,
102
+ iter_variable: Optional[IterableParameterModel] = None,
103
+ ): ...
104
+
105
+
106
+ class BaseJobExecutor(BaseExecutor):
107
+ service_type: str = "job_executor"
108
+
109
+ @abstractmethod
110
+ def submit_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
111
+ """
112
+ Local executors should
113
+ - create the run log
114
+ - and call an execute_job
115
+
116
+ Non local executors should
117
+ - transpile the job to the platform specific job spec
118
+ - submit the job to call execute_job
119
+ """
120
+ ...
121
+
122
+ def _calculate_attempt_number(
123
+ self,
124
+ node: "BaseNode",
125
+ iter_variable: Optional[IterableParameterModel] = None,
126
+ ) -> int:
127
+ """
128
+ Calculate the attempt number for a node.
129
+
130
+ Job executors typically get attempt numbers from the environment variable
131
+ set by the pipeline executor that launched them.
132
+
133
+ Args:
134
+ node: The node to calculate attempt number for
135
+ map_variable: Optional map variable if node is in a map state
136
+
137
+ Returns:
138
+ int: The attempt number (starting from 1)
139
+ """
140
+ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
141
+
142
+ @abstractmethod
143
+ def add_code_identities(self, job_log: JobLog):
144
+ """
145
+ Add code identities specific to the implementation.
146
+
147
+ The Base class has an implementation of adding git code identities.
148
+
149
+ Args:
150
+ step_log (object): The step log object
151
+ node (BaseNode): The node we are adding the step log for
152
+ """
153
+ ...
154
+
155
+ @abstractmethod
156
+ def _sync_catalog(
157
+ self,
158
+ catalog_settings: Optional[List[str]],
159
+ allow_file_not_found_exc: bool = False,
160
+ ) -> Optional[List[DataCatalog]]:
161
+ """
162
+ 1). Identify the catalog settings by over-riding node settings with the global settings.
163
+ 2). For stage = get:
164
+ Identify the catalog items that are being asked to get from the catalog
165
+ And copy them to the local compute data folder
166
+ 3). For stage = put:
167
+ Identify the catalog items that are being asked to put into the catalog
168
+ Copy the items from local compute folder to the catalog
169
+ 4). Add the items onto the step log according to the stage
170
+
171
+ Args:
172
+ node (Node): The current node being processed
173
+ step_log (StepLog): The step log corresponding to that node
174
+ stage (str): One of get or put
175
+
176
+ Raises:
177
+ Exception: If the stage is not in one of get/put
178
+
179
+ """
180
+ ...
181
+
182
+ @abstractmethod
183
+ def execute_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
184
+ """
185
+ Focusses only on execution of the job.
186
+ """
187
+ ...
188
+
189
+
190
+ class BasePipelineExecutor(BaseExecutor):
191
+ service_type: str = "pipeline_executor"
192
+ overrides: dict[str, Any] = {}
193
+
194
+ _context_node: Optional[BaseNode] = PrivateAttr(default=None)
195
+ _event_callback: Optional[Callable[[dict], None]] = PrivateAttr(default=None)
196
+
197
+ @abstractmethod
198
+ def add_code_identities(self, node: BaseNode, attempt_log: StepAttempt):
199
+ """
200
+ Add code identities specific to the implementation.
201
+
202
+ The Base class has an implementation of adding git code identities.
203
+
204
+ Args:
205
+ attempt_log (StepAttempt): The step attempt log object
206
+ node (BaseNode): The node we are adding the code identities for
207
+ """
208
+ ...
209
+
210
+ @abstractmethod
211
+ def _sync_catalog(
212
+ self,
213
+ stage: str,
214
+ synced_catalogs=None,
215
+ allow_file_no_found_exc: bool = False,
216
+ ) -> Optional[List[DataCatalog]]:
217
+ """
218
+ 1). Identify the catalog settings by over-riding node settings with the global settings.
219
+ 2). For stage = get:
220
+ Identify the catalog items that are being asked to get from the catalog
221
+ And copy them to the local compute data folder
222
+ 3). For stage = put:
223
+ Identify the catalog items that are being asked to put into the catalog
224
+ Copy the items from local compute folder to the catalog
225
+ 4). Add the items onto the step log according to the stage
226
+
227
+ Args:
228
+ node (Node): The current node being processed
229
+ step_log (StepLog): The step log corresponding to that node
230
+ stage (str): One of get or put
231
+
232
+ Raises:
233
+ Exception: If the stage is not in one of get/put
234
+
235
+ """
236
+ ...
237
+
238
+ @abstractmethod
239
+ def _calculate_attempt_number(
240
+ self,
241
+ node: "BaseNode",
242
+ iter_variable: Optional[IterableParameterModel] = None,
243
+ ) -> int:
244
+ """
245
+ Calculate the attempt number for a node based on existing attempts in the run log.
246
+
247
+ Pipeline executors should implement logic to check existing step logs
248
+ and determine the correct attempt number.
249
+
250
+ Args:
251
+ node: The node to calculate attempt number for
252
+ map_variable: Optional map variable if node is in a map state
253
+
254
+ Returns:
255
+ int: The attempt number (starting from 1)
256
+ """
257
+ ...
258
+
259
+ @abstractmethod
260
+ def _execute_node(
261
+ self,
262
+ node: BaseNode,
263
+ iter_variable: Optional[IterableParameterModel] = None,
264
+ mock: bool = False,
265
+ ):
266
+ """
267
+ This is the entry point when we do the actual execution of the function.
268
+
269
+ While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
270
+ this function.
271
+
272
+ In most cases,
273
+ * We get the corresponding step_log of the node and the parameters.
274
+ * We sync the catalog to GET any data sets that are in the catalog
275
+ * We call the execute method of the node for the actual compute and retry it as many times as asked.
276
+ * If the node succeeds, we get any of the user defined metrics provided by the user.
277
+ * We sync the catalog to PUT any data sets that are in the catalog.
278
+
279
+ Args:
280
+ node (Node): The node to execute
281
+ map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
282
+ Defaults to None.
283
+ """
284
+ ...
285
+
286
+ @abstractmethod
287
+ def execute_node(
288
+ self,
289
+ node: BaseNode,
290
+ iter_variable: Optional[IterableParameterModel] = None,
291
+ ):
292
+ """
293
+ The entry point for all executors apart from local.
294
+ We have already prepared for node execution.
295
+
296
+ Args:
297
+ node (BaseNode): The node to execute
298
+ map_variable (dict, optional): If the node is part of a map, send in the map dictionary. Defaults to None.
299
+
300
+ Raises:
301
+ NotImplementedError: _description_
302
+ """
303
+ ...
304
+
305
+ @abstractmethod
306
+ def execute_from_graph(
307
+ self,
308
+ node: BaseNode,
309
+ iter_variable: Optional[IterableParameterModel] = None,
310
+ ):
311
+ """
312
+ This is the entry point to from the graph execution.
313
+
314
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
315
+ actual execution of the node.
316
+
317
+ If the node type is:
318
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
319
+ * success: We can delegate to _execute_node
320
+ * fail: We can delegate to _execute_node
321
+
322
+ For nodes that are internally graphs:
323
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
324
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
325
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
326
+
327
+ Transpilers will NEVER use this method and will NEVER call ths method.
328
+ This method should only be used by interactive executors.
329
+
330
+ Args:
331
+ node (Node): The node to execute
332
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
333
+ Defaults to None.
334
+ """
335
+ ...
336
+
337
+ @abstractmethod
338
+ def _get_status_and_next_node_name(
339
+ self,
340
+ current_node: BaseNode,
341
+ dag: Graph,
342
+ iter_variable: Optional[IterableParameterModel] = None,
343
+ ) -> tuple[str, str]:
344
+ """
345
+ Given the current node and the graph, returns the name of the next node to execute.
346
+
347
+ The name is always relative the graph that the node resides in.
348
+
349
+ If the current node succeeded, we return the next node as per the graph.
350
+ If the current node failed, we return the on failure node of the node (if provided) or the global one.
351
+
352
+ Args:
353
+ current_node (BaseNode): The current node.
354
+ dag (Graph): The dag we are traversing.
355
+ map_variable (dict): If the node belongs to a map branch.
356
+ """
357
+
358
+ ...
359
+
360
+ @abstractmethod
361
+ def execute_graph(
362
+ self,
363
+ dag: Graph,
364
+ iter_variable: Optional[IterableParameterModel] = None,
365
+ ):
366
+ """
367
+ The parallelization is controlled by the nodes and not by this function.
368
+
369
+ Transpilers should over ride this method to do the translation of dag to the platform specific way.
370
+ Interactive methods should use this to traverse and execute the dag.
371
+ - Use execute_from_graph to handle sub-graphs
372
+
373
+ Logically the method should:
374
+ * Start at the dag.start_at of the dag.
375
+ * Call the self.execute_from_graph(node)
376
+ * depending upon the status of the execution, either move to the success node or failure node.
377
+
378
+ Args:
379
+ dag (Graph): The directed acyclic graph to traverse and execute.
380
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
381
+ Defaults to None.
382
+ """
383
+ ...
384
+
385
+ @abstractmethod
386
+ def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
387
+ """
388
+ The overrides section can contain specific over-rides to an global executor config.
389
+ To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
390
+ The nodes can over-ride the global config by referring to key in the overrides.
391
+
392
+ For example:
393
+ # configuration.yaml
394
+ execution:
395
+ type: cloud-implementation
396
+ config:
397
+ k1: v1
398
+ k3: v3
399
+ overrides:
400
+ k2: v2 # Could be a mapping internally.
401
+
402
+ # in pipeline definition.yaml
403
+ dag:
404
+ steps:
405
+ step1:
406
+ overrides:
407
+ cloud-implementation:
408
+ k1: value_specific_to_node
409
+ k2:
410
+
411
+ This method should resolve the node_config to {'k1': 'value_specific_to_node', 'k2': 'v2', 'k3': 'v3'}
412
+
413
+ Args:
414
+ node (BaseNode): The current node being processed.
415
+
416
+ """
417
+ ...
418
+
419
+ @abstractmethod
420
+ def fan_out(
421
+ self,
422
+ node: BaseNode,
423
+ iter_variable: Optional[IterableParameterModel] = None,
424
+ ):
425
+ """
426
+ This method is used to appropriately fan-out the execution of a composite node.
427
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
428
+
429
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
430
+ step which is composite. By calling this fan-out before calling the leaf nodes, we have an opportunity to
431
+ do the right set up (creating the step log, exposing the parameters, etc.) for the composite step.
432
+
433
+ All 3rd party orchestrators should use this method to fan-out the execution of a composite node.
434
+ This ensures:
435
+ - The dot path notation is preserved, this method should create the step and call the node's fan out to
436
+ create the branch logs and let the 3rd party do the actual step execution.
437
+ - Gives 3rd party orchestrators an opportunity to set out the required for running a composite node.
438
+
439
+ Args:
440
+ node (BaseNode): The node to fan-out
441
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
442
+
443
+ """
444
+ ...
445
+
446
+ @abstractmethod
447
+ def fan_in(
448
+ self,
449
+ node: BaseNode,
450
+ iter_variable: Optional[IterableParameterModel] = None,
451
+ ):
452
+ """
453
+ This method is used to appropriately fan-in after the execution of a composite node.
454
+ This is only useful when we want to execute a composite node during 3rd party orchestrators.
455
+
456
+ Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
457
+ step which is composite. By calling this fan-in after calling the leaf nodes, we have an opportunity to
458
+ act depending upon the status of the individual branches.
459
+
460
+ All 3rd party orchestrators should use this method to fan-in the execution of a composite node.
461
+ This ensures:
462
+ - Gives the renderer's the control on where to go depending upon the state of the composite node.
463
+ - The status of the step and its underlying branches are correctly updated.
464
+
465
+ Args:
466
+ node (BaseNode): The node to fan-in
467
+ map_variable (dict, optional): If the node if of a map state,.Defaults to None.
468
+
469
+ """
470
+ ...
471
+
472
+ @abstractmethod
473
+ def trigger_node_execution(
474
+ self,
475
+ node: BaseNode,
476
+ iter_variable: Optional[IterableParameterModel] = None,
477
+ ):
478
+ """
479
+ Executor specific way of triggering jobs when runnable does both traversal and execution
480
+
481
+ Transpilers will NEVER use this method and will NEVER call them.
482
+ Only interactive executors who need execute_from_graph will ever implement it.
483
+
484
+ Args:
485
+ node (BaseNode): The node to execute
486
+ map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
487
+ Defaults to ''.
488
+
489
+ NOTE: We do not raise an exception as this method is not required by many extensions
490
+ """
491
+ ...
492
+
493
+ # ═══════════════════════════════════════════════════════════════
494
+ # Async Path - Default implementations that raise NotImplementedError
495
+ # Only interactive executors (local) override these
496
+ # ═══════════════════════════════════════════════════════════════
497
+
498
+ async def execute_graph_async(
499
+ self,
500
+ dag: "Graph",
501
+ iter_variable: Optional[IterableParameterModel] = None,
502
+ ):
503
+ """
504
+ Async graph traversal.
505
+
506
+ Only implemented by interactive executors that support async execution.
507
+ Transpilers (Argo, etc.) do not implement this.
508
+
509
+ Raises:
510
+ NotImplementedError: If executor does not support async execution.
511
+ """
512
+ raise NotImplementedError(
513
+ f"{self.__class__.__name__} does not support async execution. "
514
+ f"Use a local executor for async pipelines."
515
+ )
516
+
517
+ async def execute_from_graph_async(
518
+ self,
519
+ node: "BaseNode",
520
+ iter_variable: Optional[IterableParameterModel] = None,
521
+ ):
522
+ """
523
+ Async node execution entry point.
524
+
525
+ Raises:
526
+ NotImplementedError: If executor does not support async execution.
527
+ """
528
+ raise NotImplementedError(
529
+ f"{self.__class__.__name__} does not support async execution."
530
+ )
531
+
532
+ async def trigger_node_execution_async(
533
+ self,
534
+ node: "BaseNode",
535
+ iter_variable: Optional[IterableParameterModel] = None,
536
+ ):
537
+ """
538
+ Async trigger for node execution.
539
+
540
+ Raises:
541
+ NotImplementedError: If executor does not support async execution.
542
+ """
543
+ raise NotImplementedError(
544
+ f"{self.__class__.__name__} does not support async execution."
545
+ )
546
+
547
+ async def _execute_node_async(
548
+ self,
549
+ node: "BaseNode",
550
+ iter_variable: Optional[IterableParameterModel] = None,
551
+ mock: bool = False,
552
+ ):
553
+ """
554
+ Async node execution wrapper.
555
+
556
+ Raises:
557
+ NotImplementedError: If executor does not support async execution.
558
+ """
559
+ raise NotImplementedError(
560
+ f"{self.__class__.__name__} does not support async execution."
561
+ )