runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,477 @@
1
+ import importlib
2
+ import logging
3
+ import os
4
+ import sys
5
+ from collections import OrderedDict
6
+ from copy import deepcopy
7
+ from multiprocessing import Pool
8
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast
9
+
10
+ from pydantic import Field
11
+
12
+ from extensions.nodes.task import TaskNode
13
+ from runnable import console, defaults, exceptions, utils
14
+ from runnable.datastore import JsonParameter, MetricParameter, ObjectParameter
15
+ from runnable.defaults import IterableParameterModel, MapVariableModel
16
+ from runnable.graph import Graph, create_graph
17
+ from runnable.nodes import CompositeNode
18
+
19
+ logger = logging.getLogger(defaults.LOGGER_NAME)
20
+
21
+
22
+ class MapNode(CompositeNode):
23
+ """
24
+ A composite node that contains ONE graph object within itself that has to be executed with an iterable.
25
+
26
+ The structure is generally:
27
+ MapNode:
28
+ branch
29
+
30
+ The config is expected to have a variable 'iterate_on' and iterate_as which are looked for in the parameters.
31
+ for iter_variable in parameters['iterate_on']:
32
+ Execute the Branch by sending {'iterate_as': iter_variable}
33
+
34
+ The internal naming convention creates branches dynamically based on the iteration value
35
+ """
36
+
37
+ # TODO: Should it be one function or a dict of functions indexed by the return name
38
+
39
+ node_type: str = Field(default="map", serialization_alias="type")
40
+ iterate_on: str
41
+ iterate_as: str
42
+ reducer: Optional[str] = Field(default=None)
43
+ branch: Graph
44
+
45
+ def get_summary(self) -> Dict[str, Any]:
46
+ summary = {
47
+ "name": self.name,
48
+ "type": self.node_type,
49
+ "branch": self.branch.get_summary(),
50
+ "iterate_on": self.iterate_on,
51
+ "iterate_as": self.iterate_as,
52
+ "reducer": self.reducer,
53
+ }
54
+
55
+ return summary
56
+
57
+ def get_reducer_function(self):
58
+ if not self.reducer:
59
+ return lambda *x: list(x) # returns a list of the args
60
+
61
+ # try a lambda function
62
+ try:
63
+ f = eval(self.reducer)
64
+ if callable(f):
65
+ return f
66
+ except SyntaxError:
67
+ logger.info(f"{self.reducer} is not a lambda function")
68
+
69
+ # Load the reducer function from dotted path
70
+ mod, func = utils.get_module_and_attr_names(self.reducer)
71
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
72
+ imported_module = importlib.import_module(mod)
73
+ f = getattr(imported_module, func)
74
+
75
+ return f
76
+
77
+ @classmethod
78
+ def parse_from_config(cls, config: Dict[str, Any]) -> "MapNode":
79
+ internal_name = cast(str, config.get("internal_name"))
80
+
81
+ config_branch = config.pop("branch", {})
82
+ if not config_branch:
83
+ raise Exception("A map node should have a branch")
84
+
85
+ branch = create_graph(
86
+ deepcopy(config_branch),
87
+ internal_branch_name=internal_name + "." + defaults.MAP_PLACEHOLDER,
88
+ )
89
+ return cls(branch=branch, **config)
90
+
91
+ @property
92
+ def branch_returns(self):
93
+ branch_returns: List[
94
+ Tuple[str, Union[ObjectParameter, MetricParameter, JsonParameter]]
95
+ ] = []
96
+ for _, node in self.branch.nodes.items():
97
+ if isinstance(node, TaskNode):
98
+ for task_return in node.executable.returns:
99
+ if task_return.kind == "json":
100
+ branch_returns.append(
101
+ (
102
+ task_return.name,
103
+ JsonParameter(kind="json", value=""),
104
+ )
105
+ )
106
+ elif task_return.kind == "object":
107
+ branch_returns.append(
108
+ (
109
+ task_return.name,
110
+ ObjectParameter(
111
+ kind="object",
112
+ value="",
113
+ ),
114
+ )
115
+ )
116
+ elif task_return.kind == "metric":
117
+ branch_returns.append(
118
+ (
119
+ task_return.name,
120
+ MetricParameter(kind="metric", value=""),
121
+ )
122
+ )
123
+ else:
124
+ raise Exception("kind should be either json or object")
125
+
126
+ return branch_returns
127
+
128
+ def _get_branch_by_name(self, branch_name: str) -> Graph:
129
+ """
130
+ Retrieve a branch by name.
131
+
132
+ In the case of a Map Object, the branch naming is dynamic as it is parameterized on iterable.
133
+ This method takes no responsibility in checking the validity of the naming.
134
+
135
+ Returns a Graph Object
136
+
137
+ Args:
138
+ branch_name (str): The name of the branch to retrieve
139
+
140
+ Raises:
141
+ Exception: If the branch by that name does not exist
142
+ """
143
+ return self.branch
144
+
145
+ def fan_out(
146
+ self,
147
+ iter_variable: Optional[IterableParameterModel] = None,
148
+ ):
149
+ """
150
+ The general method to fan out for a node of type map.
151
+ This method assumes that the step log has already been created.
152
+
153
+ 3rd party orchestrators should call this method to create the individual branch logs.
154
+
155
+ Args:
156
+ executor (BaseExecutor): The executor class as defined by the config
157
+ map_variable (dict, optional): If the node is part of map. Defaults to None.
158
+ """
159
+ iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[
160
+ self.iterate_on
161
+ ].get_value()
162
+
163
+ assert iterate_on
164
+ assert isinstance(iterate_on, list)
165
+
166
+ # Prepare the branch logs
167
+ for iteration_variable in iterate_on:
168
+ effective_branch_name = self._resolve_map_placeholders(
169
+ self.internal_name + "." + str(iteration_variable),
170
+ iter_variable=iter_variable,
171
+ )
172
+ try:
173
+ branch_log = self._context.run_log_store.get_branch_log(
174
+ effective_branch_name, self._context.run_id
175
+ )
176
+ console.print(f"Branch log already exists for {effective_branch_name}")
177
+ except exceptions.BranchLogNotFoundError:
178
+ branch_log = self._context.run_log_store.create_branch_log(
179
+ effective_branch_name
180
+ )
181
+ console.print(f"Branch log created for {effective_branch_name}")
182
+
183
+ branch_log.status = defaults.PROCESSING
184
+ self._context.run_log_store.add_branch_log(branch_log, self._context.run_id)
185
+
186
+ def execute_as_graph(
187
+ self,
188
+ iter_variable: Optional[IterableParameterModel] = None,
189
+ ):
190
+ """
191
+ This function does the actual execution of the branch of the map node.
192
+
193
+ From a design perspective, this function should not be called if the execution is 3rd party orchestrated.
194
+
195
+ The modes that render the job specifications, do not need to interact with this node at all as
196
+ they have their own internal mechanisms of handing map states or dynamic parallel states.
197
+ If they do not, you can find a way using as-is nodes as hack nodes.
198
+
199
+ The actual logic is :
200
+ * We iterate over the iterable as mentioned in the config
201
+ * For every value in the iterable we call the executor.execute_graph(branch, iterate_as: iter_variable)
202
+
203
+ The execution of a dag, could result in
204
+ * The dag being completely executed with a definite (fail, success) state in case of local
205
+ or local-container execution
206
+ * The dag being in a processing state with PROCESSING status in case of local-aws-batch
207
+
208
+ Only fail state is considered failure during this phase of execution.
209
+
210
+ Args:
211
+ executor (Executor): The Executor as per the use config
212
+ map_variable (dict): The map variables the graph belongs to
213
+ **kwargs: Optional kwargs passed around
214
+ """
215
+
216
+ iterate_on = None
217
+ try:
218
+ iterate_on = self._context.run_log_store.get_parameters(
219
+ self._context.run_id
220
+ )[self.iterate_on].get_value()
221
+ except KeyError as e:
222
+ raise Exception(
223
+ (
224
+ f"Expected parameter {self.iterate_on}",
225
+ "not present in Run Log parameters",
226
+ "was it ever set before?",
227
+ )
228
+ ) from e
229
+
230
+ if not isinstance(iterate_on, list):
231
+ raise Exception("Only list is allowed as a valid iterator type")
232
+
233
+ self.fan_out(iter_variable=iter_variable)
234
+
235
+ # Check if parallel execution is enabled and supported
236
+ enable_parallel = getattr(
237
+ self._context.pipeline_executor, "enable_parallel", False
238
+ )
239
+ supports_parallel_writes = getattr(
240
+ self._context.run_log_store, "supports_parallel_writes", False
241
+ )
242
+
243
+ # Check if we're using a local executor (local or local-container)
244
+ executor_service_name = getattr(
245
+ self._context.pipeline_executor, "service_name", ""
246
+ )
247
+ is_local_executor = executor_service_name in ["local", "local-container"]
248
+
249
+ if enable_parallel and is_local_executor:
250
+ if not supports_parallel_writes:
251
+ logger.warning(
252
+ "Parallel execution was requested but the run log store does not support parallel writes. "
253
+ "Falling back to sequential execution. Consider using a run log store with "
254
+ "supports_parallel_writes=True for parallel execution."
255
+ )
256
+ self._execute_map_sequentially(iterate_on, iter_variable)
257
+ else:
258
+ logger.info("Executing map iterations in parallel")
259
+ self._execute_map_in_parallel(iterate_on, iter_variable)
260
+ else:
261
+ self._execute_map_sequentially(iterate_on, iter_variable)
262
+
263
+ self.fan_in(iter_variable=iter_variable)
264
+
265
+ def _execute_map_sequentially(
266
+ self,
267
+ iterate_on: List,
268
+ iter_variable: Optional[IterableParameterModel] = None,
269
+ ):
270
+ """Execute map iterations sequentially (original behavior)."""
271
+ for iteration_variable in iterate_on:
272
+ # Build effective map variable from existing iter_variable
273
+ effective_map_variable = OrderedDict()
274
+ if iter_variable and iter_variable.map_variable:
275
+ effective_map_variable.update(
276
+ {k: v.value for k, v in iter_variable.map_variable.items()}
277
+ )
278
+ effective_map_variable[self.iterate_as] = iteration_variable
279
+
280
+ converted_map: OrderedDict = OrderedDict(
281
+ (k, MapVariableModel(value=v))
282
+ for k, v in effective_map_variable.items()
283
+ )
284
+ effective_iter_variable = IterableParameterModel(map_variable=converted_map)
285
+
286
+ self._context.pipeline_executor.execute_graph(
287
+ self.branch, iter_variable=effective_iter_variable
288
+ )
289
+
290
+ def _execute_map_in_parallel(
291
+ self,
292
+ iterate_on: List,
293
+ iter_variable: Optional[IterableParameterModel] = None,
294
+ ):
295
+ """Execute map iterations in parallel using multiprocessing."""
296
+ from runnable.entrypoints import execute_single_branch
297
+
298
+ if not isinstance(iterate_on, list):
299
+ raise Exception("Only list is allowed as a valid iterator type")
300
+
301
+ # Prepare arguments for each iteration
302
+ iteration_args = []
303
+ iteration_variables_map = [] # Keep track of iteration variables for failure reporting
304
+
305
+ for iteration_variable in iterate_on:
306
+ effective_iter_variable = (
307
+ iter_variable.model_copy(deep=True)
308
+ if iter_variable
309
+ else IterableParameterModel()
310
+ )
311
+
312
+ effective_map_variable = (
313
+ effective_iter_variable.map_variable or OrderedDict()
314
+ )
315
+ effective_map_variable[self.iterate_as] = MapVariableModel(
316
+ value=iteration_variable
317
+ )
318
+ effective_iter_variable.map_variable = effective_map_variable
319
+
320
+ branch_name = f"{self.internal_name}.{iteration_variable}"
321
+ iteration_args.append(
322
+ (
323
+ branch_name,
324
+ self.branch,
325
+ self._context,
326
+ effective_iter_variable.model_dump_json(),
327
+ )
328
+ )
329
+ iteration_variables_map.append(iteration_variable)
330
+
331
+ # Use multiprocessing Pool to execute iterations in parallel
332
+ with Pool() as pool:
333
+ results = pool.starmap(execute_single_branch, iteration_args)
334
+
335
+ # Check if any iteration failed
336
+ if not all(results):
337
+ failed_iterations = [
338
+ iteration_var
339
+ for iteration_var, result in zip(iteration_variables_map, results)
340
+ if not result
341
+ ]
342
+ logger.error(f"The following map iterations failed: {failed_iterations}")
343
+ # Note: The actual failure handling and status update will be done in fan_in()
344
+
345
+ def fan_in(
346
+ self,
347
+ iter_variable: Optional[IterableParameterModel] = None,
348
+ ):
349
+ """
350
+ The general method to fan in for a node of type map.
351
+
352
+ 3rd party orchestrators should call this method to find the status of the step log.
353
+
354
+ Args:
355
+ executor (BaseExecutor): The executor class as defined by the config
356
+ map_variable (dict, optional): If the node is part of map node. Defaults to None.
357
+ """
358
+ params = self._context.run_log_store.get_parameters(self._context.run_id)
359
+ iterate_on = params[self.iterate_on].get_value()
360
+
361
+ assert iterate_on
362
+ assert isinstance(iterate_on, list)
363
+ # # Find status of the branches
364
+ step_success_bool = True
365
+ effective_internal_name = self._resolve_map_placeholders(
366
+ self.internal_name, iter_variable=iter_variable
367
+ )
368
+
369
+ for iteration_variable in iterate_on:
370
+ effective_branch_name = self._resolve_map_placeholders(
371
+ self.internal_name + "." + str(iteration_variable),
372
+ iter_variable=iter_variable,
373
+ )
374
+ branch_log = self._context.run_log_store.get_branch_log(
375
+ effective_branch_name, self._context.run_id
376
+ )
377
+ # console.print(f"Branch log for {effective_branch_name}: {branch_log}")
378
+
379
+ if branch_log.status != defaults.SUCCESS:
380
+ step_success_bool = False
381
+
382
+ # Collate all the results and update the status of the step
383
+ step_log = self._context.run_log_store.get_step_log(
384
+ effective_internal_name, self._context.run_id
385
+ )
386
+
387
+ if step_success_bool: #  If none failed and nothing is waiting
388
+ step_log.status = defaults.SUCCESS
389
+ else:
390
+ step_log.status = defaults.FAIL
391
+
392
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
393
+
394
+ # If we failed, we return without any collection
395
+ if not step_log.status == defaults.SUCCESS:
396
+ return
397
+
398
+ # Apply the reduce function and reduce the returns of the task nodes.
399
+ # The final value of the parameter is the result of the reduce function.
400
+ reducer_f = self.get_reducer_function()
401
+
402
+ # Get parent scope for setting collected parameters
403
+ # The parent is where the map node itself lives (no placeholder resolution needed)
404
+ parent_params = self._context.run_log_store.get_parameters(
405
+ self._context.run_id, internal_branch_name=self.internal_branch_name
406
+ )
407
+
408
+ for branch_return in self.branch_returns:
409
+ param_name, _ = branch_return
410
+
411
+ to_reduce = []
412
+ for iteration_variable in iterate_on:
413
+ effective_branch_name = self._resolve_map_placeholders(
414
+ self.internal_name + "." + str(iteration_variable),
415
+ iter_variable=iter_variable,
416
+ )
417
+ branch_params = self._context.run_log_store.get_parameters(
418
+ self._context.run_id, internal_branch_name=effective_branch_name
419
+ )
420
+
421
+ # Only add to reduce list if parameter exists
422
+ # (branch might have taken failure path and not set all returns)
423
+ if param_name in branch_params:
424
+ to_reduce.append(branch_params[param_name].get_value())
425
+
426
+ # Create or update the parameter in parent scope with collected value
427
+ if to_reduce:
428
+ value = reducer_f(*to_reduce)
429
+ else:
430
+ value = ""
431
+
432
+ # Create parameter if it doesn't exist in parent
433
+ if param_name not in parent_params:
434
+ parent_params[param_name] = JsonParameter(kind="json", value=value)
435
+ else:
436
+ parent_params[param_name].value = value
437
+
438
+ self._context.run_log_store.set_parameters(
439
+ parameters=parent_params,
440
+ run_id=self._context.run_id,
441
+ internal_branch_name=self.internal_branch_name,
442
+ )
443
+
444
+ async def execute_as_graph_async(
445
+ self,
446
+ iter_variable: Optional[IterableParameterModel] = None,
447
+ ):
448
+ """Async map execution."""
449
+ self.fan_out(iter_variable=iter_variable) # sync
450
+
451
+ iterate_on = self._context.run_log_store.get_parameters(self._context.run_id)[
452
+ self.iterate_on
453
+ ].get_value()
454
+ assert iterate_on
455
+ assert isinstance(iterate_on, list)
456
+
457
+ for iteration_variable in iterate_on:
458
+ # Build effective map variable from existing iter_variable
459
+ effective_map_variable = OrderedDict()
460
+ if iter_variable and iter_variable.map_variable:
461
+ effective_map_variable.update(
462
+ {k: v.value for k, v in iter_variable.map_variable.items()}
463
+ )
464
+ effective_map_variable[self.iterate_as] = iteration_variable
465
+
466
+ # Convert to IterableParameterModel
467
+ converted_map: OrderedDict = OrderedDict(
468
+ (k, MapVariableModel(value=v))
469
+ for k, v in effective_map_variable.items()
470
+ )
471
+ effective_iter_variable = IterableParameterModel(map_variable=converted_map)
472
+
473
+ await self._context.pipeline_executor.execute_graph_async(
474
+ self.branch, iter_variable=effective_iter_variable
475
+ )
476
+
477
+ self.fan_in(iter_variable=iter_variable) # sync