runnable 0.17.1__py3-none-any.whl → 0.18.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- extensions/README.md +0 -0
- extensions/__init__.py +0 -0
- extensions/catalog/README.md +0 -0
- extensions/catalog/file_system.py +253 -0
- extensions/catalog/pyproject.toml +14 -0
- extensions/job_executor/README.md +0 -0
- extensions/job_executor/__init__.py +160 -0
- extensions/job_executor/k8s.py +362 -0
- extensions/job_executor/k8s_job_spec.yaml +37 -0
- extensions/job_executor/local.py +61 -0
- extensions/job_executor/local_container.py +192 -0
- extensions/job_executor/pyproject.toml +16 -0
- extensions/nodes/README.md +0 -0
- extensions/nodes/nodes.py +954 -0
- extensions/nodes/pyproject.toml +15 -0
- extensions/pipeline_executor/README.md +0 -0
- extensions/pipeline_executor/__init__.py +644 -0
- extensions/pipeline_executor/argo.py +1307 -0
- extensions/pipeline_executor/argo_specification.yaml +51 -0
- extensions/pipeline_executor/local.py +62 -0
- extensions/pipeline_executor/local_container.py +363 -0
- extensions/pipeline_executor/mocked.py +161 -0
- extensions/pipeline_executor/pyproject.toml +16 -0
- extensions/pipeline_executor/retry.py +180 -0
- extensions/run_log_store/README.md +0 -0
- extensions/run_log_store/__init__.py +0 -0
- extensions/run_log_store/chunked_fs.py +113 -0
- extensions/run_log_store/db/implementation_FF.py +163 -0
- extensions/run_log_store/db/integration_FF.py +0 -0
- extensions/run_log_store/file_system.py +145 -0
- extensions/run_log_store/generic_chunked.py +599 -0
- extensions/run_log_store/pyproject.toml +15 -0
- extensions/secrets/README.md +0 -0
- extensions/secrets/dotenv.py +62 -0
- extensions/secrets/pyproject.toml +15 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/METADATA +1 -7
- runnable-0.18.0.dist-info/RECORD +58 -0
- runnable-0.17.1.dist-info/RECORD +0 -23
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/WHEEL +0 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/entry_points.txt +0 -0
- {runnable-0.17.1.dist-info → runnable-0.18.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
[project]
|
2
|
+
name = "nodes"
|
3
|
+
version = "0.0.0"
|
4
|
+
description = "Add your description here"
|
5
|
+
readme = "README.md"
|
6
|
+
requires-python = ">=3.10"
|
7
|
+
dependencies = []
|
8
|
+
|
9
|
+
|
10
|
+
[build-system]
|
11
|
+
requires = ["hatchling"]
|
12
|
+
build-backend = "hatchling.build"
|
13
|
+
|
14
|
+
[tool.hatch.build.targets.wheel]
|
15
|
+
packages = ["."]
|
File without changes
|
@@ -0,0 +1,644 @@
|
|
1
|
+
import copy
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
from typing import Any, Dict, List, Optional, cast
|
5
|
+
|
6
|
+
from runnable import (
|
7
|
+
console,
|
8
|
+
context,
|
9
|
+
defaults,
|
10
|
+
exceptions,
|
11
|
+
parameters,
|
12
|
+
task_console,
|
13
|
+
utils,
|
14
|
+
)
|
15
|
+
from runnable.datastore import DataCatalog, JsonParameter, RunLog, StepLog
|
16
|
+
from runnable.defaults import TypeMapVariable
|
17
|
+
from runnable.executor import BasePipelineExecutor
|
18
|
+
from runnable.graph import Graph
|
19
|
+
from runnable.nodes import BaseNode
|
20
|
+
|
21
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
22
|
+
|
23
|
+
|
24
|
+
class GenericPipelineExecutor(BasePipelineExecutor):
|
25
|
+
"""
|
26
|
+
The skeleton of an executor class.
|
27
|
+
Any implementation of an executor should inherit this class and over-ride accordingly.
|
28
|
+
|
29
|
+
This is a loaded base class which has a lot of methods already implemented for "typical" executions.
|
30
|
+
Look at the function docs to understand how to use them appropriately.
|
31
|
+
|
32
|
+
For any implementation:
|
33
|
+
1). Who/when should the run log be set up?
|
34
|
+
2). Who/When should the step log be set up?
|
35
|
+
|
36
|
+
"""
|
37
|
+
|
38
|
+
service_name: str = ""
|
39
|
+
service_type: str = "pipeline_executor"
|
40
|
+
|
41
|
+
@property
|
42
|
+
def _context(self):
|
43
|
+
assert context.run_context
|
44
|
+
return context.run_context
|
45
|
+
|
46
|
+
def _get_parameters(self) -> Dict[str, JsonParameter]:
|
47
|
+
"""
|
48
|
+
Consolidate the parameters from the environment variables
|
49
|
+
and the parameters file.
|
50
|
+
|
51
|
+
The parameters defined in the environment variables take precedence over the parameters file.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
_type_: _description_
|
55
|
+
"""
|
56
|
+
params: Dict[str, JsonParameter] = {}
|
57
|
+
if self._context.parameters_file:
|
58
|
+
user_defined = utils.load_yaml(self._context.parameters_file) or {}
|
59
|
+
|
60
|
+
for key, value in user_defined.items():
|
61
|
+
params[key] = JsonParameter(value=value, kind="json")
|
62
|
+
|
63
|
+
# Update these with some from the environment variables
|
64
|
+
params.update(parameters.get_user_set_parameters())
|
65
|
+
logger.debug(f"parameters as seen by executor: {params}")
|
66
|
+
return params
|
67
|
+
|
68
|
+
def _set_up_run_log(self, exists_ok=False):
|
69
|
+
"""
|
70
|
+
Create a run log and put that in the run log store
|
71
|
+
|
72
|
+
If exists_ok, we allow the run log to be already present in the run log store.
|
73
|
+
"""
|
74
|
+
try:
|
75
|
+
attempt_run_log = self._context.run_log_store.get_run_log_by_id(
|
76
|
+
run_id=self._context.run_id, full=False
|
77
|
+
)
|
78
|
+
|
79
|
+
logger.warning(
|
80
|
+
f"The run log by id: {self._context.run_id} already exists, is this designed?"
|
81
|
+
)
|
82
|
+
raise exceptions.RunLogExistsError(
|
83
|
+
f"The run log by id: {self._context.run_id} already exists and is {attempt_run_log.status}"
|
84
|
+
)
|
85
|
+
except exceptions.RunLogNotFoundError:
|
86
|
+
pass
|
87
|
+
except exceptions.RunLogExistsError:
|
88
|
+
if exists_ok:
|
89
|
+
return
|
90
|
+
raise
|
91
|
+
|
92
|
+
# Consolidate and get the parameters
|
93
|
+
params = self._get_parameters()
|
94
|
+
|
95
|
+
self._context.run_log_store.create_run_log(
|
96
|
+
run_id=self._context.run_id,
|
97
|
+
tag=self._context.tag,
|
98
|
+
status=defaults.PROCESSING,
|
99
|
+
dag_hash=self._context.dag_hash,
|
100
|
+
)
|
101
|
+
# Any interaction with run log store attributes should happen via API if available.
|
102
|
+
self._context.run_log_store.set_parameters(
|
103
|
+
run_id=self._context.run_id, parameters=params
|
104
|
+
)
|
105
|
+
|
106
|
+
# Update run_config
|
107
|
+
run_config = utils.get_run_config()
|
108
|
+
logger.debug(f"run_config as seen by executor: {run_config}")
|
109
|
+
self._context.run_log_store.set_run_config(
|
110
|
+
run_id=self._context.run_id, run_config=run_config
|
111
|
+
)
|
112
|
+
|
113
|
+
def _sync_catalog(
|
114
|
+
self, stage: str, synced_catalogs=None
|
115
|
+
) -> Optional[List[DataCatalog]]:
|
116
|
+
"""
|
117
|
+
1). Identify the catalog settings by over-riding node settings with the global settings.
|
118
|
+
2). For stage = get:
|
119
|
+
Identify the catalog items that are being asked to get from the catalog
|
120
|
+
And copy them to the local compute data folder
|
121
|
+
3). For stage = put:
|
122
|
+
Identify the catalog items that are being asked to put into the catalog
|
123
|
+
Copy the items from local compute folder to the catalog
|
124
|
+
4). Add the items onto the step log according to the stage
|
125
|
+
|
126
|
+
Args:
|
127
|
+
node (Node): The current node being processed
|
128
|
+
step_log (StepLog): The step log corresponding to that node
|
129
|
+
stage (str): One of get or put
|
130
|
+
|
131
|
+
Raises:
|
132
|
+
Exception: If the stage is not in one of get/put
|
133
|
+
|
134
|
+
"""
|
135
|
+
assert isinstance(self._context_node, BaseNode)
|
136
|
+
if stage not in ["get", "put"]:
|
137
|
+
msg = (
|
138
|
+
"Catalog service only accepts get/put possible actions as part of node execution."
|
139
|
+
f"Sync catalog of the executor: {self.service_name} asks for {stage} which is not accepted"
|
140
|
+
)
|
141
|
+
logger.exception(msg)
|
142
|
+
raise Exception(msg)
|
143
|
+
|
144
|
+
try:
|
145
|
+
node_catalog_settings = self._context_node._get_catalog_settings()
|
146
|
+
except exceptions.TerminalNodeError:
|
147
|
+
return None
|
148
|
+
|
149
|
+
if not (node_catalog_settings and stage in node_catalog_settings):
|
150
|
+
logger.info("No catalog settings found for stage: %s", stage)
|
151
|
+
# Nothing to get/put from the catalog
|
152
|
+
return None
|
153
|
+
|
154
|
+
compute_data_folder = self.get_effective_compute_data_folder()
|
155
|
+
|
156
|
+
data_catalogs = []
|
157
|
+
for name_pattern in node_catalog_settings.get(stage) or []:
|
158
|
+
if stage == "get":
|
159
|
+
data_catalog = self._context.catalog_handler.get(
|
160
|
+
name=name_pattern,
|
161
|
+
run_id=self._context.run_id,
|
162
|
+
compute_data_folder=compute_data_folder,
|
163
|
+
)
|
164
|
+
|
165
|
+
elif stage == "put":
|
166
|
+
data_catalog = self._context.catalog_handler.put(
|
167
|
+
name=name_pattern,
|
168
|
+
run_id=self._context.run_id,
|
169
|
+
compute_data_folder=compute_data_folder,
|
170
|
+
synced_catalogs=synced_catalogs,
|
171
|
+
)
|
172
|
+
|
173
|
+
logger.debug(f"Added data catalog: {data_catalog} to step log")
|
174
|
+
data_catalogs.extend(data_catalog)
|
175
|
+
|
176
|
+
return data_catalogs
|
177
|
+
|
178
|
+
def get_effective_compute_data_folder(self) -> str:
|
179
|
+
"""
|
180
|
+
Get the effective compute data folder for the given stage.
|
181
|
+
If there is nothing to catalog, we return None.
|
182
|
+
|
183
|
+
The default is the compute data folder of the catalog but this can be over-ridden by the node.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
stage (str): The stage we are in the process of cataloging
|
187
|
+
|
188
|
+
|
189
|
+
Returns:
|
190
|
+
str: The compute data folder as defined by the node defaulting to catalog handler
|
191
|
+
"""
|
192
|
+
assert isinstance(self._context_node, BaseNode)
|
193
|
+
compute_data_folder = self._context.catalog_handler.compute_data_folder
|
194
|
+
|
195
|
+
catalog_settings = self._context_node._get_catalog_settings()
|
196
|
+
effective_compute_data_folder = (
|
197
|
+
catalog_settings.get("compute_data_folder", "") or compute_data_folder
|
198
|
+
)
|
199
|
+
|
200
|
+
return effective_compute_data_folder
|
201
|
+
|
202
|
+
@property
|
203
|
+
def step_attempt_number(self) -> int:
|
204
|
+
"""
|
205
|
+
The attempt number of the current step.
|
206
|
+
Orchestrators should use this step to submit multiple attempts of the job.
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
int: The attempt number of the current step. Defaults to 1.
|
210
|
+
"""
|
211
|
+
return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
|
212
|
+
|
213
|
+
def _execute_node(
|
214
|
+
self,
|
215
|
+
node: BaseNode,
|
216
|
+
map_variable: TypeMapVariable = None,
|
217
|
+
mock: bool = False,
|
218
|
+
**kwargs,
|
219
|
+
):
|
220
|
+
"""
|
221
|
+
This is the entry point when we do the actual execution of the function.
|
222
|
+
DO NOT Over-ride this function.
|
223
|
+
|
224
|
+
While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
|
225
|
+
this function.
|
226
|
+
|
227
|
+
In most cases,
|
228
|
+
* We get the corresponding step_log of the node and the parameters.
|
229
|
+
* We sync the catalog to GET any data sets that are in the catalog
|
230
|
+
* We call the execute method of the node for the actual compute and retry it as many times as asked.
|
231
|
+
* If the node succeeds, we get any of the user defined metrics provided by the user.
|
232
|
+
* We sync the catalog to PUT any data sets that are in the catalog.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
node (Node): The node to execute
|
236
|
+
map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
|
237
|
+
Defaults to None.
|
238
|
+
"""
|
239
|
+
logger.info(
|
240
|
+
f"Trying to execute node: {node.internal_name}, attempt : {self.step_attempt_number}"
|
241
|
+
)
|
242
|
+
|
243
|
+
self._context_node = node
|
244
|
+
|
245
|
+
data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(stage="get")
|
246
|
+
logger.debug(f"data_catalogs_get: {data_catalogs_get}")
|
247
|
+
|
248
|
+
step_log = node.execute(
|
249
|
+
map_variable=map_variable,
|
250
|
+
attempt_number=self.step_attempt_number,
|
251
|
+
mock=mock,
|
252
|
+
**kwargs,
|
253
|
+
)
|
254
|
+
|
255
|
+
data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(stage="put")
|
256
|
+
logger.debug(f"data_catalogs_put: {data_catalogs_put}")
|
257
|
+
|
258
|
+
step_log.add_data_catalogs(data_catalogs_get or [])
|
259
|
+
step_log.add_data_catalogs(data_catalogs_put or [])
|
260
|
+
|
261
|
+
console.print(f"Summary of the step: {step_log.internal_name}")
|
262
|
+
console.print(step_log.get_summary(), style=defaults.info_style)
|
263
|
+
|
264
|
+
self._context_node = None
|
265
|
+
|
266
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
267
|
+
|
268
|
+
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
269
|
+
"""
|
270
|
+
Add code identities specific to the implementation.
|
271
|
+
|
272
|
+
The Base class has an implementation of adding git code identities.
|
273
|
+
|
274
|
+
Args:
|
275
|
+
step_log (object): The step log object
|
276
|
+
node (BaseNode): The node we are adding the step log for
|
277
|
+
"""
|
278
|
+
step_log.code_identities.append(utils.get_git_code_identity())
|
279
|
+
|
280
|
+
def execute_from_graph(
|
281
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
282
|
+
):
|
283
|
+
"""
|
284
|
+
This is the entry point to from the graph execution.
|
285
|
+
|
286
|
+
While the self.execute_graph is responsible for traversing the graph, this function is responsible for
|
287
|
+
actual execution of the node.
|
288
|
+
|
289
|
+
If the node type is:
|
290
|
+
* task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
|
291
|
+
* success: We can delegate to _execute_node
|
292
|
+
* fail: We can delegate to _execute_node
|
293
|
+
|
294
|
+
For nodes that are internally graphs:
|
295
|
+
* parallel: Delegate the responsibility of execution to the node.execute_as_graph()
|
296
|
+
* dag: Delegate the responsibility of execution to the node.execute_as_graph()
|
297
|
+
* map: Delegate the responsibility of execution to the node.execute_as_graph()
|
298
|
+
|
299
|
+
Transpilers will NEVER use this method and will NEVER call ths method.
|
300
|
+
This method should only be used by interactive executors.
|
301
|
+
|
302
|
+
Args:
|
303
|
+
node (Node): The node to execute
|
304
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
|
305
|
+
Defaults to None.
|
306
|
+
"""
|
307
|
+
step_log = self._context.run_log_store.create_step_log(
|
308
|
+
node.name, node._get_step_log_name(map_variable)
|
309
|
+
)
|
310
|
+
|
311
|
+
self.add_code_identities(node=node, step_log=step_log)
|
312
|
+
|
313
|
+
step_log.step_type = node.node_type
|
314
|
+
step_log.status = defaults.PROCESSING
|
315
|
+
|
316
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
317
|
+
|
318
|
+
logger.info(f"Executing node: {node.get_summary()}")
|
319
|
+
|
320
|
+
# Add the step log to the database as per the situation.
|
321
|
+
# If its a terminal node, complete it now
|
322
|
+
if node.node_type in ["success", "fail"]:
|
323
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
324
|
+
return
|
325
|
+
|
326
|
+
# We call an internal function to iterate the sub graphs and execute them
|
327
|
+
if node.is_composite:
|
328
|
+
node.execute_as_graph(map_variable=map_variable, **kwargs)
|
329
|
+
return
|
330
|
+
|
331
|
+
task_console.export_text(clear=True)
|
332
|
+
|
333
|
+
task_name = node._resolve_map_placeholders(node.internal_name, map_variable)
|
334
|
+
console.print(
|
335
|
+
f":runner: Executing the node {task_name} ... ", style="bold color(208)"
|
336
|
+
)
|
337
|
+
self.trigger_node_execution(node=node, map_variable=map_variable, **kwargs)
|
338
|
+
|
339
|
+
log_file_name = utils.make_log_file_name(node=node, map_variable=map_variable)
|
340
|
+
task_console.save_text(log_file_name, clear=True)
|
341
|
+
|
342
|
+
self._context.catalog_handler.put(
|
343
|
+
name=log_file_name, run_id=self._context.run_id
|
344
|
+
)
|
345
|
+
os.remove(log_file_name)
|
346
|
+
|
347
|
+
def trigger_node_execution(
|
348
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
349
|
+
):
|
350
|
+
"""
|
351
|
+
Call this method only if we are responsible for traversing the graph via
|
352
|
+
execute_from_graph().
|
353
|
+
|
354
|
+
We are not prepared to execute node as of now.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
node (BaseNode): The node to execute
|
358
|
+
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
359
|
+
Defaults to ''.
|
360
|
+
|
361
|
+
NOTE: We do not raise an exception as this method is not required by many extensions
|
362
|
+
"""
|
363
|
+
pass
|
364
|
+
|
365
|
+
def _get_status_and_next_node_name(
|
366
|
+
self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None
|
367
|
+
) -> tuple[str, str]:
|
368
|
+
"""
|
369
|
+
Given the current node and the graph, returns the name of the next node to execute.
|
370
|
+
|
371
|
+
The name is always relative the graph that the node resides in.
|
372
|
+
|
373
|
+
If the current node succeeded, we return the next node as per the graph.
|
374
|
+
If the current node failed, we return the on failure node of the node (if provided) or the global one.
|
375
|
+
|
376
|
+
This method is only used by interactive executors i.e local and local-container
|
377
|
+
|
378
|
+
Args:
|
379
|
+
current_node (BaseNode): The current node.
|
380
|
+
dag (Graph): The dag we are traversing.
|
381
|
+
map_variable (dict): If the node belongs to a map branch.
|
382
|
+
|
383
|
+
"""
|
384
|
+
|
385
|
+
step_log = self._context.run_log_store.get_step_log(
|
386
|
+
current_node._get_step_log_name(map_variable), self._context.run_id
|
387
|
+
)
|
388
|
+
logger.info(
|
389
|
+
f"Finished executing the node {current_node} with status {step_log.status}"
|
390
|
+
)
|
391
|
+
|
392
|
+
try:
|
393
|
+
next_node_name = current_node._get_next_node()
|
394
|
+
except exceptions.TerminalNodeError:
|
395
|
+
next_node_name = ""
|
396
|
+
|
397
|
+
if step_log.status == defaults.FAIL:
|
398
|
+
next_node_name = dag.get_fail_node().name
|
399
|
+
if current_node._get_on_failure_node():
|
400
|
+
next_node_name = current_node._get_on_failure_node()
|
401
|
+
|
402
|
+
return step_log.status, next_node_name
|
403
|
+
|
404
|
+
def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
|
405
|
+
"""
|
406
|
+
The parallelization is controlled by the nodes and not by this function.
|
407
|
+
|
408
|
+
Transpilers should over ride this method to do the translation of dag to the platform specific way.
|
409
|
+
Interactive methods should use this to traverse and execute the dag.
|
410
|
+
- Use execute_from_graph to handle sub-graphs
|
411
|
+
|
412
|
+
Logically the method should:
|
413
|
+
* Start at the dag.start_at of the dag.
|
414
|
+
* Call the self.execute_from_graph(node)
|
415
|
+
* depending upon the status of the execution, either move to the success node or failure node.
|
416
|
+
|
417
|
+
Args:
|
418
|
+
dag (Graph): The directed acyclic graph to traverse and execute.
|
419
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
|
420
|
+
Defaults to None.
|
421
|
+
"""
|
422
|
+
current_node = dag.start_at
|
423
|
+
previous_node = None
|
424
|
+
logger.info(f"Running the execution with {current_node}")
|
425
|
+
|
426
|
+
branch_execution_task = None
|
427
|
+
branch_task_name: str = ""
|
428
|
+
if dag.internal_branch_name:
|
429
|
+
branch_task_name = BaseNode._resolve_map_placeholders(
|
430
|
+
dag.internal_branch_name or "Graph",
|
431
|
+
map_variable,
|
432
|
+
)
|
433
|
+
branch_execution_task = self._context.progress.add_task(
|
434
|
+
f"[dark_orange]Executing {branch_task_name}",
|
435
|
+
total=1,
|
436
|
+
)
|
437
|
+
|
438
|
+
while True:
|
439
|
+
working_on = dag.get_node_by_name(current_node)
|
440
|
+
task_name = working_on._resolve_map_placeholders(
|
441
|
+
working_on.internal_name, map_variable
|
442
|
+
)
|
443
|
+
|
444
|
+
if previous_node == current_node:
|
445
|
+
raise Exception("Potentially running in a infinite loop")
|
446
|
+
|
447
|
+
previous_node = current_node
|
448
|
+
|
449
|
+
logger.debug(f"Creating execution log for {working_on}")
|
450
|
+
|
451
|
+
depth = " " * ((task_name.count(".")) or 1 - 1)
|
452
|
+
|
453
|
+
task_execution = self._context.progress.add_task(
|
454
|
+
f"{depth}Executing {task_name}", total=1
|
455
|
+
)
|
456
|
+
|
457
|
+
try:
|
458
|
+
self.execute_from_graph(working_on, map_variable=map_variable, **kwargs)
|
459
|
+
status, next_node_name = self._get_status_and_next_node_name(
|
460
|
+
current_node=working_on, dag=dag, map_variable=map_variable
|
461
|
+
)
|
462
|
+
|
463
|
+
if status == defaults.SUCCESS:
|
464
|
+
self._context.progress.update(
|
465
|
+
task_execution,
|
466
|
+
description=f"{depth}[green] {task_name} Completed",
|
467
|
+
completed=True,
|
468
|
+
overflow="fold",
|
469
|
+
)
|
470
|
+
else:
|
471
|
+
self._context.progress.update(
|
472
|
+
task_execution,
|
473
|
+
description=f"{depth}[red] {task_name} Failed",
|
474
|
+
completed=True,
|
475
|
+
) # type ignore
|
476
|
+
except Exception as e: # noqa: E722
|
477
|
+
self._context.progress.update(
|
478
|
+
task_execution,
|
479
|
+
description=f"{depth}[red] {task_name} Errored",
|
480
|
+
completed=True,
|
481
|
+
)
|
482
|
+
console.print(e, style=defaults.error_style)
|
483
|
+
logger.exception(e)
|
484
|
+
raise
|
485
|
+
|
486
|
+
console.rule(style="[dark orange]")
|
487
|
+
|
488
|
+
if working_on.node_type in ["success", "fail"]:
|
489
|
+
break
|
490
|
+
|
491
|
+
current_node = next_node_name
|
492
|
+
|
493
|
+
if branch_execution_task:
|
494
|
+
self._context.progress.update(
|
495
|
+
branch_execution_task,
|
496
|
+
description=f"[green3] {branch_task_name} completed",
|
497
|
+
completed=True,
|
498
|
+
)
|
499
|
+
|
500
|
+
run_log = self._context.run_log_store.get_branch_log(
|
501
|
+
working_on._get_branch_log_name(map_variable), self._context.run_id
|
502
|
+
)
|
503
|
+
|
504
|
+
branch = "graph"
|
505
|
+
if working_on.internal_branch_name:
|
506
|
+
branch = working_on.internal_branch_name
|
507
|
+
|
508
|
+
logger.info(f"Finished execution of the {branch} with status {run_log.status}")
|
509
|
+
|
510
|
+
# We are in the root dag
|
511
|
+
if dag == self._context.dag:
|
512
|
+
run_log = cast(RunLog, run_log)
|
513
|
+
console.print("Completed Execution, Summary:", style="bold color(208)")
|
514
|
+
console.print(run_log.get_summary(), style=defaults.info_style)
|
515
|
+
|
516
|
+
def send_return_code(self, stage="traversal"):
|
517
|
+
"""
|
518
|
+
Convenience function used by pipeline to send return code to the caller of the cli
|
519
|
+
|
520
|
+
Raises:
|
521
|
+
Exception: If the pipeline execution failed
|
522
|
+
"""
|
523
|
+
run_id = self._context.run_id
|
524
|
+
|
525
|
+
run_log = self._context.run_log_store.get_run_log_by_id(
|
526
|
+
run_id=run_id, full=False
|
527
|
+
)
|
528
|
+
if run_log.status == defaults.FAIL:
|
529
|
+
raise exceptions.ExecutionFailedError(run_id=run_id)
|
530
|
+
|
531
|
+
def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
|
532
|
+
"""
|
533
|
+
The overrides section can contain specific over-rides to an global executor config.
|
534
|
+
To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
|
535
|
+
The nodes can over-ride the global config by referring to key in the overrides.
|
536
|
+
|
537
|
+
This function also applies variables to the effective node config.
|
538
|
+
|
539
|
+
For example:
|
540
|
+
# configuration.yaml
|
541
|
+
execution:
|
542
|
+
type: cloud-implementation
|
543
|
+
config:
|
544
|
+
k1: v1
|
545
|
+
k3: v3
|
546
|
+
overrides:
|
547
|
+
custom_config:
|
548
|
+
k1: v11
|
549
|
+
k2: v2 # Could be a mapping internally.
|
550
|
+
|
551
|
+
# in pipeline definition.yaml
|
552
|
+
dag:
|
553
|
+
steps:
|
554
|
+
step1:
|
555
|
+
overrides:
|
556
|
+
cloud-implementation: custom_config
|
557
|
+
|
558
|
+
This method should resolve the node_config to {'k1': 'v11', 'k2': 'v2', 'k3': 'v3'}
|
559
|
+
|
560
|
+
Args:
|
561
|
+
node (BaseNode): The current node being processed.
|
562
|
+
|
563
|
+
"""
|
564
|
+
effective_node_config = copy.deepcopy(self.model_dump())
|
565
|
+
try:
|
566
|
+
ctx_node_config = node._get_executor_config(self.service_name)
|
567
|
+
except exceptions.TerminalNodeError:
|
568
|
+
# Some modes request for effective node config even for success or fail nodes
|
569
|
+
return utils.apply_variables(effective_node_config, self._context.variables)
|
570
|
+
|
571
|
+
if ctx_node_config:
|
572
|
+
if ctx_node_config not in self.overrides:
|
573
|
+
raise Exception(
|
574
|
+
f"No override of key: {ctx_node_config} found in the overrides section"
|
575
|
+
)
|
576
|
+
|
577
|
+
effective_node_config.update(self.overrides[ctx_node_config])
|
578
|
+
|
579
|
+
effective_node_config = utils.apply_variables(
|
580
|
+
effective_node_config, self._context.variables
|
581
|
+
)
|
582
|
+
logger.debug(f"Effective node config: {effective_node_config}")
|
583
|
+
|
584
|
+
return effective_node_config
|
585
|
+
|
586
|
+
def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
587
|
+
"""
|
588
|
+
This method is used to appropriately fan-out the execution of a composite node.
|
589
|
+
This is only useful when we want to execute a composite node during 3rd party orchestrators.
|
590
|
+
|
591
|
+
Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
|
592
|
+
step which is composite. By calling this fan-out before calling the leaf nodes, we have an opportunity to
|
593
|
+
do the right set up (creating the step log, exposing the parameters, etc.) for the composite step.
|
594
|
+
|
595
|
+
All 3rd party orchestrators should use this method to fan-out the execution of a composite node.
|
596
|
+
This ensures:
|
597
|
+
- The dot path notation is preserved, this method should create the step and call the node's fan out to
|
598
|
+
create the branch logs and let the 3rd party do the actual step execution.
|
599
|
+
- Gives 3rd party orchestrators an opportunity to set out the required for running a composite node.
|
600
|
+
|
601
|
+
Args:
|
602
|
+
node (BaseNode): The node to fan-out
|
603
|
+
map_variable (dict, optional): If the node if of a map state,.Defaults to None.
|
604
|
+
|
605
|
+
"""
|
606
|
+
step_log = self._context.run_log_store.create_step_log(
|
607
|
+
node.name, node._get_step_log_name(map_variable=map_variable)
|
608
|
+
)
|
609
|
+
|
610
|
+
self.add_code_identities(node=node, step_log=step_log)
|
611
|
+
|
612
|
+
step_log.step_type = node.node_type
|
613
|
+
step_log.status = defaults.PROCESSING
|
614
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
615
|
+
|
616
|
+
node.fan_out(executor=self, map_variable=map_variable)
|
617
|
+
|
618
|
+
def fan_in(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
619
|
+
"""
|
620
|
+
This method is used to appropriately fan-in after the execution of a composite node.
|
621
|
+
This is only useful when we want to execute a composite node during 3rd party orchestrators.
|
622
|
+
|
623
|
+
Reason: Transpilers typically try to run the leaf nodes but do not have any capacity to do anything for the
|
624
|
+
step which is composite. By calling this fan-in after calling the leaf nodes, we have an opportunity to
|
625
|
+
act depending upon the status of the individual branches.
|
626
|
+
|
627
|
+
All 3rd party orchestrators should use this method to fan-in the execution of a composite node.
|
628
|
+
This ensures:
|
629
|
+
- Gives the renderer's the control on where to go depending upon the state of the composite node.
|
630
|
+
- The status of the step and its underlying branches are correctly updated.
|
631
|
+
|
632
|
+
Args:
|
633
|
+
node (BaseNode): The node to fan-in
|
634
|
+
map_variable (dict, optional): If the node if of a map state,.Defaults to None.
|
635
|
+
|
636
|
+
"""
|
637
|
+
node.fan_in(executor=self, map_variable=map_variable)
|
638
|
+
|
639
|
+
step_log = self._context.run_log_store.get_step_log(
|
640
|
+
node._get_step_log_name(map_variable=map_variable), self._context.run_id
|
641
|
+
)
|
642
|
+
|
643
|
+
if step_log.status == defaults.FAIL:
|
644
|
+
raise Exception(f"Step {node.name} failed")
|