runnable 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- runnable/__init__.py +1 -1
- runnable/catalog.py +2 -0
- runnable/cli.py +264 -307
- runnable/context.py +10 -3
- runnable/datastore.py +145 -22
- runnable/defaults.py +13 -54
- runnable/entrypoints.py +197 -185
- runnable/exceptions.py +22 -0
- runnable/executor.py +114 -86
- runnable/graph.py +0 -1
- runnable/nodes.py +36 -6
- runnable/sdk.py +132 -36
- runnable/tasks.py +6 -15
- runnable/utils.py +22 -30
- {runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/METADATA +6 -3
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/entry_points.txt +12 -7
- runnable/integration.py +0 -197
- runnable-0.14.0.dist-info/RECORD +0 -24
- {runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +0 -0
- {runnable-0.14.0.dist-info → runnable-0.16.0.dist-info}/licenses/LICENSE +0 -0
runnable/executor.py
CHANGED
@@ -5,17 +5,17 @@ import os
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
7
7
|
|
8
|
-
from pydantic import BaseModel, ConfigDict
|
8
|
+
from pydantic import BaseModel, ConfigDict, PrivateAttr
|
9
9
|
|
10
10
|
import runnable.context as context
|
11
11
|
from runnable import defaults
|
12
|
-
from runnable.datastore import DataCatalog, StepLog
|
12
|
+
from runnable.datastore import DataCatalog, JobLog, StepLog
|
13
13
|
from runnable.defaults import TypeMapVariable
|
14
14
|
from runnable.graph import Graph
|
15
15
|
|
16
16
|
if TYPE_CHECKING: # pragma: no cover
|
17
|
-
from extensions.nodes.nodes import TaskNode
|
18
17
|
from runnable.nodes import BaseNode
|
18
|
+
from runnable.tasks import BaseTaskType
|
19
19
|
|
20
20
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
21
21
|
|
@@ -34,14 +34,10 @@ class BaseExecutor(ABC, BaseModel):
|
|
34
34
|
service_name: str = ""
|
35
35
|
service_type: str = "executor"
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
_local: bool = (
|
37
|
+
_is_local: bool = (
|
40
38
|
False # This is a flag to indicate whether the executor is local or not.
|
41
39
|
)
|
42
40
|
|
43
|
-
# TODO: Change this to _is_local
|
44
|
-
_context_node: Optional[BaseNode] = None
|
45
41
|
model_config = ConfigDict(extra="forbid")
|
46
42
|
|
47
43
|
@property
|
@@ -68,33 +64,62 @@ class BaseExecutor(ABC, BaseModel):
|
|
68
64
|
"""
|
69
65
|
...
|
70
66
|
|
67
|
+
# TODO: Make this attempt number
|
68
|
+
@property
|
69
|
+
def step_attempt_number(self) -> int:
|
70
|
+
"""
|
71
|
+
The attempt number of the current step.
|
72
|
+
Orchestrators should use this step to submit multiple attempts of the job.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
int: The attempt number of the current step. Defaults to 1.
|
76
|
+
"""
|
77
|
+
return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
|
78
|
+
|
71
79
|
@abstractmethod
|
72
|
-
def
|
80
|
+
def send_return_code(self, stage="traversal"):
|
81
|
+
"""
|
82
|
+
Convenience function used by pipeline to send return code to the caller of the cli
|
83
|
+
|
84
|
+
Raises:
|
85
|
+
Exception: If the pipeline execution failed
|
73
86
|
"""
|
74
|
-
|
75
|
-
|
87
|
+
...
|
88
|
+
|
89
|
+
|
90
|
+
class BaseJobExecutor(BaseExecutor):
|
91
|
+
service_type: str = "job_executor"
|
76
92
|
|
77
|
-
|
93
|
+
@abstractmethod
|
94
|
+
def submit_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
|
95
|
+
"""
|
96
|
+
Local executors should
|
97
|
+
- create the run log
|
98
|
+
- and call an execute_job
|
78
99
|
|
79
|
-
|
80
|
-
|
100
|
+
Non local executors should
|
101
|
+
- transpile the job to the platform specific job spec
|
102
|
+
- submit the job to call execute_job
|
81
103
|
"""
|
82
104
|
...
|
83
105
|
|
84
106
|
@abstractmethod
|
85
|
-
def
|
107
|
+
def add_code_identities(self, job_log: JobLog, **kwargs):
|
86
108
|
"""
|
87
|
-
|
109
|
+
Add code identities specific to the implementation.
|
110
|
+
|
111
|
+
The Base class has an implementation of adding git code identities.
|
88
112
|
|
89
113
|
Args:
|
90
|
-
|
91
|
-
|
114
|
+
step_log (object): The step log object
|
115
|
+
node (BaseNode): The node we are adding the step log for
|
92
116
|
"""
|
93
117
|
...
|
94
118
|
|
95
119
|
@abstractmethod
|
96
120
|
def _sync_catalog(
|
97
|
-
self,
|
121
|
+
self,
|
122
|
+
catalog_settings: Optional[List[str]],
|
98
123
|
) -> Optional[List[DataCatalog]]:
|
99
124
|
"""
|
100
125
|
1). Identify the catalog settings by over-riding node settings with the global settings.
|
@@ -117,6 +142,34 @@ class BaseExecutor(ABC, BaseModel):
|
|
117
142
|
"""
|
118
143
|
...
|
119
144
|
|
145
|
+
@abstractmethod
|
146
|
+
def execute_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
|
147
|
+
"""
|
148
|
+
Focusses only on execution of the job.
|
149
|
+
"""
|
150
|
+
...
|
151
|
+
|
152
|
+
|
153
|
+
# TODO: Consolidate execute_node, trigger_node_execution, _execute_node
|
154
|
+
class BasePipelineExecutor(BaseExecutor):
|
155
|
+
service_type: str = "pipeline_executor"
|
156
|
+
overrides: dict = {}
|
157
|
+
|
158
|
+
_context_node: Optional[BaseNode] = PrivateAttr(default=None)
|
159
|
+
|
160
|
+
@abstractmethod
|
161
|
+
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
162
|
+
"""
|
163
|
+
Add code identities specific to the implementation.
|
164
|
+
|
165
|
+
The Base class has an implementation of adding git code identities.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
step_log (object): The step log object
|
169
|
+
node (BaseNode): The node we are adding the step log for
|
170
|
+
"""
|
171
|
+
...
|
172
|
+
|
120
173
|
@abstractmethod
|
121
174
|
def get_effective_compute_data_folder(self) -> Optional[str]:
|
122
175
|
"""
|
@@ -134,16 +187,30 @@ class BaseExecutor(ABC, BaseModel):
|
|
134
187
|
"""
|
135
188
|
...
|
136
189
|
|
137
|
-
@
|
138
|
-
def
|
190
|
+
@abstractmethod
|
191
|
+
def _sync_catalog(
|
192
|
+
self, stage: str, synced_catalogs=None
|
193
|
+
) -> Optional[List[DataCatalog]]:
|
139
194
|
"""
|
140
|
-
|
141
|
-
|
195
|
+
1). Identify the catalog settings by over-riding node settings with the global settings.
|
196
|
+
2). For stage = get:
|
197
|
+
Identify the catalog items that are being asked to get from the catalog
|
198
|
+
And copy them to the local compute data folder
|
199
|
+
3). For stage = put:
|
200
|
+
Identify the catalog items that are being asked to put into the catalog
|
201
|
+
Copy the items from local compute folder to the catalog
|
202
|
+
4). Add the items onto the step log according to the stage
|
203
|
+
|
204
|
+
Args:
|
205
|
+
node (Node): The current node being processed
|
206
|
+
step_log (StepLog): The step log corresponding to that node
|
207
|
+
stage (str): One of get or put
|
208
|
+
|
209
|
+
Raises:
|
210
|
+
Exception: If the stage is not in one of get/put
|
142
211
|
|
143
|
-
Returns:
|
144
|
-
int: The attempt number of the current step. Defaults to 1.
|
145
212
|
"""
|
146
|
-
|
213
|
+
...
|
147
214
|
|
148
215
|
@abstractmethod
|
149
216
|
def _execute_node(
|
@@ -190,19 +257,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
190
257
|
"""
|
191
258
|
...
|
192
259
|
|
193
|
-
@abstractmethod
|
194
|
-
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
195
|
-
"""
|
196
|
-
Add code identities specific to the implementation.
|
197
|
-
|
198
|
-
The Base class has an implementation of adding git code identities.
|
199
|
-
|
200
|
-
Args:
|
201
|
-
step_log (object): The step log object
|
202
|
-
node (BaseNode): The node we are adding the step log for
|
203
|
-
"""
|
204
|
-
...
|
205
|
-
|
206
260
|
@abstractmethod
|
207
261
|
def execute_from_graph(
|
208
262
|
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
@@ -233,29 +287,10 @@ class BaseExecutor(ABC, BaseModel):
|
|
233
287
|
"""
|
234
288
|
...
|
235
289
|
|
236
|
-
@abstractmethod
|
237
|
-
def trigger_job(
|
238
|
-
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
239
|
-
):
|
240
|
-
"""
|
241
|
-
Executor specific way of triggering jobs when runnable does both traversal and execution
|
242
|
-
|
243
|
-
Transpilers will NEVER use this method and will NEVER call them.
|
244
|
-
Only interactive executors who need execute_from_graph will ever implement it.
|
245
|
-
|
246
|
-
Args:
|
247
|
-
node (BaseNode): The node to execute
|
248
|
-
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
249
|
-
Defaults to ''.
|
250
|
-
|
251
|
-
NOTE: We do not raise an exception as this method is not required by many extensions
|
252
|
-
"""
|
253
|
-
...
|
254
|
-
|
255
290
|
@abstractmethod
|
256
291
|
def _get_status_and_next_node_name(
|
257
292
|
self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None
|
258
|
-
):
|
293
|
+
) -> tuple[str, str]:
|
259
294
|
"""
|
260
295
|
Given the current node and the graph, returns the name of the next node to execute.
|
261
296
|
|
@@ -294,17 +329,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
294
329
|
...
|
295
330
|
|
296
331
|
@abstractmethod
|
297
|
-
def
|
298
|
-
"""
|
299
|
-
Convenience function used by pipeline to send return code to the caller of the cli
|
300
|
-
|
301
|
-
Raises:
|
302
|
-
Exception: If the pipeline execution failed
|
303
|
-
"""
|
304
|
-
...
|
305
|
-
|
306
|
-
@abstractmethod
|
307
|
-
def _resolve_executor_config(self, node: BaseNode):
|
332
|
+
def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
|
308
333
|
"""
|
309
334
|
The overrides section can contain specific over-rides to an global executor config.
|
310
335
|
To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
|
@@ -337,22 +362,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
337
362
|
"""
|
338
363
|
...
|
339
364
|
|
340
|
-
@abstractmethod
|
341
|
-
def execute_job(self, node: TaskNode):
|
342
|
-
"""
|
343
|
-
Executor specific way of executing a job (python function or a notebook).
|
344
|
-
|
345
|
-
Interactive executors should execute the job.
|
346
|
-
Transpilers should write the instructions.
|
347
|
-
|
348
|
-
Args:
|
349
|
-
node (BaseNode): The job node to execute
|
350
|
-
|
351
|
-
Raises:
|
352
|
-
NotImplementedError: Executors should choose to extend this functionality or not.
|
353
|
-
"""
|
354
|
-
...
|
355
|
-
|
356
365
|
@abstractmethod
|
357
366
|
def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
358
367
|
"""
|
@@ -397,3 +406,22 @@ class BaseExecutor(ABC, BaseModel):
|
|
397
406
|
|
398
407
|
"""
|
399
408
|
...
|
409
|
+
|
410
|
+
@abstractmethod
|
411
|
+
def trigger_node_execution(
|
412
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
413
|
+
):
|
414
|
+
"""
|
415
|
+
Executor specific way of triggering jobs when runnable does both traversal and execution
|
416
|
+
|
417
|
+
Transpilers will NEVER use this method and will NEVER call them.
|
418
|
+
Only interactive executors who need execute_from_graph will ever implement it.
|
419
|
+
|
420
|
+
Args:
|
421
|
+
node (BaseNode): The node to execute
|
422
|
+
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
423
|
+
Defaults to ''.
|
424
|
+
|
425
|
+
NOTE: We do not raise an exception as this method is not required by many extensions
|
426
|
+
"""
|
427
|
+
...
|
runnable/graph.py
CHANGED
@@ -398,7 +398,6 @@ def create_node(name: str, step_config: dict, internal_branch_name: Optional[str
|
|
398
398
|
node = node_mgr.parse_from_config(config=invoke_kwds)
|
399
399
|
return node
|
400
400
|
except KeyError:
|
401
|
-
# type is missing!!
|
402
401
|
msg = "The node configuration does not contain the required key 'type'."
|
403
402
|
logger.exception(step_config)
|
404
403
|
raise Exception(msg)
|
runnable/nodes.py
CHANGED
@@ -435,16 +435,34 @@ class ExecutableNode(TraversalNode):
|
|
435
435
|
return self.max_attempts
|
436
436
|
|
437
437
|
def _get_branch_by_name(self, branch_name: str):
|
438
|
-
raise
|
438
|
+
raise exceptions.NodeMethodCallError(
|
439
|
+
"This is an executable node and does not have branches"
|
440
|
+
)
|
439
441
|
|
440
442
|
def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
|
441
|
-
raise
|
443
|
+
raise exceptions.NodeMethodCallError(
|
444
|
+
"This is an executable node and does not have a graph"
|
445
|
+
)
|
442
446
|
|
443
447
|
def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
|
444
|
-
raise
|
448
|
+
raise exceptions.NodeMethodCallError(
|
449
|
+
"This is an executable node and does not have a fan in"
|
450
|
+
)
|
445
451
|
|
446
452
|
def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
|
447
|
-
raise
|
453
|
+
raise exceptions.NodeMethodCallError(
|
454
|
+
"This is an executable node and does not have a fan out"
|
455
|
+
)
|
456
|
+
|
457
|
+
def prepare_for_job_execution(self):
|
458
|
+
raise exceptions.NodeMethodCallError(
|
459
|
+
"This is an executable node and does not have a prepare_for_job_execution"
|
460
|
+
)
|
461
|
+
|
462
|
+
def tear_down_after_job_execution(self):
|
463
|
+
raise exceptions.NodeMethodCallError(
|
464
|
+
"This is an executable node and does not have a tear_down_after_job_execution",
|
465
|
+
)
|
448
466
|
|
449
467
|
|
450
468
|
class CompositeNode(TraversalNode):
|
@@ -455,7 +473,9 @@ class CompositeNode(TraversalNode):
|
|
455
473
|
Returns:
|
456
474
|
dict: catalog settings defined as per the node or None
|
457
475
|
"""
|
458
|
-
raise
|
476
|
+
raise exceptions.NodeMethodCallError(
|
477
|
+
"This is a composite node and does not have a catalog settings"
|
478
|
+
)
|
459
479
|
|
460
480
|
def _get_max_attempts(self) -> int:
|
461
481
|
raise Exception("This is a composite node and does not have a max_attempts")
|
@@ -467,10 +487,20 @@ class CompositeNode(TraversalNode):
|
|
467
487
|
attempt_number: int = 1,
|
468
488
|
**kwargs,
|
469
489
|
) -> StepLog:
|
470
|
-
raise
|
490
|
+
raise exceptions.NodeMethodCallError(
|
471
491
|
"This is a composite node and does not have an execute function"
|
472
492
|
)
|
473
493
|
|
494
|
+
def prepare_for_job_execution(self):
|
495
|
+
raise exceptions.NodeMethodCallError(
|
496
|
+
"This is an executable node and does not have a prepare_for_job_execution"
|
497
|
+
)
|
498
|
+
|
499
|
+
def tear_down_after_job_execution(self):
|
500
|
+
raise exceptions.NodeMethodCallError(
|
501
|
+
"This is an executable node and does not have a tear_down_after_job_execution"
|
502
|
+
)
|
503
|
+
|
474
504
|
|
475
505
|
class TerminalNode(BaseNode):
|
476
506
|
def _get_on_failure_node(self) -> str:
|
runnable/sdk.py
CHANGED
@@ -35,7 +35,9 @@ from extensions.nodes.nodes import (
|
|
35
35
|
TaskNode,
|
36
36
|
)
|
37
37
|
from runnable import console, defaults, entrypoints, exceptions, graph, utils
|
38
|
+
from runnable.executor import BaseJobExecutor, BasePipelineExecutor
|
38
39
|
from runnable.nodes import TraversalNode
|
40
|
+
from runnable.tasks import BaseTaskType as RunnableTask
|
39
41
|
from runnable.tasks import TaskReturns
|
40
42
|
|
41
43
|
# TODO: This might have to be an extension
|
@@ -190,6 +192,11 @@ class BaseTask(BaseTraversal):
|
|
190
192
|
self.model_dump(exclude_none=True, by_alias=True)
|
191
193
|
)
|
192
194
|
|
195
|
+
def create_job(self) -> RunnableTask:
|
196
|
+
raise NotImplementedError(
|
197
|
+
"This method should be implemented in the child class"
|
198
|
+
)
|
199
|
+
|
193
200
|
|
194
201
|
class PythonTask(BaseTask):
|
195
202
|
"""
|
@@ -273,6 +280,11 @@ class PythonTask(BaseTask):
|
|
273
280
|
|
274
281
|
return f"{module}.{name}"
|
275
282
|
|
283
|
+
def create_job(self) -> RunnableTask:
|
284
|
+
self.terminate_with_success = True
|
285
|
+
node = self.create_node()
|
286
|
+
return node.executable
|
287
|
+
|
276
288
|
|
277
289
|
class NotebookTask(BaseTask):
|
278
290
|
"""
|
@@ -353,6 +365,11 @@ class NotebookTask(BaseTask):
|
|
353
365
|
def command_type(self) -> str:
|
354
366
|
return "notebook"
|
355
367
|
|
368
|
+
def create_job(self) -> RunnableTask:
|
369
|
+
self.terminate_with_success = True
|
370
|
+
node = self.create_node()
|
371
|
+
return node.executable
|
372
|
+
|
356
373
|
|
357
374
|
class ShellTask(BaseTask):
|
358
375
|
"""
|
@@ -621,6 +638,7 @@ class Pipeline(BaseModel):
|
|
621
638
|
model_config = ConfigDict(extra="forbid")
|
622
639
|
|
623
640
|
def _validate_path(self, path: List[StepType], failure_path: bool = False) -> None:
|
641
|
+
# TODO: Drastically simplify this
|
624
642
|
# Check if one and only one step terminates with success
|
625
643
|
# Check no more than one step terminates with failure
|
626
644
|
|
@@ -734,6 +752,16 @@ class Pipeline(BaseModel):
|
|
734
752
|
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
735
753
|
return graph.create_graph(dag_definition)
|
736
754
|
|
755
|
+
def _is_called_for_definition(self) -> bool:
|
756
|
+
"""
|
757
|
+
If the run context is set, we are coming in only to get the pipeline definition.
|
758
|
+
"""
|
759
|
+
from runnable.context import run_context
|
760
|
+
|
761
|
+
if run_context is None:
|
762
|
+
return False
|
763
|
+
return True
|
764
|
+
|
737
765
|
def execute(
|
738
766
|
self,
|
739
767
|
configuration_file: str = "",
|
@@ -743,33 +771,12 @@ class Pipeline(BaseModel):
|
|
743
771
|
log_level: str = defaults.LOG_LEVEL,
|
744
772
|
):
|
745
773
|
"""
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
|
751
|
-
|
752
|
-
Or create the representation of the pipeline for other executors.
|
753
|
-
|
754
|
-
Please refer to [concepts](concepts/executor.md) for more information.
|
755
|
-
|
756
|
-
Args:
|
757
|
-
configuration_file (str, optional): The path to the configuration file. Defaults to "".
|
758
|
-
The configuration file can be overridden by the environment variable RUNNABLE_CONFIGURATION_FILE.
|
759
|
-
|
760
|
-
run_id (str, optional): The ID of the run. Defaults to "".
|
761
|
-
tag (str, optional): The tag of the run. Defaults to "".
|
762
|
-
Use to group multiple runs.
|
763
|
-
|
764
|
-
parameters_file (str, optional): The path to the parameters file. Defaults to "".
|
765
|
-
|
766
|
-
log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
|
774
|
+
Overloaded method:
|
775
|
+
- Could be called by the user when executing the pipeline via SDK
|
776
|
+
- Could be called by the system itself when getting the pipeline definition
|
767
777
|
"""
|
768
|
-
|
769
|
-
|
770
|
-
py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
|
771
|
-
|
772
|
-
if py_to_yaml == "true":
|
778
|
+
if self._is_called_for_definition():
|
779
|
+
# Immediately return as this call is only for getting the pipeline definition
|
773
780
|
return {}
|
774
781
|
|
775
782
|
logger.setLevel(log_level)
|
@@ -785,21 +792,22 @@ class Pipeline(BaseModel):
|
|
785
792
|
parameters_file=parameters_file,
|
786
793
|
)
|
787
794
|
|
788
|
-
run_context.
|
795
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
796
|
+
|
789
797
|
utils.set_runnable_environment_variables(
|
790
798
|
run_id=run_id, configuration_file=configuration_file, tag=tag
|
791
799
|
)
|
792
800
|
|
793
801
|
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
794
|
-
|
802
|
+
run_context.from_sdk = True
|
795
803
|
run_context.dag = graph.create_graph(dag_definition)
|
796
804
|
|
797
805
|
console.print("Working with context:")
|
798
806
|
console.print(run_context)
|
799
807
|
console.rule(style="[dark orange]")
|
800
808
|
|
801
|
-
if not run_context.executor.
|
802
|
-
# We are not working with
|
809
|
+
if not run_context.executor._is_local:
|
810
|
+
# We are not working with executor that does not work in local environment
|
803
811
|
import inspect
|
804
812
|
|
805
813
|
caller_stack = inspect.stack()[1]
|
@@ -809,9 +817,10 @@ class Pipeline(BaseModel):
|
|
809
817
|
module_to_call = f"{module_name}.{caller_stack.function}"
|
810
818
|
|
811
819
|
run_context.pipeline_file = f"{module_to_call}.py"
|
820
|
+
run_context.from_sdk = True
|
812
821
|
|
813
822
|
# Prepare for graph execution
|
814
|
-
run_context.executor.
|
823
|
+
run_context.executor._set_up_run_log(exists_ok=False)
|
815
824
|
|
816
825
|
with Progress(
|
817
826
|
SpinnerColumn(spinner_name="runner"),
|
@@ -823,14 +832,16 @@ class Pipeline(BaseModel):
|
|
823
832
|
console=console,
|
824
833
|
expand=True,
|
825
834
|
) as progress:
|
835
|
+
pipeline_execution_task = progress.add_task(
|
836
|
+
"[dark_orange] Starting execution .. ", total=1
|
837
|
+
)
|
826
838
|
try:
|
827
839
|
run_context.progress = progress
|
828
|
-
|
829
|
-
"[dark_orange] Starting execution .. ", total=1
|
830
|
-
)
|
840
|
+
|
831
841
|
run_context.executor.execute_graph(dag=run_context.dag)
|
832
842
|
|
833
|
-
if not run_context.executor.
|
843
|
+
if not run_context.executor._is_local:
|
844
|
+
# non local executors just traverse the graph and do nothing
|
834
845
|
return {}
|
835
846
|
|
836
847
|
run_log = run_context.run_log_store.get_run_log_by_id(
|
@@ -859,7 +870,92 @@ class Pipeline(BaseModel):
|
|
859
870
|
)
|
860
871
|
raise
|
861
872
|
|
862
|
-
if run_context.executor.
|
873
|
+
if run_context.executor._is_local:
|
874
|
+
return run_context.run_log_store.get_run_log_by_id(
|
875
|
+
run_id=run_context.run_id
|
876
|
+
)
|
877
|
+
|
878
|
+
|
879
|
+
class Job(BaseModel):
|
880
|
+
name: str
|
881
|
+
task: BaseTask
|
882
|
+
|
883
|
+
def return_task(self) -> RunnableTask:
|
884
|
+
return self.task.create_job()
|
885
|
+
|
886
|
+
def return_catalog_settings(self) -> Optional[List[str]]:
|
887
|
+
if self.task.catalog is None:
|
888
|
+
return []
|
889
|
+
return self.task.catalog.put
|
890
|
+
|
891
|
+
def _is_called_for_definition(self) -> bool:
|
892
|
+
"""
|
893
|
+
If the run context is set, we are coming in only to get the pipeline definition.
|
894
|
+
"""
|
895
|
+
from runnable.context import run_context
|
896
|
+
|
897
|
+
if run_context is None:
|
898
|
+
return False
|
899
|
+
return True
|
900
|
+
|
901
|
+
def execute(
|
902
|
+
self,
|
903
|
+
configuration_file: str = "",
|
904
|
+
job_id: str = "",
|
905
|
+
tag: str = "",
|
906
|
+
parameters_file: str = "",
|
907
|
+
log_level: str = defaults.LOG_LEVEL,
|
908
|
+
):
|
909
|
+
if self._is_called_for_definition():
|
910
|
+
# Immediately return as this call is only for getting the job definition
|
911
|
+
return {}
|
912
|
+
logger.setLevel(log_level)
|
913
|
+
|
914
|
+
run_id = utils.generate_run_id(run_id=job_id)
|
915
|
+
configuration_file = os.environ.get(
|
916
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
917
|
+
)
|
918
|
+
run_context = entrypoints.prepare_configurations(
|
919
|
+
configuration_file=configuration_file,
|
920
|
+
run_id=run_id,
|
921
|
+
tag=tag,
|
922
|
+
parameters_file=parameters_file,
|
923
|
+
is_job=True,
|
924
|
+
)
|
925
|
+
|
926
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
927
|
+
run_context.from_sdk = True
|
928
|
+
|
929
|
+
utils.set_runnable_environment_variables(
|
930
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
931
|
+
)
|
932
|
+
|
933
|
+
console.print("Working with context:")
|
934
|
+
console.print(run_context)
|
935
|
+
console.rule(style="[dark orange]")
|
936
|
+
|
937
|
+
if not run_context.executor._is_local:
|
938
|
+
# We are not working with executor that does not work in local environment
|
939
|
+
import inspect
|
940
|
+
|
941
|
+
caller_stack = inspect.stack()[1]
|
942
|
+
relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
|
943
|
+
|
944
|
+
module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
|
945
|
+
module_to_call = f"{module_name}.{caller_stack.function}"
|
946
|
+
|
947
|
+
run_context.job_definition_file = f"{module_to_call}.py"
|
948
|
+
|
949
|
+
job = self.task.create_job()
|
950
|
+
catalog_settings = self.return_catalog_settings()
|
951
|
+
|
952
|
+
run_context.executor.submit_job(job, catalog_settings=catalog_settings)
|
953
|
+
|
954
|
+
logger.info(
|
955
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
956
|
+
)
|
957
|
+
|
958
|
+
if run_context.executor._is_local:
|
863
959
|
return run_context.run_log_store.get_run_log_by_id(
|
864
960
|
run_id=run_context.run_id
|
865
961
|
)
|
runnable/tasks.py
CHANGED
@@ -156,19 +156,6 @@ class BaseTaskType(BaseModel):
|
|
156
156
|
params = self.resolve_unreduced_parameters(map_variable=map_variable)
|
157
157
|
logger.info(f"Parameters available for the execution: {params}")
|
158
158
|
|
159
|
-
for param_name, param in params.items():
|
160
|
-
# Any access to unreduced param should be replaced.
|
161
|
-
# The replacement is the context param
|
162
|
-
# It is possible that the unreduced param is not created as no upstream step
|
163
|
-
# has created it yet.
|
164
|
-
if param.reduced is False:
|
165
|
-
context_param = param_name
|
166
|
-
for _, v in map_variable.items(): # type: ignore
|
167
|
-
context_param = f"{v}_{context_param}"
|
168
|
-
|
169
|
-
if context_param in params:
|
170
|
-
params[param_name].value = params[context_param].value
|
171
|
-
|
172
159
|
task_console.log("Parameters available for the execution:")
|
173
160
|
task_console.log(params)
|
174
161
|
|
@@ -440,8 +427,12 @@ class NotebookTaskType(BaseTaskType):
|
|
440
427
|
|
441
428
|
@property
|
442
429
|
def notebook_output_path(self) -> str:
|
443
|
-
|
444
|
-
|
430
|
+
# This is to accommodate jobs which does not have a context_node
|
431
|
+
if self._context.executor._context_node:
|
432
|
+
node_name = self._context.executor._context_node.internal_name
|
433
|
+
sane_name = "".join(x for x in node_name if x.isalnum())
|
434
|
+
else:
|
435
|
+
sane_name = ""
|
445
436
|
|
446
437
|
output_path = Path(".", self.command)
|
447
438
|
file_name = output_path.parent / (output_path.stem + f"{sane_name}_out.ipynb")
|