runnable 0.14.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +1 -1
- runnable/catalog.py +2 -0
- runnable/cli.py +264 -307
- runnable/context.py +12 -3
- runnable/datastore.py +159 -25
- runnable/defaults.py +13 -54
- runnable/entrypoints.py +197 -185
- runnable/exceptions.py +22 -0
- runnable/executor.py +114 -88
- runnable/graph.py +0 -1
- runnable/nodes.py +36 -6
- runnable/sdk.py +132 -36
- runnable/tasks.py +6 -15
- runnable/utils.py +22 -30
- {runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/METADATA +6 -3
- runnable-0.17.0.dist-info/RECORD +23 -0
- {runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/entry_points.txt +12 -7
- runnable/integration.py +0 -197
- runnable-0.14.0.dist-info/RECORD +0 -24
- {runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/WHEEL +0 -0
- {runnable-0.14.0.dist-info → runnable-0.17.0.dist-info}/licenses/LICENSE +0 -0
runnable/executor.py
CHANGED
@@ -5,17 +5,17 @@ import os
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
7
7
|
|
8
|
-
from pydantic import BaseModel, ConfigDict
|
8
|
+
from pydantic import BaseModel, ConfigDict, PrivateAttr
|
9
9
|
|
10
10
|
import runnable.context as context
|
11
11
|
from runnable import defaults
|
12
|
-
from runnable.datastore import DataCatalog, StepLog
|
12
|
+
from runnable.datastore import DataCatalog, JobLog, StepLog
|
13
13
|
from runnable.defaults import TypeMapVariable
|
14
14
|
from runnable.graph import Graph
|
15
15
|
|
16
16
|
if TYPE_CHECKING: # pragma: no cover
|
17
|
-
from extensions.nodes.nodes import TaskNode
|
18
17
|
from runnable.nodes import BaseNode
|
18
|
+
from runnable.tasks import BaseTaskType
|
19
19
|
|
20
20
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
21
21
|
|
@@ -34,14 +34,8 @@ class BaseExecutor(ABC, BaseModel):
|
|
34
34
|
service_name: str = ""
|
35
35
|
service_type: str = "executor"
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
_local: bool = (
|
40
|
-
False # This is a flag to indicate whether the executor is local or not.
|
41
|
-
)
|
37
|
+
_is_local: bool = PrivateAttr(default=False)
|
42
38
|
|
43
|
-
# TODO: Change this to _is_local
|
44
|
-
_context_node: Optional[BaseNode] = None
|
45
39
|
model_config = ConfigDict(extra="forbid")
|
46
40
|
|
47
41
|
@property
|
@@ -68,33 +62,62 @@ class BaseExecutor(ABC, BaseModel):
|
|
68
62
|
"""
|
69
63
|
...
|
70
64
|
|
65
|
+
# TODO: Make this attempt number
|
66
|
+
@property
|
67
|
+
def step_attempt_number(self) -> int:
|
68
|
+
"""
|
69
|
+
The attempt number of the current step.
|
70
|
+
Orchestrators should use this step to submit multiple attempts of the job.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
int: The attempt number of the current step. Defaults to 1.
|
74
|
+
"""
|
75
|
+
return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
|
76
|
+
|
71
77
|
@abstractmethod
|
72
|
-
def
|
78
|
+
def send_return_code(self, stage="traversal"):
|
79
|
+
"""
|
80
|
+
Convenience function used by pipeline to send return code to the caller of the cli
|
81
|
+
|
82
|
+
Raises:
|
83
|
+
Exception: If the pipeline execution failed
|
73
84
|
"""
|
74
|
-
|
75
|
-
|
85
|
+
...
|
86
|
+
|
87
|
+
|
88
|
+
class BaseJobExecutor(BaseExecutor):
|
89
|
+
service_type: str = "job_executor"
|
76
90
|
|
77
|
-
|
91
|
+
@abstractmethod
|
92
|
+
def submit_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
|
93
|
+
"""
|
94
|
+
Local executors should
|
95
|
+
- create the run log
|
96
|
+
- and call an execute_job
|
78
97
|
|
79
|
-
|
80
|
-
|
98
|
+
Non local executors should
|
99
|
+
- transpile the job to the platform specific job spec
|
100
|
+
- submit the job to call execute_job
|
81
101
|
"""
|
82
102
|
...
|
83
103
|
|
84
104
|
@abstractmethod
|
85
|
-
def
|
105
|
+
def add_code_identities(self, job_log: JobLog, **kwargs):
|
86
106
|
"""
|
87
|
-
|
107
|
+
Add code identities specific to the implementation.
|
108
|
+
|
109
|
+
The Base class has an implementation of adding git code identities.
|
88
110
|
|
89
111
|
Args:
|
90
|
-
|
91
|
-
|
112
|
+
step_log (object): The step log object
|
113
|
+
node (BaseNode): The node we are adding the step log for
|
92
114
|
"""
|
93
115
|
...
|
94
116
|
|
95
117
|
@abstractmethod
|
96
118
|
def _sync_catalog(
|
97
|
-
self,
|
119
|
+
self,
|
120
|
+
catalog_settings: Optional[List[str]],
|
98
121
|
) -> Optional[List[DataCatalog]]:
|
99
122
|
"""
|
100
123
|
1). Identify the catalog settings by over-riding node settings with the global settings.
|
@@ -117,6 +140,34 @@ class BaseExecutor(ABC, BaseModel):
|
|
117
140
|
"""
|
118
141
|
...
|
119
142
|
|
143
|
+
@abstractmethod
|
144
|
+
def execute_job(self, job: BaseTaskType, catalog_settings: Optional[List[str]]):
|
145
|
+
"""
|
146
|
+
Focusses only on execution of the job.
|
147
|
+
"""
|
148
|
+
...
|
149
|
+
|
150
|
+
|
151
|
+
# TODO: Consolidate execute_node, trigger_node_execution, _execute_node
|
152
|
+
class BasePipelineExecutor(BaseExecutor):
|
153
|
+
service_type: str = "pipeline_executor"
|
154
|
+
overrides: dict = {}
|
155
|
+
|
156
|
+
_context_node: Optional[BaseNode] = PrivateAttr(default=None)
|
157
|
+
|
158
|
+
@abstractmethod
|
159
|
+
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
160
|
+
"""
|
161
|
+
Add code identities specific to the implementation.
|
162
|
+
|
163
|
+
The Base class has an implementation of adding git code identities.
|
164
|
+
|
165
|
+
Args:
|
166
|
+
step_log (object): The step log object
|
167
|
+
node (BaseNode): The node we are adding the step log for
|
168
|
+
"""
|
169
|
+
...
|
170
|
+
|
120
171
|
@abstractmethod
|
121
172
|
def get_effective_compute_data_folder(self) -> Optional[str]:
|
122
173
|
"""
|
@@ -134,16 +185,30 @@ class BaseExecutor(ABC, BaseModel):
|
|
134
185
|
"""
|
135
186
|
...
|
136
187
|
|
137
|
-
@
|
138
|
-
def
|
188
|
+
@abstractmethod
|
189
|
+
def _sync_catalog(
|
190
|
+
self, stage: str, synced_catalogs=None
|
191
|
+
) -> Optional[List[DataCatalog]]:
|
139
192
|
"""
|
140
|
-
|
141
|
-
|
193
|
+
1). Identify the catalog settings by over-riding node settings with the global settings.
|
194
|
+
2). For stage = get:
|
195
|
+
Identify the catalog items that are being asked to get from the catalog
|
196
|
+
And copy them to the local compute data folder
|
197
|
+
3). For stage = put:
|
198
|
+
Identify the catalog items that are being asked to put into the catalog
|
199
|
+
Copy the items from local compute folder to the catalog
|
200
|
+
4). Add the items onto the step log according to the stage
|
201
|
+
|
202
|
+
Args:
|
203
|
+
node (Node): The current node being processed
|
204
|
+
step_log (StepLog): The step log corresponding to that node
|
205
|
+
stage (str): One of get or put
|
206
|
+
|
207
|
+
Raises:
|
208
|
+
Exception: If the stage is not in one of get/put
|
142
209
|
|
143
|
-
Returns:
|
144
|
-
int: The attempt number of the current step. Defaults to 1.
|
145
210
|
"""
|
146
|
-
|
211
|
+
...
|
147
212
|
|
148
213
|
@abstractmethod
|
149
214
|
def _execute_node(
|
@@ -190,19 +255,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
190
255
|
"""
|
191
256
|
...
|
192
257
|
|
193
|
-
@abstractmethod
|
194
|
-
def add_code_identities(self, node: BaseNode, step_log: StepLog, **kwargs):
|
195
|
-
"""
|
196
|
-
Add code identities specific to the implementation.
|
197
|
-
|
198
|
-
The Base class has an implementation of adding git code identities.
|
199
|
-
|
200
|
-
Args:
|
201
|
-
step_log (object): The step log object
|
202
|
-
node (BaseNode): The node we are adding the step log for
|
203
|
-
"""
|
204
|
-
...
|
205
|
-
|
206
258
|
@abstractmethod
|
207
259
|
def execute_from_graph(
|
208
260
|
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
@@ -233,29 +285,10 @@ class BaseExecutor(ABC, BaseModel):
|
|
233
285
|
"""
|
234
286
|
...
|
235
287
|
|
236
|
-
@abstractmethod
|
237
|
-
def trigger_job(
|
238
|
-
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
239
|
-
):
|
240
|
-
"""
|
241
|
-
Executor specific way of triggering jobs when runnable does both traversal and execution
|
242
|
-
|
243
|
-
Transpilers will NEVER use this method and will NEVER call them.
|
244
|
-
Only interactive executors who need execute_from_graph will ever implement it.
|
245
|
-
|
246
|
-
Args:
|
247
|
-
node (BaseNode): The node to execute
|
248
|
-
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
249
|
-
Defaults to ''.
|
250
|
-
|
251
|
-
NOTE: We do not raise an exception as this method is not required by many extensions
|
252
|
-
"""
|
253
|
-
...
|
254
|
-
|
255
288
|
@abstractmethod
|
256
289
|
def _get_status_and_next_node_name(
|
257
290
|
self, current_node: BaseNode, dag: Graph, map_variable: TypeMapVariable = None
|
258
|
-
):
|
291
|
+
) -> tuple[str, str]:
|
259
292
|
"""
|
260
293
|
Given the current node and the graph, returns the name of the next node to execute.
|
261
294
|
|
@@ -294,17 +327,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
294
327
|
...
|
295
328
|
|
296
329
|
@abstractmethod
|
297
|
-
def
|
298
|
-
"""
|
299
|
-
Convenience function used by pipeline to send return code to the caller of the cli
|
300
|
-
|
301
|
-
Raises:
|
302
|
-
Exception: If the pipeline execution failed
|
303
|
-
"""
|
304
|
-
...
|
305
|
-
|
306
|
-
@abstractmethod
|
307
|
-
def _resolve_executor_config(self, node: BaseNode):
|
330
|
+
def _resolve_executor_config(self, node: BaseNode) -> Dict[str, Any]:
|
308
331
|
"""
|
309
332
|
The overrides section can contain specific over-rides to an global executor config.
|
310
333
|
To avoid too much clutter in the dag definition, we allow the configuration file to have overrides block.
|
@@ -337,22 +360,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
337
360
|
"""
|
338
361
|
...
|
339
362
|
|
340
|
-
@abstractmethod
|
341
|
-
def execute_job(self, node: TaskNode):
|
342
|
-
"""
|
343
|
-
Executor specific way of executing a job (python function or a notebook).
|
344
|
-
|
345
|
-
Interactive executors should execute the job.
|
346
|
-
Transpilers should write the instructions.
|
347
|
-
|
348
|
-
Args:
|
349
|
-
node (BaseNode): The job node to execute
|
350
|
-
|
351
|
-
Raises:
|
352
|
-
NotImplementedError: Executors should choose to extend this functionality or not.
|
353
|
-
"""
|
354
|
-
...
|
355
|
-
|
356
363
|
@abstractmethod
|
357
364
|
def fan_out(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
358
365
|
"""
|
@@ -397,3 +404,22 @@ class BaseExecutor(ABC, BaseModel):
|
|
397
404
|
|
398
405
|
"""
|
399
406
|
...
|
407
|
+
|
408
|
+
@abstractmethod
|
409
|
+
def trigger_node_execution(
|
410
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
411
|
+
):
|
412
|
+
"""
|
413
|
+
Executor specific way of triggering jobs when runnable does both traversal and execution
|
414
|
+
|
415
|
+
Transpilers will NEVER use this method and will NEVER call them.
|
416
|
+
Only interactive executors who need execute_from_graph will ever implement it.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
node (BaseNode): The node to execute
|
420
|
+
map_variable (str, optional): If the node if of a map state, this corresponds to the value of iterable.
|
421
|
+
Defaults to ''.
|
422
|
+
|
423
|
+
NOTE: We do not raise an exception as this method is not required by many extensions
|
424
|
+
"""
|
425
|
+
...
|
runnable/graph.py
CHANGED
@@ -398,7 +398,6 @@ def create_node(name: str, step_config: dict, internal_branch_name: Optional[str
|
|
398
398
|
node = node_mgr.parse_from_config(config=invoke_kwds)
|
399
399
|
return node
|
400
400
|
except KeyError:
|
401
|
-
# type is missing!!
|
402
401
|
msg = "The node configuration does not contain the required key 'type'."
|
403
402
|
logger.exception(step_config)
|
404
403
|
raise Exception(msg)
|
runnable/nodes.py
CHANGED
@@ -435,16 +435,34 @@ class ExecutableNode(TraversalNode):
|
|
435
435
|
return self.max_attempts
|
436
436
|
|
437
437
|
def _get_branch_by_name(self, branch_name: str):
|
438
|
-
raise
|
438
|
+
raise exceptions.NodeMethodCallError(
|
439
|
+
"This is an executable node and does not have branches"
|
440
|
+
)
|
439
441
|
|
440
442
|
def execute_as_graph(self, map_variable: TypeMapVariable = None, **kwargs):
|
441
|
-
raise
|
443
|
+
raise exceptions.NodeMethodCallError(
|
444
|
+
"This is an executable node and does not have a graph"
|
445
|
+
)
|
442
446
|
|
443
447
|
def fan_in(self, map_variable: TypeMapVariable = None, **kwargs):
|
444
|
-
raise
|
448
|
+
raise exceptions.NodeMethodCallError(
|
449
|
+
"This is an executable node and does not have a fan in"
|
450
|
+
)
|
445
451
|
|
446
452
|
def fan_out(self, map_variable: TypeMapVariable = None, **kwargs):
|
447
|
-
raise
|
453
|
+
raise exceptions.NodeMethodCallError(
|
454
|
+
"This is an executable node and does not have a fan out"
|
455
|
+
)
|
456
|
+
|
457
|
+
def prepare_for_job_execution(self):
|
458
|
+
raise exceptions.NodeMethodCallError(
|
459
|
+
"This is an executable node and does not have a prepare_for_job_execution"
|
460
|
+
)
|
461
|
+
|
462
|
+
def tear_down_after_job_execution(self):
|
463
|
+
raise exceptions.NodeMethodCallError(
|
464
|
+
"This is an executable node and does not have a tear_down_after_job_execution",
|
465
|
+
)
|
448
466
|
|
449
467
|
|
450
468
|
class CompositeNode(TraversalNode):
|
@@ -455,7 +473,9 @@ class CompositeNode(TraversalNode):
|
|
455
473
|
Returns:
|
456
474
|
dict: catalog settings defined as per the node or None
|
457
475
|
"""
|
458
|
-
raise
|
476
|
+
raise exceptions.NodeMethodCallError(
|
477
|
+
"This is a composite node and does not have a catalog settings"
|
478
|
+
)
|
459
479
|
|
460
480
|
def _get_max_attempts(self) -> int:
|
461
481
|
raise Exception("This is a composite node and does not have a max_attempts")
|
@@ -467,10 +487,20 @@ class CompositeNode(TraversalNode):
|
|
467
487
|
attempt_number: int = 1,
|
468
488
|
**kwargs,
|
469
489
|
) -> StepLog:
|
470
|
-
raise
|
490
|
+
raise exceptions.NodeMethodCallError(
|
471
491
|
"This is a composite node and does not have an execute function"
|
472
492
|
)
|
473
493
|
|
494
|
+
def prepare_for_job_execution(self):
|
495
|
+
raise exceptions.NodeMethodCallError(
|
496
|
+
"This is an executable node and does not have a prepare_for_job_execution"
|
497
|
+
)
|
498
|
+
|
499
|
+
def tear_down_after_job_execution(self):
|
500
|
+
raise exceptions.NodeMethodCallError(
|
501
|
+
"This is an executable node and does not have a tear_down_after_job_execution"
|
502
|
+
)
|
503
|
+
|
474
504
|
|
475
505
|
class TerminalNode(BaseNode):
|
476
506
|
def _get_on_failure_node(self) -> str:
|
runnable/sdk.py
CHANGED
@@ -35,7 +35,9 @@ from extensions.nodes.nodes import (
|
|
35
35
|
TaskNode,
|
36
36
|
)
|
37
37
|
from runnable import console, defaults, entrypoints, exceptions, graph, utils
|
38
|
+
from runnable.executor import BaseJobExecutor, BasePipelineExecutor
|
38
39
|
from runnable.nodes import TraversalNode
|
40
|
+
from runnable.tasks import BaseTaskType as RunnableTask
|
39
41
|
from runnable.tasks import TaskReturns
|
40
42
|
|
41
43
|
# TODO: This might have to be an extension
|
@@ -190,6 +192,11 @@ class BaseTask(BaseTraversal):
|
|
190
192
|
self.model_dump(exclude_none=True, by_alias=True)
|
191
193
|
)
|
192
194
|
|
195
|
+
def create_job(self) -> RunnableTask:
|
196
|
+
raise NotImplementedError(
|
197
|
+
"This method should be implemented in the child class"
|
198
|
+
)
|
199
|
+
|
193
200
|
|
194
201
|
class PythonTask(BaseTask):
|
195
202
|
"""
|
@@ -273,6 +280,11 @@ class PythonTask(BaseTask):
|
|
273
280
|
|
274
281
|
return f"{module}.{name}"
|
275
282
|
|
283
|
+
def create_job(self) -> RunnableTask:
|
284
|
+
self.terminate_with_success = True
|
285
|
+
node = self.create_node()
|
286
|
+
return node.executable
|
287
|
+
|
276
288
|
|
277
289
|
class NotebookTask(BaseTask):
|
278
290
|
"""
|
@@ -353,6 +365,11 @@ class NotebookTask(BaseTask):
|
|
353
365
|
def command_type(self) -> str:
|
354
366
|
return "notebook"
|
355
367
|
|
368
|
+
def create_job(self) -> RunnableTask:
|
369
|
+
self.terminate_with_success = True
|
370
|
+
node = self.create_node()
|
371
|
+
return node.executable
|
372
|
+
|
356
373
|
|
357
374
|
class ShellTask(BaseTask):
|
358
375
|
"""
|
@@ -621,6 +638,7 @@ class Pipeline(BaseModel):
|
|
621
638
|
model_config = ConfigDict(extra="forbid")
|
622
639
|
|
623
640
|
def _validate_path(self, path: List[StepType], failure_path: bool = False) -> None:
|
641
|
+
# TODO: Drastically simplify this
|
624
642
|
# Check if one and only one step terminates with success
|
625
643
|
# Check no more than one step terminates with failure
|
626
644
|
|
@@ -734,6 +752,16 @@ class Pipeline(BaseModel):
|
|
734
752
|
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
735
753
|
return graph.create_graph(dag_definition)
|
736
754
|
|
755
|
+
def _is_called_for_definition(self) -> bool:
|
756
|
+
"""
|
757
|
+
If the run context is set, we are coming in only to get the pipeline definition.
|
758
|
+
"""
|
759
|
+
from runnable.context import run_context
|
760
|
+
|
761
|
+
if run_context is None:
|
762
|
+
return False
|
763
|
+
return True
|
764
|
+
|
737
765
|
def execute(
|
738
766
|
self,
|
739
767
|
configuration_file: str = "",
|
@@ -743,33 +771,12 @@ class Pipeline(BaseModel):
|
|
743
771
|
log_level: str = defaults.LOG_LEVEL,
|
744
772
|
):
|
745
773
|
"""
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
|
751
|
-
|
752
|
-
Or create the representation of the pipeline for other executors.
|
753
|
-
|
754
|
-
Please refer to [concepts](concepts/executor.md) for more information.
|
755
|
-
|
756
|
-
Args:
|
757
|
-
configuration_file (str, optional): The path to the configuration file. Defaults to "".
|
758
|
-
The configuration file can be overridden by the environment variable RUNNABLE_CONFIGURATION_FILE.
|
759
|
-
|
760
|
-
run_id (str, optional): The ID of the run. Defaults to "".
|
761
|
-
tag (str, optional): The tag of the run. Defaults to "".
|
762
|
-
Use to group multiple runs.
|
763
|
-
|
764
|
-
parameters_file (str, optional): The path to the parameters file. Defaults to "".
|
765
|
-
|
766
|
-
log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
|
774
|
+
Overloaded method:
|
775
|
+
- Could be called by the user when executing the pipeline via SDK
|
776
|
+
- Could be called by the system itself when getting the pipeline definition
|
767
777
|
"""
|
768
|
-
|
769
|
-
|
770
|
-
py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
|
771
|
-
|
772
|
-
if py_to_yaml == "true":
|
778
|
+
if self._is_called_for_definition():
|
779
|
+
# Immediately return as this call is only for getting the pipeline definition
|
773
780
|
return {}
|
774
781
|
|
775
782
|
logger.setLevel(log_level)
|
@@ -785,21 +792,22 @@ class Pipeline(BaseModel):
|
|
785
792
|
parameters_file=parameters_file,
|
786
793
|
)
|
787
794
|
|
788
|
-
run_context.
|
795
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
796
|
+
|
789
797
|
utils.set_runnable_environment_variables(
|
790
798
|
run_id=run_id, configuration_file=configuration_file, tag=tag
|
791
799
|
)
|
792
800
|
|
793
801
|
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
794
|
-
|
802
|
+
run_context.from_sdk = True
|
795
803
|
run_context.dag = graph.create_graph(dag_definition)
|
796
804
|
|
797
805
|
console.print("Working with context:")
|
798
806
|
console.print(run_context)
|
799
807
|
console.rule(style="[dark orange]")
|
800
808
|
|
801
|
-
if not run_context.executor.
|
802
|
-
# We are not working with
|
809
|
+
if not run_context.executor._is_local:
|
810
|
+
# We are not working with executor that does not work in local environment
|
803
811
|
import inspect
|
804
812
|
|
805
813
|
caller_stack = inspect.stack()[1]
|
@@ -809,9 +817,10 @@ class Pipeline(BaseModel):
|
|
809
817
|
module_to_call = f"{module_name}.{caller_stack.function}"
|
810
818
|
|
811
819
|
run_context.pipeline_file = f"{module_to_call}.py"
|
820
|
+
run_context.from_sdk = True
|
812
821
|
|
813
822
|
# Prepare for graph execution
|
814
|
-
run_context.executor.
|
823
|
+
run_context.executor._set_up_run_log(exists_ok=False)
|
815
824
|
|
816
825
|
with Progress(
|
817
826
|
SpinnerColumn(spinner_name="runner"),
|
@@ -823,14 +832,16 @@ class Pipeline(BaseModel):
|
|
823
832
|
console=console,
|
824
833
|
expand=True,
|
825
834
|
) as progress:
|
835
|
+
pipeline_execution_task = progress.add_task(
|
836
|
+
"[dark_orange] Starting execution .. ", total=1
|
837
|
+
)
|
826
838
|
try:
|
827
839
|
run_context.progress = progress
|
828
|
-
|
829
|
-
"[dark_orange] Starting execution .. ", total=1
|
830
|
-
)
|
840
|
+
|
831
841
|
run_context.executor.execute_graph(dag=run_context.dag)
|
832
842
|
|
833
|
-
if not run_context.executor.
|
843
|
+
if not run_context.executor._is_local:
|
844
|
+
# non local executors just traverse the graph and do nothing
|
834
845
|
return {}
|
835
846
|
|
836
847
|
run_log = run_context.run_log_store.get_run_log_by_id(
|
@@ -859,7 +870,92 @@ class Pipeline(BaseModel):
|
|
859
870
|
)
|
860
871
|
raise
|
861
872
|
|
862
|
-
if run_context.executor.
|
873
|
+
if run_context.executor._is_local:
|
874
|
+
return run_context.run_log_store.get_run_log_by_id(
|
875
|
+
run_id=run_context.run_id
|
876
|
+
)
|
877
|
+
|
878
|
+
|
879
|
+
class Job(BaseModel):
|
880
|
+
name: str
|
881
|
+
task: BaseTask
|
882
|
+
|
883
|
+
def return_task(self) -> RunnableTask:
|
884
|
+
return self.task.create_job()
|
885
|
+
|
886
|
+
def return_catalog_settings(self) -> Optional[List[str]]:
|
887
|
+
if self.task.catalog is None:
|
888
|
+
return []
|
889
|
+
return self.task.catalog.put
|
890
|
+
|
891
|
+
def _is_called_for_definition(self) -> bool:
|
892
|
+
"""
|
893
|
+
If the run context is set, we are coming in only to get the pipeline definition.
|
894
|
+
"""
|
895
|
+
from runnable.context import run_context
|
896
|
+
|
897
|
+
if run_context is None:
|
898
|
+
return False
|
899
|
+
return True
|
900
|
+
|
901
|
+
def execute(
|
902
|
+
self,
|
903
|
+
configuration_file: str = "",
|
904
|
+
job_id: str = "",
|
905
|
+
tag: str = "",
|
906
|
+
parameters_file: str = "",
|
907
|
+
log_level: str = defaults.LOG_LEVEL,
|
908
|
+
):
|
909
|
+
if self._is_called_for_definition():
|
910
|
+
# Immediately return as this call is only for getting the job definition
|
911
|
+
return {}
|
912
|
+
logger.setLevel(log_level)
|
913
|
+
|
914
|
+
run_id = utils.generate_run_id(run_id=job_id)
|
915
|
+
configuration_file = os.environ.get(
|
916
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
917
|
+
)
|
918
|
+
run_context = entrypoints.prepare_configurations(
|
919
|
+
configuration_file=configuration_file,
|
920
|
+
run_id=run_id,
|
921
|
+
tag=tag,
|
922
|
+
parameters_file=parameters_file,
|
923
|
+
is_job=True,
|
924
|
+
)
|
925
|
+
|
926
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
927
|
+
run_context.from_sdk = True
|
928
|
+
|
929
|
+
utils.set_runnable_environment_variables(
|
930
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
931
|
+
)
|
932
|
+
|
933
|
+
console.print("Working with context:")
|
934
|
+
console.print(run_context)
|
935
|
+
console.rule(style="[dark orange]")
|
936
|
+
|
937
|
+
if not run_context.executor._is_local:
|
938
|
+
# We are not working with executor that does not work in local environment
|
939
|
+
import inspect
|
940
|
+
|
941
|
+
caller_stack = inspect.stack()[1]
|
942
|
+
relative_to_root = str(Path(caller_stack.filename).relative_to(Path.cwd()))
|
943
|
+
|
944
|
+
module_name = re.sub(r"\b.py\b", "", relative_to_root.replace("/", "."))
|
945
|
+
module_to_call = f"{module_name}.{caller_stack.function}"
|
946
|
+
|
947
|
+
run_context.job_definition_file = f"{module_to_call}.py"
|
948
|
+
|
949
|
+
job = self.task.create_job()
|
950
|
+
catalog_settings = self.return_catalog_settings()
|
951
|
+
|
952
|
+
run_context.executor.submit_job(job, catalog_settings=catalog_settings)
|
953
|
+
|
954
|
+
logger.info(
|
955
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
956
|
+
)
|
957
|
+
|
958
|
+
if run_context.executor._is_local:
|
863
959
|
return run_context.run_log_store.get_run_log_by_id(
|
864
960
|
run_id=run_context.run_id
|
865
961
|
)
|
runnable/tasks.py
CHANGED
@@ -156,19 +156,6 @@ class BaseTaskType(BaseModel):
|
|
156
156
|
params = self.resolve_unreduced_parameters(map_variable=map_variable)
|
157
157
|
logger.info(f"Parameters available for the execution: {params}")
|
158
158
|
|
159
|
-
for param_name, param in params.items():
|
160
|
-
# Any access to unreduced param should be replaced.
|
161
|
-
# The replacement is the context param
|
162
|
-
# It is possible that the unreduced param is not created as no upstream step
|
163
|
-
# has created it yet.
|
164
|
-
if param.reduced is False:
|
165
|
-
context_param = param_name
|
166
|
-
for _, v in map_variable.items(): # type: ignore
|
167
|
-
context_param = f"{v}_{context_param}"
|
168
|
-
|
169
|
-
if context_param in params:
|
170
|
-
params[param_name].value = params[context_param].value
|
171
|
-
|
172
159
|
task_console.log("Parameters available for the execution:")
|
173
160
|
task_console.log(params)
|
174
161
|
|
@@ -440,8 +427,12 @@ class NotebookTaskType(BaseTaskType):
|
|
440
427
|
|
441
428
|
@property
|
442
429
|
def notebook_output_path(self) -> str:
|
443
|
-
|
444
|
-
|
430
|
+
# This is to accommodate jobs which does not have a context_node
|
431
|
+
if self._context.executor._context_node:
|
432
|
+
node_name = self._context.executor._context_node.internal_name
|
433
|
+
sane_name = "".join(x for x in node_name if x.isalnum())
|
434
|
+
else:
|
435
|
+
sane_name = ""
|
445
436
|
|
446
437
|
output_path = Path(".", self.command)
|
447
438
|
file_name = output_path.parent / (output_path.stem + f"{sane_name}_out.ipynb")
|