runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- runnable/__init__.py +1 -12
- runnable/catalog.py +29 -5
- runnable/cli.py +268 -215
- runnable/context.py +10 -3
- runnable/datastore.py +212 -53
- runnable/defaults.py +13 -55
- runnable/entrypoints.py +270 -183
- runnable/exceptions.py +28 -2
- runnable/executor.py +133 -86
- runnable/graph.py +37 -13
- runnable/nodes.py +50 -22
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +230 -66
- runnable/secrets.py +3 -1
- runnable/tasks.py +99 -41
- runnable/utils.py +59 -39
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
- runnable-0.16.0.dist-info/entry_points.txt +45 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local.py +0 -69
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -870
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable/integration.py +0 -192
- runnable-0.13.0.dist-info/RECORD +0 -63
- runnable-0.13.0.dist-info/entry_points.txt +0 -41
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/entrypoints.py
CHANGED
@@ -9,12 +9,16 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
|
|
9
9
|
from rich.table import Column
|
10
10
|
|
11
11
|
import runnable.context as context
|
12
|
-
from runnable import console, defaults, graph, task_console, utils
|
12
|
+
from runnable import console, defaults, graph, task_console, tasks, utils
|
13
13
|
from runnable.defaults import RunnableConfig, ServiceConfig
|
14
|
+
from runnable.executor import BaseJobExecutor, BasePipelineExecutor
|
14
15
|
|
15
16
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
16
17
|
|
17
18
|
|
19
|
+
print("") # removes the buffer print
|
20
|
+
|
21
|
+
|
18
22
|
def get_default_configs() -> RunnableConfig:
|
19
23
|
"""
|
20
24
|
User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
|
@@ -29,19 +33,18 @@ def get_default_configs() -> RunnableConfig:
|
|
29
33
|
def prepare_configurations(
|
30
34
|
run_id: str,
|
31
35
|
configuration_file: str = "",
|
32
|
-
pipeline_file: str = "",
|
33
36
|
tag: str = "",
|
34
37
|
parameters_file: str = "",
|
35
|
-
|
38
|
+
is_job: bool = False,
|
36
39
|
) -> context.Context:
|
37
40
|
"""
|
41
|
+
Sets up everything needed
|
38
42
|
Replace the placeholders in the dag/config against the variables file.
|
39
43
|
|
40
44
|
Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it.
|
41
45
|
|
42
46
|
Args:
|
43
47
|
variables_file (str): The variables file, if used or None
|
44
|
-
pipeline_file (str): The config/dag file
|
45
48
|
run_id (str): The run id of the run.
|
46
49
|
tag (str): If a tag is provided at the run time
|
47
50
|
|
@@ -53,45 +56,87 @@ def prepare_configurations(
|
|
53
56
|
variables = utils.gather_variables()
|
54
57
|
|
55
58
|
templated_configuration = {}
|
56
|
-
configuration_file = os.environ.get(
|
59
|
+
configuration_file = os.environ.get(
|
60
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
61
|
+
)
|
57
62
|
|
58
63
|
if configuration_file:
|
59
|
-
templated_configuration = utils.load_yaml(configuration_file)
|
64
|
+
templated_configuration = utils.load_yaml(configuration_file)
|
60
65
|
|
61
|
-
|
66
|
+
# apply variables
|
67
|
+
configuration = cast(
|
68
|
+
RunnableConfig, utils.apply_variables(templated_configuration, variables)
|
69
|
+
)
|
70
|
+
|
71
|
+
# Since all the services (run_log_store, catalog, secrets, executor) are
|
72
|
+
# dynamically loaded via stevedore, we cannot validate the configuration
|
73
|
+
# before they are passed to the service.
|
62
74
|
|
63
75
|
logger.info(f"Resolved configurations: {configuration}")
|
64
76
|
|
65
77
|
# Run log settings, configuration over-rides everything
|
66
|
-
|
78
|
+
# The user config has run-log-store while internally we use run_log_store
|
79
|
+
run_log_config: Optional[ServiceConfig] = configuration.get("run-log-store", None) # type: ignore
|
67
80
|
if not run_log_config:
|
68
|
-
run_log_config = cast(
|
81
|
+
run_log_config = cast(
|
82
|
+
ServiceConfig,
|
83
|
+
runnable_defaults.get("run-log-store", defaults.DEFAULT_RUN_LOG_STORE),
|
84
|
+
)
|
69
85
|
run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
|
70
86
|
|
71
87
|
# Catalog handler settings, configuration over-rides everything
|
72
88
|
catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None)
|
73
89
|
if not catalog_config:
|
74
|
-
catalog_config = cast(
|
90
|
+
catalog_config = cast(
|
91
|
+
ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG)
|
92
|
+
)
|
75
93
|
catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config)
|
76
94
|
|
77
95
|
# Secret handler settings, configuration over-rides everything
|
78
96
|
secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None)
|
79
97
|
if not secrets_config:
|
80
|
-
secrets_config = cast(
|
98
|
+
secrets_config = cast(
|
99
|
+
ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS)
|
100
|
+
)
|
81
101
|
secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config)
|
82
102
|
|
83
103
|
# pickler
|
84
|
-
pickler_config = cast(
|
104
|
+
pickler_config = cast(
|
105
|
+
ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER)
|
106
|
+
)
|
85
107
|
pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
|
86
108
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
109
|
+
if not is_job:
|
110
|
+
# executor configurations, configuration over rides everything
|
111
|
+
executor_config: Optional[ServiceConfig] = configuration.get(
|
112
|
+
"pipeline-executor", None
|
113
|
+
) # type: ignore
|
114
|
+
# as pipeline-executor is not a valid key
|
115
|
+
if not executor_config:
|
116
|
+
executor_config = cast(
|
117
|
+
ServiceConfig,
|
118
|
+
runnable_defaults.get(
|
119
|
+
"pipeline-executor", defaults.DEFAULT_PIPELINE_EXECUTOR
|
120
|
+
),
|
121
|
+
)
|
122
|
+
configured_executor = utils.get_provider_by_name_and_type(
|
123
|
+
"pipeline_executor", executor_config
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
# executor configurations, configuration over rides everything
|
127
|
+
job_executor_config: Optional[ServiceConfig] = configuration.get(
|
128
|
+
"job-executor", None
|
129
|
+
) # type: ignore
|
130
|
+
if not job_executor_config:
|
131
|
+
executor_config = cast(
|
132
|
+
ServiceConfig,
|
133
|
+
runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
|
134
|
+
)
|
135
|
+
|
136
|
+
assert job_executor_config, "Job executor is not provided"
|
137
|
+
configured_executor = utils.get_provider_by_name_and_type(
|
138
|
+
"job_executor", job_executor_config
|
139
|
+
)
|
95
140
|
|
96
141
|
# Construct the context
|
97
142
|
run_context = context.Context(
|
@@ -107,38 +152,45 @@ def prepare_configurations(
|
|
107
152
|
parameters_file=parameters_file,
|
108
153
|
)
|
109
154
|
|
110
|
-
|
111
|
-
if pipeline_file.endswith(".py"):
|
112
|
-
# converting a pipeline defined in python to a dag in yaml
|
113
|
-
module_file = pipeline_file.strip(".py")
|
114
|
-
module, func = utils.get_module_and_attr_names(module_file)
|
115
|
-
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
116
|
-
imported_module = importlib.import_module(module)
|
155
|
+
context.run_context = run_context
|
117
156
|
|
118
|
-
|
119
|
-
dag = getattr(imported_module, func)().return_dag()
|
157
|
+
return run_context
|
120
158
|
|
121
|
-
else:
|
122
|
-
pipeline_config = utils.load_yaml(pipeline_file)
|
123
159
|
|
124
|
-
|
125
|
-
|
160
|
+
def set_pipeline_spec_from_yaml(run_context: context.Context, pipeline_file: str):
|
161
|
+
"""
|
162
|
+
Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
|
163
|
+
"""
|
164
|
+
pipeline_config = utils.load_yaml(pipeline_file)
|
165
|
+
logger.info("The input pipeline:")
|
166
|
+
logger.info(json.dumps(pipeline_config, indent=4))
|
126
167
|
|
127
|
-
|
168
|
+
dag_config = pipeline_config["dag"]
|
128
169
|
|
129
|
-
|
130
|
-
|
131
|
-
|
170
|
+
dag_hash = utils.get_dag_hash(dag_config)
|
171
|
+
dag = graph.create_graph(dag_config)
|
172
|
+
run_context.dag_hash = dag_hash
|
132
173
|
|
133
|
-
|
134
|
-
|
174
|
+
run_context.pipeline_file = pipeline_file
|
175
|
+
run_context.dag = dag
|
135
176
|
|
136
|
-
context.run_context = run_context
|
137
177
|
|
138
|
-
|
178
|
+
def set_pipeline_spec_from_python(run_context: context.Context, python_module: str):
|
179
|
+
# Call the SDK to get the dag
|
180
|
+
# Import the module and call the function to get the dag
|
181
|
+
module_file = python_module.strip(".py")
|
182
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
183
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
184
|
+
imported_module = importlib.import_module(module)
|
139
185
|
|
186
|
+
run_context.from_sdk = True
|
187
|
+
dag = getattr(imported_module, func)().return_dag()
|
140
188
|
|
141
|
-
|
189
|
+
run_context.pipeline_file = python_module
|
190
|
+
run_context.dag = dag
|
191
|
+
|
192
|
+
|
193
|
+
def execute_pipeline_yaml_spec(
|
142
194
|
pipeline_file: str,
|
143
195
|
configuration_file: str = "",
|
144
196
|
tag: str = "",
|
@@ -147,66 +199,82 @@ def execute(
|
|
147
199
|
):
|
148
200
|
# pylint: disable=R0914,R0913
|
149
201
|
"""
|
150
|
-
The entry point to runnable execution
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
pipeline_file (str): The config/dag file
|
155
|
-
run_id (str): The run id of the run.
|
156
|
-
tag (str): If a tag is provided at the run time
|
157
|
-
parameters_file (str): The parameters being sent in to the application
|
202
|
+
The entry point to runnable execution for any YAML based spec.
|
203
|
+
The result could:
|
204
|
+
- Execution of the pipeline if its local executor
|
205
|
+
- Rendering of the spec in the case of non local executor
|
158
206
|
"""
|
159
207
|
run_id = utils.generate_run_id(run_id=run_id)
|
160
208
|
|
161
209
|
run_context = prepare_configurations(
|
162
210
|
configuration_file=configuration_file,
|
163
|
-
pipeline_file=pipeline_file,
|
164
211
|
run_id=run_id,
|
165
212
|
tag=tag,
|
166
213
|
parameters_file=parameters_file,
|
167
214
|
)
|
168
215
|
|
169
|
-
|
170
|
-
console.print(run_context)
|
171
|
-
console.rule(style="[dark orange]")
|
216
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
172
217
|
|
218
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
173
219
|
executor = run_context.executor
|
174
220
|
|
175
|
-
|
176
|
-
|
177
|
-
|
221
|
+
utils.set_runnable_environment_variables(
|
222
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
223
|
+
)
|
178
224
|
|
179
225
|
# Prepare for graph execution
|
180
|
-
executor.
|
226
|
+
executor._set_up_run_log(exists_ok=False)
|
227
|
+
|
228
|
+
console.print("Working with context:")
|
229
|
+
console.print(run_context)
|
230
|
+
console.rule(style="[dark orange]")
|
181
231
|
|
182
232
|
logger.info(f"Executing the graph: {run_context.dag}")
|
183
233
|
with Progress(
|
184
|
-
TextColumn(
|
234
|
+
TextColumn(
|
235
|
+
"[progress.description]{task.description}", table_column=Column(ratio=2)
|
236
|
+
),
|
185
237
|
BarColumn(table_column=Column(ratio=1), style="dark_orange"),
|
186
238
|
TimeElapsedColumn(table_column=Column(ratio=1)),
|
187
239
|
console=console,
|
188
240
|
expand=True,
|
189
241
|
) as progress:
|
190
|
-
pipeline_execution_task = progress.add_task(
|
242
|
+
pipeline_execution_task = progress.add_task(
|
243
|
+
"[dark_orange] Starting execution .. ", total=1
|
244
|
+
)
|
191
245
|
try:
|
192
246
|
run_context.progress = progress
|
193
247
|
executor.execute_graph(dag=run_context.dag) # type: ignore
|
194
248
|
|
195
|
-
|
196
|
-
|
249
|
+
if not executor._is_local:
|
250
|
+
# Non local executors only traverse the graph and do not execute the nodes
|
197
251
|
executor.send_return_code(stage="traversal")
|
198
252
|
return
|
199
253
|
|
200
|
-
run_log = run_context.run_log_store.get_run_log_by_id(
|
254
|
+
run_log = run_context.run_log_store.get_run_log_by_id(
|
255
|
+
run_id=run_context.run_id, full=False
|
256
|
+
)
|
201
257
|
|
202
258
|
if run_log.status == defaults.SUCCESS:
|
203
|
-
progress.update(
|
259
|
+
progress.update(
|
260
|
+
pipeline_execution_task,
|
261
|
+
description="[green] Success",
|
262
|
+
completed=True,
|
263
|
+
)
|
204
264
|
else:
|
205
|
-
progress.update(
|
265
|
+
progress.update(
|
266
|
+
pipeline_execution_task, description="[red] Failed", completed=True
|
267
|
+
)
|
206
268
|
except Exception as e: # noqa: E722
|
207
269
|
console.print(e, style=defaults.error_style)
|
208
|
-
progress.update(
|
209
|
-
|
270
|
+
progress.update(
|
271
|
+
pipeline_execution_task,
|
272
|
+
description="[red] Errored execution",
|
273
|
+
completed=True,
|
274
|
+
)
|
275
|
+
run_log = run_context.run_log_store.get_run_log_by_id(
|
276
|
+
run_id=run_context.run_id, full=False
|
277
|
+
)
|
210
278
|
run_log.status = defaults.FAIL
|
211
279
|
run_context.run_log_store.add_branch_log(run_log, run_context.run_id)
|
212
280
|
raise e
|
@@ -219,62 +287,64 @@ def execute_single_node(
|
|
219
287
|
pipeline_file: str,
|
220
288
|
step_name: str,
|
221
289
|
map_variable: str,
|
290
|
+
mode: str,
|
222
291
|
run_id: str,
|
223
292
|
tag: str = "",
|
224
293
|
parameters_file: str = "",
|
225
294
|
):
|
226
295
|
"""
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
|
231
|
-
|
232
|
-
Args:
|
233
|
-
variables_file (str): The variables file, if used or None
|
234
|
-
step_name : The name of the step to execute in dot path convention
|
235
|
-
pipeline_file (str): The config/dag file
|
236
|
-
run_id (str): The run id of the run.
|
237
|
-
tag (str): If a tag is provided at the run time
|
238
|
-
parameters_file (str): The parameters being sent in to the application
|
296
|
+
This entry point is triggered during the execution of the pipeline
|
297
|
+
- non local execution environments
|
239
298
|
|
299
|
+
The mode defines how the pipeline spec is provided to the runnable
|
300
|
+
- yaml
|
301
|
+
- python
|
240
302
|
"""
|
241
303
|
from runnable import nodes
|
242
304
|
|
243
|
-
task_console.print(
|
305
|
+
task_console.print(
|
306
|
+
f"Executing the single node: {step_name} with map variable: {map_variable}"
|
307
|
+
)
|
244
308
|
|
245
|
-
configuration_file = os.environ.get(
|
309
|
+
configuration_file = os.environ.get(
|
310
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
311
|
+
)
|
246
312
|
|
247
313
|
run_context = prepare_configurations(
|
248
314
|
configuration_file=configuration_file,
|
249
|
-
pipeline_file=pipeline_file,
|
250
315
|
run_id=run_id,
|
251
316
|
tag=tag,
|
252
317
|
parameters_file=parameters_file,
|
253
318
|
)
|
319
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
320
|
+
|
321
|
+
if mode == "yaml":
|
322
|
+
# Load the yaml file
|
323
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
324
|
+
elif mode == "python":
|
325
|
+
# Call the SDK to get the dag
|
326
|
+
set_pipeline_spec_from_python(run_context, pipeline_file)
|
327
|
+
|
328
|
+
assert run_context.dag
|
329
|
+
|
254
330
|
task_console.print("Working with context:")
|
255
331
|
task_console.print(run_context)
|
256
332
|
task_console.rule(style="[dark orange]")
|
257
333
|
|
258
334
|
executor = run_context.executor
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
executor.prepare_for_node_execution()
|
263
|
-
|
264
|
-
# TODO: may be make its own entry point
|
265
|
-
# if not run_context.dag:
|
266
|
-
# # There are a few entry points that make graph dynamically and do not have a dag defined statically.
|
267
|
-
# run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
|
268
|
-
# run_context.dag = graph.create_graph(run_log.run_config["pipeline"])
|
269
|
-
assert run_context.dag
|
335
|
+
utils.set_runnable_environment_variables(
|
336
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
337
|
+
)
|
270
338
|
|
271
339
|
map_variable_dict = utils.json_to_ordered_dict(map_variable)
|
272
340
|
|
273
341
|
step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
|
274
|
-
node_to_execute, _ = graph.search_node_by_internal_name(
|
342
|
+
node_to_execute, _ = graph.search_node_by_internal_name(
|
343
|
+
run_context.dag, step_internal_name
|
344
|
+
)
|
275
345
|
|
276
346
|
logger.info("Executing the single node of : %s", node_to_execute)
|
277
|
-
## This step is where we save the
|
347
|
+
## This step is where we save output of the function/shell command
|
278
348
|
try:
|
279
349
|
executor.execute_node(node=node_to_execute, map_variable=map_variable_dict)
|
280
350
|
finally:
|
@@ -288,23 +358,15 @@ def execute_single_node(
|
|
288
358
|
run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)
|
289
359
|
os.remove(log_file_name)
|
290
360
|
|
291
|
-
# executor.send_return_code(stage="execution")
|
292
|
-
|
293
361
|
|
294
|
-
def
|
295
|
-
|
296
|
-
|
297
|
-
catalog_config: dict,
|
298
|
-
configuration_file: str,
|
299
|
-
notebook_output_path: str = "",
|
362
|
+
def execute_job_yaml_spec(
|
363
|
+
job_definition_file: str,
|
364
|
+
configuration_file: str = "",
|
300
365
|
tag: str = "",
|
301
366
|
run_id: str = "",
|
302
367
|
parameters_file: str = "",
|
303
368
|
):
|
304
|
-
|
305
|
-
The entry point to runnable execution of a notebook. This method would prepare the configurations and
|
306
|
-
delegates traversal to the executor
|
307
|
-
"""
|
369
|
+
# A job and task are internally the same.
|
308
370
|
run_id = utils.generate_run_id(run_id=run_id)
|
309
371
|
|
310
372
|
run_context = prepare_configurations(
|
@@ -312,61 +374,87 @@ def execute_notebook(
|
|
312
374
|
run_id=run_id,
|
313
375
|
tag=tag,
|
314
376
|
parameters_file=parameters_file,
|
377
|
+
is_job=True,
|
315
378
|
)
|
316
379
|
|
380
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
381
|
+
|
317
382
|
executor = run_context.executor
|
318
|
-
|
319
|
-
|
383
|
+
utils.set_runnable_environment_variables(
|
384
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
385
|
+
)
|
386
|
+
|
387
|
+
run_context.job_definition_file = job_definition_file
|
388
|
+
|
389
|
+
job_config = utils.load_yaml(job_definition_file)
|
390
|
+
logger.info(
|
391
|
+
"Executing the job from the user."
|
392
|
+
f"job definition: {job_definition_file}, config: {job_config}"
|
393
|
+
)
|
394
|
+
assert job_config.get("type"), "Job type is not provided"
|
320
395
|
|
321
396
|
console.print("Working with context:")
|
322
397
|
console.print(run_context)
|
323
398
|
console.rule(style="[dark orange]")
|
324
399
|
|
325
|
-
|
326
|
-
|
327
|
-
"command_type": "notebook",
|
328
|
-
"notebook_output_path": notebook_output_path,
|
329
|
-
"type": "task",
|
330
|
-
"next": "success",
|
331
|
-
"catalog": catalog_config,
|
332
|
-
}
|
333
|
-
node = graph.create_node(name="executing job", step_config=step_config)
|
400
|
+
# A hack where we create a task node and get our job/catalog settings
|
401
|
+
catalog_config: list[str] = job_config.pop("catalog", {})
|
334
402
|
|
335
|
-
|
336
|
-
|
337
|
-
|
403
|
+
# rename the type to command_type of task
|
404
|
+
job_config["command_type"] = job_config.pop("type")
|
405
|
+
job = tasks.create_task(job_config)
|
338
406
|
|
339
|
-
|
340
|
-
|
407
|
+
logger.info(
|
408
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
409
|
+
)
|
341
410
|
|
342
|
-
|
343
|
-
|
344
|
-
logger.info("Executing the job from the system. We are in the config's compute environment")
|
345
|
-
executor.execute_node(node=node)
|
411
|
+
assert isinstance(executor, BaseJobExecutor)
|
412
|
+
executor.submit_job(job, catalog_settings=catalog_config)
|
346
413
|
|
347
|
-
|
348
|
-
step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
|
349
|
-
run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
|
414
|
+
executor.send_return_code()
|
350
415
|
|
351
|
-
else:
|
352
|
-
raise ValueError(f"Invalid entrypoint {entrypoint}")
|
353
416
|
|
354
|
-
|
417
|
+
def set_job_spec_from_yaml(run_context: context.Context, job_definition_file: str):
|
418
|
+
"""
|
419
|
+
Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
|
420
|
+
"""
|
421
|
+
job_config = utils.load_yaml(job_definition_file)
|
422
|
+
logger.info("The input job definition file:")
|
423
|
+
logger.info(json.dumps(job_config, indent=4))
|
355
424
|
|
425
|
+
catalog_config: list[str] = job_config.pop("catalog", {})
|
356
426
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
427
|
+
job_config["command_type"] = job_config.pop("type")
|
428
|
+
|
429
|
+
run_context.job_definition_file = job_definition_file
|
430
|
+
run_context.job = tasks.create_task(job_config)
|
431
|
+
run_context.job_catalog_settings = catalog_config
|
432
|
+
|
433
|
+
|
434
|
+
def set_job_spec_from_python(run_context: context.Context, python_module: str):
|
435
|
+
# Import the module and call the function to get the task
|
436
|
+
module_file = python_module.strip(".py")
|
437
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
438
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
439
|
+
imported_module = importlib.import_module(module)
|
440
|
+
|
441
|
+
run_context.from_sdk = True
|
442
|
+
task = getattr(imported_module, func)().return_task()
|
443
|
+
catalog_settings = getattr(imported_module, func)().return_catalog_settings()
|
444
|
+
|
445
|
+
run_context.job_definition_file = python_module
|
446
|
+
run_context.job = task
|
447
|
+
run_context.job_catalog_settings = catalog_settings
|
448
|
+
|
449
|
+
|
450
|
+
def execute_job_non_local(
|
451
|
+
job_definition_file: str,
|
452
|
+
configuration_file: str = "",
|
453
|
+
mode: str = "yaml",
|
362
454
|
tag: str = "",
|
363
455
|
run_id: str = "",
|
364
456
|
parameters_file: str = "",
|
365
457
|
):
|
366
|
-
"""
|
367
|
-
The entry point to runnable execution of a function. This method would prepare the configurations and
|
368
|
-
delegates traversal to the executor
|
369
|
-
"""
|
370
458
|
run_id = utils.generate_run_id(run_id=run_id)
|
371
459
|
|
372
460
|
run_context = prepare_configurations(
|
@@ -374,47 +462,33 @@ def execute_function(
|
|
374
462
|
run_id=run_id,
|
375
463
|
tag=tag,
|
376
464
|
parameters_file=parameters_file,
|
465
|
+
is_job=True,
|
377
466
|
)
|
378
467
|
|
379
|
-
|
468
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
380
469
|
|
381
|
-
|
382
|
-
|
470
|
+
if mode == "yaml":
|
471
|
+
# Load the yaml file
|
472
|
+
set_job_spec_from_yaml(run_context, job_definition_file)
|
473
|
+
elif mode == "python":
|
474
|
+
# Call the SDK to get the task
|
475
|
+
set_job_spec_from_python(run_context, job_definition_file)
|
476
|
+
|
477
|
+
assert run_context.job
|
383
478
|
|
384
479
|
console.print("Working with context:")
|
385
480
|
console.print(run_context)
|
386
481
|
console.rule(style="[dark orange]")
|
387
482
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
"command_type": "python",
|
392
|
-
"type": "task",
|
393
|
-
"next": "success",
|
394
|
-
"catalog": catalog_config,
|
395
|
-
}
|
396
|
-
node = graph.create_node(name="executing job", step_config=step_config)
|
397
|
-
|
398
|
-
if entrypoint == defaults.ENTRYPOINT.USER.value:
|
399
|
-
# Prepare for graph execution
|
400
|
-
executor.prepare_for_graph_execution()
|
401
|
-
|
402
|
-
logger.info("Executing the job from the user. We are still in the caller's compute environment")
|
403
|
-
executor.execute_job(node=node)
|
404
|
-
|
405
|
-
elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
|
406
|
-
executor.prepare_for_node_execution()
|
407
|
-
logger.info("Executing the job from the system. We are in the config's compute environment")
|
408
|
-
executor.execute_node(node=node)
|
409
|
-
|
410
|
-
# Update the status of the run log
|
411
|
-
step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
|
412
|
-
run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
|
483
|
+
logger.info(
|
484
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
485
|
+
)
|
413
486
|
|
414
|
-
|
415
|
-
|
487
|
+
run_context.executor.execute_job(
|
488
|
+
run_context.job, catalog_settings=run_context.job_catalog_settings
|
489
|
+
)
|
416
490
|
|
417
|
-
executor.send_return_code()
|
491
|
+
run_context.executor.send_return_code()
|
418
492
|
|
419
493
|
|
420
494
|
def fan(
|
@@ -444,27 +518,40 @@ def fan(
|
|
444
518
|
"""
|
445
519
|
from runnable import nodes
|
446
520
|
|
447
|
-
configuration_file = os.environ.get(
|
521
|
+
configuration_file = os.environ.get(
|
522
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
523
|
+
)
|
448
524
|
|
449
525
|
run_context = prepare_configurations(
|
450
526
|
configuration_file=configuration_file,
|
451
|
-
pipeline_file=pipeline_file,
|
452
527
|
run_id=run_id,
|
453
528
|
tag=tag,
|
454
529
|
parameters_file=parameters_file,
|
455
530
|
)
|
531
|
+
|
532
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
533
|
+
|
534
|
+
if mode == "yaml":
|
535
|
+
# Load the yaml file
|
536
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
537
|
+
elif mode == "python":
|
538
|
+
# Call the SDK to get the dag
|
539
|
+
set_pipeline_spec_from_python(run_context, pipeline_file)
|
540
|
+
|
456
541
|
console.print("Working with context:")
|
457
542
|
console.print(run_context)
|
458
543
|
console.rule(style="[dark orange]")
|
459
544
|
|
460
545
|
executor = run_context.executor
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
executor.prepare_for_node_execution()
|
546
|
+
utils.set_runnable_environment_variables(
|
547
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
548
|
+
)
|
465
549
|
|
466
550
|
step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
|
467
|
-
node_to_execute, _ = graph.search_node_by_internal_name(
|
551
|
+
node_to_execute, _ = graph.search_node_by_internal_name(
|
552
|
+
run_context.dag, # type: ignore
|
553
|
+
step_internal_name,
|
554
|
+
)
|
468
555
|
|
469
556
|
map_variable_dict = utils.json_to_ordered_dict(map_variable)
|
470
557
|
|
@@ -478,6 +565,6 @@ def fan(
|
|
478
565
|
raise ValueError(f"Invalid mode {mode}")
|
479
566
|
|
480
567
|
|
481
|
-
if __name__ == "__main__":
|
482
|
-
|
483
|
-
|
568
|
+
# if __name__ == "__main__":
|
569
|
+
# # This is only for perf testing purposes.
|
570
|
+
# prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
|