runnable 0.13.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +1 -12
- runnable/catalog.py +29 -5
- runnable/cli.py +268 -215
- runnable/context.py +10 -3
- runnable/datastore.py +212 -53
- runnable/defaults.py +13 -55
- runnable/entrypoints.py +270 -183
- runnable/exceptions.py +28 -2
- runnable/executor.py +133 -86
- runnable/graph.py +37 -13
- runnable/nodes.py +50 -22
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +230 -66
- runnable/secrets.py +3 -1
- runnable/tasks.py +99 -41
- runnable/utils.py +59 -39
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/METADATA +28 -31
- runnable-0.16.0.dist-info/RECORD +23 -0
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info}/WHEEL +1 -1
- runnable-0.16.0.dist-info/entry_points.txt +45 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local.py +0 -69
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -870
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable/integration.py +0 -192
- runnable-0.13.0.dist-info/RECORD +0 -63
- runnable-0.13.0.dist-info/entry_points.txt +0 -41
- {runnable-0.13.0.dist-info → runnable-0.16.0.dist-info/licenses}/LICENSE +0 -0
runnable/entrypoints.py
CHANGED
@@ -9,12 +9,16 @@ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
|
|
9
9
|
from rich.table import Column
|
10
10
|
|
11
11
|
import runnable.context as context
|
12
|
-
from runnable import console, defaults, graph, task_console, utils
|
12
|
+
from runnable import console, defaults, graph, task_console, tasks, utils
|
13
13
|
from runnable.defaults import RunnableConfig, ServiceConfig
|
14
|
+
from runnable.executor import BaseJobExecutor, BasePipelineExecutor
|
14
15
|
|
15
16
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
16
17
|
|
17
18
|
|
19
|
+
print("") # removes the buffer print
|
20
|
+
|
21
|
+
|
18
22
|
def get_default_configs() -> RunnableConfig:
|
19
23
|
"""
|
20
24
|
User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
|
@@ -29,19 +33,18 @@ def get_default_configs() -> RunnableConfig:
|
|
29
33
|
def prepare_configurations(
|
30
34
|
run_id: str,
|
31
35
|
configuration_file: str = "",
|
32
|
-
pipeline_file: str = "",
|
33
36
|
tag: str = "",
|
34
37
|
parameters_file: str = "",
|
35
|
-
|
38
|
+
is_job: bool = False,
|
36
39
|
) -> context.Context:
|
37
40
|
"""
|
41
|
+
Sets up everything needed
|
38
42
|
Replace the placeholders in the dag/config against the variables file.
|
39
43
|
|
40
44
|
Attach the secrets_handler, run_log_store, catalog_handler to the executor and return it.
|
41
45
|
|
42
46
|
Args:
|
43
47
|
variables_file (str): The variables file, if used or None
|
44
|
-
pipeline_file (str): The config/dag file
|
45
48
|
run_id (str): The run id of the run.
|
46
49
|
tag (str): If a tag is provided at the run time
|
47
50
|
|
@@ -53,45 +56,87 @@ def prepare_configurations(
|
|
53
56
|
variables = utils.gather_variables()
|
54
57
|
|
55
58
|
templated_configuration = {}
|
56
|
-
configuration_file = os.environ.get(
|
59
|
+
configuration_file = os.environ.get(
|
60
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
61
|
+
)
|
57
62
|
|
58
63
|
if configuration_file:
|
59
|
-
templated_configuration = utils.load_yaml(configuration_file)
|
64
|
+
templated_configuration = utils.load_yaml(configuration_file)
|
60
65
|
|
61
|
-
|
66
|
+
# apply variables
|
67
|
+
configuration = cast(
|
68
|
+
RunnableConfig, utils.apply_variables(templated_configuration, variables)
|
69
|
+
)
|
70
|
+
|
71
|
+
# Since all the services (run_log_store, catalog, secrets, executor) are
|
72
|
+
# dynamically loaded via stevedore, we cannot validate the configuration
|
73
|
+
# before they are passed to the service.
|
62
74
|
|
63
75
|
logger.info(f"Resolved configurations: {configuration}")
|
64
76
|
|
65
77
|
# Run log settings, configuration over-rides everything
|
66
|
-
|
78
|
+
# The user config has run-log-store while internally we use run_log_store
|
79
|
+
run_log_config: Optional[ServiceConfig] = configuration.get("run-log-store", None) # type: ignore
|
67
80
|
if not run_log_config:
|
68
|
-
run_log_config = cast(
|
81
|
+
run_log_config = cast(
|
82
|
+
ServiceConfig,
|
83
|
+
runnable_defaults.get("run-log-store", defaults.DEFAULT_RUN_LOG_STORE),
|
84
|
+
)
|
69
85
|
run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
|
70
86
|
|
71
87
|
# Catalog handler settings, configuration over-rides everything
|
72
88
|
catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None)
|
73
89
|
if not catalog_config:
|
74
|
-
catalog_config = cast(
|
90
|
+
catalog_config = cast(
|
91
|
+
ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG)
|
92
|
+
)
|
75
93
|
catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config)
|
76
94
|
|
77
95
|
# Secret handler settings, configuration over-rides everything
|
78
96
|
secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None)
|
79
97
|
if not secrets_config:
|
80
|
-
secrets_config = cast(
|
98
|
+
secrets_config = cast(
|
99
|
+
ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS)
|
100
|
+
)
|
81
101
|
secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config)
|
82
102
|
|
83
103
|
# pickler
|
84
|
-
pickler_config = cast(
|
104
|
+
pickler_config = cast(
|
105
|
+
ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER)
|
106
|
+
)
|
85
107
|
pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
|
86
108
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
109
|
+
if not is_job:
|
110
|
+
# executor configurations, configuration over rides everything
|
111
|
+
executor_config: Optional[ServiceConfig] = configuration.get(
|
112
|
+
"pipeline-executor", None
|
113
|
+
) # type: ignore
|
114
|
+
# as pipeline-executor is not a valid key
|
115
|
+
if not executor_config:
|
116
|
+
executor_config = cast(
|
117
|
+
ServiceConfig,
|
118
|
+
runnable_defaults.get(
|
119
|
+
"pipeline-executor", defaults.DEFAULT_PIPELINE_EXECUTOR
|
120
|
+
),
|
121
|
+
)
|
122
|
+
configured_executor = utils.get_provider_by_name_and_type(
|
123
|
+
"pipeline_executor", executor_config
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
# executor configurations, configuration over rides everything
|
127
|
+
job_executor_config: Optional[ServiceConfig] = configuration.get(
|
128
|
+
"job-executor", None
|
129
|
+
) # type: ignore
|
130
|
+
if not job_executor_config:
|
131
|
+
executor_config = cast(
|
132
|
+
ServiceConfig,
|
133
|
+
runnable_defaults.get("job-executor", defaults.DEFAULT_JOB_EXECUTOR),
|
134
|
+
)
|
135
|
+
|
136
|
+
assert job_executor_config, "Job executor is not provided"
|
137
|
+
configured_executor = utils.get_provider_by_name_and_type(
|
138
|
+
"job_executor", job_executor_config
|
139
|
+
)
|
95
140
|
|
96
141
|
# Construct the context
|
97
142
|
run_context = context.Context(
|
@@ -107,38 +152,45 @@ def prepare_configurations(
|
|
107
152
|
parameters_file=parameters_file,
|
108
153
|
)
|
109
154
|
|
110
|
-
|
111
|
-
if pipeline_file.endswith(".py"):
|
112
|
-
# converting a pipeline defined in python to a dag in yaml
|
113
|
-
module_file = pipeline_file.strip(".py")
|
114
|
-
module, func = utils.get_module_and_attr_names(module_file)
|
115
|
-
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
116
|
-
imported_module = importlib.import_module(module)
|
155
|
+
context.run_context = run_context
|
117
156
|
|
118
|
-
|
119
|
-
dag = getattr(imported_module, func)().return_dag()
|
157
|
+
return run_context
|
120
158
|
|
121
|
-
else:
|
122
|
-
pipeline_config = utils.load_yaml(pipeline_file)
|
123
159
|
|
124
|
-
|
125
|
-
|
160
|
+
def set_pipeline_spec_from_yaml(run_context: context.Context, pipeline_file: str):
|
161
|
+
"""
|
162
|
+
Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
|
163
|
+
"""
|
164
|
+
pipeline_config = utils.load_yaml(pipeline_file)
|
165
|
+
logger.info("The input pipeline:")
|
166
|
+
logger.info(json.dumps(pipeline_config, indent=4))
|
126
167
|
|
127
|
-
|
168
|
+
dag_config = pipeline_config["dag"]
|
128
169
|
|
129
|
-
|
130
|
-
|
131
|
-
|
170
|
+
dag_hash = utils.get_dag_hash(dag_config)
|
171
|
+
dag = graph.create_graph(dag_config)
|
172
|
+
run_context.dag_hash = dag_hash
|
132
173
|
|
133
|
-
|
134
|
-
|
174
|
+
run_context.pipeline_file = pipeline_file
|
175
|
+
run_context.dag = dag
|
135
176
|
|
136
|
-
context.run_context = run_context
|
137
177
|
|
138
|
-
|
178
|
+
def set_pipeline_spec_from_python(run_context: context.Context, python_module: str):
|
179
|
+
# Call the SDK to get the dag
|
180
|
+
# Import the module and call the function to get the dag
|
181
|
+
module_file = python_module.strip(".py")
|
182
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
183
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
184
|
+
imported_module = importlib.import_module(module)
|
139
185
|
|
186
|
+
run_context.from_sdk = True
|
187
|
+
dag = getattr(imported_module, func)().return_dag()
|
140
188
|
|
141
|
-
|
189
|
+
run_context.pipeline_file = python_module
|
190
|
+
run_context.dag = dag
|
191
|
+
|
192
|
+
|
193
|
+
def execute_pipeline_yaml_spec(
|
142
194
|
pipeline_file: str,
|
143
195
|
configuration_file: str = "",
|
144
196
|
tag: str = "",
|
@@ -147,66 +199,82 @@ def execute(
|
|
147
199
|
):
|
148
200
|
# pylint: disable=R0914,R0913
|
149
201
|
"""
|
150
|
-
The entry point to runnable execution
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
pipeline_file (str): The config/dag file
|
155
|
-
run_id (str): The run id of the run.
|
156
|
-
tag (str): If a tag is provided at the run time
|
157
|
-
parameters_file (str): The parameters being sent in to the application
|
202
|
+
The entry point to runnable execution for any YAML based spec.
|
203
|
+
The result could:
|
204
|
+
- Execution of the pipeline if its local executor
|
205
|
+
- Rendering of the spec in the case of non local executor
|
158
206
|
"""
|
159
207
|
run_id = utils.generate_run_id(run_id=run_id)
|
160
208
|
|
161
209
|
run_context = prepare_configurations(
|
162
210
|
configuration_file=configuration_file,
|
163
|
-
pipeline_file=pipeline_file,
|
164
211
|
run_id=run_id,
|
165
212
|
tag=tag,
|
166
213
|
parameters_file=parameters_file,
|
167
214
|
)
|
168
215
|
|
169
|
-
|
170
|
-
console.print(run_context)
|
171
|
-
console.rule(style="[dark orange]")
|
216
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
172
217
|
|
218
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
173
219
|
executor = run_context.executor
|
174
220
|
|
175
|
-
|
176
|
-
|
177
|
-
|
221
|
+
utils.set_runnable_environment_variables(
|
222
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
223
|
+
)
|
178
224
|
|
179
225
|
# Prepare for graph execution
|
180
|
-
executor.
|
226
|
+
executor._set_up_run_log(exists_ok=False)
|
227
|
+
|
228
|
+
console.print("Working with context:")
|
229
|
+
console.print(run_context)
|
230
|
+
console.rule(style="[dark orange]")
|
181
231
|
|
182
232
|
logger.info(f"Executing the graph: {run_context.dag}")
|
183
233
|
with Progress(
|
184
|
-
TextColumn(
|
234
|
+
TextColumn(
|
235
|
+
"[progress.description]{task.description}", table_column=Column(ratio=2)
|
236
|
+
),
|
185
237
|
BarColumn(table_column=Column(ratio=1), style="dark_orange"),
|
186
238
|
TimeElapsedColumn(table_column=Column(ratio=1)),
|
187
239
|
console=console,
|
188
240
|
expand=True,
|
189
241
|
) as progress:
|
190
|
-
pipeline_execution_task = progress.add_task(
|
242
|
+
pipeline_execution_task = progress.add_task(
|
243
|
+
"[dark_orange] Starting execution .. ", total=1
|
244
|
+
)
|
191
245
|
try:
|
192
246
|
run_context.progress = progress
|
193
247
|
executor.execute_graph(dag=run_context.dag) # type: ignore
|
194
248
|
|
195
|
-
|
196
|
-
|
249
|
+
if not executor._is_local:
|
250
|
+
# Non local executors only traverse the graph and do not execute the nodes
|
197
251
|
executor.send_return_code(stage="traversal")
|
198
252
|
return
|
199
253
|
|
200
|
-
run_log = run_context.run_log_store.get_run_log_by_id(
|
254
|
+
run_log = run_context.run_log_store.get_run_log_by_id(
|
255
|
+
run_id=run_context.run_id, full=False
|
256
|
+
)
|
201
257
|
|
202
258
|
if run_log.status == defaults.SUCCESS:
|
203
|
-
progress.update(
|
259
|
+
progress.update(
|
260
|
+
pipeline_execution_task,
|
261
|
+
description="[green] Success",
|
262
|
+
completed=True,
|
263
|
+
)
|
204
264
|
else:
|
205
|
-
progress.update(
|
265
|
+
progress.update(
|
266
|
+
pipeline_execution_task, description="[red] Failed", completed=True
|
267
|
+
)
|
206
268
|
except Exception as e: # noqa: E722
|
207
269
|
console.print(e, style=defaults.error_style)
|
208
|
-
progress.update(
|
209
|
-
|
270
|
+
progress.update(
|
271
|
+
pipeline_execution_task,
|
272
|
+
description="[red] Errored execution",
|
273
|
+
completed=True,
|
274
|
+
)
|
275
|
+
run_log = run_context.run_log_store.get_run_log_by_id(
|
276
|
+
run_id=run_context.run_id, full=False
|
277
|
+
)
|
210
278
|
run_log.status = defaults.FAIL
|
211
279
|
run_context.run_log_store.add_branch_log(run_log, run_context.run_id)
|
212
280
|
raise e
|
@@ -219,62 +287,64 @@ def execute_single_node(
|
|
219
287
|
pipeline_file: str,
|
220
288
|
step_name: str,
|
221
289
|
map_variable: str,
|
290
|
+
mode: str,
|
222
291
|
run_id: str,
|
223
292
|
tag: str = "",
|
224
293
|
parameters_file: str = "",
|
225
294
|
):
|
226
295
|
"""
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
|
231
|
-
|
232
|
-
Args:
|
233
|
-
variables_file (str): The variables file, if used or None
|
234
|
-
step_name : The name of the step to execute in dot path convention
|
235
|
-
pipeline_file (str): The config/dag file
|
236
|
-
run_id (str): The run id of the run.
|
237
|
-
tag (str): If a tag is provided at the run time
|
238
|
-
parameters_file (str): The parameters being sent in to the application
|
296
|
+
This entry point is triggered during the execution of the pipeline
|
297
|
+
- non local execution environments
|
239
298
|
|
299
|
+
The mode defines how the pipeline spec is provided to the runnable
|
300
|
+
- yaml
|
301
|
+
- python
|
240
302
|
"""
|
241
303
|
from runnable import nodes
|
242
304
|
|
243
|
-
task_console.print(
|
305
|
+
task_console.print(
|
306
|
+
f"Executing the single node: {step_name} with map variable: {map_variable}"
|
307
|
+
)
|
244
308
|
|
245
|
-
configuration_file = os.environ.get(
|
309
|
+
configuration_file = os.environ.get(
|
310
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
311
|
+
)
|
246
312
|
|
247
313
|
run_context = prepare_configurations(
|
248
314
|
configuration_file=configuration_file,
|
249
|
-
pipeline_file=pipeline_file,
|
250
315
|
run_id=run_id,
|
251
316
|
tag=tag,
|
252
317
|
parameters_file=parameters_file,
|
253
318
|
)
|
319
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
320
|
+
|
321
|
+
if mode == "yaml":
|
322
|
+
# Load the yaml file
|
323
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
324
|
+
elif mode == "python":
|
325
|
+
# Call the SDK to get the dag
|
326
|
+
set_pipeline_spec_from_python(run_context, pipeline_file)
|
327
|
+
|
328
|
+
assert run_context.dag
|
329
|
+
|
254
330
|
task_console.print("Working with context:")
|
255
331
|
task_console.print(run_context)
|
256
332
|
task_console.rule(style="[dark orange]")
|
257
333
|
|
258
334
|
executor = run_context.executor
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
executor.prepare_for_node_execution()
|
263
|
-
|
264
|
-
# TODO: may be make its own entry point
|
265
|
-
# if not run_context.dag:
|
266
|
-
# # There are a few entry points that make graph dynamically and do not have a dag defined statically.
|
267
|
-
# run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_id, full=False)
|
268
|
-
# run_context.dag = graph.create_graph(run_log.run_config["pipeline"])
|
269
|
-
assert run_context.dag
|
335
|
+
utils.set_runnable_environment_variables(
|
336
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
337
|
+
)
|
270
338
|
|
271
339
|
map_variable_dict = utils.json_to_ordered_dict(map_variable)
|
272
340
|
|
273
341
|
step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
|
274
|
-
node_to_execute, _ = graph.search_node_by_internal_name(
|
342
|
+
node_to_execute, _ = graph.search_node_by_internal_name(
|
343
|
+
run_context.dag, step_internal_name
|
344
|
+
)
|
275
345
|
|
276
346
|
logger.info("Executing the single node of : %s", node_to_execute)
|
277
|
-
## This step is where we save the
|
347
|
+
## This step is where we save output of the function/shell command
|
278
348
|
try:
|
279
349
|
executor.execute_node(node=node_to_execute, map_variable=map_variable_dict)
|
280
350
|
finally:
|
@@ -288,23 +358,15 @@ def execute_single_node(
|
|
288
358
|
run_context.catalog_handler.put(name=log_file_name, run_id=run_context.run_id)
|
289
359
|
os.remove(log_file_name)
|
290
360
|
|
291
|
-
# executor.send_return_code(stage="execution")
|
292
|
-
|
293
361
|
|
294
|
-
def
|
295
|
-
|
296
|
-
|
297
|
-
catalog_config: dict,
|
298
|
-
configuration_file: str,
|
299
|
-
notebook_output_path: str = "",
|
362
|
+
def execute_job_yaml_spec(
|
363
|
+
job_definition_file: str,
|
364
|
+
configuration_file: str = "",
|
300
365
|
tag: str = "",
|
301
366
|
run_id: str = "",
|
302
367
|
parameters_file: str = "",
|
303
368
|
):
|
304
|
-
|
305
|
-
The entry point to runnable execution of a notebook. This method would prepare the configurations and
|
306
|
-
delegates traversal to the executor
|
307
|
-
"""
|
369
|
+
# A job and task are internally the same.
|
308
370
|
run_id = utils.generate_run_id(run_id=run_id)
|
309
371
|
|
310
372
|
run_context = prepare_configurations(
|
@@ -312,61 +374,87 @@ def execute_notebook(
|
|
312
374
|
run_id=run_id,
|
313
375
|
tag=tag,
|
314
376
|
parameters_file=parameters_file,
|
377
|
+
is_job=True,
|
315
378
|
)
|
316
379
|
|
380
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
381
|
+
|
317
382
|
executor = run_context.executor
|
318
|
-
|
319
|
-
|
383
|
+
utils.set_runnable_environment_variables(
|
384
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
385
|
+
)
|
386
|
+
|
387
|
+
run_context.job_definition_file = job_definition_file
|
388
|
+
|
389
|
+
job_config = utils.load_yaml(job_definition_file)
|
390
|
+
logger.info(
|
391
|
+
"Executing the job from the user."
|
392
|
+
f"job definition: {job_definition_file}, config: {job_config}"
|
393
|
+
)
|
394
|
+
assert job_config.get("type"), "Job type is not provided"
|
320
395
|
|
321
396
|
console.print("Working with context:")
|
322
397
|
console.print(run_context)
|
323
398
|
console.rule(style="[dark orange]")
|
324
399
|
|
325
|
-
|
326
|
-
|
327
|
-
"command_type": "notebook",
|
328
|
-
"notebook_output_path": notebook_output_path,
|
329
|
-
"type": "task",
|
330
|
-
"next": "success",
|
331
|
-
"catalog": catalog_config,
|
332
|
-
}
|
333
|
-
node = graph.create_node(name="executing job", step_config=step_config)
|
400
|
+
# A hack where we create a task node and get our job/catalog settings
|
401
|
+
catalog_config: list[str] = job_config.pop("catalog", {})
|
334
402
|
|
335
|
-
|
336
|
-
|
337
|
-
|
403
|
+
# rename the type to command_type of task
|
404
|
+
job_config["command_type"] = job_config.pop("type")
|
405
|
+
job = tasks.create_task(job_config)
|
338
406
|
|
339
|
-
|
340
|
-
|
407
|
+
logger.info(
|
408
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
409
|
+
)
|
341
410
|
|
342
|
-
|
343
|
-
|
344
|
-
logger.info("Executing the job from the system. We are in the config's compute environment")
|
345
|
-
executor.execute_node(node=node)
|
411
|
+
assert isinstance(executor, BaseJobExecutor)
|
412
|
+
executor.submit_job(job, catalog_settings=catalog_config)
|
346
413
|
|
347
|
-
|
348
|
-
step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
|
349
|
-
run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
|
414
|
+
executor.send_return_code()
|
350
415
|
|
351
|
-
else:
|
352
|
-
raise ValueError(f"Invalid entrypoint {entrypoint}")
|
353
416
|
|
354
|
-
|
417
|
+
def set_job_spec_from_yaml(run_context: context.Context, job_definition_file: str):
|
418
|
+
"""
|
419
|
+
Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
|
420
|
+
"""
|
421
|
+
job_config = utils.load_yaml(job_definition_file)
|
422
|
+
logger.info("The input job definition file:")
|
423
|
+
logger.info(json.dumps(job_config, indent=4))
|
355
424
|
|
425
|
+
catalog_config: list[str] = job_config.pop("catalog", {})
|
356
426
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
427
|
+
job_config["command_type"] = job_config.pop("type")
|
428
|
+
|
429
|
+
run_context.job_definition_file = job_definition_file
|
430
|
+
run_context.job = tasks.create_task(job_config)
|
431
|
+
run_context.job_catalog_settings = catalog_config
|
432
|
+
|
433
|
+
|
434
|
+
def set_job_spec_from_python(run_context: context.Context, python_module: str):
|
435
|
+
# Import the module and call the function to get the task
|
436
|
+
module_file = python_module.strip(".py")
|
437
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
438
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
439
|
+
imported_module = importlib.import_module(module)
|
440
|
+
|
441
|
+
run_context.from_sdk = True
|
442
|
+
task = getattr(imported_module, func)().return_task()
|
443
|
+
catalog_settings = getattr(imported_module, func)().return_catalog_settings()
|
444
|
+
|
445
|
+
run_context.job_definition_file = python_module
|
446
|
+
run_context.job = task
|
447
|
+
run_context.job_catalog_settings = catalog_settings
|
448
|
+
|
449
|
+
|
450
|
+
def execute_job_non_local(
|
451
|
+
job_definition_file: str,
|
452
|
+
configuration_file: str = "",
|
453
|
+
mode: str = "yaml",
|
362
454
|
tag: str = "",
|
363
455
|
run_id: str = "",
|
364
456
|
parameters_file: str = "",
|
365
457
|
):
|
366
|
-
"""
|
367
|
-
The entry point to runnable execution of a function. This method would prepare the configurations and
|
368
|
-
delegates traversal to the executor
|
369
|
-
"""
|
370
458
|
run_id = utils.generate_run_id(run_id=run_id)
|
371
459
|
|
372
460
|
run_context = prepare_configurations(
|
@@ -374,47 +462,33 @@ def execute_function(
|
|
374
462
|
run_id=run_id,
|
375
463
|
tag=tag,
|
376
464
|
parameters_file=parameters_file,
|
465
|
+
is_job=True,
|
377
466
|
)
|
378
467
|
|
379
|
-
|
468
|
+
assert isinstance(run_context.executor, BaseJobExecutor)
|
380
469
|
|
381
|
-
|
382
|
-
|
470
|
+
if mode == "yaml":
|
471
|
+
# Load the yaml file
|
472
|
+
set_job_spec_from_yaml(run_context, job_definition_file)
|
473
|
+
elif mode == "python":
|
474
|
+
# Call the SDK to get the task
|
475
|
+
set_job_spec_from_python(run_context, job_definition_file)
|
476
|
+
|
477
|
+
assert run_context.job
|
383
478
|
|
384
479
|
console.print("Working with context:")
|
385
480
|
console.print(run_context)
|
386
481
|
console.rule(style="[dark orange]")
|
387
482
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
"command_type": "python",
|
392
|
-
"type": "task",
|
393
|
-
"next": "success",
|
394
|
-
"catalog": catalog_config,
|
395
|
-
}
|
396
|
-
node = graph.create_node(name="executing job", step_config=step_config)
|
397
|
-
|
398
|
-
if entrypoint == defaults.ENTRYPOINT.USER.value:
|
399
|
-
# Prepare for graph execution
|
400
|
-
executor.prepare_for_graph_execution()
|
401
|
-
|
402
|
-
logger.info("Executing the job from the user. We are still in the caller's compute environment")
|
403
|
-
executor.execute_job(node=node)
|
404
|
-
|
405
|
-
elif entrypoint == defaults.ENTRYPOINT.SYSTEM.value:
|
406
|
-
executor.prepare_for_node_execution()
|
407
|
-
logger.info("Executing the job from the system. We are in the config's compute environment")
|
408
|
-
executor.execute_node(node=node)
|
409
|
-
|
410
|
-
# Update the status of the run log
|
411
|
-
step_log = run_context.run_log_store.get_step_log(node._get_step_log_name(), run_id)
|
412
|
-
run_context.run_log_store.update_run_log_status(run_id=run_id, status=step_log.status)
|
483
|
+
logger.info(
|
484
|
+
"Executing the job from the user. We are still in the caller's compute environment"
|
485
|
+
)
|
413
486
|
|
414
|
-
|
415
|
-
|
487
|
+
run_context.executor.execute_job(
|
488
|
+
run_context.job, catalog_settings=run_context.job_catalog_settings
|
489
|
+
)
|
416
490
|
|
417
|
-
executor.send_return_code()
|
491
|
+
run_context.executor.send_return_code()
|
418
492
|
|
419
493
|
|
420
494
|
def fan(
|
@@ -444,27 +518,40 @@ def fan(
|
|
444
518
|
"""
|
445
519
|
from runnable import nodes
|
446
520
|
|
447
|
-
configuration_file = os.environ.get(
|
521
|
+
configuration_file = os.environ.get(
|
522
|
+
"RUNNABLE_CONFIGURATION_FILE", configuration_file
|
523
|
+
)
|
448
524
|
|
449
525
|
run_context = prepare_configurations(
|
450
526
|
configuration_file=configuration_file,
|
451
|
-
pipeline_file=pipeline_file,
|
452
527
|
run_id=run_id,
|
453
528
|
tag=tag,
|
454
529
|
parameters_file=parameters_file,
|
455
530
|
)
|
531
|
+
|
532
|
+
assert isinstance(run_context.executor, BasePipelineExecutor)
|
533
|
+
|
534
|
+
if mode == "yaml":
|
535
|
+
# Load the yaml file
|
536
|
+
set_pipeline_spec_from_yaml(run_context, pipeline_file)
|
537
|
+
elif mode == "python":
|
538
|
+
# Call the SDK to get the dag
|
539
|
+
set_pipeline_spec_from_python(run_context, pipeline_file)
|
540
|
+
|
456
541
|
console.print("Working with context:")
|
457
542
|
console.print(run_context)
|
458
543
|
console.rule(style="[dark orange]")
|
459
544
|
|
460
545
|
executor = run_context.executor
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
executor.prepare_for_node_execution()
|
546
|
+
utils.set_runnable_environment_variables(
|
547
|
+
run_id=run_id, configuration_file=configuration_file, tag=tag
|
548
|
+
)
|
465
549
|
|
466
550
|
step_internal_name = nodes.BaseNode._get_internal_name_from_command_name(step_name)
|
467
|
-
node_to_execute, _ = graph.search_node_by_internal_name(
|
551
|
+
node_to_execute, _ = graph.search_node_by_internal_name(
|
552
|
+
run_context.dag, # type: ignore
|
553
|
+
step_internal_name,
|
554
|
+
)
|
468
555
|
|
469
556
|
map_variable_dict = utils.json_to_ordered_dict(map_variable)
|
470
557
|
|
@@ -478,6 +565,6 @@ def fan(
|
|
478
565
|
raise ValueError(f"Invalid mode {mode}")
|
479
566
|
|
480
567
|
|
481
|
-
if __name__ == "__main__":
|
482
|
-
|
483
|
-
|
568
|
+
# if __name__ == "__main__":
|
569
|
+
# # This is only for perf testing purposes.
|
570
|
+
# prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
|