runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +0 -11
- runnable/catalog.py +27 -5
- runnable/cli.py +122 -26
- runnable/datastore.py +71 -35
- runnable/defaults.py +0 -1
- runnable/entrypoints.py +107 -32
- runnable/exceptions.py +6 -2
- runnable/executor.py +28 -9
- runnable/graph.py +37 -12
- runnable/integration.py +7 -2
- runnable/nodes.py +15 -17
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +101 -33
- runnable/secrets.py +3 -1
- runnable/tasks.py +246 -34
- runnable/utils.py +41 -13
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
- runnable-0.14.0.dist-info/RECORD +24 -0
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
- runnable-0.14.0.dist-info/entry_points.txt +40 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +0 -71
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -855
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable-0.12.3.dist-info/RECORD +0 -64
- runnable-0.12.3.dist-info/entry_points.txt +0 -41
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
runnable/tasks.py
CHANGED
@@ -31,7 +31,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
31
31
|
logging.getLogger("stevedore").setLevel(logging.CRITICAL)
|
32
32
|
|
33
33
|
|
34
|
-
# TODO:
|
34
|
+
# TODO: This has to be an extension
|
35
35
|
|
36
36
|
|
37
37
|
class TaskReturns(BaseModel):
|
@@ -43,8 +43,12 @@ class BaseTaskType(BaseModel):
|
|
43
43
|
"""A base task class which does the execution of command defined by the user."""
|
44
44
|
|
45
45
|
task_type: str = Field(serialization_alias="command_type")
|
46
|
-
secrets: List[str] = Field(
|
47
|
-
|
46
|
+
secrets: List[str] = Field(
|
47
|
+
default_factory=list
|
48
|
+
) # A list of secrets to expose by secrets manager
|
49
|
+
returns: List[TaskReturns] = Field(
|
50
|
+
default_factory=list, alias="returns"
|
51
|
+
) # The return values of the task
|
48
52
|
|
49
53
|
model_config = ConfigDict(extra="forbid")
|
50
54
|
|
@@ -70,11 +74,13 @@ class BaseTaskType(BaseModel):
|
|
70
74
|
raise NotImplementedError()
|
71
75
|
|
72
76
|
def set_secrets_as_env_variables(self):
|
77
|
+
# Preparing the environment for the task execution
|
73
78
|
for key in self.secrets:
|
74
79
|
secret_value = context.run_context.secrets_handler.get(key)
|
75
80
|
os.environ[key] = secret_value
|
76
81
|
|
77
82
|
def delete_secrets_from_env_variables(self):
|
83
|
+
# Cleaning up the environment after the task execution
|
78
84
|
for key in self.secrets:
|
79
85
|
if key in os.environ:
|
80
86
|
del os.environ[key]
|
@@ -99,6 +105,7 @@ class BaseTaskType(BaseModel):
|
|
99
105
|
def _diff_parameters(
|
100
106
|
self, parameters_in: Dict[str, Parameter], context_params: Dict[str, Parameter]
|
101
107
|
) -> Dict[str, Parameter]:
|
108
|
+
# If the parameter is different from existing parameters, then it is updated
|
102
109
|
diff: Dict[str, Parameter] = {}
|
103
110
|
for param_name, param in context_params.items():
|
104
111
|
if param_name in parameters_in:
|
@@ -112,12 +119,7 @@ class BaseTaskType(BaseModel):
|
|
112
119
|
|
113
120
|
@contextlib.contextmanager
|
114
121
|
def expose_secrets(self):
|
115
|
-
"""Context manager to expose secrets to the execution.
|
116
|
-
|
117
|
-
Args:
|
118
|
-
map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
|
119
|
-
|
120
|
-
"""
|
122
|
+
"""Context manager to expose secrets to the execution."""
|
121
123
|
self.set_secrets_as_env_variables()
|
122
124
|
try:
|
123
125
|
yield
|
@@ -126,9 +128,32 @@ class BaseTaskType(BaseModel):
|
|
126
128
|
finally:
|
127
129
|
self.delete_secrets_from_env_variables()
|
128
130
|
|
131
|
+
def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
|
132
|
+
"""Resolve the unreduced parameters."""
|
133
|
+
params = self._context.run_log_store.get_parameters(
|
134
|
+
run_id=self._context.run_id
|
135
|
+
).copy()
|
136
|
+
|
137
|
+
for param_name, param in params.items():
|
138
|
+
if param.reduced is False:
|
139
|
+
assert (
|
140
|
+
map_variable is not None
|
141
|
+
), "Parameters in non-map node should always be reduced"
|
142
|
+
|
143
|
+
context_param = param_name
|
144
|
+
for _, v in map_variable.items():
|
145
|
+
context_param = f"{v}_{context_param}"
|
146
|
+
|
147
|
+
if context_param in params: # Is this if required?
|
148
|
+
params[param_name].value = params[context_param].value
|
149
|
+
|
150
|
+
return params
|
151
|
+
|
129
152
|
@contextlib.contextmanager
|
130
|
-
def execution_context(
|
131
|
-
|
153
|
+
def execution_context(
|
154
|
+
self, map_variable: TypeMapVariable = None, allow_complex: bool = True
|
155
|
+
):
|
156
|
+
params = self.resolve_unreduced_parameters(map_variable=map_variable)
|
132
157
|
logger.info(f"Parameters available for the execution: {params}")
|
133
158
|
|
134
159
|
for param_name, param in params.items():
|
@@ -150,7 +175,11 @@ class BaseTaskType(BaseModel):
|
|
150
175
|
logger.debug(f"Resolved parameters: {params}")
|
151
176
|
|
152
177
|
if not allow_complex:
|
153
|
-
params = {
|
178
|
+
params = {
|
179
|
+
key: value
|
180
|
+
for key, value in params.items()
|
181
|
+
if isinstance(value, JsonParameter)
|
182
|
+
}
|
154
183
|
|
155
184
|
parameters_in = copy.deepcopy(params)
|
156
185
|
try:
|
@@ -161,8 +190,12 @@ class BaseTaskType(BaseModel):
|
|
161
190
|
finally:
|
162
191
|
# Update parameters
|
163
192
|
# This should only update the parameters that are changed at the root level.
|
164
|
-
diff_parameters = self._diff_parameters(
|
165
|
-
|
193
|
+
diff_parameters = self._diff_parameters(
|
194
|
+
parameters_in=parameters_in, context_params=params
|
195
|
+
)
|
196
|
+
self._context.run_log_store.set_parameters(
|
197
|
+
parameters=diff_parameters, run_id=self._context.run_id
|
198
|
+
)
|
166
199
|
|
167
200
|
|
168
201
|
def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
|
@@ -188,7 +221,56 @@ def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
|
|
188
221
|
|
189
222
|
|
190
223
|
class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
191
|
-
"""
|
224
|
+
"""
|
225
|
+
--8<-- [start:python_reference]
|
226
|
+
An execution node of the pipeline of python functions.
|
227
|
+
Please refer to define pipeline/tasks/python for more information.
|
228
|
+
|
229
|
+
As part of the dag definition, a python task is defined as follows:
|
230
|
+
|
231
|
+
dag:
|
232
|
+
steps:
|
233
|
+
python_task: # The name of the node
|
234
|
+
type: task
|
235
|
+
command_type: python # this is default
|
236
|
+
command: my_module.my_function # the dotted path to the function. Please refer to the yaml section of
|
237
|
+
define pipeline/tasks/python for concrete details.
|
238
|
+
returns:
|
239
|
+
- name: # The name to assign the return value
|
240
|
+
kind: json # the default value is json,
|
241
|
+
can be object for python objects and metric for metrics
|
242
|
+
secrets:
|
243
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
244
|
+
catalog:
|
245
|
+
get:
|
246
|
+
- A list of glob patterns to get from the catalog to the local file system
|
247
|
+
put:
|
248
|
+
- A list of glob patterns to put to the catalog from the local file system
|
249
|
+
on_failure: The name of the step to traverse in case of failure
|
250
|
+
overrides:
|
251
|
+
Individual tasks can override the global configuration config by referring to the
|
252
|
+
specific override.
|
253
|
+
|
254
|
+
For example,
|
255
|
+
#Global configuration
|
256
|
+
executor:
|
257
|
+
type: local-container
|
258
|
+
config:
|
259
|
+
docker_image: "runnable/runnable:latest"
|
260
|
+
overrides:
|
261
|
+
custom_docker_image:
|
262
|
+
docker_image: "runnable/runnable:custom"
|
263
|
+
|
264
|
+
## In the node definition
|
265
|
+
overrides:
|
266
|
+
local-container:
|
267
|
+
docker_image: "runnable/runnable:custom"
|
268
|
+
|
269
|
+
This instruction will override the docker image for the local-container executor.
|
270
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
271
|
+
or "fail" to terminate the pipeline with an error.
|
272
|
+
--8<-- [end:python_reference]
|
273
|
+
"""
|
192
274
|
|
193
275
|
task_type: str = Field(default="python", serialization_alias="command_type")
|
194
276
|
command: str
|
@@ -209,7 +291,10 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
209
291
|
"""Execute the notebook as defined by the command."""
|
210
292
|
attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
|
211
293
|
|
212
|
-
with
|
294
|
+
with (
|
295
|
+
self.execution_context(map_variable=map_variable) as params,
|
296
|
+
self.expose_secrets() as _,
|
297
|
+
):
|
213
298
|
module, func = utils.get_module_and_attr_names(self.command)
|
214
299
|
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
215
300
|
imported_module = importlib.import_module(module)
|
@@ -217,21 +302,32 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
217
302
|
|
218
303
|
try:
|
219
304
|
try:
|
220
|
-
filtered_parameters = parameters.filter_arguments_for_func(
|
221
|
-
|
305
|
+
filtered_parameters = parameters.filter_arguments_for_func(
|
306
|
+
f, params.copy(), map_variable
|
307
|
+
)
|
308
|
+
logger.info(
|
309
|
+
f"Calling {func} from {module} with {filtered_parameters}"
|
310
|
+
)
|
222
311
|
|
223
312
|
out_file = io.StringIO()
|
224
313
|
with contextlib.redirect_stdout(out_file):
|
225
|
-
user_set_parameters = f(
|
314
|
+
user_set_parameters = f(
|
315
|
+
**filtered_parameters
|
316
|
+
) # This is a tuple or single value
|
226
317
|
task_console.print(out_file.getvalue())
|
227
318
|
except Exception as e:
|
228
|
-
raise exceptions.CommandCallError(
|
319
|
+
raise exceptions.CommandCallError(
|
320
|
+
f"Function call: {self.command} did not succeed.\n"
|
321
|
+
) from e
|
229
322
|
|
230
323
|
attempt_log.input_parameters = params.copy()
|
231
324
|
|
232
325
|
if map_variable:
|
233
326
|
attempt_log.input_parameters.update(
|
234
|
-
{
|
327
|
+
{
|
328
|
+
k: JsonParameter(value=v, kind="json")
|
329
|
+
for k, v in map_variable.items()
|
330
|
+
}
|
235
331
|
)
|
236
332
|
|
237
333
|
if self.returns:
|
@@ -239,7 +335,9 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
239
335
|
user_set_parameters = (user_set_parameters,)
|
240
336
|
|
241
337
|
if len(user_set_parameters) != len(self.returns):
|
242
|
-
raise ValueError(
|
338
|
+
raise ValueError(
|
339
|
+
"Returns task signature does not match the function returns"
|
340
|
+
)
|
243
341
|
|
244
342
|
output_parameters: Dict[str, Parameter] = {}
|
245
343
|
metrics: Dict[str, Parameter] = {}
|
@@ -277,7 +375,56 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
277
375
|
|
278
376
|
|
279
377
|
class NotebookTaskType(BaseTaskType):
|
280
|
-
"""
|
378
|
+
"""
|
379
|
+
--8<-- [start:notebook_reference]
|
380
|
+
An execution node of the pipeline of notebook execution.
|
381
|
+
Please refer to define pipeline/tasks/notebook for more information.
|
382
|
+
|
383
|
+
As part of the dag definition, a notebook task is defined as follows:
|
384
|
+
|
385
|
+
dag:
|
386
|
+
steps:
|
387
|
+
notebook_task: # The name of the node
|
388
|
+
type: task
|
389
|
+
command_type: notebook
|
390
|
+
command: the path to the notebook relative to project root.
|
391
|
+
optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
|
392
|
+
returns:
|
393
|
+
- name: # The name to assign the return value
|
394
|
+
kind: json # the default value is json,
|
395
|
+
can be object for python objects and metric for metrics
|
396
|
+
secrets:
|
397
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
398
|
+
catalog:
|
399
|
+
get:
|
400
|
+
- A list of glob patterns to get from the catalog to the local file system
|
401
|
+
put:
|
402
|
+
- A list of glob patterns to put to the catalog from the local file system
|
403
|
+
on_failure: The name of the step to traverse in case of failure
|
404
|
+
overrides:
|
405
|
+
Individual tasks can override the global configuration config by referring to the
|
406
|
+
specific override.
|
407
|
+
|
408
|
+
For example,
|
409
|
+
#Global configuration
|
410
|
+
executor:
|
411
|
+
type: local-container
|
412
|
+
config:
|
413
|
+
docker_image: "runnable/runnable:latest"
|
414
|
+
overrides:
|
415
|
+
custom_docker_image:
|
416
|
+
docker_image: "runnable/runnable:custom"
|
417
|
+
|
418
|
+
## In the node definition
|
419
|
+
overrides:
|
420
|
+
local-container:
|
421
|
+
docker_image: "runnable/runnable:custom"
|
422
|
+
|
423
|
+
This instruction will override the docker image for the local-container executor.
|
424
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
425
|
+
or "fail" to terminate the pipeline with an error.
|
426
|
+
--8<-- [end:notebook_reference]
|
427
|
+
"""
|
281
428
|
|
282
429
|
task_type: str = Field(default="notebook", serialization_alias="command_type")
|
283
430
|
command: str
|
@@ -302,7 +449,10 @@ class NotebookTaskType(BaseTaskType):
|
|
302
449
|
return str(file_name)
|
303
450
|
|
304
451
|
def get_cli_options(self) -> Tuple[str, dict]:
|
305
|
-
return "notebook", {
|
452
|
+
return "notebook", {
|
453
|
+
"command": self.command,
|
454
|
+
"notebook-output-path": self.notebook_output_path,
|
455
|
+
}
|
306
456
|
|
307
457
|
def execute_command(
|
308
458
|
self,
|
@@ -325,9 +475,12 @@ class NotebookTaskType(BaseTaskType):
|
|
325
475
|
|
326
476
|
notebook_output_path = self.notebook_output_path
|
327
477
|
|
328
|
-
with
|
329
|
-
|
330
|
-
|
478
|
+
with (
|
479
|
+
self.execution_context(
|
480
|
+
map_variable=map_variable, allow_complex=False
|
481
|
+
) as params,
|
482
|
+
self.expose_secrets() as _,
|
483
|
+
):
|
331
484
|
copy_params = copy.deepcopy(params)
|
332
485
|
|
333
486
|
if map_variable:
|
@@ -336,7 +489,9 @@ class NotebookTaskType(BaseTaskType):
|
|
336
489
|
copy_params[key] = JsonParameter(kind="json", value=value)
|
337
490
|
|
338
491
|
# Remove any {v}_unreduced parameters from the parameters
|
339
|
-
unprocessed_params = [
|
492
|
+
unprocessed_params = [
|
493
|
+
k for k, v in copy_params.items() if not v.reduced
|
494
|
+
]
|
340
495
|
|
341
496
|
for key in list(copy_params.keys()):
|
342
497
|
if any(key.endswith(f"_{k}") for k in unprocessed_params):
|
@@ -360,7 +515,9 @@ class NotebookTaskType(BaseTaskType):
|
|
360
515
|
pm.execute_notebook(**kwds)
|
361
516
|
task_console.print(out_file.getvalue())
|
362
517
|
|
363
|
-
context.run_context.catalog_handler.put(
|
518
|
+
context.run_context.catalog_handler.put(
|
519
|
+
name=notebook_output_path, run_id=context.run_context.run_id
|
520
|
+
)
|
364
521
|
|
365
522
|
client = PloomberClient.from_path(path=notebook_output_path)
|
366
523
|
namespace = client.get_namespace()
|
@@ -368,7 +525,9 @@ class NotebookTaskType(BaseTaskType):
|
|
368
525
|
output_parameters: Dict[str, Parameter] = {}
|
369
526
|
try:
|
370
527
|
for task_return in self.returns:
|
371
|
-
param_name = Template(task_return.name).safe_substitute(
|
528
|
+
param_name = Template(task_return.name).safe_substitute(
|
529
|
+
map_variable # type: ignore
|
530
|
+
)
|
372
531
|
|
373
532
|
if map_variable:
|
374
533
|
for _, v in map_variable.items():
|
@@ -410,7 +569,54 @@ class NotebookTaskType(BaseTaskType):
|
|
410
569
|
|
411
570
|
class ShellTaskType(BaseTaskType):
|
412
571
|
"""
|
413
|
-
|
572
|
+
--8<-- [start:shell_reference]
|
573
|
+
An execution node of the pipeline of shell execution.
|
574
|
+
Please refer to define pipeline/tasks/shell for more information.
|
575
|
+
|
576
|
+
As part of the dag definition, a shell task is defined as follows:
|
577
|
+
|
578
|
+
dag:
|
579
|
+
steps:
|
580
|
+
shell_task: # The name of the node
|
581
|
+
type: task
|
582
|
+
command_type: shell
|
583
|
+
command: The command to execute, it could be multiline
|
584
|
+
optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
|
585
|
+
returns:
|
586
|
+
- name: # The name to assign the return value
|
587
|
+
kind: json # the default value is json,
|
588
|
+
can be object for python objects and metric for metrics
|
589
|
+
secrets:
|
590
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
591
|
+
catalog:
|
592
|
+
get:
|
593
|
+
- A list of glob patterns to get from the catalog to the local file system
|
594
|
+
put:
|
595
|
+
- A list of glob patterns to put to the catalog from the local file system
|
596
|
+
on_failure: The name of the step to traverse in case of failure
|
597
|
+
overrides:
|
598
|
+
Individual tasks can override the global configuration config by referring to the
|
599
|
+
specific override.
|
600
|
+
|
601
|
+
For example,
|
602
|
+
#Global configuration
|
603
|
+
executor:
|
604
|
+
type: local-container
|
605
|
+
config:
|
606
|
+
docker_image: "runnable/runnable:latest"
|
607
|
+
overrides:
|
608
|
+
custom_docker_image:
|
609
|
+
docker_image: "runnable/runnable:custom"
|
610
|
+
|
611
|
+
## In the node definition
|
612
|
+
overrides:
|
613
|
+
local-container:
|
614
|
+
docker_image: "runnable/runnable:custom"
|
615
|
+
|
616
|
+
This instruction will override the docker image for the local-container executor.
|
617
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
618
|
+
or "fail" to terminate the pipeline with an error.
|
619
|
+
--8<-- [end:shell_reference]
|
414
620
|
"""
|
415
621
|
|
416
622
|
task_type: str = Field(default="shell", serialization_alias="command_type")
|
@@ -421,7 +627,9 @@ class ShellTaskType(BaseTaskType):
|
|
421
627
|
def returns_should_be_json(cls, returns: List[TaskReturns]):
|
422
628
|
for task_return in returns:
|
423
629
|
if task_return.kind == "object" or task_return.kind == "pydantic":
|
424
|
-
raise ValueError(
|
630
|
+
raise ValueError(
|
631
|
+
"Pydantic models or Objects are not allowed in returns"
|
632
|
+
)
|
425
633
|
|
426
634
|
return returns
|
427
635
|
|
@@ -456,7 +664,9 @@ class ShellTaskType(BaseTaskType):
|
|
456
664
|
subprocess_env[key] = secret_value
|
457
665
|
|
458
666
|
try:
|
459
|
-
with self.execution_context(
|
667
|
+
with self.execution_context(
|
668
|
+
map_variable=map_variable, allow_complex=False
|
669
|
+
) as params:
|
460
670
|
subprocess_env.update({k: v.get_value() for k, v in params.items()})
|
461
671
|
|
462
672
|
# Json dumps all runnable environment variables
|
@@ -467,7 +677,9 @@ class ShellTaskType(BaseTaskType):
|
|
467
677
|
|
468
678
|
collect_delimiter = "=== COLLECT ==="
|
469
679
|
|
470
|
-
command =
|
680
|
+
command = (
|
681
|
+
self.command.strip() + f" && echo '{collect_delimiter}' && env"
|
682
|
+
)
|
471
683
|
logger.info(f"Executing shell command: {command}")
|
472
684
|
|
473
685
|
capture = False
|
runnable/utils.py
CHANGED
@@ -21,7 +21,7 @@ from runnable import defaults, names
|
|
21
21
|
from runnable.defaults import TypeMapVariable
|
22
22
|
|
23
23
|
if TYPE_CHECKING: # pragma: no cover
|
24
|
-
from
|
24
|
+
from extensions.nodes.nodes import TaskNode
|
25
25
|
from runnable.nodes import BaseNode
|
26
26
|
|
27
27
|
|
@@ -86,7 +86,9 @@ def generate_run_id(run_id: str = "") -> str:
|
|
86
86
|
return run_id
|
87
87
|
|
88
88
|
|
89
|
-
def apply_variables(
|
89
|
+
def apply_variables(
|
90
|
+
apply_to: Dict[str, Any], variables: Dict[str, str]
|
91
|
+
) -> Dict[str, Any]:
|
90
92
|
"""Safely applies the variables to a config.
|
91
93
|
|
92
94
|
For example: For config:
|
@@ -272,7 +274,9 @@ def get_local_docker_image_id(image_name: str) -> str:
|
|
272
274
|
image = client.images.get(image_name)
|
273
275
|
return image.attrs["Id"]
|
274
276
|
except ImportError: # pragma: no cover
|
275
|
-
logger.warning(
|
277
|
+
logger.warning(
|
278
|
+
"Did not find docker installed, some functionality might be affected"
|
279
|
+
)
|
276
280
|
except BaseException:
|
277
281
|
logger.exception(f"Could not find the image by name {image_name}")
|
278
282
|
|
@@ -295,7 +299,9 @@ def get_git_code_identity():
|
|
295
299
|
code_identity.code_identifier_dependable, changed = is_git_clean()
|
296
300
|
code_identity.code_identifier_url = get_git_remote()
|
297
301
|
if changed:
|
298
|
-
code_identity.code_identifier_message = "changes found in " + ", ".join(
|
302
|
+
code_identity.code_identifier_message = "changes found in " + ", ".join(
|
303
|
+
changed.split("\n")
|
304
|
+
)
|
299
305
|
except BaseException:
|
300
306
|
logger.exception("Git code versioning problems")
|
301
307
|
|
@@ -331,7 +337,9 @@ def get_tracked_data() -> Dict[str, str]:
|
|
331
337
|
try:
|
332
338
|
tracked_data[key.lower()] = json.loads(value)
|
333
339
|
except json.decoder.JSONDecodeError:
|
334
|
-
logger.warning(
|
340
|
+
logger.warning(
|
341
|
+
f"Tracker {key} could not be JSON decoded, adding the literal value"
|
342
|
+
)
|
335
343
|
tracked_data[key.lower()] = value
|
336
344
|
|
337
345
|
del os.environ[env_var]
|
@@ -389,9 +397,13 @@ def get_data_hash(file_name: str):
|
|
389
397
|
str: The SHA ID of the file contents
|
390
398
|
"""
|
391
399
|
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
392
|
-
|
400
|
+
# TODO: For a big file, we should only hash the first few bytes
|
401
|
+
return hash_bytestr_iter(
|
402
|
+
file_as_blockiter(open(file_name, "rb")), hashlib.sha256()
|
403
|
+
) # pragma: no cover
|
393
404
|
|
394
405
|
|
406
|
+
# TODO: This is not the right place for this.
|
395
407
|
def get_node_execution_command(
|
396
408
|
node: BaseNode,
|
397
409
|
map_variable: TypeMapVariable = None,
|
@@ -415,7 +427,11 @@ def get_node_execution_command(
|
|
415
427
|
|
416
428
|
log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
|
417
429
|
|
418
|
-
action =
|
430
|
+
action = (
|
431
|
+
f"runnable execute_single_node {run_id} "
|
432
|
+
f"{node._command_friendly_name()}"
|
433
|
+
f" --log-level {log_level}"
|
434
|
+
)
|
419
435
|
|
420
436
|
if context.run_context.pipeline_file:
|
421
437
|
action = action + f" --file {context.run_context.pipeline_file}"
|
@@ -435,6 +451,7 @@ def get_node_execution_command(
|
|
435
451
|
return action
|
436
452
|
|
437
453
|
|
454
|
+
# TODO: This is not the right place for this.
|
438
455
|
def get_fan_command(
|
439
456
|
mode: str,
|
440
457
|
node: BaseNode,
|
@@ -478,6 +495,7 @@ def get_fan_command(
|
|
478
495
|
return action
|
479
496
|
|
480
497
|
|
498
|
+
# TODO: This is not the right place for this.
|
481
499
|
def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> str:
|
482
500
|
"""Get the execution command to run a job via command line.
|
483
501
|
|
@@ -520,7 +538,9 @@ def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> st
|
|
520
538
|
return action
|
521
539
|
|
522
540
|
|
523
|
-
def get_provider_by_name_and_type(
|
541
|
+
def get_provider_by_name_and_type(
|
542
|
+
service_type: str, service_details: defaults.ServiceConfig
|
543
|
+
):
|
524
544
|
"""Given a service type, one of executor, run_log_store, catalog, secrets and the config
|
525
545
|
return the exact child class implementing the service.
|
526
546
|
We use stevedore to do the work for us.
|
@@ -542,7 +562,9 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
|
|
542
562
|
if "config" in service_details:
|
543
563
|
service_config = service_details.get("config", {})
|
544
564
|
|
545
|
-
logger.debug(
|
565
|
+
logger.debug(
|
566
|
+
f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}"
|
567
|
+
)
|
546
568
|
try:
|
547
569
|
mgr = driver.DriverManager(
|
548
570
|
namespace=namespace,
|
@@ -552,8 +574,12 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
|
|
552
574
|
)
|
553
575
|
return mgr.driver
|
554
576
|
except Exception as _e:
|
555
|
-
logger.exception(
|
556
|
-
|
577
|
+
logger.exception(
|
578
|
+
f"Could not find the service of type: {service_type} with config: {service_details}"
|
579
|
+
)
|
580
|
+
raise Exception(
|
581
|
+
f"Could not find the service of type: {service_type} with config: {service_details}"
|
582
|
+
) from _e
|
557
583
|
|
558
584
|
|
559
585
|
def get_run_config() -> dict:
|
@@ -585,7 +611,9 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
|
|
585
611
|
return OrderedDict()
|
586
612
|
|
587
613
|
|
588
|
-
def set_runnable_environment_variables(
|
614
|
+
def set_runnable_environment_variables(
|
615
|
+
run_id: str = "", configuration_file: str = "", tag: str = ""
|
616
|
+
) -> None:
|
589
617
|
"""Set the environment variables used by runnable. This function should be called during the prepare configurations
|
590
618
|
by all executors.
|
591
619
|
|
@@ -604,7 +632,7 @@ def set_runnable_environment_variables(run_id: str = "", configuration_file: str
|
|
604
632
|
os.environ[defaults.RUNNABLE_RUN_TAG] = tag
|
605
633
|
|
606
634
|
|
607
|
-
def gather_variables() ->
|
635
|
+
def gather_variables() -> Dict[str, str]:
|
608
636
|
"""Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
|
609
637
|
|
610
638
|
Returns:
|
@@ -1,36 +1,31 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: runnable
|
3
|
-
Version: 0.
|
4
|
-
Summary:
|
5
|
-
|
6
|
-
License:
|
7
|
-
|
8
|
-
|
9
|
-
Requires-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
Version: 0.14.0
|
4
|
+
Summary: Add your description here
|
5
|
+
Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
|
6
|
+
License-File: LICENSE
|
7
|
+
Requires-Python: >=3.9
|
8
|
+
Requires-Dist: catalog
|
9
|
+
Requires-Dist: click-plugins>=1.1.1
|
10
|
+
Requires-Dist: click<=8.1.3
|
11
|
+
Requires-Dist: dill>=0.3.9
|
12
|
+
Requires-Dist: executor
|
13
|
+
Requires-Dist: nodes
|
14
|
+
Requires-Dist: pydantic>=2.10.3
|
15
|
+
Requires-Dist: python-dotenv>=1.0.1
|
16
|
+
Requires-Dist: rich>=13.9.4
|
17
|
+
Requires-Dist: ruamel-yaml>=0.18.6
|
18
|
+
Requires-Dist: run-log-store
|
19
|
+
Requires-Dist: secrets
|
20
|
+
Requires-Dist: setuptools>=75.6.0
|
21
|
+
Requires-Dist: stevedore>=5.4.0
|
22
|
+
Requires-Dist: typer>=0.15.1
|
17
23
|
Provides-Extra: docker
|
24
|
+
Requires-Dist: docker>=7.1.0; extra == 'docker'
|
25
|
+
Provides-Extra: examples
|
26
|
+
Requires-Dist: pandas>=2.2.3; extra == 'examples'
|
18
27
|
Provides-Extra: notebook
|
19
|
-
Requires-Dist:
|
20
|
-
Requires-Dist: click-plugins (>=1.1.1,<2.0.0)
|
21
|
-
Requires-Dist: dill (>=0.3.8,<0.4.0)
|
22
|
-
Requires-Dist: docker ; extra == "docker"
|
23
|
-
Requires-Dist: mlflow-skinny
|
24
|
-
Requires-Dist: ploomber-engine (>=0.0.31,<0.0.32) ; extra == "notebook"
|
25
|
-
Requires-Dist: pydantic (>=2.5,<3.0)
|
26
|
-
Requires-Dist: rich (>=13.5.2,<14.0.0)
|
27
|
-
Requires-Dist: ruamel.yaml
|
28
|
-
Requires-Dist: ruamel.yaml.clib
|
29
|
-
Requires-Dist: sqlalchemy ; extra == "database"
|
30
|
-
Requires-Dist: stevedore (>=3.5.0,<4.0.0)
|
31
|
-
Requires-Dist: typing-extensions ; python_version < "3.8"
|
32
|
-
Project-URL: Documentation, https://github.com/vijayvammi/runnable
|
33
|
-
Project-URL: Repository, https://github.com/vijayvammi/runnable
|
28
|
+
Requires-Dist: ploomber-engine>=0.0.33; extra == 'notebook'
|
34
29
|
Description-Content-Type: text/markdown
|
35
30
|
|
36
31
|
|
@@ -267,4 +262,3 @@ Execute a pipeline over an iterable parameter.
|
|
267
262
|
|
268
263
|
### [Arbitrary nesting](https://astrazeneca.github.io/runnable-core/concepts/nesting/)
|
269
264
|
Any nesting of parallel within map and so on.
|
270
|
-
|