runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- runnable/__init__.py +0 -11
- runnable/catalog.py +27 -5
- runnable/cli.py +122 -26
- runnable/datastore.py +71 -35
- runnable/defaults.py +0 -1
- runnable/entrypoints.py +107 -32
- runnable/exceptions.py +6 -2
- runnable/executor.py +28 -9
- runnable/graph.py +37 -12
- runnable/integration.py +7 -2
- runnable/nodes.py +15 -17
- runnable/parameters.py +27 -8
- runnable/pickler.py +1 -1
- runnable/sdk.py +101 -33
- runnable/secrets.py +3 -1
- runnable/tasks.py +246 -34
- runnable/utils.py +41 -13
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
- runnable-0.14.0.dist-info/RECORD +24 -0
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
- runnable-0.14.0.dist-info/entry_points.txt +40 -0
- runnable/extensions/__init__.py +0 -0
- runnable/extensions/catalog/__init__.py +0 -21
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +0 -234
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
- runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
- runnable/extensions/executor/__init__.py +0 -649
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +0 -1194
- runnable/extensions/executor/argo/specification.yaml +0 -51
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
- runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +0 -71
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +0 -446
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +0 -154
- runnable/extensions/executor/retry/__init__.py +0 -0
- runnable/extensions/executor/retry/implementation.py +0 -168
- runnable/extensions/nodes.py +0 -855
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
- runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +0 -140
- runnable/extensions/run_log_store/generic_chunked.py +0 -557
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
- runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +0 -100
- runnable-0.12.3.dist-info/RECORD +0 -64
- runnable-0.12.3.dist-info/entry_points.txt +0 -41
- {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
runnable/tasks.py
CHANGED
@@ -31,7 +31,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
31
31
|
logging.getLogger("stevedore").setLevel(logging.CRITICAL)
|
32
32
|
|
33
33
|
|
34
|
-
# TODO:
|
34
|
+
# TODO: This has to be an extension
|
35
35
|
|
36
36
|
|
37
37
|
class TaskReturns(BaseModel):
|
@@ -43,8 +43,12 @@ class BaseTaskType(BaseModel):
|
|
43
43
|
"""A base task class which does the execution of command defined by the user."""
|
44
44
|
|
45
45
|
task_type: str = Field(serialization_alias="command_type")
|
46
|
-
secrets: List[str] = Field(
|
47
|
-
|
46
|
+
secrets: List[str] = Field(
|
47
|
+
default_factory=list
|
48
|
+
) # A list of secrets to expose by secrets manager
|
49
|
+
returns: List[TaskReturns] = Field(
|
50
|
+
default_factory=list, alias="returns"
|
51
|
+
) # The return values of the task
|
48
52
|
|
49
53
|
model_config = ConfigDict(extra="forbid")
|
50
54
|
|
@@ -70,11 +74,13 @@ class BaseTaskType(BaseModel):
|
|
70
74
|
raise NotImplementedError()
|
71
75
|
|
72
76
|
def set_secrets_as_env_variables(self):
|
77
|
+
# Preparing the environment for the task execution
|
73
78
|
for key in self.secrets:
|
74
79
|
secret_value = context.run_context.secrets_handler.get(key)
|
75
80
|
os.environ[key] = secret_value
|
76
81
|
|
77
82
|
def delete_secrets_from_env_variables(self):
|
83
|
+
# Cleaning up the environment after the task execution
|
78
84
|
for key in self.secrets:
|
79
85
|
if key in os.environ:
|
80
86
|
del os.environ[key]
|
@@ -99,6 +105,7 @@ class BaseTaskType(BaseModel):
|
|
99
105
|
def _diff_parameters(
|
100
106
|
self, parameters_in: Dict[str, Parameter], context_params: Dict[str, Parameter]
|
101
107
|
) -> Dict[str, Parameter]:
|
108
|
+
# If the parameter is different from existing parameters, then it is updated
|
102
109
|
diff: Dict[str, Parameter] = {}
|
103
110
|
for param_name, param in context_params.items():
|
104
111
|
if param_name in parameters_in:
|
@@ -112,12 +119,7 @@ class BaseTaskType(BaseModel):
|
|
112
119
|
|
113
120
|
@contextlib.contextmanager
|
114
121
|
def expose_secrets(self):
|
115
|
-
"""Context manager to expose secrets to the execution.
|
116
|
-
|
117
|
-
Args:
|
118
|
-
map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
|
119
|
-
|
120
|
-
"""
|
122
|
+
"""Context manager to expose secrets to the execution."""
|
121
123
|
self.set_secrets_as_env_variables()
|
122
124
|
try:
|
123
125
|
yield
|
@@ -126,9 +128,32 @@ class BaseTaskType(BaseModel):
|
|
126
128
|
finally:
|
127
129
|
self.delete_secrets_from_env_variables()
|
128
130
|
|
131
|
+
def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
|
132
|
+
"""Resolve the unreduced parameters."""
|
133
|
+
params = self._context.run_log_store.get_parameters(
|
134
|
+
run_id=self._context.run_id
|
135
|
+
).copy()
|
136
|
+
|
137
|
+
for param_name, param in params.items():
|
138
|
+
if param.reduced is False:
|
139
|
+
assert (
|
140
|
+
map_variable is not None
|
141
|
+
), "Parameters in non-map node should always be reduced"
|
142
|
+
|
143
|
+
context_param = param_name
|
144
|
+
for _, v in map_variable.items():
|
145
|
+
context_param = f"{v}_{context_param}"
|
146
|
+
|
147
|
+
if context_param in params: # Is this if required?
|
148
|
+
params[param_name].value = params[context_param].value
|
149
|
+
|
150
|
+
return params
|
151
|
+
|
129
152
|
@contextlib.contextmanager
|
130
|
-
def execution_context(
|
131
|
-
|
153
|
+
def execution_context(
|
154
|
+
self, map_variable: TypeMapVariable = None, allow_complex: bool = True
|
155
|
+
):
|
156
|
+
params = self.resolve_unreduced_parameters(map_variable=map_variable)
|
132
157
|
logger.info(f"Parameters available for the execution: {params}")
|
133
158
|
|
134
159
|
for param_name, param in params.items():
|
@@ -150,7 +175,11 @@ class BaseTaskType(BaseModel):
|
|
150
175
|
logger.debug(f"Resolved parameters: {params}")
|
151
176
|
|
152
177
|
if not allow_complex:
|
153
|
-
params = {
|
178
|
+
params = {
|
179
|
+
key: value
|
180
|
+
for key, value in params.items()
|
181
|
+
if isinstance(value, JsonParameter)
|
182
|
+
}
|
154
183
|
|
155
184
|
parameters_in = copy.deepcopy(params)
|
156
185
|
try:
|
@@ -161,8 +190,12 @@ class BaseTaskType(BaseModel):
|
|
161
190
|
finally:
|
162
191
|
# Update parameters
|
163
192
|
# This should only update the parameters that are changed at the root level.
|
164
|
-
diff_parameters = self._diff_parameters(
|
165
|
-
|
193
|
+
diff_parameters = self._diff_parameters(
|
194
|
+
parameters_in=parameters_in, context_params=params
|
195
|
+
)
|
196
|
+
self._context.run_log_store.set_parameters(
|
197
|
+
parameters=diff_parameters, run_id=self._context.run_id
|
198
|
+
)
|
166
199
|
|
167
200
|
|
168
201
|
def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
|
@@ -188,7 +221,56 @@ def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
|
|
188
221
|
|
189
222
|
|
190
223
|
class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
191
|
-
"""
|
224
|
+
"""
|
225
|
+
--8<-- [start:python_reference]
|
226
|
+
An execution node of the pipeline of python functions.
|
227
|
+
Please refer to define pipeline/tasks/python for more information.
|
228
|
+
|
229
|
+
As part of the dag definition, a python task is defined as follows:
|
230
|
+
|
231
|
+
dag:
|
232
|
+
steps:
|
233
|
+
python_task: # The name of the node
|
234
|
+
type: task
|
235
|
+
command_type: python # this is default
|
236
|
+
command: my_module.my_function # the dotted path to the function. Please refer to the yaml section of
|
237
|
+
define pipeline/tasks/python for concrete details.
|
238
|
+
returns:
|
239
|
+
- name: # The name to assign the return value
|
240
|
+
kind: json # the default value is json,
|
241
|
+
can be object for python objects and metric for metrics
|
242
|
+
secrets:
|
243
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
244
|
+
catalog:
|
245
|
+
get:
|
246
|
+
- A list of glob patterns to get from the catalog to the local file system
|
247
|
+
put:
|
248
|
+
- A list of glob patterns to put to the catalog from the local file system
|
249
|
+
on_failure: The name of the step to traverse in case of failure
|
250
|
+
overrides:
|
251
|
+
Individual tasks can override the global configuration config by referring to the
|
252
|
+
specific override.
|
253
|
+
|
254
|
+
For example,
|
255
|
+
#Global configuration
|
256
|
+
executor:
|
257
|
+
type: local-container
|
258
|
+
config:
|
259
|
+
docker_image: "runnable/runnable:latest"
|
260
|
+
overrides:
|
261
|
+
custom_docker_image:
|
262
|
+
docker_image: "runnable/runnable:custom"
|
263
|
+
|
264
|
+
## In the node definition
|
265
|
+
overrides:
|
266
|
+
local-container:
|
267
|
+
docker_image: "runnable/runnable:custom"
|
268
|
+
|
269
|
+
This instruction will override the docker image for the local-container executor.
|
270
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
271
|
+
or "fail" to terminate the pipeline with an error.
|
272
|
+
--8<-- [end:python_reference]
|
273
|
+
"""
|
192
274
|
|
193
275
|
task_type: str = Field(default="python", serialization_alias="command_type")
|
194
276
|
command: str
|
@@ -209,7 +291,10 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
209
291
|
"""Execute the notebook as defined by the command."""
|
210
292
|
attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
|
211
293
|
|
212
|
-
with
|
294
|
+
with (
|
295
|
+
self.execution_context(map_variable=map_variable) as params,
|
296
|
+
self.expose_secrets() as _,
|
297
|
+
):
|
213
298
|
module, func = utils.get_module_and_attr_names(self.command)
|
214
299
|
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
215
300
|
imported_module = importlib.import_module(module)
|
@@ -217,21 +302,32 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
217
302
|
|
218
303
|
try:
|
219
304
|
try:
|
220
|
-
filtered_parameters = parameters.filter_arguments_for_func(
|
221
|
-
|
305
|
+
filtered_parameters = parameters.filter_arguments_for_func(
|
306
|
+
f, params.copy(), map_variable
|
307
|
+
)
|
308
|
+
logger.info(
|
309
|
+
f"Calling {func} from {module} with {filtered_parameters}"
|
310
|
+
)
|
222
311
|
|
223
312
|
out_file = io.StringIO()
|
224
313
|
with contextlib.redirect_stdout(out_file):
|
225
|
-
user_set_parameters = f(
|
314
|
+
user_set_parameters = f(
|
315
|
+
**filtered_parameters
|
316
|
+
) # This is a tuple or single value
|
226
317
|
task_console.print(out_file.getvalue())
|
227
318
|
except Exception as e:
|
228
|
-
raise exceptions.CommandCallError(
|
319
|
+
raise exceptions.CommandCallError(
|
320
|
+
f"Function call: {self.command} did not succeed.\n"
|
321
|
+
) from e
|
229
322
|
|
230
323
|
attempt_log.input_parameters = params.copy()
|
231
324
|
|
232
325
|
if map_variable:
|
233
326
|
attempt_log.input_parameters.update(
|
234
|
-
{
|
327
|
+
{
|
328
|
+
k: JsonParameter(value=v, kind="json")
|
329
|
+
for k, v in map_variable.items()
|
330
|
+
}
|
235
331
|
)
|
236
332
|
|
237
333
|
if self.returns:
|
@@ -239,7 +335,9 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
239
335
|
user_set_parameters = (user_set_parameters,)
|
240
336
|
|
241
337
|
if len(user_set_parameters) != len(self.returns):
|
242
|
-
raise ValueError(
|
338
|
+
raise ValueError(
|
339
|
+
"Returns task signature does not match the function returns"
|
340
|
+
)
|
243
341
|
|
244
342
|
output_parameters: Dict[str, Parameter] = {}
|
245
343
|
metrics: Dict[str, Parameter] = {}
|
@@ -277,7 +375,56 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
|
277
375
|
|
278
376
|
|
279
377
|
class NotebookTaskType(BaseTaskType):
|
280
|
-
"""
|
378
|
+
"""
|
379
|
+
--8<-- [start:notebook_reference]
|
380
|
+
An execution node of the pipeline of notebook execution.
|
381
|
+
Please refer to define pipeline/tasks/notebook for more information.
|
382
|
+
|
383
|
+
As part of the dag definition, a notebook task is defined as follows:
|
384
|
+
|
385
|
+
dag:
|
386
|
+
steps:
|
387
|
+
notebook_task: # The name of the node
|
388
|
+
type: task
|
389
|
+
command_type: notebook
|
390
|
+
command: the path to the notebook relative to project root.
|
391
|
+
optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
|
392
|
+
returns:
|
393
|
+
- name: # The name to assign the return value
|
394
|
+
kind: json # the default value is json,
|
395
|
+
can be object for python objects and metric for metrics
|
396
|
+
secrets:
|
397
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
398
|
+
catalog:
|
399
|
+
get:
|
400
|
+
- A list of glob patterns to get from the catalog to the local file system
|
401
|
+
put:
|
402
|
+
- A list of glob patterns to put to the catalog from the local file system
|
403
|
+
on_failure: The name of the step to traverse in case of failure
|
404
|
+
overrides:
|
405
|
+
Individual tasks can override the global configuration config by referring to the
|
406
|
+
specific override.
|
407
|
+
|
408
|
+
For example,
|
409
|
+
#Global configuration
|
410
|
+
executor:
|
411
|
+
type: local-container
|
412
|
+
config:
|
413
|
+
docker_image: "runnable/runnable:latest"
|
414
|
+
overrides:
|
415
|
+
custom_docker_image:
|
416
|
+
docker_image: "runnable/runnable:custom"
|
417
|
+
|
418
|
+
## In the node definition
|
419
|
+
overrides:
|
420
|
+
local-container:
|
421
|
+
docker_image: "runnable/runnable:custom"
|
422
|
+
|
423
|
+
This instruction will override the docker image for the local-container executor.
|
424
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
425
|
+
or "fail" to terminate the pipeline with an error.
|
426
|
+
--8<-- [end:notebook_reference]
|
427
|
+
"""
|
281
428
|
|
282
429
|
task_type: str = Field(default="notebook", serialization_alias="command_type")
|
283
430
|
command: str
|
@@ -302,7 +449,10 @@ class NotebookTaskType(BaseTaskType):
|
|
302
449
|
return str(file_name)
|
303
450
|
|
304
451
|
def get_cli_options(self) -> Tuple[str, dict]:
|
305
|
-
return "notebook", {
|
452
|
+
return "notebook", {
|
453
|
+
"command": self.command,
|
454
|
+
"notebook-output-path": self.notebook_output_path,
|
455
|
+
}
|
306
456
|
|
307
457
|
def execute_command(
|
308
458
|
self,
|
@@ -325,9 +475,12 @@ class NotebookTaskType(BaseTaskType):
|
|
325
475
|
|
326
476
|
notebook_output_path = self.notebook_output_path
|
327
477
|
|
328
|
-
with
|
329
|
-
|
330
|
-
|
478
|
+
with (
|
479
|
+
self.execution_context(
|
480
|
+
map_variable=map_variable, allow_complex=False
|
481
|
+
) as params,
|
482
|
+
self.expose_secrets() as _,
|
483
|
+
):
|
331
484
|
copy_params = copy.deepcopy(params)
|
332
485
|
|
333
486
|
if map_variable:
|
@@ -336,7 +489,9 @@ class NotebookTaskType(BaseTaskType):
|
|
336
489
|
copy_params[key] = JsonParameter(kind="json", value=value)
|
337
490
|
|
338
491
|
# Remove any {v}_unreduced parameters from the parameters
|
339
|
-
unprocessed_params = [
|
492
|
+
unprocessed_params = [
|
493
|
+
k for k, v in copy_params.items() if not v.reduced
|
494
|
+
]
|
340
495
|
|
341
496
|
for key in list(copy_params.keys()):
|
342
497
|
if any(key.endswith(f"_{k}") for k in unprocessed_params):
|
@@ -360,7 +515,9 @@ class NotebookTaskType(BaseTaskType):
|
|
360
515
|
pm.execute_notebook(**kwds)
|
361
516
|
task_console.print(out_file.getvalue())
|
362
517
|
|
363
|
-
context.run_context.catalog_handler.put(
|
518
|
+
context.run_context.catalog_handler.put(
|
519
|
+
name=notebook_output_path, run_id=context.run_context.run_id
|
520
|
+
)
|
364
521
|
|
365
522
|
client = PloomberClient.from_path(path=notebook_output_path)
|
366
523
|
namespace = client.get_namespace()
|
@@ -368,7 +525,9 @@ class NotebookTaskType(BaseTaskType):
|
|
368
525
|
output_parameters: Dict[str, Parameter] = {}
|
369
526
|
try:
|
370
527
|
for task_return in self.returns:
|
371
|
-
param_name = Template(task_return.name).safe_substitute(
|
528
|
+
param_name = Template(task_return.name).safe_substitute(
|
529
|
+
map_variable # type: ignore
|
530
|
+
)
|
372
531
|
|
373
532
|
if map_variable:
|
374
533
|
for _, v in map_variable.items():
|
@@ -410,7 +569,54 @@ class NotebookTaskType(BaseTaskType):
|
|
410
569
|
|
411
570
|
class ShellTaskType(BaseTaskType):
|
412
571
|
"""
|
413
|
-
|
572
|
+
--8<-- [start:shell_reference]
|
573
|
+
An execution node of the pipeline of shell execution.
|
574
|
+
Please refer to define pipeline/tasks/shell for more information.
|
575
|
+
|
576
|
+
As part of the dag definition, a shell task is defined as follows:
|
577
|
+
|
578
|
+
dag:
|
579
|
+
steps:
|
580
|
+
shell_task: # The name of the node
|
581
|
+
type: task
|
582
|
+
command_type: shell
|
583
|
+
command: The command to execute, it could be multiline
|
584
|
+
optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
|
585
|
+
returns:
|
586
|
+
- name: # The name to assign the return value
|
587
|
+
kind: json # the default value is json,
|
588
|
+
can be object for python objects and metric for metrics
|
589
|
+
secrets:
|
590
|
+
- my_secret_key # A list of secrets to expose by secrets manager
|
591
|
+
catalog:
|
592
|
+
get:
|
593
|
+
- A list of glob patterns to get from the catalog to the local file system
|
594
|
+
put:
|
595
|
+
- A list of glob patterns to put to the catalog from the local file system
|
596
|
+
on_failure: The name of the step to traverse in case of failure
|
597
|
+
overrides:
|
598
|
+
Individual tasks can override the global configuration config by referring to the
|
599
|
+
specific override.
|
600
|
+
|
601
|
+
For example,
|
602
|
+
#Global configuration
|
603
|
+
executor:
|
604
|
+
type: local-container
|
605
|
+
config:
|
606
|
+
docker_image: "runnable/runnable:latest"
|
607
|
+
overrides:
|
608
|
+
custom_docker_image:
|
609
|
+
docker_image: "runnable/runnable:custom"
|
610
|
+
|
611
|
+
## In the node definition
|
612
|
+
overrides:
|
613
|
+
local-container:
|
614
|
+
docker_image: "runnable/runnable:custom"
|
615
|
+
|
616
|
+
This instruction will override the docker image for the local-container executor.
|
617
|
+
next: The next node to execute after this task, use "success" to terminate the pipeline successfully
|
618
|
+
or "fail" to terminate the pipeline with an error.
|
619
|
+
--8<-- [end:shell_reference]
|
414
620
|
"""
|
415
621
|
|
416
622
|
task_type: str = Field(default="shell", serialization_alias="command_type")
|
@@ -421,7 +627,9 @@ class ShellTaskType(BaseTaskType):
|
|
421
627
|
def returns_should_be_json(cls, returns: List[TaskReturns]):
|
422
628
|
for task_return in returns:
|
423
629
|
if task_return.kind == "object" or task_return.kind == "pydantic":
|
424
|
-
raise ValueError(
|
630
|
+
raise ValueError(
|
631
|
+
"Pydantic models or Objects are not allowed in returns"
|
632
|
+
)
|
425
633
|
|
426
634
|
return returns
|
427
635
|
|
@@ -456,7 +664,9 @@ class ShellTaskType(BaseTaskType):
|
|
456
664
|
subprocess_env[key] = secret_value
|
457
665
|
|
458
666
|
try:
|
459
|
-
with self.execution_context(
|
667
|
+
with self.execution_context(
|
668
|
+
map_variable=map_variable, allow_complex=False
|
669
|
+
) as params:
|
460
670
|
subprocess_env.update({k: v.get_value() for k, v in params.items()})
|
461
671
|
|
462
672
|
# Json dumps all runnable environment variables
|
@@ -467,7 +677,9 @@ class ShellTaskType(BaseTaskType):
|
|
467
677
|
|
468
678
|
collect_delimiter = "=== COLLECT ==="
|
469
679
|
|
470
|
-
command =
|
680
|
+
command = (
|
681
|
+
self.command.strip() + f" && echo '{collect_delimiter}' && env"
|
682
|
+
)
|
471
683
|
logger.info(f"Executing shell command: {command}")
|
472
684
|
|
473
685
|
capture = False
|
runnable/utils.py
CHANGED
@@ -21,7 +21,7 @@ from runnable import defaults, names
|
|
21
21
|
from runnable.defaults import TypeMapVariable
|
22
22
|
|
23
23
|
if TYPE_CHECKING: # pragma: no cover
|
24
|
-
from
|
24
|
+
from extensions.nodes.nodes import TaskNode
|
25
25
|
from runnable.nodes import BaseNode
|
26
26
|
|
27
27
|
|
@@ -86,7 +86,9 @@ def generate_run_id(run_id: str = "") -> str:
|
|
86
86
|
return run_id
|
87
87
|
|
88
88
|
|
89
|
-
def apply_variables(
|
89
|
+
def apply_variables(
|
90
|
+
apply_to: Dict[str, Any], variables: Dict[str, str]
|
91
|
+
) -> Dict[str, Any]:
|
90
92
|
"""Safely applies the variables to a config.
|
91
93
|
|
92
94
|
For example: For config:
|
@@ -272,7 +274,9 @@ def get_local_docker_image_id(image_name: str) -> str:
|
|
272
274
|
image = client.images.get(image_name)
|
273
275
|
return image.attrs["Id"]
|
274
276
|
except ImportError: # pragma: no cover
|
275
|
-
logger.warning(
|
277
|
+
logger.warning(
|
278
|
+
"Did not find docker installed, some functionality might be affected"
|
279
|
+
)
|
276
280
|
except BaseException:
|
277
281
|
logger.exception(f"Could not find the image by name {image_name}")
|
278
282
|
|
@@ -295,7 +299,9 @@ def get_git_code_identity():
|
|
295
299
|
code_identity.code_identifier_dependable, changed = is_git_clean()
|
296
300
|
code_identity.code_identifier_url = get_git_remote()
|
297
301
|
if changed:
|
298
|
-
code_identity.code_identifier_message = "changes found in " + ", ".join(
|
302
|
+
code_identity.code_identifier_message = "changes found in " + ", ".join(
|
303
|
+
changed.split("\n")
|
304
|
+
)
|
299
305
|
except BaseException:
|
300
306
|
logger.exception("Git code versioning problems")
|
301
307
|
|
@@ -331,7 +337,9 @@ def get_tracked_data() -> Dict[str, str]:
|
|
331
337
|
try:
|
332
338
|
tracked_data[key.lower()] = json.loads(value)
|
333
339
|
except json.decoder.JSONDecodeError:
|
334
|
-
logger.warning(
|
340
|
+
logger.warning(
|
341
|
+
f"Tracker {key} could not be JSON decoded, adding the literal value"
|
342
|
+
)
|
335
343
|
tracked_data[key.lower()] = value
|
336
344
|
|
337
345
|
del os.environ[env_var]
|
@@ -389,9 +397,13 @@ def get_data_hash(file_name: str):
|
|
389
397
|
str: The SHA ID of the file contents
|
390
398
|
"""
|
391
399
|
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
392
|
-
|
400
|
+
# TODO: For a big file, we should only hash the first few bytes
|
401
|
+
return hash_bytestr_iter(
|
402
|
+
file_as_blockiter(open(file_name, "rb")), hashlib.sha256()
|
403
|
+
) # pragma: no cover
|
393
404
|
|
394
405
|
|
406
|
+
# TODO: This is not the right place for this.
|
395
407
|
def get_node_execution_command(
|
396
408
|
node: BaseNode,
|
397
409
|
map_variable: TypeMapVariable = None,
|
@@ -415,7 +427,11 @@ def get_node_execution_command(
|
|
415
427
|
|
416
428
|
log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
|
417
429
|
|
418
|
-
action =
|
430
|
+
action = (
|
431
|
+
f"runnable execute_single_node {run_id} "
|
432
|
+
f"{node._command_friendly_name()}"
|
433
|
+
f" --log-level {log_level}"
|
434
|
+
)
|
419
435
|
|
420
436
|
if context.run_context.pipeline_file:
|
421
437
|
action = action + f" --file {context.run_context.pipeline_file}"
|
@@ -435,6 +451,7 @@ def get_node_execution_command(
|
|
435
451
|
return action
|
436
452
|
|
437
453
|
|
454
|
+
# TODO: This is not the right place for this.
|
438
455
|
def get_fan_command(
|
439
456
|
mode: str,
|
440
457
|
node: BaseNode,
|
@@ -478,6 +495,7 @@ def get_fan_command(
|
|
478
495
|
return action
|
479
496
|
|
480
497
|
|
498
|
+
# TODO: This is not the right place for this.
|
481
499
|
def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> str:
|
482
500
|
"""Get the execution command to run a job via command line.
|
483
501
|
|
@@ -520,7 +538,9 @@ def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> st
|
|
520
538
|
return action
|
521
539
|
|
522
540
|
|
523
|
-
def get_provider_by_name_and_type(
|
541
|
+
def get_provider_by_name_and_type(
|
542
|
+
service_type: str, service_details: defaults.ServiceConfig
|
543
|
+
):
|
524
544
|
"""Given a service type, one of executor, run_log_store, catalog, secrets and the config
|
525
545
|
return the exact child class implementing the service.
|
526
546
|
We use stevedore to do the work for us.
|
@@ -542,7 +562,9 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
|
|
542
562
|
if "config" in service_details:
|
543
563
|
service_config = service_details.get("config", {})
|
544
564
|
|
545
|
-
logger.debug(
|
565
|
+
logger.debug(
|
566
|
+
f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}"
|
567
|
+
)
|
546
568
|
try:
|
547
569
|
mgr = driver.DriverManager(
|
548
570
|
namespace=namespace,
|
@@ -552,8 +574,12 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
|
|
552
574
|
)
|
553
575
|
return mgr.driver
|
554
576
|
except Exception as _e:
|
555
|
-
logger.exception(
|
556
|
-
|
577
|
+
logger.exception(
|
578
|
+
f"Could not find the service of type: {service_type} with config: {service_details}"
|
579
|
+
)
|
580
|
+
raise Exception(
|
581
|
+
f"Could not find the service of type: {service_type} with config: {service_details}"
|
582
|
+
) from _e
|
557
583
|
|
558
584
|
|
559
585
|
def get_run_config() -> dict:
|
@@ -585,7 +611,9 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
|
|
585
611
|
return OrderedDict()
|
586
612
|
|
587
613
|
|
588
|
-
def set_runnable_environment_variables(
|
614
|
+
def set_runnable_environment_variables(
|
615
|
+
run_id: str = "", configuration_file: str = "", tag: str = ""
|
616
|
+
) -> None:
|
589
617
|
"""Set the environment variables used by runnable. This function should be called during the prepare configurations
|
590
618
|
by all executors.
|
591
619
|
|
@@ -604,7 +632,7 @@ def set_runnable_environment_variables(run_id: str = "", configuration_file: str
|
|
604
632
|
os.environ[defaults.RUNNABLE_RUN_TAG] = tag
|
605
633
|
|
606
634
|
|
607
|
-
def gather_variables() ->
|
635
|
+
def gather_variables() -> Dict[str, str]:
|
608
636
|
"""Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
|
609
637
|
|
610
638
|
Returns:
|
@@ -1,36 +1,31 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: runnable
|
3
|
-
Version: 0.
|
4
|
-
Summary:
|
5
|
-
|
6
|
-
License:
|
7
|
-
|
8
|
-
|
9
|
-
Requires-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
Version: 0.14.0
|
4
|
+
Summary: Add your description here
|
5
|
+
Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
|
6
|
+
License-File: LICENSE
|
7
|
+
Requires-Python: >=3.9
|
8
|
+
Requires-Dist: catalog
|
9
|
+
Requires-Dist: click-plugins>=1.1.1
|
10
|
+
Requires-Dist: click<=8.1.3
|
11
|
+
Requires-Dist: dill>=0.3.9
|
12
|
+
Requires-Dist: executor
|
13
|
+
Requires-Dist: nodes
|
14
|
+
Requires-Dist: pydantic>=2.10.3
|
15
|
+
Requires-Dist: python-dotenv>=1.0.1
|
16
|
+
Requires-Dist: rich>=13.9.4
|
17
|
+
Requires-Dist: ruamel-yaml>=0.18.6
|
18
|
+
Requires-Dist: run-log-store
|
19
|
+
Requires-Dist: secrets
|
20
|
+
Requires-Dist: setuptools>=75.6.0
|
21
|
+
Requires-Dist: stevedore>=5.4.0
|
22
|
+
Requires-Dist: typer>=0.15.1
|
17
23
|
Provides-Extra: docker
|
24
|
+
Requires-Dist: docker>=7.1.0; extra == 'docker'
|
25
|
+
Provides-Extra: examples
|
26
|
+
Requires-Dist: pandas>=2.2.3; extra == 'examples'
|
18
27
|
Provides-Extra: notebook
|
19
|
-
Requires-Dist:
|
20
|
-
Requires-Dist: click-plugins (>=1.1.1,<2.0.0)
|
21
|
-
Requires-Dist: dill (>=0.3.8,<0.4.0)
|
22
|
-
Requires-Dist: docker ; extra == "docker"
|
23
|
-
Requires-Dist: mlflow-skinny
|
24
|
-
Requires-Dist: ploomber-engine (>=0.0.31,<0.0.32) ; extra == "notebook"
|
25
|
-
Requires-Dist: pydantic (>=2.5,<3.0)
|
26
|
-
Requires-Dist: rich (>=13.5.2,<14.0.0)
|
27
|
-
Requires-Dist: ruamel.yaml
|
28
|
-
Requires-Dist: ruamel.yaml.clib
|
29
|
-
Requires-Dist: sqlalchemy ; extra == "database"
|
30
|
-
Requires-Dist: stevedore (>=3.5.0,<4.0.0)
|
31
|
-
Requires-Dist: typing-extensions ; python_version < "3.8"
|
32
|
-
Project-URL: Documentation, https://github.com/vijayvammi/runnable
|
33
|
-
Project-URL: Repository, https://github.com/vijayvammi/runnable
|
28
|
+
Requires-Dist: ploomber-engine>=0.0.33; extra == 'notebook'
|
34
29
|
Description-Content-Type: text/markdown
|
35
30
|
|
36
31
|
|
@@ -267,4 +262,3 @@ Execute a pipeline over an iterable parameter.
|
|
267
262
|
|
268
263
|
### [Arbitrary nesting](https://astrazeneca.github.io/runnable-core/concepts/nesting/)
|
269
264
|
Any nesting of parallel within map and so on.
|
270
|
-
|