runnable 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +12 -1
- runnable/catalog.py +2 -2
- runnable/cli.py +5 -5
- runnable/datastore.py +3 -2
- runnable/defaults.py +21 -18
- runnable/entrypoints.py +41 -77
- runnable/executor.py +6 -16
- runnable/extensions/catalog/file_system/implementation.py +2 -1
- runnable/extensions/executor/__init__.py +20 -9
- runnable/extensions/executor/argo/implementation.py +6 -5
- runnable/extensions/executor/argo/specification.yaml +1 -1
- runnable/extensions/executor/k8s_job/implementation_FF.py +4 -4
- runnable/extensions/executor/local/implementation.py +1 -0
- runnable/extensions/executor/local_container/implementation.py +4 -10
- runnable/extensions/executor/mocked/implementation.py +2 -33
- runnable/extensions/nodes.py +40 -60
- runnable/integration.py +2 -2
- runnable/interaction.py +9 -4
- runnable/nodes.py +19 -7
- runnable/parameters.py +1 -1
- runnable/sdk.py +181 -59
- runnable/tasks.py +124 -121
- runnable/utils.py +11 -11
- {runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/METADATA +53 -53
- {runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/RECORD +28 -28
- {runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/WHEEL +1 -1
- {runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/LICENSE +0 -0
- {runnable-0.2.0.dist-info → runnable-0.4.0.dist-info}/entry_points.txt +0 -0
runnable/sdk.py
CHANGED
@@ -3,11 +3,10 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
from abc import ABC, abstractmethod
|
6
|
-
from typing import Any, Dict, List, Optional, Union
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
7
7
|
|
8
|
-
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field,
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
|
9
9
|
from rich import print
|
10
|
-
from ruamel.yaml import YAML
|
11
10
|
from typing_extensions import Self
|
12
11
|
|
13
12
|
from runnable import defaults, entrypoints, graph, utils
|
@@ -16,11 +15,8 @@ from runnable.nodes import TraversalNode
|
|
16
15
|
|
17
16
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
18
17
|
|
19
|
-
StepType = Union["Stub", "
|
20
|
-
TraversalTypes = Union["Stub", "
|
21
|
-
|
22
|
-
|
23
|
-
ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
|
18
|
+
StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
|
19
|
+
TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
|
24
20
|
|
25
21
|
|
26
22
|
class Catalog(BaseModel):
|
@@ -33,7 +29,7 @@ class Catalog(BaseModel):
|
|
33
29
|
put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
|
34
30
|
|
35
31
|
Examples:
|
36
|
-
>>> from
|
32
|
+
>>> from runnable import Catalog, Task
|
37
33
|
>>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
|
38
34
|
|
39
35
|
>>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
|
@@ -107,7 +103,7 @@ class BaseTraversal(ABC, BaseModel):
|
|
107
103
|
...
|
108
104
|
|
109
105
|
|
110
|
-
class
|
106
|
+
class BaseTask(BaseTraversal):
|
111
107
|
"""
|
112
108
|
An execution node of the pipeline.
|
113
109
|
Please refer to [concepts](concepts/task.md) for more information.
|
@@ -133,10 +129,10 @@ class Task(BaseTraversal):
|
|
133
129
|
executor:
|
134
130
|
type: local-container
|
135
131
|
config:
|
136
|
-
docker_image: "
|
132
|
+
docker_image: "runnable/runnable:latest"
|
137
133
|
overrides:
|
138
134
|
custom_docker_image:
|
139
|
-
docker_image: "
|
135
|
+
docker_image: "runnable/runnable:custom"
|
140
136
|
```
|
141
137
|
### Task specific configuration
|
142
138
|
```python
|
@@ -148,48 +144,173 @@ class Task(BaseTraversal):
|
|
148
144
|
optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
|
149
145
|
Only used when command_type is 'notebook', defaults to {}
|
150
146
|
output_cell_tag (Optional[str]): The tag of the output cell.
|
151
|
-
Only used when command_type is 'notebook', defaults to "
|
147
|
+
Only used when command_type is 'notebook', defaults to "runnable_output"
|
152
148
|
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
153
149
|
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
154
150
|
on_failure (str): The name of the node to execute if the step fails.
|
155
151
|
|
156
152
|
"""
|
157
153
|
|
158
|
-
command: str = Field(alias="command")
|
159
|
-
command_type: str = Field(default="python")
|
160
154
|
catalog: Optional[Catalog] = Field(default=None, alias="catalog")
|
161
155
|
overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
|
162
156
|
|
157
|
+
def create_node(self) -> TaskNode:
|
158
|
+
if not self.next_node:
|
159
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
160
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
161
|
+
|
162
|
+
print(self.model_dump(exclude_none=True))
|
163
|
+
return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
|
164
|
+
|
165
|
+
|
166
|
+
class PythonTask(BaseTask):
|
167
|
+
"""
|
168
|
+
An execution node of the pipeline of python functions.
|
169
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
170
|
+
|
171
|
+
Attributes:
|
172
|
+
name (str): The name of the node.
|
173
|
+
function (callable): The function to execute.
|
174
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
175
|
+
Please see Catalog about the structure of the catalog.
|
176
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
177
|
+
Individual tasks can override the global configuration config by referring to the
|
178
|
+
specific override.
|
179
|
+
|
180
|
+
For example,
|
181
|
+
### Global configuration
|
182
|
+
```yaml
|
183
|
+
executor:
|
184
|
+
type: local-container
|
185
|
+
config:
|
186
|
+
docker_image: "runnable/runnable:latest"
|
187
|
+
overrides:
|
188
|
+
custom_docker_image:
|
189
|
+
docker_image: "runnable/runnable:custom"
|
190
|
+
```
|
191
|
+
### Task specific configuration
|
192
|
+
```python
|
193
|
+
task = PythonTask(name="task", function="function'",
|
194
|
+
overrides={'local-container': custom_docker_image})
|
195
|
+
```
|
196
|
+
|
197
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
198
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
199
|
+
on_failure (str): The name of the node to execute if the step fails.
|
200
|
+
|
201
|
+
"""
|
202
|
+
|
203
|
+
function: Callable = Field(exclude=True)
|
204
|
+
|
205
|
+
@computed_field
|
206
|
+
def command_type(self) -> str:
|
207
|
+
return "python"
|
208
|
+
|
209
|
+
@computed_field
|
210
|
+
def command(self) -> str:
|
211
|
+
module = self.function.__module__
|
212
|
+
name = self.function.__name__
|
213
|
+
|
214
|
+
return f"{module}.{name}"
|
215
|
+
|
216
|
+
|
217
|
+
class NotebookTask(BaseTask):
|
218
|
+
"""
|
219
|
+
An execution node of the pipeline of type notebook.
|
220
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
221
|
+
|
222
|
+
Attributes:
|
223
|
+
name (str): The name of the node.
|
224
|
+
notebook: The path to the notebook
|
225
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
226
|
+
Please see Catalog about the structure of the catalog.
|
227
|
+
returns: A list of the names of variables to return from the notebook.
|
228
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
229
|
+
Individual tasks can override the global configuration config by referring to the
|
230
|
+
specific override.
|
231
|
+
|
232
|
+
For example,
|
233
|
+
### Global configuration
|
234
|
+
```yaml
|
235
|
+
executor:
|
236
|
+
type: local-container
|
237
|
+
config:
|
238
|
+
docker_image: "runnable/runnable:latest"
|
239
|
+
overrides:
|
240
|
+
custom_docker_image:
|
241
|
+
docker_image: "runnable/runnable:custom"
|
242
|
+
```
|
243
|
+
### Task specific configuration
|
244
|
+
```python
|
245
|
+
task = NotebookTask(name="task", notebook="evaluation.ipynb",
|
246
|
+
overrides={'local-container': custom_docker_image})
|
247
|
+
```
|
248
|
+
notebook_output_path (Optional[str]): The path to save the notebook output.
|
249
|
+
Only used when command_type is 'notebook', defaults to command+_out.ipynb
|
250
|
+
optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
|
251
|
+
Only used when command_type is 'notebook', defaults to {}
|
252
|
+
|
253
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
254
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
255
|
+
on_failure (str): The name of the node to execute if the step fails.
|
256
|
+
|
257
|
+
"""
|
258
|
+
|
259
|
+
notebook: str = Field(alias="command")
|
260
|
+
|
163
261
|
notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
|
164
262
|
optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
|
165
|
-
|
263
|
+
returns: List[str] = Field(default_factory=list, alias="returns")
|
166
264
|
|
167
|
-
@
|
168
|
-
|
169
|
-
|
170
|
-
if value not in ALLOWED_COMMAND_TYPES:
|
171
|
-
raise ValueError(f"Invalid command_type: {value}")
|
172
|
-
return value
|
265
|
+
@computed_field
|
266
|
+
def command_type(self) -> str:
|
267
|
+
return "notebook"
|
173
268
|
|
174
|
-
@model_validator(mode="after")
|
175
|
-
def check_notebook_args(self) -> "Task":
|
176
|
-
if self.command_type != "notebook":
|
177
|
-
assert (
|
178
|
-
self.notebook_output_path is None
|
179
|
-
), "Only command_types of 'notebook' can be used with notebook_output_path"
|
180
269
|
|
181
|
-
|
182
|
-
|
183
|
-
|
270
|
+
class ShellTask(BaseTask):
|
271
|
+
"""
|
272
|
+
An execution node of the pipeline of type shell.
|
273
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
184
274
|
|
185
|
-
|
186
|
-
|
275
|
+
Attributes:
|
276
|
+
name (str): The name of the node.
|
277
|
+
command: The shell command to execute.
|
278
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
279
|
+
Please see Catalog about the structure of the catalog.
|
280
|
+
returns: A list of the names of variables to capture from environment variables of shell.
|
281
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
282
|
+
Individual tasks can override the global configuration config by referring to the
|
283
|
+
specific override.
|
187
284
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
285
|
+
For example,
|
286
|
+
### Global configuration
|
287
|
+
```yaml
|
288
|
+
executor:
|
289
|
+
type: local-container
|
290
|
+
config:
|
291
|
+
docker_image: "runnable/runnable:latest"
|
292
|
+
overrides:
|
293
|
+
custom_docker_image:
|
294
|
+
docker_image: "runnable/runnable:custom"
|
295
|
+
```
|
296
|
+
### Task specific configuration
|
297
|
+
```python
|
298
|
+
task = ShellTask(name="task", command="exit 0",
|
299
|
+
overrides={'local-container': custom_docker_image})
|
300
|
+
```
|
301
|
+
|
302
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
303
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
304
|
+
on_failure (str): The name of the node to execute if the step fails.
|
305
|
+
|
306
|
+
"""
|
307
|
+
|
308
|
+
command: str = Field(alias="command")
|
309
|
+
returns: List[str] = Field(default_factory=list, alias="returns")
|
310
|
+
|
311
|
+
@computed_field
|
312
|
+
def command_type(self) -> str:
|
313
|
+
return "shell"
|
193
314
|
|
194
315
|
|
195
316
|
class Stub(BaseTraversal):
|
@@ -341,7 +462,8 @@ class Pipeline(BaseModel):
|
|
341
462
|
A Pipeline is a directed acyclic graph of Steps that define a workflow.
|
342
463
|
|
343
464
|
Attributes:
|
344
|
-
steps (List[Stub |
|
465
|
+
steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
|
466
|
+
A list of Steps that make up the Pipeline.
|
345
467
|
start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
|
346
468
|
name (str, optional): The name of the Pipeline. Defaults to "".
|
347
469
|
description (str, optional): A description of the Pipeline. Defaults to "".
|
@@ -385,6 +507,9 @@ class Pipeline(BaseModel):
|
|
385
507
|
|
386
508
|
self._dag.check_graph()
|
387
509
|
|
510
|
+
def return_dag(self) -> graph.Graph:
|
511
|
+
return self._dag
|
512
|
+
|
388
513
|
def execute(
|
389
514
|
self,
|
390
515
|
configuration_file: str = "",
|
@@ -393,7 +518,6 @@ class Pipeline(BaseModel):
|
|
393
518
|
parameters_file: str = "",
|
394
519
|
use_cached: str = "",
|
395
520
|
log_level: str = defaults.LOG_LEVEL,
|
396
|
-
output_pipeline_definition: str = "magnus-pipeline.yaml",
|
397
521
|
):
|
398
522
|
"""
|
399
523
|
*Execute* the Pipeline.
|
@@ -408,7 +532,7 @@ class Pipeline(BaseModel):
|
|
408
532
|
|
409
533
|
Args:
|
410
534
|
configuration_file (str, optional): The path to the configuration file. Defaults to "".
|
411
|
-
The configuration file can be overridden by the environment variable
|
535
|
+
The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
|
412
536
|
|
413
537
|
run_id (str, optional): The ID of the run. Defaults to "".
|
414
538
|
tag (str, optional): The tag of the run. Defaults to "".
|
@@ -419,18 +543,18 @@ class Pipeline(BaseModel):
|
|
419
543
|
Provide the run_id of the older execution to recover.
|
420
544
|
|
421
545
|
log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
|
422
|
-
output_pipeline_definition (str, optional): The path to the output pipeline definition file.
|
423
|
-
Defaults to "magnus-pipeline.yaml".
|
424
|
-
|
425
|
-
Only applicable for the execution via SDK for non ```local``` executors.
|
426
546
|
"""
|
427
|
-
|
428
|
-
|
547
|
+
|
548
|
+
# py_to_yaml is used by non local executors to generate the yaml representation of the pipeline.
|
549
|
+
py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
|
550
|
+
|
551
|
+
if py_to_yaml == "true":
|
552
|
+
return
|
429
553
|
|
430
554
|
logger.setLevel(log_level)
|
431
555
|
|
432
556
|
run_id = utils.generate_run_id(run_id=run_id)
|
433
|
-
configuration_file = os.environ.get("
|
557
|
+
configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
|
434
558
|
run_context = entrypoints.prepare_configurations(
|
435
559
|
configuration_file=configuration_file,
|
436
560
|
run_id=run_id,
|
@@ -440,7 +564,7 @@ class Pipeline(BaseModel):
|
|
440
564
|
)
|
441
565
|
|
442
566
|
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
443
|
-
utils.
|
567
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
444
568
|
|
445
569
|
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
446
570
|
|
@@ -449,17 +573,14 @@ class Pipeline(BaseModel):
|
|
449
573
|
print("Working with context:")
|
450
574
|
print(run_context)
|
451
575
|
|
452
|
-
if not
|
453
|
-
|
454
|
-
|
576
|
+
if not run_context.executor._local:
|
577
|
+
# We are working with non local executor
|
578
|
+
import inspect
|
455
579
|
|
456
|
-
|
457
|
-
|
458
|
-
{"dag": run_context.dag.model_dump(by_alias=True, exclude_none=True)},
|
459
|
-
f,
|
460
|
-
)
|
580
|
+
caller_stack = inspect.stack()[1]
|
581
|
+
module_to_call = f"{caller_stack.filename.replace('/', '.').replace('.py', '')}.{caller_stack.function}"
|
461
582
|
|
462
|
-
|
583
|
+
run_context.pipeline_file = f"{module_to_call}.py"
|
463
584
|
|
464
585
|
# Prepare for graph execution
|
465
586
|
run_context.executor.prepare_for_graph_execution()
|
@@ -467,4 +588,5 @@ class Pipeline(BaseModel):
|
|
467
588
|
logger.info("Executing the graph")
|
468
589
|
run_context.executor.execute_graph(dag=run_context.dag)
|
469
590
|
|
470
|
-
|
591
|
+
if run_context.executor._local:
|
592
|
+
return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)
|