runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +34 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +272 -0
- runnable/context.py +34 -0
- runnable/datastore.py +687 -0
- runnable/defaults.py +182 -0
- runnable/entrypoints.py +448 -0
- runnable/exceptions.py +94 -0
- runnable/executor.py +421 -0
- runnable/experiment_tracker.py +139 -0
- runnable/extensions/catalog/__init__.py +21 -0
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +227 -0
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
- runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
- runnable/extensions/executor/__init__.py +725 -0
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +1183 -0
- runnable/extensions/executor/argo/specification.yaml +51 -0
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
- runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +70 -0
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +361 -0
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +189 -0
- runnable/extensions/experiment_tracker/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
- runnable/extensions/nodes.py +655 -0
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
- runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +136 -0
- runnable/extensions/run_log_store/generic_chunked.py +541 -0
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +100 -0
- runnable/extensions/secrets/env_secrets/__init__.py +0 -0
- runnable/extensions/secrets/env_secrets/implementation.py +42 -0
- runnable/graph.py +464 -0
- runnable/integration.py +205 -0
- runnable/interaction.py +404 -0
- runnable/names.py +546 -0
- runnable/nodes.py +501 -0
- runnable/parameters.py +183 -0
- runnable/pickler.py +102 -0
- runnable/sdk.py +472 -0
- runnable/secrets.py +95 -0
- runnable/tasks.py +395 -0
- runnable/utils.py +630 -0
- runnable-0.3.0.dist-info/METADATA +437 -0
- runnable-0.3.0.dist-info/RECORD +69 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
- runnable-0.3.0.dist-info/entry_points.txt +44 -0
- runnable-0.1.0.dist-info/METADATA +0 -16
- runnable-0.1.0.dist-info/RECORD +0 -6
- /runnable/{.gitkeep → extensions/__init__.py} +0 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
runnable/pickler.py
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
import pickle
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict
|
6
|
+
|
7
|
+
import runnable.context as context
|
8
|
+
|
9
|
+
|
10
|
+
class BasePickler(ABC, BaseModel):
|
11
|
+
"""
|
12
|
+
The base class for all picklers.
|
13
|
+
|
14
|
+
We are still in the process of hardening the design of this class.
|
15
|
+
For now, we are just going to use pickle.
|
16
|
+
"""
|
17
|
+
|
18
|
+
extension: str = ""
|
19
|
+
service_name: str = ""
|
20
|
+
service_type: str = "pickler"
|
21
|
+
model_config = ConfigDict(extra="forbid")
|
22
|
+
|
23
|
+
@property
|
24
|
+
def _context(self):
|
25
|
+
return context.run_context
|
26
|
+
|
27
|
+
@abstractmethod
|
28
|
+
def dump(self, data: Any, path: str):
|
29
|
+
"""
|
30
|
+
Dump an object to the specified path.
|
31
|
+
The path is the full path.
|
32
|
+
|
33
|
+
To correctly identify the pickler from possible implementations, we use the extension.
|
34
|
+
An extension is added automatically, if not provided.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
data (Any): The object to pickle
|
38
|
+
path (str): The path to save the pickle file
|
39
|
+
|
40
|
+
Raises:
|
41
|
+
NotImplementedError: Base class has no implementation
|
42
|
+
"""
|
43
|
+
raise NotImplementedError
|
44
|
+
|
45
|
+
@abstractmethod
|
46
|
+
def load(self, path: str) -> Any:
|
47
|
+
"""
|
48
|
+
Load the object from the specified path.
|
49
|
+
|
50
|
+
To correctly identify the pickler from possible implementations, we use the extension.
|
51
|
+
An extension is added automatically, if not provided.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
path (str): The path to load the pickled file from.
|
55
|
+
|
56
|
+
Raises:
|
57
|
+
NotImplementedError: Base class has no implementation.
|
58
|
+
"""
|
59
|
+
raise NotImplementedError
|
60
|
+
|
61
|
+
|
62
|
+
class NativePickler(BasePickler):
|
63
|
+
"""
|
64
|
+
Uses native python pickle to load and dump files
|
65
|
+
"""
|
66
|
+
|
67
|
+
extension: str = ".pickle"
|
68
|
+
service_name: str = "pickle"
|
69
|
+
|
70
|
+
def dump(self, data: Any, path: str):
|
71
|
+
"""
|
72
|
+
Dump an object to the specified path.
|
73
|
+
The path is the full path.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
data (Any): The data to pickle
|
77
|
+
path (str): The path to save the pickle file
|
78
|
+
"""
|
79
|
+
if not path.endswith(self.extension):
|
80
|
+
path = path + self.extension
|
81
|
+
|
82
|
+
with open(path, "wb") as f:
|
83
|
+
pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
|
84
|
+
|
85
|
+
def load(self, path: str) -> Any:
|
86
|
+
"""
|
87
|
+
Load the object from the specified path.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
path (str): The path to load the object from.
|
91
|
+
|
92
|
+
Returns:
|
93
|
+
Any: The data loaded from the file.
|
94
|
+
"""
|
95
|
+
if not path.endswith(self.extension):
|
96
|
+
path = path + self.extension
|
97
|
+
|
98
|
+
data = None
|
99
|
+
with open(path, "rb") as f:
|
100
|
+
data = pickle.load(f)
|
101
|
+
|
102
|
+
return data
|
runnable/sdk.py
ADDED
@@ -0,0 +1,472 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from typing import Any, Dict, List, Optional, Union
|
7
|
+
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator
|
9
|
+
from rich import print
|
10
|
+
from typing_extensions import Self
|
11
|
+
|
12
|
+
from runnable import defaults, entrypoints, graph, utils
|
13
|
+
from runnable.extensions.nodes import FailNode, MapNode, ParallelNode, StubNode, SuccessNode, TaskNode
|
14
|
+
from runnable.nodes import TraversalNode
|
15
|
+
|
16
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
17
|
+
|
18
|
+
StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"]
|
19
|
+
TraversalTypes = Union["Stub", "Task", "Parallel", "Map"]
|
20
|
+
|
21
|
+
|
22
|
+
ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
|
23
|
+
|
24
|
+
|
25
|
+
class Catalog(BaseModel):
|
26
|
+
"""
|
27
|
+
Use to instruct a task to sync data from/to the central catalog.
|
28
|
+
Please refer to [concepts](concepts/catalog.md) for more information.
|
29
|
+
|
30
|
+
Attributes:
|
31
|
+
get (List[str]): List of glob patterns to get from central catalog to the compute data folder.
|
32
|
+
put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
|
33
|
+
|
34
|
+
Examples:
|
35
|
+
>>> from runnable import Catalog, Task
|
36
|
+
>>> catalog = Catalog(compute_data_folder="/path/to/data", get=["*.csv"], put=["*.csv"])
|
37
|
+
|
38
|
+
>>> task = Task(name="task", catalog=catalog, command="echo 'hello'")
|
39
|
+
|
40
|
+
"""
|
41
|
+
|
42
|
+
model_config = ConfigDict(extra="forbid") # Need to be for command, would be validated later
|
43
|
+
# Note: compute_data_folder was confusing to explain, might be introduced later.
|
44
|
+
# compute_data_folder: str = Field(default="", alias="compute_data_folder")
|
45
|
+
get: List[str] = Field(default_factory=list, alias="get")
|
46
|
+
put: List[str] = Field(default_factory=list, alias="put")
|
47
|
+
|
48
|
+
|
49
|
+
class BaseTraversal(ABC, BaseModel):
|
50
|
+
name: str
|
51
|
+
next_node: str = Field(default="", alias="next")
|
52
|
+
terminate_with_success: bool = Field(default=False, exclude=True)
|
53
|
+
terminate_with_failure: bool = Field(default=False, exclude=True)
|
54
|
+
on_failure: str = Field(default="", alias="on_failure")
|
55
|
+
|
56
|
+
model_config = ConfigDict(extra="forbid")
|
57
|
+
|
58
|
+
@computed_field # type: ignore
|
59
|
+
@property
|
60
|
+
def internal_name(self) -> str:
|
61
|
+
return self.name
|
62
|
+
|
63
|
+
def __rshift__(self, other: StepType) -> StepType:
|
64
|
+
if self.next_node:
|
65
|
+
raise Exception(f"The node {self} already has a next node: {self.next_node}")
|
66
|
+
self.next_node = other.name
|
67
|
+
|
68
|
+
return other
|
69
|
+
|
70
|
+
def __lshift__(self, other: TraversalNode) -> TraversalNode:
|
71
|
+
if other.next_node:
|
72
|
+
raise Exception(f"The {other} node already has a next node: {other.next_node}")
|
73
|
+
other.next_node = self.name
|
74
|
+
|
75
|
+
return other
|
76
|
+
|
77
|
+
def depends_on(self, node: StepType) -> Self:
|
78
|
+
assert not isinstance(node, Success)
|
79
|
+
assert not isinstance(node, Fail)
|
80
|
+
|
81
|
+
if node.next_node:
|
82
|
+
raise Exception(f"The {node} node already has a next node: {node.next_node}")
|
83
|
+
|
84
|
+
node.next_node = self.name
|
85
|
+
return self
|
86
|
+
|
87
|
+
@model_validator(mode="after")
|
88
|
+
def validate_terminations(self) -> Self:
|
89
|
+
if self.terminate_with_failure and self.terminate_with_success:
|
90
|
+
raise AssertionError("A node cannot terminate with success and failure")
|
91
|
+
|
92
|
+
if self.terminate_with_failure or self.terminate_with_success:
|
93
|
+
if self.next_node and self.next_node not in ["success", "fail"]:
|
94
|
+
raise AssertionError("A node being terminated cannot have a user defined next node")
|
95
|
+
|
96
|
+
if self.terminate_with_failure:
|
97
|
+
self.next_node = "fail"
|
98
|
+
|
99
|
+
if self.terminate_with_success:
|
100
|
+
self.next_node = "success"
|
101
|
+
|
102
|
+
return self
|
103
|
+
|
104
|
+
@abstractmethod
|
105
|
+
def create_node(self) -> TraversalNode:
|
106
|
+
...
|
107
|
+
|
108
|
+
|
109
|
+
## TODO: Add python task, shell task, and notebook task.
|
110
|
+
|
111
|
+
|
112
|
+
class Task(BaseTraversal):
|
113
|
+
"""
|
114
|
+
An execution node of the pipeline.
|
115
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
116
|
+
|
117
|
+
Attributes:
|
118
|
+
name (str): The name of the node.
|
119
|
+
command (str): The command to execute.
|
120
|
+
|
121
|
+
- For python functions, [dotted path](concepts/task.md/#python_functions) to the function.
|
122
|
+
- For shell commands: command to execute in the shell.
|
123
|
+
- For notebooks: path to the notebook.
|
124
|
+
command_type (str): The type of command to execute.
|
125
|
+
Can be one of "shell", "python", or "notebook".
|
126
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
127
|
+
Please see Catalog about the structure of the catalog.
|
128
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
129
|
+
Individual tasks can override the global configuration config by referring to the
|
130
|
+
specific override.
|
131
|
+
|
132
|
+
For example,
|
133
|
+
### Global configuration
|
134
|
+
```yaml
|
135
|
+
executor:
|
136
|
+
type: local-container
|
137
|
+
config:
|
138
|
+
docker_image: "runnable/runnable:latest"
|
139
|
+
overrides:
|
140
|
+
custom_docker_image:
|
141
|
+
docker_image: "runnable/runnable:custom"
|
142
|
+
```
|
143
|
+
### Task specific configuration
|
144
|
+
```python
|
145
|
+
task = Task(name="task", command="echo 'hello'", command_type="shell",
|
146
|
+
overrides={'local-container': custom_docker_image})
|
147
|
+
```
|
148
|
+
notebook_output_path (Optional[str]): The path to save the notebook output.
|
149
|
+
Only used when command_type is 'notebook', defaults to command+_out.ipynb
|
150
|
+
optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
|
151
|
+
Only used when command_type is 'notebook', defaults to {}
|
152
|
+
output_cell_tag (Optional[str]): The tag of the output cell.
|
153
|
+
Only used when command_type is 'notebook', defaults to "runnable_output"
|
154
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
155
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
156
|
+
on_failure (str): The name of the node to execute if the step fails.
|
157
|
+
|
158
|
+
"""
|
159
|
+
|
160
|
+
command: str = Field(alias="command")
|
161
|
+
command_type: str = Field(default="python")
|
162
|
+
catalog: Optional[Catalog] = Field(default=None, alias="catalog")
|
163
|
+
overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
|
164
|
+
|
165
|
+
notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
|
166
|
+
optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
|
167
|
+
output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag")
|
168
|
+
|
169
|
+
@field_validator("command_type", mode="before")
|
170
|
+
@classmethod
|
171
|
+
def validate_command_type(cls, value: str) -> str:
|
172
|
+
if value not in ALLOWED_COMMAND_TYPES:
|
173
|
+
raise ValueError(f"Invalid command_type: {value}")
|
174
|
+
return value
|
175
|
+
|
176
|
+
@model_validator(mode="after")
|
177
|
+
def check_notebook_args(self) -> "Task":
|
178
|
+
if self.command_type != "notebook":
|
179
|
+
assert (
|
180
|
+
self.notebook_output_path is None
|
181
|
+
), "Only command_types of 'notebook' can be used with notebook_output_path"
|
182
|
+
|
183
|
+
assert (
|
184
|
+
self.optional_ploomber_args is None
|
185
|
+
), "Only command_types of 'notebook' can be used with optional_ploomber_args"
|
186
|
+
|
187
|
+
assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag"
|
188
|
+
return self
|
189
|
+
|
190
|
+
def create_node(self) -> TaskNode:
|
191
|
+
if not self.next_node:
|
192
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
193
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
194
|
+
return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
|
195
|
+
|
196
|
+
|
197
|
+
class Stub(BaseTraversal):
|
198
|
+
"""
|
199
|
+
A node that does nothing.
|
200
|
+
|
201
|
+
A stub node can tak arbitrary number of arguments.
|
202
|
+
Please refer to [concepts](concepts/stub.md) for more information.
|
203
|
+
|
204
|
+
Attributes:
|
205
|
+
name (str): The name of the node.
|
206
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
207
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
208
|
+
|
209
|
+
"""
|
210
|
+
|
211
|
+
model_config = ConfigDict(extra="allow")
|
212
|
+
catalog: Optional[Catalog] = Field(default=None, alias="catalog")
|
213
|
+
|
214
|
+
def create_node(self) -> StubNode:
|
215
|
+
if not self.next_node:
|
216
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
217
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
218
|
+
|
219
|
+
return StubNode.parse_from_config(self.model_dump(exclude_none=True))
|
220
|
+
|
221
|
+
|
222
|
+
class Parallel(BaseTraversal):
|
223
|
+
"""
|
224
|
+
A node that executes multiple branches in parallel.
|
225
|
+
Please refer to [concepts](concepts/parallel.md) for more information.
|
226
|
+
|
227
|
+
Attributes:
|
228
|
+
name (str): The name of the node.
|
229
|
+
branches (Dict[str, Pipeline]): A dictionary of branches to execute in parallel.
|
230
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
231
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
232
|
+
on_failure (str): The name of the node to execute if any of the branches fail.
|
233
|
+
"""
|
234
|
+
|
235
|
+
branches: Dict[str, "Pipeline"]
|
236
|
+
|
237
|
+
@computed_field # type: ignore
|
238
|
+
@property
|
239
|
+
def graph_branches(self) -> Dict[str, graph.Graph]:
|
240
|
+
return {name: pipeline._dag.model_copy() for name, pipeline in self.branches.items()}
|
241
|
+
|
242
|
+
def create_node(self) -> ParallelNode:
|
243
|
+
if not self.next_node:
|
244
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
245
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
246
|
+
|
247
|
+
node = ParallelNode(name=self.name, branches=self.graph_branches, internal_name="", next_node=self.next_node)
|
248
|
+
return node
|
249
|
+
|
250
|
+
|
251
|
+
class Map(BaseTraversal):
|
252
|
+
"""
|
253
|
+
A node that iterates over a list of items and executes a pipeline for each item.
|
254
|
+
Please refer to [concepts](concepts/map.md) for more information.
|
255
|
+
|
256
|
+
Attributes:
|
257
|
+
branch: The pipeline to execute for each item.
|
258
|
+
|
259
|
+
iterate_on: The name of the parameter to iterate over.
|
260
|
+
The parameter should be defined either by previous steps or statically at the start of execution.
|
261
|
+
|
262
|
+
iterate_as: The name of the iterable to be passed to functions.
|
263
|
+
|
264
|
+
|
265
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
266
|
+
|
267
|
+
"""
|
268
|
+
|
269
|
+
branch: "Pipeline"
|
270
|
+
iterate_on: str
|
271
|
+
iterate_as: str
|
272
|
+
overrides: Dict[str, Any] = Field(default_factory=dict)
|
273
|
+
|
274
|
+
@computed_field # type: ignore
|
275
|
+
@property
|
276
|
+
def graph_branch(self) -> graph.Graph:
|
277
|
+
return self.branch._dag.model_copy()
|
278
|
+
|
279
|
+
def create_node(self) -> MapNode:
|
280
|
+
if not self.next_node:
|
281
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
282
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
283
|
+
|
284
|
+
node = MapNode(
|
285
|
+
name=self.name,
|
286
|
+
branch=self.graph_branch,
|
287
|
+
internal_name="",
|
288
|
+
next_node=self.next_node,
|
289
|
+
iterate_on=self.iterate_on,
|
290
|
+
iterate_as=self.iterate_as,
|
291
|
+
overrides=self.overrides,
|
292
|
+
)
|
293
|
+
|
294
|
+
return node
|
295
|
+
|
296
|
+
|
297
|
+
class Success(BaseModel):
|
298
|
+
"""
|
299
|
+
A node that represents a successful execution of the pipeline.
|
300
|
+
|
301
|
+
Most often, there is no need to use this node as nodes can be instructed to
|
302
|
+
terminate_with_success and pipeline with add_terminal_nodes=True.
|
303
|
+
|
304
|
+
Attributes:
|
305
|
+
name (str): The name of the node.
|
306
|
+
"""
|
307
|
+
|
308
|
+
name: str = "success"
|
309
|
+
|
310
|
+
@computed_field # type: ignore
|
311
|
+
@property
|
312
|
+
def internal_name(self) -> str:
|
313
|
+
return self.name
|
314
|
+
|
315
|
+
def create_node(self) -> SuccessNode:
|
316
|
+
return SuccessNode.parse_from_config(self.model_dump())
|
317
|
+
|
318
|
+
|
319
|
+
class Fail(BaseModel):
|
320
|
+
"""
|
321
|
+
A node that represents a failed execution of the pipeline.
|
322
|
+
|
323
|
+
Most often, there is no need to use this node as nodes can be instructed to
|
324
|
+
terminate_with_failure and pipeline with add_terminal_nodes=True.
|
325
|
+
|
326
|
+
Attributes:
|
327
|
+
name (str): The name of the node.
|
328
|
+
"""
|
329
|
+
|
330
|
+
name: str = "fail"
|
331
|
+
|
332
|
+
@computed_field # type: ignore
|
333
|
+
@property
|
334
|
+
def internal_name(self) -> str:
|
335
|
+
return self.name
|
336
|
+
|
337
|
+
def create_node(self) -> FailNode:
|
338
|
+
return FailNode.parse_from_config(self.model_dump())
|
339
|
+
|
340
|
+
|
341
|
+
class Pipeline(BaseModel):
|
342
|
+
"""
|
343
|
+
A Pipeline is a directed acyclic graph of Steps that define a workflow.
|
344
|
+
|
345
|
+
Attributes:
|
346
|
+
steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline.
|
347
|
+
start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
|
348
|
+
name (str, optional): The name of the Pipeline. Defaults to "".
|
349
|
+
description (str, optional): A description of the Pipeline. Defaults to "".
|
350
|
+
add_terminal_nodes (bool, optional): Whether to add terminal nodes to the Pipeline. Defaults to True.
|
351
|
+
|
352
|
+
The default behavior is to add "success" and "fail" nodes to the Pipeline.
|
353
|
+
To add custom success and fail nodes, set add_terminal_nodes=False and create success
|
354
|
+
and fail nodes manually.
|
355
|
+
|
356
|
+
"""
|
357
|
+
|
358
|
+
steps: List[StepType]
|
359
|
+
start_at: TraversalTypes
|
360
|
+
name: str = ""
|
361
|
+
description: str = ""
|
362
|
+
add_terminal_nodes: bool = True # Adds "success" and "fail" nodes
|
363
|
+
|
364
|
+
internal_branch_name: str = ""
|
365
|
+
|
366
|
+
_dag: graph.Graph = PrivateAttr()
|
367
|
+
model_config = ConfigDict(extra="forbid")
|
368
|
+
|
369
|
+
def model_post_init(self, __context: Any) -> None:
|
370
|
+
self.steps = [model.model_copy(deep=True) for model in self.steps]
|
371
|
+
|
372
|
+
self._dag = graph.Graph(
|
373
|
+
start_at=self.start_at.name,
|
374
|
+
description=self.description,
|
375
|
+
internal_branch_name=self.internal_branch_name,
|
376
|
+
)
|
377
|
+
|
378
|
+
for step in self.steps:
|
379
|
+
if step.name == self.start_at.name:
|
380
|
+
if isinstance(step, Success) or isinstance(step, Fail):
|
381
|
+
raise Exception("A success or fail node cannot be the start_at of the graph")
|
382
|
+
assert step.next_node
|
383
|
+
self._dag.add_node(step.create_node())
|
384
|
+
|
385
|
+
if self.add_terminal_nodes:
|
386
|
+
self._dag.add_terminal_nodes()
|
387
|
+
|
388
|
+
self._dag.check_graph()
|
389
|
+
|
390
|
+
def return_dag(self) -> graph.Graph:
|
391
|
+
return self._dag
|
392
|
+
|
393
|
+
def execute(
|
394
|
+
self,
|
395
|
+
configuration_file: str = "",
|
396
|
+
run_id: str = "",
|
397
|
+
tag: str = "",
|
398
|
+
parameters_file: str = "",
|
399
|
+
use_cached: str = "",
|
400
|
+
log_level: str = defaults.LOG_LEVEL,
|
401
|
+
):
|
402
|
+
"""
|
403
|
+
*Execute* the Pipeline.
|
404
|
+
|
405
|
+
Execution of pipeline could either be:
|
406
|
+
|
407
|
+
Traverse and execute all the steps of the pipeline, eg. [local execution](configurations/executors/local.md).
|
408
|
+
|
409
|
+
Or create the ```yaml``` representation of the pipeline for other executors.
|
410
|
+
|
411
|
+
Please refer to [concepts](concepts/executor.md) for more information.
|
412
|
+
|
413
|
+
Args:
|
414
|
+
configuration_file (str, optional): The path to the configuration file. Defaults to "".
|
415
|
+
The configuration file can be overridden by the environment variable runnable_CONFIGURATION_FILE.
|
416
|
+
|
417
|
+
run_id (str, optional): The ID of the run. Defaults to "".
|
418
|
+
tag (str, optional): The tag of the run. Defaults to "".
|
419
|
+
Use to group multiple runs.
|
420
|
+
|
421
|
+
parameters_file (str, optional): The path to the parameters file. Defaults to "".
|
422
|
+
use_cached (str, optional): Whether to use cached results. Defaults to "".
|
423
|
+
Provide the run_id of the older execution to recover.
|
424
|
+
|
425
|
+
log_level (str, optional): The log level. Defaults to defaults.LOG_LEVEL.
|
426
|
+
"""
|
427
|
+
|
428
|
+
# py_to_yaml is used by non local executors to generate the yaml representation of the pipeline.
|
429
|
+
py_to_yaml = os.environ.get("RUNNABLE_PY_TO_YAML", "false")
|
430
|
+
|
431
|
+
if py_to_yaml == "true":
|
432
|
+
return
|
433
|
+
|
434
|
+
logger.setLevel(log_level)
|
435
|
+
|
436
|
+
run_id = utils.generate_run_id(run_id=run_id)
|
437
|
+
configuration_file = os.environ.get("RUNNABLE_CONFIGURATION_FILE", configuration_file)
|
438
|
+
run_context = entrypoints.prepare_configurations(
|
439
|
+
configuration_file=configuration_file,
|
440
|
+
run_id=run_id,
|
441
|
+
tag=tag,
|
442
|
+
parameters_file=parameters_file,
|
443
|
+
use_cached=use_cached,
|
444
|
+
)
|
445
|
+
|
446
|
+
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
447
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
448
|
+
|
449
|
+
dag_definition = self._dag.model_dump(by_alias=True, exclude_none=True)
|
450
|
+
|
451
|
+
run_context.dag = graph.create_graph(dag_definition)
|
452
|
+
|
453
|
+
print("Working with context:")
|
454
|
+
print(run_context)
|
455
|
+
|
456
|
+
if not run_context.executor._local:
|
457
|
+
# We are working with non local executor
|
458
|
+
import inspect
|
459
|
+
|
460
|
+
caller_stack = inspect.stack()[1]
|
461
|
+
module_to_call = f"{caller_stack.filename.replace('/', '.').replace('.py', '')}.{caller_stack.function}"
|
462
|
+
|
463
|
+
run_context.pipeline_file = f"{module_to_call}.py"
|
464
|
+
|
465
|
+
# Prepare for graph execution
|
466
|
+
run_context.executor.prepare_for_graph_execution()
|
467
|
+
|
468
|
+
logger.info("Executing the graph")
|
469
|
+
run_context.executor.execute_graph(dag=run_context.dag)
|
470
|
+
|
471
|
+
if run_context.executor._local:
|
472
|
+
return run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id)
|
runnable/secrets.py
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict
|
6
|
+
|
7
|
+
import runnable.context as context
|
8
|
+
from runnable import defaults, exceptions
|
9
|
+
|
10
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
11
|
+
|
12
|
+
|
13
|
+
# --8<-- [start:docs]
|
14
|
+
class BaseSecrets(ABC, BaseModel):
|
15
|
+
"""
|
16
|
+
A base class for Secrets Handler.
|
17
|
+
All implementations should extend this class.
|
18
|
+
|
19
|
+
Raises:
|
20
|
+
NotImplementedError: Base class and not implemented
|
21
|
+
"""
|
22
|
+
|
23
|
+
service_name: str = ""
|
24
|
+
service_type: str = "secrets"
|
25
|
+
model_config = ConfigDict(extra="forbid")
|
26
|
+
|
27
|
+
@property
|
28
|
+
def _context(self):
|
29
|
+
return context.run_context
|
30
|
+
|
31
|
+
@abstractmethod
|
32
|
+
def get(self, name: str, **kwargs) -> str:
|
33
|
+
"""
|
34
|
+
Return the secret by name.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
name (str): The name of the secret to return.
|
38
|
+
|
39
|
+
Raises:
|
40
|
+
NotImplementedError: Base class and hence not implemented.
|
41
|
+
exceptions.SecretNotFoundError: Secret not found in the secrets manager.
|
42
|
+
"""
|
43
|
+
raise NotImplementedError
|
44
|
+
|
45
|
+
|
46
|
+
# --8<-- [end:docs]
|
47
|
+
|
48
|
+
|
49
|
+
class DoNothingSecretManager(BaseSecrets):
|
50
|
+
"""
|
51
|
+
Does nothing secret manager
|
52
|
+
"""
|
53
|
+
|
54
|
+
service_name: str = "do-nothing"
|
55
|
+
|
56
|
+
def get(self, name: str, **kwargs) -> str:
|
57
|
+
"""
|
58
|
+
If a name is provided, return None else return empty dict.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
name (str): The name of the secret to retrieve
|
62
|
+
|
63
|
+
Raises:
|
64
|
+
exceptions.SecretNotFoundError: Secret not found in the secrets manager.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
[str]: The value of the secret
|
68
|
+
"""
|
69
|
+
return ""
|
70
|
+
|
71
|
+
|
72
|
+
class EnvSecretsManager(BaseSecrets):
|
73
|
+
"""
|
74
|
+
A secret manager which uses environment variables for secrets.
|
75
|
+
"""
|
76
|
+
|
77
|
+
service_name: str = "env-secrets"
|
78
|
+
|
79
|
+
def get(self, name: str, **kwargs) -> str:
|
80
|
+
"""
|
81
|
+
If a name is provided, return None else return empty dict.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
name (str): The name of the secret to retrieve
|
85
|
+
|
86
|
+
Raises:
|
87
|
+
exceptions.SecretNotFoundError: Secret not found in the secrets manager.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
[str]: The value of the secret
|
91
|
+
"""
|
92
|
+
try:
|
93
|
+
return os.environ[name]
|
94
|
+
except KeyError:
|
95
|
+
raise exceptions.SecretNotFoundError(secret_name=name, secret_setting="environment variables")
|