runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +34 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +272 -0
- runnable/context.py +34 -0
- runnable/datastore.py +686 -0
- runnable/defaults.py +179 -0
- runnable/entrypoints.py +484 -0
- runnable/exceptions.py +94 -0
- runnable/executor.py +431 -0
- runnable/experiment_tracker.py +139 -0
- runnable/extensions/catalog/__init__.py +21 -0
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +226 -0
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
- runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
- runnable/extensions/executor/__init__.py +714 -0
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +1182 -0
- runnable/extensions/executor/argo/specification.yaml +51 -0
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
- runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +69 -0
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +367 -0
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +220 -0
- runnable/extensions/experiment_tracker/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
- runnable/extensions/nodes.py +675 -0
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
- runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +136 -0
- runnable/extensions/run_log_store/generic_chunked.py +541 -0
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +100 -0
- runnable/extensions/secrets/env_secrets/__init__.py +0 -0
- runnable/extensions/secrets/env_secrets/implementation.py +42 -0
- runnable/graph.py +464 -0
- runnable/integration.py +205 -0
- runnable/interaction.py +399 -0
- runnable/names.py +546 -0
- runnable/nodes.py +489 -0
- runnable/parameters.py +183 -0
- runnable/pickler.py +102 -0
- runnable/sdk.py +470 -0
- runnable/secrets.py +95 -0
- runnable/tasks.py +392 -0
- runnable/utils.py +630 -0
- runnable-0.2.0.dist-info/METADATA +437 -0
- runnable-0.2.0.dist-info/RECORD +69 -0
- runnable-0.2.0.dist-info/entry_points.txt +44 -0
- runnable-0.1.0.dist-info/METADATA +0 -16
- runnable-0.1.0.dist-info/RECORD +0 -6
- /runnable/{.gitkeep → extensions/__init__.py} +0 -0
- {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
- {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0
runnable/tasks.py
ADDED
@@ -0,0 +1,392 @@
|
|
1
|
+
import ast
|
2
|
+
import contextlib
|
3
|
+
import importlib
|
4
|
+
import io
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import os
|
8
|
+
import subprocess
|
9
|
+
import sys
|
10
|
+
from typing import Any, Dict, Tuple
|
11
|
+
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
|
13
|
+
from pydantic._internal._model_construction import ModelMetaclass
|
14
|
+
from stevedore import driver
|
15
|
+
|
16
|
+
import runnable.context as context
|
17
|
+
from runnable import defaults, parameters, utils
|
18
|
+
from runnable.defaults import TypeMapVariable
|
19
|
+
|
20
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
21
|
+
logging.getLogger("stevedore").setLevel(logging.CRITICAL)
|
22
|
+
|
23
|
+
|
24
|
+
# TODO: Can we add memory peak, cpu usage, etc. to the metrics?
|
25
|
+
|
26
|
+
|
27
|
+
class BaseTaskType(BaseModel):
|
28
|
+
"""A base task class which does the execution of command defined by the user."""
|
29
|
+
|
30
|
+
task_type: str = Field(serialization_alias="command_type")
|
31
|
+
node_name: str = Field(exclude=True)
|
32
|
+
|
33
|
+
model_config = ConfigDict(extra="forbid")
|
34
|
+
|
35
|
+
@property
|
36
|
+
def _context(self):
|
37
|
+
return context.run_context
|
38
|
+
|
39
|
+
def get_cli_options(self) -> Tuple[str, dict]:
|
40
|
+
"""
|
41
|
+
Key is the name of the cli option and value is the value of the cli option.
|
42
|
+
This should always be in sync with the cli options defined in execute_*.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
str: The name of the cli option.
|
46
|
+
dict: The dict of cli options for the task.
|
47
|
+
|
48
|
+
Raises:
|
49
|
+
NotImplementedError: Base class, not implemented
|
50
|
+
"""
|
51
|
+
raise NotImplementedError()
|
52
|
+
|
53
|
+
def _get_parameters(self, map_variable: TypeMapVariable = None, **kwargs) -> Dict[str, Any]:
|
54
|
+
"""
|
55
|
+
By this step, all the parameters are present as environment variables as json strings.
|
56
|
+
Return the parameters in scope for the execution.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
dict: The parameters dictionary in-scope for the task execution
|
63
|
+
"""
|
64
|
+
return parameters.get_user_set_parameters(remove=False)
|
65
|
+
|
66
|
+
def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
|
67
|
+
"""The function to execute the command.
|
68
|
+
|
69
|
+
And map_variable is sent in as an argument into the function.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
|
73
|
+
|
74
|
+
Raises:
|
75
|
+
NotImplementedError: Base class, not implemented
|
76
|
+
"""
|
77
|
+
raise NotImplementedError()
|
78
|
+
|
79
|
+
def _set_parameters(self, params: BaseModel, **kwargs):
|
80
|
+
"""Set the parameters back to the environment variables.
|
81
|
+
|
82
|
+
Args:
|
83
|
+
parameters (dict, optional): The parameters to set back as env variables. Defaults to None.
|
84
|
+
"""
|
85
|
+
# Nothing to do
|
86
|
+
if not params:
|
87
|
+
return
|
88
|
+
|
89
|
+
if not isinstance(params, BaseModel) or isinstance(params, ModelMetaclass):
|
90
|
+
raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.")
|
91
|
+
|
92
|
+
parameters.set_user_defined_params_as_environment_variables(params.model_dump(by_alias=True))
|
93
|
+
|
94
|
+
@contextlib.contextmanager
|
95
|
+
def output_to_file(self, map_variable: TypeMapVariable = None):
|
96
|
+
"""Context manager to put the output of a function execution to catalog.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
|
100
|
+
|
101
|
+
"""
|
102
|
+
from runnable import put_in_catalog # Causing cyclic imports
|
103
|
+
|
104
|
+
log_file_name = self.node_name.replace(" ", "_") + ".execution.log"
|
105
|
+
if map_variable:
|
106
|
+
for _, value in map_variable.items():
|
107
|
+
log_file_name += "_" + str(value)
|
108
|
+
|
109
|
+
log_file = open(log_file_name, "w")
|
110
|
+
|
111
|
+
f = io.StringIO()
|
112
|
+
try:
|
113
|
+
with contextlib.redirect_stdout(f):
|
114
|
+
yield
|
115
|
+
finally:
|
116
|
+
print(f.getvalue()) # print to console
|
117
|
+
log_file.write(f.getvalue()) # Print to file
|
118
|
+
|
119
|
+
f.close()
|
120
|
+
log_file.close()
|
121
|
+
put_in_catalog(log_file.name)
|
122
|
+
os.remove(log_file.name)
|
123
|
+
|
124
|
+
|
125
|
+
class EasyModel(BaseModel):
|
126
|
+
model_config = ConfigDict(extra="allow")
|
127
|
+
|
128
|
+
|
129
|
+
def make_pydantic_model(
|
130
|
+
variables: Dict[str, Any],
|
131
|
+
prefix: str = "",
|
132
|
+
) -> BaseModel:
|
133
|
+
prefix_removed = {utils.remove_prefix(k, prefix): v for k, v in variables.items()}
|
134
|
+
return EasyModel(**prefix_removed)
|
135
|
+
|
136
|
+
|
137
|
+
class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
|
138
|
+
"""The task class for python command."""
|
139
|
+
|
140
|
+
task_type: str = Field(default="python", serialization_alias="command_type")
|
141
|
+
command: str
|
142
|
+
|
143
|
+
@field_validator("command")
|
144
|
+
@classmethod
|
145
|
+
def validate_command(cls, command: str):
|
146
|
+
if not command:
|
147
|
+
raise Exception("Command cannot be empty for shell task")
|
148
|
+
|
149
|
+
return command
|
150
|
+
|
151
|
+
def get_cli_options(self) -> Tuple[str, dict]:
|
152
|
+
"""Return the cli options for the task.
|
153
|
+
|
154
|
+
Returns:
|
155
|
+
dict: The cli options for the task
|
156
|
+
"""
|
157
|
+
return "function", {"command": self.command}
|
158
|
+
|
159
|
+
def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
|
160
|
+
"""Execute the notebook as defined by the command."""
|
161
|
+
module, func = utils.get_module_and_attr_names(self.command)
|
162
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
163
|
+
imported_module = importlib.import_module(module)
|
164
|
+
f = getattr(imported_module, func)
|
165
|
+
|
166
|
+
params = self._get_parameters()
|
167
|
+
filtered_parameters = parameters.filter_arguments_for_func(f, params, map_variable)
|
168
|
+
|
169
|
+
if map_variable:
|
170
|
+
os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
|
171
|
+
|
172
|
+
logger.info(f"Calling {func} from {module} with {filtered_parameters}")
|
173
|
+
|
174
|
+
with self.output_to_file(map_variable=map_variable) as _:
|
175
|
+
try:
|
176
|
+
user_set_parameters = f(**filtered_parameters)
|
177
|
+
except Exception as _e:
|
178
|
+
msg = f"Call to the function {self.command} with {filtered_parameters} did not succeed.\n"
|
179
|
+
logger.exception(msg)
|
180
|
+
logger.exception(_e)
|
181
|
+
raise
|
182
|
+
|
183
|
+
if map_variable:
|
184
|
+
del os.environ[defaults.MAP_VARIABLE]
|
185
|
+
|
186
|
+
self._set_parameters(user_set_parameters)
|
187
|
+
|
188
|
+
|
189
|
+
class NotebookTaskType(BaseTaskType):
|
190
|
+
"""The task class for Notebook based execution."""
|
191
|
+
|
192
|
+
task_type: str = Field(default="notebook", serialization_alias="command_type")
|
193
|
+
command: str
|
194
|
+
notebook_output_path: str = Field(default="", validate_default=True)
|
195
|
+
output_cell_tag: str = Field(default="magnus_output", validate_default=True)
|
196
|
+
optional_ploomber_args: dict = {}
|
197
|
+
|
198
|
+
_output_tag: str = "magnus_output"
|
199
|
+
|
200
|
+
@field_validator("command")
|
201
|
+
@classmethod
|
202
|
+
def notebook_should_end_with_ipynb(cls, command: str):
|
203
|
+
if not command:
|
204
|
+
raise Exception("Command should point to the ipynb file")
|
205
|
+
|
206
|
+
if not command.endswith(".ipynb"):
|
207
|
+
raise Exception("Notebook task should point to a ipynb file")
|
208
|
+
|
209
|
+
return command
|
210
|
+
|
211
|
+
@field_validator("notebook_output_path")
|
212
|
+
@classmethod
|
213
|
+
def correct_notebook_output_path(cls, notebook_output_path: str, info: ValidationInfo):
|
214
|
+
if notebook_output_path:
|
215
|
+
return notebook_output_path
|
216
|
+
|
217
|
+
command = info.data["command"]
|
218
|
+
return "".join(command.split(".")[:-1]) + "_out.ipynb"
|
219
|
+
|
220
|
+
def get_cli_options(self) -> Tuple[str, dict]:
|
221
|
+
return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
|
222
|
+
|
223
|
+
def _parse_notebook_for_output(self, notebook: Any):
|
224
|
+
collected_params = {}
|
225
|
+
|
226
|
+
for cell in notebook.cells:
|
227
|
+
d = cell.dict()
|
228
|
+
# identify the tags attached to the cell.
|
229
|
+
tags = d.get("metadata", {}).get("tags", {})
|
230
|
+
if self.output_cell_tag in tags:
|
231
|
+
# There is a tag that has output
|
232
|
+
outputs = d["outputs"]
|
233
|
+
|
234
|
+
for out in outputs:
|
235
|
+
params = out.get("text", "{}")
|
236
|
+
collected_params.update(ast.literal_eval(params))
|
237
|
+
|
238
|
+
return collected_params
|
239
|
+
|
240
|
+
def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
|
241
|
+
"""Execute the python notebook as defined by the command.
|
242
|
+
|
243
|
+
Args:
|
244
|
+
map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
ImportError: If necessary dependencies are not installed
|
248
|
+
Exception: If anything else fails
|
249
|
+
"""
|
250
|
+
try:
|
251
|
+
import ploomber_engine as pm
|
252
|
+
|
253
|
+
from runnable import put_in_catalog # Causes issues with cyclic import
|
254
|
+
|
255
|
+
parameters = self._get_parameters()
|
256
|
+
filtered_parameters = parameters
|
257
|
+
|
258
|
+
notebook_output_path = self.notebook_output_path
|
259
|
+
|
260
|
+
if map_variable:
|
261
|
+
os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
|
262
|
+
|
263
|
+
for _, value in map_variable.items():
|
264
|
+
notebook_output_path += "_" + str(value)
|
265
|
+
|
266
|
+
ploomber_optional_args = self.optional_ploomber_args
|
267
|
+
|
268
|
+
kwds = {
|
269
|
+
"input_path": self.command,
|
270
|
+
"output_path": notebook_output_path,
|
271
|
+
"parameters": filtered_parameters,
|
272
|
+
"log_output": True,
|
273
|
+
"progress_bar": False,
|
274
|
+
}
|
275
|
+
kwds.update(ploomber_optional_args)
|
276
|
+
|
277
|
+
collected_params: Dict[str, Any] = {}
|
278
|
+
with self.output_to_file(map_variable=map_variable) as _:
|
279
|
+
out = pm.execute_notebook(**kwds)
|
280
|
+
collected_params = self._parse_notebook_for_output(out)
|
281
|
+
|
282
|
+
collected_params_model = make_pydantic_model(collected_params)
|
283
|
+
self._set_parameters(collected_params_model)
|
284
|
+
|
285
|
+
put_in_catalog(notebook_output_path)
|
286
|
+
if map_variable:
|
287
|
+
del os.environ[defaults.MAP_VARIABLE]
|
288
|
+
|
289
|
+
except ImportError as e:
|
290
|
+
msg = (
|
291
|
+
"Task type of notebook requires ploomber engine to be installed. Please install via optional: notebook"
|
292
|
+
)
|
293
|
+
raise Exception(msg) from e
|
294
|
+
|
295
|
+
|
296
|
+
class ShellTaskType(BaseTaskType):
|
297
|
+
"""
|
298
|
+
The task class for shell based commands.
|
299
|
+
"""
|
300
|
+
|
301
|
+
task_type: str = Field(default="shell", serialization_alias="command_type")
|
302
|
+
command: str
|
303
|
+
|
304
|
+
@field_validator("command")
|
305
|
+
@classmethod
|
306
|
+
def validate_command(cls, command: str):
|
307
|
+
if not command:
|
308
|
+
raise Exception("Command cannot be empty for shell task")
|
309
|
+
|
310
|
+
return command
|
311
|
+
|
312
|
+
def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
|
313
|
+
# Using shell=True as we want to have chained commands to be executed in the same shell.
|
314
|
+
"""Execute the shell command as defined by the command.
|
315
|
+
|
316
|
+
Args:
|
317
|
+
map_variable (dict, optional): If the node is part of an internal branch. Defaults to None.
|
318
|
+
"""
|
319
|
+
subprocess_env = os.environ.copy()
|
320
|
+
|
321
|
+
if map_variable:
|
322
|
+
subprocess_env[defaults.MAP_VARIABLE] = json.dumps(map_variable)
|
323
|
+
|
324
|
+
command = self.command.strip() + " && env | grep MAGNUS"
|
325
|
+
logger.info(f"Executing shell command: {command}")
|
326
|
+
|
327
|
+
output_parameters = {}
|
328
|
+
|
329
|
+
with subprocess.Popen(
|
330
|
+
command,
|
331
|
+
shell=True,
|
332
|
+
env=subprocess_env,
|
333
|
+
stdout=subprocess.PIPE,
|
334
|
+
stderr=subprocess.PIPE,
|
335
|
+
text=True,
|
336
|
+
) as proc, self.output_to_file(map_variable=map_variable) as _:
|
337
|
+
for line in proc.stdout: # type: ignore
|
338
|
+
logger.info(line)
|
339
|
+
print(line)
|
340
|
+
|
341
|
+
if line.startswith(defaults.PARAMETER_PREFIX):
|
342
|
+
key, value = line.strip().split("=", 1)
|
343
|
+
try:
|
344
|
+
output_parameters[key] = json.loads(value)
|
345
|
+
except json.JSONDecodeError:
|
346
|
+
output_parameters[key] = value # simple data types
|
347
|
+
|
348
|
+
if line.startswith(defaults.TRACK_PREFIX):
|
349
|
+
key, value = line.split("=", 1)
|
350
|
+
os.environ[key] = value.strip()
|
351
|
+
|
352
|
+
proc.wait()
|
353
|
+
if proc.returncode != 0:
|
354
|
+
raise Exception("Command failed")
|
355
|
+
|
356
|
+
self._set_parameters(
|
357
|
+
params=make_pydantic_model(
|
358
|
+
output_parameters,
|
359
|
+
defaults.PARAMETER_PREFIX,
|
360
|
+
)
|
361
|
+
)
|
362
|
+
|
363
|
+
|
364
|
+
def create_task(kwargs_for_init) -> BaseTaskType:
|
365
|
+
"""
|
366
|
+
Creates a task object from the command configuration.
|
367
|
+
|
368
|
+
Args:
|
369
|
+
A dictionary of keyword arguments that are sent by the user to the task.
|
370
|
+
Check against the model class for the validity of it.
|
371
|
+
|
372
|
+
Returns:
|
373
|
+
tasks.BaseTaskType: The command object
|
374
|
+
"""
|
375
|
+
# The dictionary cannot be modified
|
376
|
+
kwargs = kwargs_for_init.copy()
|
377
|
+
command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE)
|
378
|
+
|
379
|
+
try:
|
380
|
+
task_mgr = driver.DriverManager(
|
381
|
+
namespace="tasks",
|
382
|
+
name=command_type,
|
383
|
+
invoke_on_load=True,
|
384
|
+
invoke_kwds=kwargs,
|
385
|
+
)
|
386
|
+
return task_mgr.driver
|
387
|
+
except Exception as _e:
|
388
|
+
msg = (
|
389
|
+
f"Could not find the task type {command_type}. Please ensure you have installed "
|
390
|
+
"the extension that provides the node type."
|
391
|
+
)
|
392
|
+
raise Exception(msg) from _e
|