runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +686 -0
  6. runnable/defaults.py +179 -0
  7. runnable/entrypoints.py +484 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +431 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +226 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +714 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1182 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +69 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +367 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +220 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +675 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +399 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +489 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +470 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +392 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.2.0.dist-info/METADATA +437 -0
  65. runnable-0.2.0.dist-info/RECORD +69 -0
  66. runnable-0.2.0.dist-info/entry_points.txt +44 -0
  67. runnable-0.1.0.dist-info/METADATA +0 -16
  68. runnable-0.1.0.dist-info/RECORD +0 -6
  69. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  70. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0
runnable/tasks.py ADDED
@@ -0,0 +1,392 @@
1
+ import ast
2
+ import contextlib
3
+ import importlib
4
+ import io
5
+ import json
6
+ import logging
7
+ import os
8
+ import subprocess
9
+ import sys
10
+ from typing import Any, Dict, Tuple
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
13
+ from pydantic._internal._model_construction import ModelMetaclass
14
+ from stevedore import driver
15
+
16
+ import runnable.context as context
17
+ from runnable import defaults, parameters, utils
18
+ from runnable.defaults import TypeMapVariable
19
+
20
+ logger = logging.getLogger(defaults.LOGGER_NAME)
21
+ logging.getLogger("stevedore").setLevel(logging.CRITICAL)
22
+
23
+
24
+ # TODO: Can we add memory peak, cpu usage, etc. to the metrics?
25
+
26
+
27
+ class BaseTaskType(BaseModel):
28
+ """A base task class which does the execution of command defined by the user."""
29
+
30
+ task_type: str = Field(serialization_alias="command_type")
31
+ node_name: str = Field(exclude=True)
32
+
33
+ model_config = ConfigDict(extra="forbid")
34
+
35
+ @property
36
+ def _context(self):
37
+ return context.run_context
38
+
39
+ def get_cli_options(self) -> Tuple[str, dict]:
40
+ """
41
+ Key is the name of the cli option and value is the value of the cli option.
42
+ This should always be in sync with the cli options defined in execute_*.
43
+
44
+ Returns:
45
+ str: The name of the cli option.
46
+ dict: The dict of cli options for the task.
47
+
48
+ Raises:
49
+ NotImplementedError: Base class, not implemented
50
+ """
51
+ raise NotImplementedError()
52
+
53
+ def _get_parameters(self, map_variable: TypeMapVariable = None, **kwargs) -> Dict[str, Any]:
54
+ """
55
+ By this step, all the parameters are present as environment variables as json strings.
56
+ Return the parameters in scope for the execution.
57
+
58
+ Args:
59
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
60
+
61
+ Returns:
62
+ dict: The parameters dictionary in-scope for the task execution
63
+ """
64
+ return parameters.get_user_set_parameters(remove=False)
65
+
66
+ def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
67
+ """The function to execute the command.
68
+
69
+ And map_variable is sent in as an argument into the function.
70
+
71
+ Args:
72
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
73
+
74
+ Raises:
75
+ NotImplementedError: Base class, not implemented
76
+ """
77
+ raise NotImplementedError()
78
+
79
+ def _set_parameters(self, params: BaseModel, **kwargs):
80
+ """Set the parameters back to the environment variables.
81
+
82
+ Args:
83
+ parameters (dict, optional): The parameters to set back as env variables. Defaults to None.
84
+ """
85
+ # Nothing to do
86
+ if not params:
87
+ return
88
+
89
+ if not isinstance(params, BaseModel) or isinstance(params, ModelMetaclass):
90
+ raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.")
91
+
92
+ parameters.set_user_defined_params_as_environment_variables(params.model_dump(by_alias=True))
93
+
94
+ @contextlib.contextmanager
95
+ def output_to_file(self, map_variable: TypeMapVariable = None):
96
+ """Context manager to put the output of a function execution to catalog.
97
+
98
+ Args:
99
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
100
+
101
+ """
102
+ from runnable import put_in_catalog # Causing cyclic imports
103
+
104
+ log_file_name = self.node_name.replace(" ", "_") + ".execution.log"
105
+ if map_variable:
106
+ for _, value in map_variable.items():
107
+ log_file_name += "_" + str(value)
108
+
109
+ log_file = open(log_file_name, "w")
110
+
111
+ f = io.StringIO()
112
+ try:
113
+ with contextlib.redirect_stdout(f):
114
+ yield
115
+ finally:
116
+ print(f.getvalue()) # print to console
117
+ log_file.write(f.getvalue()) # Print to file
118
+
119
+ f.close()
120
+ log_file.close()
121
+ put_in_catalog(log_file.name)
122
+ os.remove(log_file.name)
123
+
124
+
125
+ class EasyModel(BaseModel):
126
+ model_config = ConfigDict(extra="allow")
127
+
128
+
129
+ def make_pydantic_model(
130
+ variables: Dict[str, Any],
131
+ prefix: str = "",
132
+ ) -> BaseModel:
133
+ prefix_removed = {utils.remove_prefix(k, prefix): v for k, v in variables.items()}
134
+ return EasyModel(**prefix_removed)
135
+
136
+
137
+ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
138
+ """The task class for python command."""
139
+
140
+ task_type: str = Field(default="python", serialization_alias="command_type")
141
+ command: str
142
+
143
+ @field_validator("command")
144
+ @classmethod
145
+ def validate_command(cls, command: str):
146
+ if not command:
147
+ raise Exception("Command cannot be empty for shell task")
148
+
149
+ return command
150
+
151
+ def get_cli_options(self) -> Tuple[str, dict]:
152
+ """Return the cli options for the task.
153
+
154
+ Returns:
155
+ dict: The cli options for the task
156
+ """
157
+ return "function", {"command": self.command}
158
+
159
+ def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
160
+ """Execute the notebook as defined by the command."""
161
+ module, func = utils.get_module_and_attr_names(self.command)
162
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
163
+ imported_module = importlib.import_module(module)
164
+ f = getattr(imported_module, func)
165
+
166
+ params = self._get_parameters()
167
+ filtered_parameters = parameters.filter_arguments_for_func(f, params, map_variable)
168
+
169
+ if map_variable:
170
+ os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
171
+
172
+ logger.info(f"Calling {func} from {module} with {filtered_parameters}")
173
+
174
+ with self.output_to_file(map_variable=map_variable) as _:
175
+ try:
176
+ user_set_parameters = f(**filtered_parameters)
177
+ except Exception as _e:
178
+ msg = f"Call to the function {self.command} with {filtered_parameters} did not succeed.\n"
179
+ logger.exception(msg)
180
+ logger.exception(_e)
181
+ raise
182
+
183
+ if map_variable:
184
+ del os.environ[defaults.MAP_VARIABLE]
185
+
186
+ self._set_parameters(user_set_parameters)
187
+
188
+
189
+ class NotebookTaskType(BaseTaskType):
190
+ """The task class for Notebook based execution."""
191
+
192
+ task_type: str = Field(default="notebook", serialization_alias="command_type")
193
+ command: str
194
+ notebook_output_path: str = Field(default="", validate_default=True)
195
+ output_cell_tag: str = Field(default="magnus_output", validate_default=True)
196
+ optional_ploomber_args: dict = {}
197
+
198
+ _output_tag: str = "magnus_output"
199
+
200
+ @field_validator("command")
201
+ @classmethod
202
+ def notebook_should_end_with_ipynb(cls, command: str):
203
+ if not command:
204
+ raise Exception("Command should point to the ipynb file")
205
+
206
+ if not command.endswith(".ipynb"):
207
+ raise Exception("Notebook task should point to a ipynb file")
208
+
209
+ return command
210
+
211
+ @field_validator("notebook_output_path")
212
+ @classmethod
213
+ def correct_notebook_output_path(cls, notebook_output_path: str, info: ValidationInfo):
214
+ if notebook_output_path:
215
+ return notebook_output_path
216
+
217
+ command = info.data["command"]
218
+ return "".join(command.split(".")[:-1]) + "_out.ipynb"
219
+
220
+ def get_cli_options(self) -> Tuple[str, dict]:
221
+ return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
222
+
223
+ def _parse_notebook_for_output(self, notebook: Any):
224
+ collected_params = {}
225
+
226
+ for cell in notebook.cells:
227
+ d = cell.dict()
228
+ # identify the tags attached to the cell.
229
+ tags = d.get("metadata", {}).get("tags", {})
230
+ if self.output_cell_tag in tags:
231
+ # There is a tag that has output
232
+ outputs = d["outputs"]
233
+
234
+ for out in outputs:
235
+ params = out.get("text", "{}")
236
+ collected_params.update(ast.literal_eval(params))
237
+
238
+ return collected_params
239
+
240
+ def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
241
+ """Execute the python notebook as defined by the command.
242
+
243
+ Args:
244
+ map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
245
+
246
+ Raises:
247
+ ImportError: If necessary dependencies are not installed
248
+ Exception: If anything else fails
249
+ """
250
+ try:
251
+ import ploomber_engine as pm
252
+
253
+ from runnable import put_in_catalog # Causes issues with cyclic import
254
+
255
+ parameters = self._get_parameters()
256
+ filtered_parameters = parameters
257
+
258
+ notebook_output_path = self.notebook_output_path
259
+
260
+ if map_variable:
261
+ os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
262
+
263
+ for _, value in map_variable.items():
264
+ notebook_output_path += "_" + str(value)
265
+
266
+ ploomber_optional_args = self.optional_ploomber_args
267
+
268
+ kwds = {
269
+ "input_path": self.command,
270
+ "output_path": notebook_output_path,
271
+ "parameters": filtered_parameters,
272
+ "log_output": True,
273
+ "progress_bar": False,
274
+ }
275
+ kwds.update(ploomber_optional_args)
276
+
277
+ collected_params: Dict[str, Any] = {}
278
+ with self.output_to_file(map_variable=map_variable) as _:
279
+ out = pm.execute_notebook(**kwds)
280
+ collected_params = self._parse_notebook_for_output(out)
281
+
282
+ collected_params_model = make_pydantic_model(collected_params)
283
+ self._set_parameters(collected_params_model)
284
+
285
+ put_in_catalog(notebook_output_path)
286
+ if map_variable:
287
+ del os.environ[defaults.MAP_VARIABLE]
288
+
289
+ except ImportError as e:
290
+ msg = (
291
+ "Task type of notebook requires ploomber engine to be installed. Please install via optional: notebook"
292
+ )
293
+ raise Exception(msg) from e
294
+
295
+
296
+ class ShellTaskType(BaseTaskType):
297
+ """
298
+ The task class for shell based commands.
299
+ """
300
+
301
+ task_type: str = Field(default="shell", serialization_alias="command_type")
302
+ command: str
303
+
304
+ @field_validator("command")
305
+ @classmethod
306
+ def validate_command(cls, command: str):
307
+ if not command:
308
+ raise Exception("Command cannot be empty for shell task")
309
+
310
+ return command
311
+
312
+ def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
313
+ # Using shell=True as we want to have chained commands to be executed in the same shell.
314
+ """Execute the shell command as defined by the command.
315
+
316
+ Args:
317
+ map_variable (dict, optional): If the node is part of an internal branch. Defaults to None.
318
+ """
319
+ subprocess_env = os.environ.copy()
320
+
321
+ if map_variable:
322
+ subprocess_env[defaults.MAP_VARIABLE] = json.dumps(map_variable)
323
+
324
+ command = self.command.strip() + " && env | grep MAGNUS"
325
+ logger.info(f"Executing shell command: {command}")
326
+
327
+ output_parameters = {}
328
+
329
+ with subprocess.Popen(
330
+ command,
331
+ shell=True,
332
+ env=subprocess_env,
333
+ stdout=subprocess.PIPE,
334
+ stderr=subprocess.PIPE,
335
+ text=True,
336
+ ) as proc, self.output_to_file(map_variable=map_variable) as _:
337
+ for line in proc.stdout: # type: ignore
338
+ logger.info(line)
339
+ print(line)
340
+
341
+ if line.startswith(defaults.PARAMETER_PREFIX):
342
+ key, value = line.strip().split("=", 1)
343
+ try:
344
+ output_parameters[key] = json.loads(value)
345
+ except json.JSONDecodeError:
346
+ output_parameters[key] = value # simple data types
347
+
348
+ if line.startswith(defaults.TRACK_PREFIX):
349
+ key, value = line.split("=", 1)
350
+ os.environ[key] = value.strip()
351
+
352
+ proc.wait()
353
+ if proc.returncode != 0:
354
+ raise Exception("Command failed")
355
+
356
+ self._set_parameters(
357
+ params=make_pydantic_model(
358
+ output_parameters,
359
+ defaults.PARAMETER_PREFIX,
360
+ )
361
+ )
362
+
363
+
364
+ def create_task(kwargs_for_init) -> BaseTaskType:
365
+ """
366
+ Creates a task object from the command configuration.
367
+
368
+ Args:
369
+ A dictionary of keyword arguments that are sent by the user to the task.
370
+ Check against the model class for the validity of it.
371
+
372
+ Returns:
373
+ tasks.BaseTaskType: The command object
374
+ """
375
+ # The dictionary cannot be modified
376
+ kwargs = kwargs_for_init.copy()
377
+ command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE)
378
+
379
+ try:
380
+ task_mgr = driver.DriverManager(
381
+ namespace="tasks",
382
+ name=command_type,
383
+ invoke_on_load=True,
384
+ invoke_kwds=kwargs,
385
+ )
386
+ return task_mgr.driver
387
+ except Exception as _e:
388
+ msg = (
389
+ f"Could not find the task type {command_type}. Please ensure you have installed "
390
+ "the extension that provides the node type."
391
+ )
392
+ raise Exception(msg) from _e