runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +687 -0
  6. runnable/defaults.py +182 -0
  7. runnable/entrypoints.py +448 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +421 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +227 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +725 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1183 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +70 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +361 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +189 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +655 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +404 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +501 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +472 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +395 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.3.0.dist-info/METADATA +437 -0
  65. runnable-0.3.0.dist-info/RECORD +69 -0
  66. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
  67. runnable-0.3.0.dist-info/entry_points.txt +44 -0
  68. runnable-0.1.0.dist-info/METADATA +0 -16
  69. runnable-0.1.0.dist-info/RECORD +0 -6
  70. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
runnable/tasks.py ADDED
@@ -0,0 +1,395 @@
1
+ import contextlib
2
+ import importlib
3
+ import io
4
+ import json
5
+ import logging
6
+ import os
7
+ import subprocess
8
+ import sys
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
12
+ from pydantic._internal._model_construction import ModelMetaclass
13
+ from stevedore import driver
14
+
15
+ import runnable.context as context
16
+ from runnable import defaults, parameters, utils
17
+ from runnable.defaults import TypeMapVariable
18
+
19
+ logger = logging.getLogger(defaults.LOGGER_NAME)
20
+ logging.getLogger("stevedore").setLevel(logging.CRITICAL)
21
+
22
+
23
+ # TODO: Can we add memory peak, cpu usage, etc. to the metrics?
24
+
25
+
26
+ class BaseTaskType(BaseModel):
27
+ """A base task class which does the execution of command defined by the user."""
28
+
29
+ task_type: str = Field(serialization_alias="command_type")
30
+ node_name: str = Field(exclude=True)
31
+ secrets: Dict[str, str] = Field(default_factory=dict)
32
+
33
+ model_config = ConfigDict(extra="forbid")
34
+
35
+ @property
36
+ def _context(self):
37
+ return context.run_context
38
+
39
+ def get_cli_options(self) -> Tuple[str, dict]:
40
+ """
41
+ Key is the name of the cli option and value is the value of the cli option.
42
+ This should always be in sync with the cli options defined in execute_*.
43
+
44
+ Returns:
45
+ str: The name of the cli option.
46
+ dict: The dict of cli options for the task.
47
+
48
+ Raises:
49
+ NotImplementedError: Base class, not implemented
50
+ """
51
+ raise NotImplementedError()
52
+
53
+ def set_secrets_as_env_variables(self):
54
+ for key, value in self.secrets.items():
55
+ secret_value = context.run_context.secrets_handler.get(key)
56
+ self.secrets[value] = secret_value
57
+ os.environ[value] = secret_value
58
+
59
+ def delete_secrets_from_env_variables(self):
60
+ for _, value in self.secrets.items():
61
+ if value in os.environ:
62
+ del os.environ[value]
63
+
64
+ def execute_command(
65
+ self,
66
+ params: Optional[Dict[str, Any]] = None,
67
+ map_variable: TypeMapVariable = None,
68
+ **kwargs,
69
+ ) -> Optional[Dict[str, Any]]:
70
+ """The function to execute the command.
71
+
72
+ And map_variable is sent in as an argument into the function.
73
+
74
+ Args:
75
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
76
+
77
+ Raises:
78
+ NotImplementedError: Base class, not implemented
79
+ """
80
+ raise NotImplementedError()
81
+
82
+ @contextlib.contextmanager
83
+ def expose_secrets(self, map_variable: TypeMapVariable = None):
84
+ """Context manager to expose secrets to the execution.
85
+
86
+ Args:
87
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
88
+
89
+ """
90
+ self.set_secrets_as_env_variables()
91
+ try:
92
+ yield
93
+ finally:
94
+ self.delete_secrets_from_env_variables()
95
+
96
+ @contextlib.contextmanager
97
+ def output_to_file(self, map_variable: TypeMapVariable = None):
98
+ """Context manager to put the output of a function execution to catalog.
99
+
100
+ Args:
101
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
102
+
103
+ """
104
+ from runnable import put_in_catalog # Causing cyclic imports
105
+
106
+ log_file_name = self.node_name.replace(" ", "_") + ".execution.log"
107
+ if map_variable:
108
+ for _, value in map_variable.items():
109
+ log_file_name += "_" + str(value)
110
+
111
+ log_file = open(log_file_name, "w")
112
+
113
+ f = io.StringIO()
114
+ try:
115
+ with contextlib.redirect_stdout(f):
116
+ yield
117
+ finally:
118
+ print(f.getvalue()) # print to console
119
+ log_file.write(f.getvalue()) # Print to file
120
+
121
+ f.close()
122
+ log_file.close()
123
+ put_in_catalog(log_file.name)
124
+ os.remove(log_file.name)
125
+
126
+
127
+ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
128
+ """The task class for python command."""
129
+
130
+ task_type: str = Field(default="python", serialization_alias="command_type")
131
+ command: str
132
+
133
+ def get_cli_options(self) -> Tuple[str, dict]:
134
+ """Return the cli options for the task.
135
+
136
+ Returns:
137
+ dict: The cli options for the task
138
+ """
139
+ return "function", {"command": self.command}
140
+
141
+ def execute_command(
142
+ self,
143
+ params: Optional[Dict[str, Any]] = None,
144
+ map_variable: TypeMapVariable = None,
145
+ **kwargs,
146
+ ) -> Optional[Dict[str, Any]]:
147
+ """Execute the notebook as defined by the command."""
148
+ if not params:
149
+ params = {}
150
+
151
+ module, func = utils.get_module_and_attr_names(self.command)
152
+ sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
153
+ imported_module = importlib.import_module(module)
154
+ f = getattr(imported_module, func)
155
+
156
+ filtered_parameters = parameters.filter_arguments_for_func(f, params, map_variable)
157
+
158
+ logger.info(f"Calling {func} from {module} with {filtered_parameters}")
159
+
160
+ with self.output_to_file(map_variable=map_variable) as _, self.expose_secrets() as _:
161
+ try:
162
+ user_set_parameters = f(**filtered_parameters)
163
+ except Exception as _e:
164
+ msg = f"Call to the function {self.command} with {filtered_parameters} did not succeed.\n"
165
+ logger.exception(msg)
166
+ logger.exception(_e)
167
+ raise
168
+
169
+ if user_set_parameters is None:
170
+ return {}
171
+
172
+ if not isinstance(user_set_parameters, BaseModel) or isinstance(user_set_parameters, ModelMetaclass):
173
+ raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.")
174
+
175
+ return user_set_parameters.model_dump(by_alias=True)
176
+
177
+
178
+ class NotebookTaskType(BaseTaskType):
179
+ """The task class for Notebook based execution."""
180
+
181
+ task_type: str = Field(default="notebook", serialization_alias="command_type")
182
+ command: str
183
+ notebook_output_path: str = Field(default="", validate_default=True)
184
+ returns: Optional[List[str]] = Field(default_factory=list)
185
+ output_cell_tag: str = Field(default="runnable_output", validate_default=True)
186
+ optional_ploomber_args: dict = {}
187
+
188
+ @field_validator("command")
189
+ @classmethod
190
+ def notebook_should_end_with_ipynb(cls, command: str):
191
+ if not command.endswith(".ipynb"):
192
+ raise Exception("Notebook task should point to a ipynb file")
193
+
194
+ return command
195
+
196
+ @field_validator("notebook_output_path")
197
+ @classmethod
198
+ def correct_notebook_output_path(cls, notebook_output_path: str, info: ValidationInfo):
199
+ if notebook_output_path:
200
+ return notebook_output_path
201
+
202
+ command = info.data["command"]
203
+ return "".join(command.split(".")[:-1]) + "_out.ipynb"
204
+
205
+ def get_cli_options(self) -> Tuple[str, dict]:
206
+ return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
207
+
208
+ def execute_command(
209
+ self,
210
+ params: Optional[Dict[str, Any]] = None,
211
+ map_variable: TypeMapVariable = None,
212
+ **kwargs,
213
+ ) -> Optional[Dict[str, Any]]:
214
+ """Execute the python notebook as defined by the command.
215
+
216
+ Args:
217
+ map_variable (dict, optional): If the node is part of internal branch. Defaults to None.
218
+
219
+ Raises:
220
+ ImportError: If necessary dependencies are not installed
221
+ Exception: If anything else fails
222
+ """
223
+ if not params:
224
+ params = {}
225
+
226
+ try:
227
+ import ploomber_engine as pm
228
+ from ploomber_engine.ipython import PloomberClient
229
+
230
+ from runnable import put_in_catalog # Causes issues with cyclic import
231
+
232
+ notebook_output_path = self.notebook_output_path
233
+
234
+ if map_variable:
235
+ for key, value in map_variable.items():
236
+ notebook_output_path += "_" + str(value)
237
+
238
+ params[key] = value
239
+
240
+ ploomber_optional_args = self.optional_ploomber_args
241
+
242
+ kwds = {
243
+ "input_path": self.command,
244
+ "output_path": notebook_output_path,
245
+ "parameters": params,
246
+ "log_output": True,
247
+ "progress_bar": False,
248
+ }
249
+ kwds.update(ploomber_optional_args)
250
+
251
+ collected_params: Dict[str, Any] = {}
252
+ with self.output_to_file(map_variable=map_variable) as _, self.expose_secrets() as _:
253
+ pm.execute_notebook(**kwds)
254
+
255
+ put_in_catalog(notebook_output_path)
256
+
257
+ client = PloomberClient.from_path(path=notebook_output_path)
258
+ namespace = client.get_namespace()
259
+
260
+ for key, value in namespace.items():
261
+ if key in (self.returns or []):
262
+ if isinstance(value, BaseModel):
263
+ collected_params[key] = value.model_dump(by_alias=True)
264
+ continue
265
+ collected_params[key] = value
266
+
267
+ return collected_params
268
+
269
+ except ImportError as e:
270
+ msg = (
271
+ "Task type of notebook requires ploomber engine to be installed. Please install via optional: notebook"
272
+ )
273
+ raise Exception(msg) from e
274
+
275
+
276
+ class ShellTaskType(BaseTaskType):
277
+ """
278
+ The task class for shell based commands.
279
+ """
280
+
281
+ task_type: str = Field(default="shell", serialization_alias="command_type")
282
+ returns: Optional[List[str]] = Field(default_factory=list)
283
+ command: str
284
+
285
+ def execute_command(
286
+ self,
287
+ params: Optional[Dict[str, Any]] = None,
288
+ map_variable: TypeMapVariable = None,
289
+ **kwargs,
290
+ ) -> Optional[Dict[str, Any]]:
291
+ # Using shell=True as we want to have chained commands to be executed in the same shell.
292
+ """Execute the shell command as defined by the command.
293
+
294
+ Args:
295
+ map_variable (dict, optional): If the node is part of an internal branch. Defaults to None.
296
+ """
297
+ if not params:
298
+ params = {}
299
+
300
+ runnable_env_vars: Dict[str, Any] = {}
301
+
302
+ # Expose RUNNABLE environment variables, ignoring the parameters, to be passed to the subprocess.
303
+ for key, value in os.environ.items():
304
+ if key.startswith("RUNNABLE_") and not key.startswith("RUNNABLE_PRM_"):
305
+ runnable_env_vars[key] = value
306
+
307
+ subprocess_env = {**params, **runnable_env_vars}
308
+
309
+ # Expose map variable as environment variables
310
+ if map_variable:
311
+ for key, value in map_variable.items(): # type: ignore
312
+ subprocess_env[key] = str(value)
313
+
314
+ # Expose secrets as environment variables
315
+ if self.secrets:
316
+ for key, value in self.secrets.items():
317
+ secret_value = context.run_context.secrets_handler.get(key)
318
+ subprocess_env[value] = secret_value
319
+
320
+ # Json dumps all runnable environment variables
321
+ for key, value in subprocess_env.items():
322
+ subprocess_env[key] = json.dumps(value)
323
+
324
+ collect_delimiter = "=== COLLECT ==="
325
+
326
+ command = self.command.strip() + f" && echo '{collect_delimiter}' && env"
327
+ logger.info(f"Executing shell command: {command}")
328
+
329
+ output_parameters = {}
330
+ capture = False
331
+
332
+ with subprocess.Popen(
333
+ command,
334
+ shell=True,
335
+ env=subprocess_env,
336
+ stdout=subprocess.PIPE,
337
+ stderr=subprocess.PIPE,
338
+ text=True,
339
+ ) as proc, self.output_to_file(map_variable=map_variable) as _:
340
+ for line in proc.stdout: # type: ignore
341
+ logger.info(line)
342
+ print(line)
343
+
344
+ if line.strip() == collect_delimiter:
345
+ capture = True
346
+ continue
347
+
348
+ if capture:
349
+ key, value = line.strip().split("=", 1)
350
+ if key in (self.returns or []):
351
+ try:
352
+ output_parameters[key] = json.loads(value)
353
+ except json.JSONDecodeError:
354
+ output_parameters[key] = value # simple data types
355
+
356
+ # if line.startswith(defaults.TRACK_PREFIX):
357
+ # key, value = line.split("=", 1)
358
+ # os.environ[key] = value.strip()
359
+
360
+ proc.wait()
361
+ if proc.returncode != 0:
362
+ raise Exception("Command failed")
363
+
364
+ return output_parameters
365
+
366
+
367
+ def create_task(kwargs_for_init) -> BaseTaskType:
368
+ """
369
+ Creates a task object from the command configuration.
370
+
371
+ Args:
372
+ A dictionary of keyword arguments that are sent by the user to the task.
373
+ Check against the model class for the validity of it.
374
+
375
+ Returns:
376
+ tasks.BaseTaskType: The command object
377
+ """
378
+ # The dictionary cannot be modified
379
+ kwargs = kwargs_for_init.copy()
380
+ command_type = kwargs.pop("command_type", defaults.COMMAND_TYPE)
381
+
382
+ try:
383
+ task_mgr = driver.DriverManager(
384
+ namespace="tasks",
385
+ name=command_type,
386
+ invoke_on_load=True,
387
+ invoke_kwds=kwargs,
388
+ )
389
+ return task_mgr.driver
390
+ except Exception as _e:
391
+ msg = (
392
+ f"Could not find the task type {command_type}. Please ensure you have installed "
393
+ "the extension that provides the node type."
394
+ )
395
+ raise Exception(msg) from _e