runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
runnable/tasks.py CHANGED
@@ -31,7 +31,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
31
31
  logging.getLogger("stevedore").setLevel(logging.CRITICAL)
32
32
 
33
33
 
34
- # TODO: Can we add memory peak, cpu usage, etc. to the metrics?
34
+ # TODO: This has to be an extension
35
35
 
36
36
 
37
37
  class TaskReturns(BaseModel):
@@ -43,8 +43,12 @@ class BaseTaskType(BaseModel):
43
43
  """A base task class which does the execution of command defined by the user."""
44
44
 
45
45
  task_type: str = Field(serialization_alias="command_type")
46
- secrets: List[str] = Field(default_factory=list)
47
- returns: List[TaskReturns] = Field(default_factory=list, alias="returns")
46
+ secrets: List[str] = Field(
47
+ default_factory=list
48
+ ) # A list of secrets to expose by secrets manager
49
+ returns: List[TaskReturns] = Field(
50
+ default_factory=list, alias="returns"
51
+ ) # The return values of the task
48
52
 
49
53
  model_config = ConfigDict(extra="forbid")
50
54
 
@@ -70,11 +74,13 @@ class BaseTaskType(BaseModel):
70
74
  raise NotImplementedError()
71
75
 
72
76
  def set_secrets_as_env_variables(self):
77
+ # Preparing the environment for the task execution
73
78
  for key in self.secrets:
74
79
  secret_value = context.run_context.secrets_handler.get(key)
75
80
  os.environ[key] = secret_value
76
81
 
77
82
  def delete_secrets_from_env_variables(self):
83
+ # Cleaning up the environment after the task execution
78
84
  for key in self.secrets:
79
85
  if key in os.environ:
80
86
  del os.environ[key]
@@ -99,6 +105,7 @@ class BaseTaskType(BaseModel):
99
105
  def _diff_parameters(
100
106
  self, parameters_in: Dict[str, Parameter], context_params: Dict[str, Parameter]
101
107
  ) -> Dict[str, Parameter]:
108
+ # If the parameter is different from existing parameters, then it is updated
102
109
  diff: Dict[str, Parameter] = {}
103
110
  for param_name, param in context_params.items():
104
111
  if param_name in parameters_in:
@@ -112,12 +119,7 @@ class BaseTaskType(BaseModel):
112
119
 
113
120
  @contextlib.contextmanager
114
121
  def expose_secrets(self):
115
- """Context manager to expose secrets to the execution.
116
-
117
- Args:
118
- map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
119
-
120
- """
122
+ """Context manager to expose secrets to the execution."""
121
123
  self.set_secrets_as_env_variables()
122
124
  try:
123
125
  yield
@@ -126,9 +128,32 @@ class BaseTaskType(BaseModel):
126
128
  finally:
127
129
  self.delete_secrets_from_env_variables()
128
130
 
131
+ def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
132
+ """Resolve the unreduced parameters."""
133
+ params = self._context.run_log_store.get_parameters(
134
+ run_id=self._context.run_id
135
+ ).copy()
136
+
137
+ for param_name, param in params.items():
138
+ if param.reduced is False:
139
+ assert (
140
+ map_variable is not None
141
+ ), "Parameters in non-map node should always be reduced"
142
+
143
+ context_param = param_name
144
+ for _, v in map_variable.items():
145
+ context_param = f"{v}_{context_param}"
146
+
147
+ if context_param in params: # Is this if required?
148
+ params[param_name].value = params[context_param].value
149
+
150
+ return params
151
+
129
152
  @contextlib.contextmanager
130
- def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: bool = True):
131
- params = self._context.run_log_store.get_parameters(run_id=self._context.run_id).copy()
153
+ def execution_context(
154
+ self, map_variable: TypeMapVariable = None, allow_complex: bool = True
155
+ ):
156
+ params = self.resolve_unreduced_parameters(map_variable=map_variable)
132
157
  logger.info(f"Parameters available for the execution: {params}")
133
158
 
134
159
  for param_name, param in params.items():
@@ -150,7 +175,11 @@ class BaseTaskType(BaseModel):
150
175
  logger.debug(f"Resolved parameters: {params}")
151
176
 
152
177
  if not allow_complex:
153
- params = {key: value for key, value in params.items() if isinstance(value, JsonParameter)}
178
+ params = {
179
+ key: value
180
+ for key, value in params.items()
181
+ if isinstance(value, JsonParameter)
182
+ }
154
183
 
155
184
  parameters_in = copy.deepcopy(params)
156
185
  try:
@@ -161,8 +190,12 @@ class BaseTaskType(BaseModel):
161
190
  finally:
162
191
  # Update parameters
163
192
  # This should only update the parameters that are changed at the root level.
164
- diff_parameters = self._diff_parameters(parameters_in=parameters_in, context_params=params)
165
- self._context.run_log_store.set_parameters(parameters=diff_parameters, run_id=self._context.run_id)
193
+ diff_parameters = self._diff_parameters(
194
+ parameters_in=parameters_in, context_params=params
195
+ )
196
+ self._context.run_log_store.set_parameters(
197
+ parameters=diff_parameters, run_id=self._context.run_id
198
+ )
166
199
 
167
200
 
168
201
  def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
@@ -188,7 +221,56 @@ def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
188
221
 
189
222
 
190
223
  class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
191
- """The task class for python command."""
224
+ """
225
+ --8<-- [start:python_reference]
226
+ An execution node of the pipeline of python functions.
227
+ Please refer to define pipeline/tasks/python for more information.
228
+
229
+ As part of the dag definition, a python task is defined as follows:
230
+
231
+ dag:
232
+ steps:
233
+ python_task: # The name of the node
234
+ type: task
235
+ command_type: python # this is default
236
+ command: my_module.my_function # the dotted path to the function. Please refer to the yaml section of
237
+ define pipeline/tasks/python for concrete details.
238
+ returns:
239
+ - name: # The name to assign the return value
240
+ kind: json # the default value is json,
241
+ can be object for python objects and metric for metrics
242
+ secrets:
243
+ - my_secret_key # A list of secrets to expose by secrets manager
244
+ catalog:
245
+ get:
246
+ - A list of glob patterns to get from the catalog to the local file system
247
+ put:
248
+ - A list of glob patterns to put to the catalog from the local file system
249
+ on_failure: The name of the step to traverse in case of failure
250
+ overrides:
251
+ Individual tasks can override the global configuration config by referring to the
252
+ specific override.
253
+
254
+ For example,
255
+ #Global configuration
256
+ executor:
257
+ type: local-container
258
+ config:
259
+ docker_image: "runnable/runnable:latest"
260
+ overrides:
261
+ custom_docker_image:
262
+ docker_image: "runnable/runnable:custom"
263
+
264
+ ## In the node definition
265
+ overrides:
266
+ local-container:
267
+ docker_image: "runnable/runnable:custom"
268
+
269
+ This instruction will override the docker image for the local-container executor.
270
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
271
+ or "fail" to terminate the pipeline with an error.
272
+ --8<-- [end:python_reference]
273
+ """
192
274
 
193
275
  task_type: str = Field(default="python", serialization_alias="command_type")
194
276
  command: str
@@ -209,7 +291,10 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
209
291
  """Execute the notebook as defined by the command."""
210
292
  attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
211
293
 
212
- with self.execution_context(map_variable=map_variable) as params, self.expose_secrets() as _:
294
+ with (
295
+ self.execution_context(map_variable=map_variable) as params,
296
+ self.expose_secrets() as _,
297
+ ):
213
298
  module, func = utils.get_module_and_attr_names(self.command)
214
299
  sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
215
300
  imported_module = importlib.import_module(module)
@@ -217,21 +302,32 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
217
302
 
218
303
  try:
219
304
  try:
220
- filtered_parameters = parameters.filter_arguments_for_func(f, params.copy(), map_variable)
221
- logger.info(f"Calling {func} from {module} with {filtered_parameters}")
305
+ filtered_parameters = parameters.filter_arguments_for_func(
306
+ f, params.copy(), map_variable
307
+ )
308
+ logger.info(
309
+ f"Calling {func} from {module} with {filtered_parameters}"
310
+ )
222
311
 
223
312
  out_file = io.StringIO()
224
313
  with contextlib.redirect_stdout(out_file):
225
- user_set_parameters = f(**filtered_parameters) # This is a tuple or single value
314
+ user_set_parameters = f(
315
+ **filtered_parameters
316
+ ) # This is a tuple or single value
226
317
  task_console.print(out_file.getvalue())
227
318
  except Exception as e:
228
- raise exceptions.CommandCallError(f"Function call: {self.command} did not succeed.\n") from e
319
+ raise exceptions.CommandCallError(
320
+ f"Function call: {self.command} did not succeed.\n"
321
+ ) from e
229
322
 
230
323
  attempt_log.input_parameters = params.copy()
231
324
 
232
325
  if map_variable:
233
326
  attempt_log.input_parameters.update(
234
- {k: JsonParameter(value=v, kind="json") for k, v in map_variable.items()}
327
+ {
328
+ k: JsonParameter(value=v, kind="json")
329
+ for k, v in map_variable.items()
330
+ }
235
331
  )
236
332
 
237
333
  if self.returns:
@@ -239,7 +335,9 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
239
335
  user_set_parameters = (user_set_parameters,)
240
336
 
241
337
  if len(user_set_parameters) != len(self.returns):
242
- raise ValueError("Returns task signature does not match the function returns")
338
+ raise ValueError(
339
+ "Returns task signature does not match the function returns"
340
+ )
243
341
 
244
342
  output_parameters: Dict[str, Parameter] = {}
245
343
  metrics: Dict[str, Parameter] = {}
@@ -277,7 +375,56 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
277
375
 
278
376
 
279
377
  class NotebookTaskType(BaseTaskType):
280
- """The task class for Notebook based execution."""
378
+ """
379
+ --8<-- [start:notebook_reference]
380
+ An execution node of the pipeline of notebook execution.
381
+ Please refer to define pipeline/tasks/notebook for more information.
382
+
383
+ As part of the dag definition, a notebook task is defined as follows:
384
+
385
+ dag:
386
+ steps:
387
+ notebook_task: # The name of the node
388
+ type: task
389
+ command_type: notebook
390
+ command: the path to the notebook relative to project root.
391
+ optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
392
+ returns:
393
+ - name: # The name to assign the return value
394
+ kind: json # the default value is json,
395
+ can be object for python objects and metric for metrics
396
+ secrets:
397
+ - my_secret_key # A list of secrets to expose by secrets manager
398
+ catalog:
399
+ get:
400
+ - A list of glob patterns to get from the catalog to the local file system
401
+ put:
402
+ - A list of glob patterns to put to the catalog from the local file system
403
+ on_failure: The name of the step to traverse in case of failure
404
+ overrides:
405
+ Individual tasks can override the global configuration config by referring to the
406
+ specific override.
407
+
408
+ For example,
409
+ #Global configuration
410
+ executor:
411
+ type: local-container
412
+ config:
413
+ docker_image: "runnable/runnable:latest"
414
+ overrides:
415
+ custom_docker_image:
416
+ docker_image: "runnable/runnable:custom"
417
+
418
+ ## In the node definition
419
+ overrides:
420
+ local-container:
421
+ docker_image: "runnable/runnable:custom"
422
+
423
+ This instruction will override the docker image for the local-container executor.
424
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
425
+ or "fail" to terminate the pipeline with an error.
426
+ --8<-- [end:notebook_reference]
427
+ """
281
428
 
282
429
  task_type: str = Field(default="notebook", serialization_alias="command_type")
283
430
  command: str
@@ -302,7 +449,10 @@ class NotebookTaskType(BaseTaskType):
302
449
  return str(file_name)
303
450
 
304
451
  def get_cli_options(self) -> Tuple[str, dict]:
305
- return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
452
+ return "notebook", {
453
+ "command": self.command,
454
+ "notebook-output-path": self.notebook_output_path,
455
+ }
306
456
 
307
457
  def execute_command(
308
458
  self,
@@ -325,9 +475,12 @@ class NotebookTaskType(BaseTaskType):
325
475
 
326
476
  notebook_output_path = self.notebook_output_path
327
477
 
328
- with self.execution_context(
329
- map_variable=map_variable, allow_complex=False
330
- ) as params, self.expose_secrets() as _:
478
+ with (
479
+ self.execution_context(
480
+ map_variable=map_variable, allow_complex=False
481
+ ) as params,
482
+ self.expose_secrets() as _,
483
+ ):
331
484
  copy_params = copy.deepcopy(params)
332
485
 
333
486
  if map_variable:
@@ -336,7 +489,9 @@ class NotebookTaskType(BaseTaskType):
336
489
  copy_params[key] = JsonParameter(kind="json", value=value)
337
490
 
338
491
  # Remove any {v}_unreduced parameters from the parameters
339
- unprocessed_params = [k for k, v in copy_params.items() if not v.reduced]
492
+ unprocessed_params = [
493
+ k for k, v in copy_params.items() if not v.reduced
494
+ ]
340
495
 
341
496
  for key in list(copy_params.keys()):
342
497
  if any(key.endswith(f"_{k}") for k in unprocessed_params):
@@ -360,7 +515,9 @@ class NotebookTaskType(BaseTaskType):
360
515
  pm.execute_notebook(**kwds)
361
516
  task_console.print(out_file.getvalue())
362
517
 
363
- context.run_context.catalog_handler.put(name=notebook_output_path, run_id=context.run_context.run_id)
518
+ context.run_context.catalog_handler.put(
519
+ name=notebook_output_path, run_id=context.run_context.run_id
520
+ )
364
521
 
365
522
  client = PloomberClient.from_path(path=notebook_output_path)
366
523
  namespace = client.get_namespace()
@@ -368,7 +525,9 @@ class NotebookTaskType(BaseTaskType):
368
525
  output_parameters: Dict[str, Parameter] = {}
369
526
  try:
370
527
  for task_return in self.returns:
371
- param_name = Template(task_return.name).safe_substitute(map_variable) # type: ignore
528
+ param_name = Template(task_return.name).safe_substitute(
529
+ map_variable # type: ignore
530
+ )
372
531
 
373
532
  if map_variable:
374
533
  for _, v in map_variable.items():
@@ -410,7 +569,54 @@ class NotebookTaskType(BaseTaskType):
410
569
 
411
570
  class ShellTaskType(BaseTaskType):
412
571
  """
413
- The task class for shell based commands.
572
+ --8<-- [start:shell_reference]
573
+ An execution node of the pipeline of shell execution.
574
+ Please refer to define pipeline/tasks/shell for more information.
575
+
576
+ As part of the dag definition, a shell task is defined as follows:
577
+
578
+ dag:
579
+ steps:
580
+ shell_task: # The name of the node
581
+ type: task
582
+ command_type: shell
583
+ command: The command to execute, it could be multiline
584
+ optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
585
+ returns:
586
+ - name: # The name to assign the return value
587
+ kind: json # the default value is json,
588
+ can be object for python objects and metric for metrics
589
+ secrets:
590
+ - my_secret_key # A list of secrets to expose by secrets manager
591
+ catalog:
592
+ get:
593
+ - A list of glob patterns to get from the catalog to the local file system
594
+ put:
595
+ - A list of glob patterns to put to the catalog from the local file system
596
+ on_failure: The name of the step to traverse in case of failure
597
+ overrides:
598
+ Individual tasks can override the global configuration config by referring to the
599
+ specific override.
600
+
601
+ For example,
602
+ #Global configuration
603
+ executor:
604
+ type: local-container
605
+ config:
606
+ docker_image: "runnable/runnable:latest"
607
+ overrides:
608
+ custom_docker_image:
609
+ docker_image: "runnable/runnable:custom"
610
+
611
+ ## In the node definition
612
+ overrides:
613
+ local-container:
614
+ docker_image: "runnable/runnable:custom"
615
+
616
+ This instruction will override the docker image for the local-container executor.
617
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
618
+ or "fail" to terminate the pipeline with an error.
619
+ --8<-- [end:shell_reference]
414
620
  """
415
621
 
416
622
  task_type: str = Field(default="shell", serialization_alias="command_type")
@@ -421,7 +627,9 @@ class ShellTaskType(BaseTaskType):
421
627
  def returns_should_be_json(cls, returns: List[TaskReturns]):
422
628
  for task_return in returns:
423
629
  if task_return.kind == "object" or task_return.kind == "pydantic":
424
- raise ValueError("Pydantic models or Objects are not allowed in returns")
630
+ raise ValueError(
631
+ "Pydantic models or Objects are not allowed in returns"
632
+ )
425
633
 
426
634
  return returns
427
635
 
@@ -456,7 +664,9 @@ class ShellTaskType(BaseTaskType):
456
664
  subprocess_env[key] = secret_value
457
665
 
458
666
  try:
459
- with self.execution_context(map_variable=map_variable, allow_complex=False) as params:
667
+ with self.execution_context(
668
+ map_variable=map_variable, allow_complex=False
669
+ ) as params:
460
670
  subprocess_env.update({k: v.get_value() for k, v in params.items()})
461
671
 
462
672
  # Json dumps all runnable environment variables
@@ -467,7 +677,9 @@ class ShellTaskType(BaseTaskType):
467
677
 
468
678
  collect_delimiter = "=== COLLECT ==="
469
679
 
470
- command = self.command.strip() + f" && echo '{collect_delimiter}' && env"
680
+ command = (
681
+ self.command.strip() + f" && echo '{collect_delimiter}' && env"
682
+ )
471
683
  logger.info(f"Executing shell command: {command}")
472
684
 
473
685
  capture = False
runnable/utils.py CHANGED
@@ -21,7 +21,7 @@ from runnable import defaults, names
21
21
  from runnable.defaults import TypeMapVariable
22
22
 
23
23
  if TYPE_CHECKING: # pragma: no cover
24
- from runnable.extensions.nodes import TaskNode
24
+ from extensions.nodes.nodes import TaskNode
25
25
  from runnable.nodes import BaseNode
26
26
 
27
27
 
@@ -86,7 +86,9 @@ def generate_run_id(run_id: str = "") -> str:
86
86
  return run_id
87
87
 
88
88
 
89
- def apply_variables(apply_to: Dict[str, Any], variables: Dict[str, str]) -> Dict[str, Any]:
89
+ def apply_variables(
90
+ apply_to: Dict[str, Any], variables: Dict[str, str]
91
+ ) -> Dict[str, Any]:
90
92
  """Safely applies the variables to a config.
91
93
 
92
94
  For example: For config:
@@ -272,7 +274,9 @@ def get_local_docker_image_id(image_name: str) -> str:
272
274
  image = client.images.get(image_name)
273
275
  return image.attrs["Id"]
274
276
  except ImportError: # pragma: no cover
275
- logger.warning("Did not find docker installed, some functionality might be affected")
277
+ logger.warning(
278
+ "Did not find docker installed, some functionality might be affected"
279
+ )
276
280
  except BaseException:
277
281
  logger.exception(f"Could not find the image by name {image_name}")
278
282
 
@@ -295,7 +299,9 @@ def get_git_code_identity():
295
299
  code_identity.code_identifier_dependable, changed = is_git_clean()
296
300
  code_identity.code_identifier_url = get_git_remote()
297
301
  if changed:
298
- code_identity.code_identifier_message = "changes found in " + ", ".join(changed.split("\n"))
302
+ code_identity.code_identifier_message = "changes found in " + ", ".join(
303
+ changed.split("\n")
304
+ )
299
305
  except BaseException:
300
306
  logger.exception("Git code versioning problems")
301
307
 
@@ -331,7 +337,9 @@ def get_tracked_data() -> Dict[str, str]:
331
337
  try:
332
338
  tracked_data[key.lower()] = json.loads(value)
333
339
  except json.decoder.JSONDecodeError:
334
- logger.warning(f"Tracker {key} could not be JSON decoded, adding the literal value")
340
+ logger.warning(
341
+ f"Tracker {key} could not be JSON decoded, adding the literal value"
342
+ )
335
343
  tracked_data[key.lower()] = value
336
344
 
337
345
  del os.environ[env_var]
@@ -389,9 +397,13 @@ def get_data_hash(file_name: str):
389
397
  str: The SHA ID of the file contents
390
398
  """
391
399
  # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
392
- return hash_bytestr_iter(file_as_blockiter(open(file_name, "rb")), hashlib.sha256()) # pragma: no cover
400
+ # TODO: For a big file, we should only hash the first few bytes
401
+ return hash_bytestr_iter(
402
+ file_as_blockiter(open(file_name, "rb")), hashlib.sha256()
403
+ ) # pragma: no cover
393
404
 
394
405
 
406
+ # TODO: This is not the right place for this.
395
407
  def get_node_execution_command(
396
408
  node: BaseNode,
397
409
  map_variable: TypeMapVariable = None,
@@ -415,7 +427,11 @@ def get_node_execution_command(
415
427
 
416
428
  log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
417
429
 
418
- action = f"runnable execute_single_node {run_id} " f"{node._command_friendly_name()}" f" --log-level {log_level}"
430
+ action = (
431
+ f"runnable execute_single_node {run_id} "
432
+ f"{node._command_friendly_name()}"
433
+ f" --log-level {log_level}"
434
+ )
419
435
 
420
436
  if context.run_context.pipeline_file:
421
437
  action = action + f" --file {context.run_context.pipeline_file}"
@@ -435,6 +451,7 @@ def get_node_execution_command(
435
451
  return action
436
452
 
437
453
 
454
+ # TODO: This is not the right place for this.
438
455
  def get_fan_command(
439
456
  mode: str,
440
457
  node: BaseNode,
@@ -478,6 +495,7 @@ def get_fan_command(
478
495
  return action
479
496
 
480
497
 
498
+ # TODO: This is not the right place for this.
481
499
  def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> str:
482
500
  """Get the execution command to run a job via command line.
483
501
 
@@ -520,7 +538,9 @@ def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> st
520
538
  return action
521
539
 
522
540
 
523
- def get_provider_by_name_and_type(service_type: str, service_details: defaults.ServiceConfig):
541
+ def get_provider_by_name_and_type(
542
+ service_type: str, service_details: defaults.ServiceConfig
543
+ ):
524
544
  """Given a service type, one of executor, run_log_store, catalog, secrets and the config
525
545
  return the exact child class implementing the service.
526
546
  We use stevedore to do the work for us.
@@ -542,7 +562,9 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
542
562
  if "config" in service_details:
543
563
  service_config = service_details.get("config", {})
544
564
 
545
- logger.debug(f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}")
565
+ logger.debug(
566
+ f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}"
567
+ )
546
568
  try:
547
569
  mgr = driver.DriverManager(
548
570
  namespace=namespace,
@@ -552,8 +574,12 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
552
574
  )
553
575
  return mgr.driver
554
576
  except Exception as _e:
555
- logger.exception(f"Could not find the service of type: {service_type} with config: {service_details}")
556
- raise Exception(f"Could not find the service of type: {service_type} with config: {service_details}") from _e
577
+ logger.exception(
578
+ f"Could not find the service of type: {service_type} with config: {service_details}"
579
+ )
580
+ raise Exception(
581
+ f"Could not find the service of type: {service_type} with config: {service_details}"
582
+ ) from _e
557
583
 
558
584
 
559
585
  def get_run_config() -> dict:
@@ -585,7 +611,9 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
585
611
  return OrderedDict()
586
612
 
587
613
 
588
- def set_runnable_environment_variables(run_id: str = "", configuration_file: str = "", tag: str = "") -> None:
614
+ def set_runnable_environment_variables(
615
+ run_id: str = "", configuration_file: str = "", tag: str = ""
616
+ ) -> None:
589
617
  """Set the environment variables used by runnable. This function should be called during the prepare configurations
590
618
  by all executors.
591
619
 
@@ -604,7 +632,7 @@ def set_runnable_environment_variables(run_id: str = "", configuration_file: str
604
632
  os.environ[defaults.RUNNABLE_RUN_TAG] = tag
605
633
 
606
634
 
607
- def gather_variables() -> dict:
635
+ def gather_variables() -> Dict[str, str]:
608
636
  """Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
609
637
 
610
638
  Returns:
@@ -1,36 +1,31 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.12.3
4
- Summary: A Compute agnostic pipelining software
5
- Home-page: https://github.com/vijayvammi/runnable
6
- License: Apache-2.0
7
- Author: Vijay Vammi
8
- Author-email: mesanthu@gmail.com
9
- Requires-Python: >=3.9,<3.13
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
- Provides-Extra: database
3
+ Version: 0.14.0
4
+ Summary: Add your description here
5
+ Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: catalog
9
+ Requires-Dist: click-plugins>=1.1.1
10
+ Requires-Dist: click<=8.1.3
11
+ Requires-Dist: dill>=0.3.9
12
+ Requires-Dist: executor
13
+ Requires-Dist: nodes
14
+ Requires-Dist: pydantic>=2.10.3
15
+ Requires-Dist: python-dotenv>=1.0.1
16
+ Requires-Dist: rich>=13.9.4
17
+ Requires-Dist: ruamel-yaml>=0.18.6
18
+ Requires-Dist: run-log-store
19
+ Requires-Dist: secrets
20
+ Requires-Dist: setuptools>=75.6.0
21
+ Requires-Dist: stevedore>=5.4.0
22
+ Requires-Dist: typer>=0.15.1
17
23
  Provides-Extra: docker
24
+ Requires-Dist: docker>=7.1.0; extra == 'docker'
25
+ Provides-Extra: examples
26
+ Requires-Dist: pandas>=2.2.3; extra == 'examples'
18
27
  Provides-Extra: notebook
19
- Requires-Dist: click
20
- Requires-Dist: click-plugins (>=1.1.1,<2.0.0)
21
- Requires-Dist: dill (>=0.3.8,<0.4.0)
22
- Requires-Dist: docker ; extra == "docker"
23
- Requires-Dist: mlflow-skinny
24
- Requires-Dist: ploomber-engine (>=0.0.31,<0.0.32) ; extra == "notebook"
25
- Requires-Dist: pydantic (>=2.5,<3.0)
26
- Requires-Dist: rich (>=13.5.2,<14.0.0)
27
- Requires-Dist: ruamel.yaml
28
- Requires-Dist: ruamel.yaml.clib
29
- Requires-Dist: sqlalchemy ; extra == "database"
30
- Requires-Dist: stevedore (>=3.5.0,<4.0.0)
31
- Requires-Dist: typing-extensions ; python_version < "3.8"
32
- Project-URL: Documentation, https://github.com/vijayvammi/runnable
33
- Project-URL: Repository, https://github.com/vijayvammi/runnable
28
+ Requires-Dist: ploomber-engine>=0.0.33; extra == 'notebook'
34
29
  Description-Content-Type: text/markdown
35
30
 
36
31
 
@@ -267,4 +262,3 @@ Execute a pipeline over an iterable parameter.
267
262
 
268
263
  ### [Arbitrary nesting](https://astrazeneca.github.io/runnable-core/concepts/nesting/)
269
264
  Any nesting of parallel within map and so on.
270
-