runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. runnable/__init__.py +0 -11
  2. runnable/catalog.py +27 -5
  3. runnable/cli.py +122 -26
  4. runnable/datastore.py +71 -35
  5. runnable/defaults.py +0 -1
  6. runnable/entrypoints.py +107 -32
  7. runnable/exceptions.py +6 -2
  8. runnable/executor.py +28 -9
  9. runnable/graph.py +37 -12
  10. runnable/integration.py +7 -2
  11. runnable/nodes.py +15 -17
  12. runnable/parameters.py +27 -8
  13. runnable/pickler.py +1 -1
  14. runnable/sdk.py +101 -33
  15. runnable/secrets.py +3 -1
  16. runnable/tasks.py +246 -34
  17. runnable/utils.py +41 -13
  18. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
  19. runnable-0.14.0.dist-info/RECORD +24 -0
  20. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
  21. runnable-0.14.0.dist-info/entry_points.txt +40 -0
  22. runnable/extensions/__init__.py +0 -0
  23. runnable/extensions/catalog/__init__.py +0 -21
  24. runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. runnable/extensions/catalog/file_system/implementation.py +0 -234
  26. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
  28. runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
  29. runnable/extensions/executor/__init__.py +0 -649
  30. runnable/extensions/executor/argo/__init__.py +0 -0
  31. runnable/extensions/executor/argo/implementation.py +0 -1194
  32. runnable/extensions/executor/argo/specification.yaml +0 -51
  33. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  34. runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
  35. runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
  36. runnable/extensions/executor/local/__init__.py +0 -0
  37. runnable/extensions/executor/local/implementation.py +0 -71
  38. runnable/extensions/executor/local_container/__init__.py +0 -0
  39. runnable/extensions/executor/local_container/implementation.py +0 -446
  40. runnable/extensions/executor/mocked/__init__.py +0 -0
  41. runnable/extensions/executor/mocked/implementation.py +0 -154
  42. runnable/extensions/executor/retry/__init__.py +0 -0
  43. runnable/extensions/executor/retry/implementation.py +0 -168
  44. runnable/extensions/nodes.py +0 -855
  45. runnable/extensions/run_log_store/__init__.py +0 -0
  46. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  47. runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
  48. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  49. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
  50. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
  51. runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
  52. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  54. runnable/extensions/run_log_store/file_system/implementation.py +0 -140
  55. runnable/extensions/run_log_store/generic_chunked.py +0 -557
  56. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  57. runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
  58. runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
  59. runnable/extensions/secrets/__init__.py +0 -0
  60. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  61. runnable/extensions/secrets/dotenv/implementation.py +0 -100
  62. runnable-0.12.3.dist-info/RECORD +0 -64
  63. runnable-0.12.3.dist-info/entry_points.txt +0 -41
  64. {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
runnable/tasks.py CHANGED
@@ -31,7 +31,7 @@ logger = logging.getLogger(defaults.LOGGER_NAME)
31
31
  logging.getLogger("stevedore").setLevel(logging.CRITICAL)
32
32
 
33
33
 
34
- # TODO: Can we add memory peak, cpu usage, etc. to the metrics?
34
+ # TODO: This has to be an extension
35
35
 
36
36
 
37
37
  class TaskReturns(BaseModel):
@@ -43,8 +43,12 @@ class BaseTaskType(BaseModel):
43
43
  """A base task class which does the execution of command defined by the user."""
44
44
 
45
45
  task_type: str = Field(serialization_alias="command_type")
46
- secrets: List[str] = Field(default_factory=list)
47
- returns: List[TaskReturns] = Field(default_factory=list, alias="returns")
46
+ secrets: List[str] = Field(
47
+ default_factory=list
48
+ ) # A list of secrets to expose by secrets manager
49
+ returns: List[TaskReturns] = Field(
50
+ default_factory=list, alias="returns"
51
+ ) # The return values of the task
48
52
 
49
53
  model_config = ConfigDict(extra="forbid")
50
54
 
@@ -70,11 +74,13 @@ class BaseTaskType(BaseModel):
70
74
  raise NotImplementedError()
71
75
 
72
76
  def set_secrets_as_env_variables(self):
77
+ # Preparing the environment for the task execution
73
78
  for key in self.secrets:
74
79
  secret_value = context.run_context.secrets_handler.get(key)
75
80
  os.environ[key] = secret_value
76
81
 
77
82
  def delete_secrets_from_env_variables(self):
83
+ # Cleaning up the environment after the task execution
78
84
  for key in self.secrets:
79
85
  if key in os.environ:
80
86
  del os.environ[key]
@@ -99,6 +105,7 @@ class BaseTaskType(BaseModel):
99
105
  def _diff_parameters(
100
106
  self, parameters_in: Dict[str, Parameter], context_params: Dict[str, Parameter]
101
107
  ) -> Dict[str, Parameter]:
108
+ # If the parameter is different from existing parameters, then it is updated
102
109
  diff: Dict[str, Parameter] = {}
103
110
  for param_name, param in context_params.items():
104
111
  if param_name in parameters_in:
@@ -112,12 +119,7 @@ class BaseTaskType(BaseModel):
112
119
 
113
120
  @contextlib.contextmanager
114
121
  def expose_secrets(self):
115
- """Context manager to expose secrets to the execution.
116
-
117
- Args:
118
- map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
119
-
120
- """
122
+ """Context manager to expose secrets to the execution."""
121
123
  self.set_secrets_as_env_variables()
122
124
  try:
123
125
  yield
@@ -126,9 +128,32 @@ class BaseTaskType(BaseModel):
126
128
  finally:
127
129
  self.delete_secrets_from_env_variables()
128
130
 
131
+ def resolve_unreduced_parameters(self, map_variable: TypeMapVariable = None):
132
+ """Resolve the unreduced parameters."""
133
+ params = self._context.run_log_store.get_parameters(
134
+ run_id=self._context.run_id
135
+ ).copy()
136
+
137
+ for param_name, param in params.items():
138
+ if param.reduced is False:
139
+ assert (
140
+ map_variable is not None
141
+ ), "Parameters in non-map node should always be reduced"
142
+
143
+ context_param = param_name
144
+ for _, v in map_variable.items():
145
+ context_param = f"{v}_{context_param}"
146
+
147
+ if context_param in params: # Is this if required?
148
+ params[param_name].value = params[context_param].value
149
+
150
+ return params
151
+
129
152
  @contextlib.contextmanager
130
- def execution_context(self, map_variable: TypeMapVariable = None, allow_complex: bool = True):
131
- params = self._context.run_log_store.get_parameters(run_id=self._context.run_id).copy()
153
+ def execution_context(
154
+ self, map_variable: TypeMapVariable = None, allow_complex: bool = True
155
+ ):
156
+ params = self.resolve_unreduced_parameters(map_variable=map_variable)
132
157
  logger.info(f"Parameters available for the execution: {params}")
133
158
 
134
159
  for param_name, param in params.items():
@@ -150,7 +175,11 @@ class BaseTaskType(BaseModel):
150
175
  logger.debug(f"Resolved parameters: {params}")
151
176
 
152
177
  if not allow_complex:
153
- params = {key: value for key, value in params.items() if isinstance(value, JsonParameter)}
178
+ params = {
179
+ key: value
180
+ for key, value in params.items()
181
+ if isinstance(value, JsonParameter)
182
+ }
154
183
 
155
184
  parameters_in = copy.deepcopy(params)
156
185
  try:
@@ -161,8 +190,12 @@ class BaseTaskType(BaseModel):
161
190
  finally:
162
191
  # Update parameters
163
192
  # This should only update the parameters that are changed at the root level.
164
- diff_parameters = self._diff_parameters(parameters_in=parameters_in, context_params=params)
165
- self._context.run_log_store.set_parameters(parameters=diff_parameters, run_id=self._context.run_id)
193
+ diff_parameters = self._diff_parameters(
194
+ parameters_in=parameters_in, context_params=params
195
+ )
196
+ self._context.run_log_store.set_parameters(
197
+ parameters=diff_parameters, run_id=self._context.run_id
198
+ )
166
199
 
167
200
 
168
201
  def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
@@ -188,7 +221,56 @@ def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
188
221
 
189
222
 
190
223
  class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
191
- """The task class for python command."""
224
+ """
225
+ --8<-- [start:python_reference]
226
+ An execution node of the pipeline of python functions.
227
+ Please refer to define pipeline/tasks/python for more information.
228
+
229
+ As part of the dag definition, a python task is defined as follows:
230
+
231
+ dag:
232
+ steps:
233
+ python_task: # The name of the node
234
+ type: task
235
+ command_type: python # this is default
236
+ command: my_module.my_function # the dotted path to the function. Please refer to the yaml section of
237
+ define pipeline/tasks/python for concrete details.
238
+ returns:
239
+ - name: # The name to assign the return value
240
+ kind: json # the default value is json,
241
+ can be object for python objects and metric for metrics
242
+ secrets:
243
+ - my_secret_key # A list of secrets to expose by secrets manager
244
+ catalog:
245
+ get:
246
+ - A list of glob patterns to get from the catalog to the local file system
247
+ put:
248
+ - A list of glob patterns to put to the catalog from the local file system
249
+ on_failure: The name of the step to traverse in case of failure
250
+ overrides:
251
+ Individual tasks can override the global configuration config by referring to the
252
+ specific override.
253
+
254
+ For example,
255
+ #Global configuration
256
+ executor:
257
+ type: local-container
258
+ config:
259
+ docker_image: "runnable/runnable:latest"
260
+ overrides:
261
+ custom_docker_image:
262
+ docker_image: "runnable/runnable:custom"
263
+
264
+ ## In the node definition
265
+ overrides:
266
+ local-container:
267
+ docker_image: "runnable/runnable:custom"
268
+
269
+ This instruction will override the docker image for the local-container executor.
270
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
271
+ or "fail" to terminate the pipeline with an error.
272
+ --8<-- [end:python_reference]
273
+ """
192
274
 
193
275
  task_type: str = Field(default="python", serialization_alias="command_type")
194
276
  command: str
@@ -209,7 +291,10 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
209
291
  """Execute the notebook as defined by the command."""
210
292
  attempt_log = StepAttempt(status=defaults.FAIL, start_time=str(datetime.now()))
211
293
 
212
- with self.execution_context(map_variable=map_variable) as params, self.expose_secrets() as _:
294
+ with (
295
+ self.execution_context(map_variable=map_variable) as params,
296
+ self.expose_secrets() as _,
297
+ ):
213
298
  module, func = utils.get_module_and_attr_names(self.command)
214
299
  sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
215
300
  imported_module = importlib.import_module(module)
@@ -217,21 +302,32 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
217
302
 
218
303
  try:
219
304
  try:
220
- filtered_parameters = parameters.filter_arguments_for_func(f, params.copy(), map_variable)
221
- logger.info(f"Calling {func} from {module} with {filtered_parameters}")
305
+ filtered_parameters = parameters.filter_arguments_for_func(
306
+ f, params.copy(), map_variable
307
+ )
308
+ logger.info(
309
+ f"Calling {func} from {module} with {filtered_parameters}"
310
+ )
222
311
 
223
312
  out_file = io.StringIO()
224
313
  with contextlib.redirect_stdout(out_file):
225
- user_set_parameters = f(**filtered_parameters) # This is a tuple or single value
314
+ user_set_parameters = f(
315
+ **filtered_parameters
316
+ ) # This is a tuple or single value
226
317
  task_console.print(out_file.getvalue())
227
318
  except Exception as e:
228
- raise exceptions.CommandCallError(f"Function call: {self.command} did not succeed.\n") from e
319
+ raise exceptions.CommandCallError(
320
+ f"Function call: {self.command} did not succeed.\n"
321
+ ) from e
229
322
 
230
323
  attempt_log.input_parameters = params.copy()
231
324
 
232
325
  if map_variable:
233
326
  attempt_log.input_parameters.update(
234
- {k: JsonParameter(value=v, kind="json") for k, v in map_variable.items()}
327
+ {
328
+ k: JsonParameter(value=v, kind="json")
329
+ for k, v in map_variable.items()
330
+ }
235
331
  )
236
332
 
237
333
  if self.returns:
@@ -239,7 +335,9 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
239
335
  user_set_parameters = (user_set_parameters,)
240
336
 
241
337
  if len(user_set_parameters) != len(self.returns):
242
- raise ValueError("Returns task signature does not match the function returns")
338
+ raise ValueError(
339
+ "Returns task signature does not match the function returns"
340
+ )
243
341
 
244
342
  output_parameters: Dict[str, Parameter] = {}
245
343
  metrics: Dict[str, Parameter] = {}
@@ -277,7 +375,56 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
277
375
 
278
376
 
279
377
  class NotebookTaskType(BaseTaskType):
280
- """The task class for Notebook based execution."""
378
+ """
379
+ --8<-- [start:notebook_reference]
380
+ An execution node of the pipeline of notebook execution.
381
+ Please refer to define pipeline/tasks/notebook for more information.
382
+
383
+ As part of the dag definition, a notebook task is defined as follows:
384
+
385
+ dag:
386
+ steps:
387
+ notebook_task: # The name of the node
388
+ type: task
389
+ command_type: notebook
390
+ command: the path to the notebook relative to project root.
391
+ optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
392
+ returns:
393
+ - name: # The name to assign the return value
394
+ kind: json # the default value is json,
395
+ can be object for python objects and metric for metrics
396
+ secrets:
397
+ - my_secret_key # A list of secrets to expose by secrets manager
398
+ catalog:
399
+ get:
400
+ - A list of glob patterns to get from the catalog to the local file system
401
+ put:
402
+ - A list of glob patterns to put to the catalog from the local file system
403
+ on_failure: The name of the step to traverse in case of failure
404
+ overrides:
405
+ Individual tasks can override the global configuration config by referring to the
406
+ specific override.
407
+
408
+ For example,
409
+ #Global configuration
410
+ executor:
411
+ type: local-container
412
+ config:
413
+ docker_image: "runnable/runnable:latest"
414
+ overrides:
415
+ custom_docker_image:
416
+ docker_image: "runnable/runnable:custom"
417
+
418
+ ## In the node definition
419
+ overrides:
420
+ local-container:
421
+ docker_image: "runnable/runnable:custom"
422
+
423
+ This instruction will override the docker image for the local-container executor.
424
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
425
+ or "fail" to terminate the pipeline with an error.
426
+ --8<-- [end:notebook_reference]
427
+ """
281
428
 
282
429
  task_type: str = Field(default="notebook", serialization_alias="command_type")
283
430
  command: str
@@ -302,7 +449,10 @@ class NotebookTaskType(BaseTaskType):
302
449
  return str(file_name)
303
450
 
304
451
  def get_cli_options(self) -> Tuple[str, dict]:
305
- return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
452
+ return "notebook", {
453
+ "command": self.command,
454
+ "notebook-output-path": self.notebook_output_path,
455
+ }
306
456
 
307
457
  def execute_command(
308
458
  self,
@@ -325,9 +475,12 @@ class NotebookTaskType(BaseTaskType):
325
475
 
326
476
  notebook_output_path = self.notebook_output_path
327
477
 
328
- with self.execution_context(
329
- map_variable=map_variable, allow_complex=False
330
- ) as params, self.expose_secrets() as _:
478
+ with (
479
+ self.execution_context(
480
+ map_variable=map_variable, allow_complex=False
481
+ ) as params,
482
+ self.expose_secrets() as _,
483
+ ):
331
484
  copy_params = copy.deepcopy(params)
332
485
 
333
486
  if map_variable:
@@ -336,7 +489,9 @@ class NotebookTaskType(BaseTaskType):
336
489
  copy_params[key] = JsonParameter(kind="json", value=value)
337
490
 
338
491
  # Remove any {v}_unreduced parameters from the parameters
339
- unprocessed_params = [k for k, v in copy_params.items() if not v.reduced]
492
+ unprocessed_params = [
493
+ k for k, v in copy_params.items() if not v.reduced
494
+ ]
340
495
 
341
496
  for key in list(copy_params.keys()):
342
497
  if any(key.endswith(f"_{k}") for k in unprocessed_params):
@@ -360,7 +515,9 @@ class NotebookTaskType(BaseTaskType):
360
515
  pm.execute_notebook(**kwds)
361
516
  task_console.print(out_file.getvalue())
362
517
 
363
- context.run_context.catalog_handler.put(name=notebook_output_path, run_id=context.run_context.run_id)
518
+ context.run_context.catalog_handler.put(
519
+ name=notebook_output_path, run_id=context.run_context.run_id
520
+ )
364
521
 
365
522
  client = PloomberClient.from_path(path=notebook_output_path)
366
523
  namespace = client.get_namespace()
@@ -368,7 +525,9 @@ class NotebookTaskType(BaseTaskType):
368
525
  output_parameters: Dict[str, Parameter] = {}
369
526
  try:
370
527
  for task_return in self.returns:
371
- param_name = Template(task_return.name).safe_substitute(map_variable) # type: ignore
528
+ param_name = Template(task_return.name).safe_substitute(
529
+ map_variable # type: ignore
530
+ )
372
531
 
373
532
  if map_variable:
374
533
  for _, v in map_variable.items():
@@ -410,7 +569,54 @@ class NotebookTaskType(BaseTaskType):
410
569
 
411
570
  class ShellTaskType(BaseTaskType):
412
571
  """
413
- The task class for shell based commands.
572
+ --8<-- [start:shell_reference]
573
+ An execution node of the pipeline of shell execution.
574
+ Please refer to define pipeline/tasks/shell for more information.
575
+
576
+ As part of the dag definition, a shell task is defined as follows:
577
+
578
+ dag:
579
+ steps:
580
+ shell_task: # The name of the node
581
+ type: task
582
+ command_type: shell
583
+ command: The command to execute, it could be multiline
584
+ optional_ploomber_args: a dictionary of arguments to be passed to ploomber engine
585
+ returns:
586
+ - name: # The name to assign the return value
587
+ kind: json # the default value is json,
588
+ can be object for python objects and metric for metrics
589
+ secrets:
590
+ - my_secret_key # A list of secrets to expose by secrets manager
591
+ catalog:
592
+ get:
593
+ - A list of glob patterns to get from the catalog to the local file system
594
+ put:
595
+ - A list of glob patterns to put to the catalog from the local file system
596
+ on_failure: The name of the step to traverse in case of failure
597
+ overrides:
598
+ Individual tasks can override the global configuration config by referring to the
599
+ specific override.
600
+
601
+ For example,
602
+ #Global configuration
603
+ executor:
604
+ type: local-container
605
+ config:
606
+ docker_image: "runnable/runnable:latest"
607
+ overrides:
608
+ custom_docker_image:
609
+ docker_image: "runnable/runnable:custom"
610
+
611
+ ## In the node definition
612
+ overrides:
613
+ local-container:
614
+ docker_image: "runnable/runnable:custom"
615
+
616
+ This instruction will override the docker image for the local-container executor.
617
+ next: The next node to execute after this task, use "success" to terminate the pipeline successfully
618
+ or "fail" to terminate the pipeline with an error.
619
+ --8<-- [end:shell_reference]
414
620
  """
415
621
 
416
622
  task_type: str = Field(default="shell", serialization_alias="command_type")
@@ -421,7 +627,9 @@ class ShellTaskType(BaseTaskType):
421
627
  def returns_should_be_json(cls, returns: List[TaskReturns]):
422
628
  for task_return in returns:
423
629
  if task_return.kind == "object" or task_return.kind == "pydantic":
424
- raise ValueError("Pydantic models or Objects are not allowed in returns")
630
+ raise ValueError(
631
+ "Pydantic models or Objects are not allowed in returns"
632
+ )
425
633
 
426
634
  return returns
427
635
 
@@ -456,7 +664,9 @@ class ShellTaskType(BaseTaskType):
456
664
  subprocess_env[key] = secret_value
457
665
 
458
666
  try:
459
- with self.execution_context(map_variable=map_variable, allow_complex=False) as params:
667
+ with self.execution_context(
668
+ map_variable=map_variable, allow_complex=False
669
+ ) as params:
460
670
  subprocess_env.update({k: v.get_value() for k, v in params.items()})
461
671
 
462
672
  # Json dumps all runnable environment variables
@@ -467,7 +677,9 @@ class ShellTaskType(BaseTaskType):
467
677
 
468
678
  collect_delimiter = "=== COLLECT ==="
469
679
 
470
- command = self.command.strip() + f" && echo '{collect_delimiter}' && env"
680
+ command = (
681
+ self.command.strip() + f" && echo '{collect_delimiter}' && env"
682
+ )
471
683
  logger.info(f"Executing shell command: {command}")
472
684
 
473
685
  capture = False
runnable/utils.py CHANGED
@@ -21,7 +21,7 @@ from runnable import defaults, names
21
21
  from runnable.defaults import TypeMapVariable
22
22
 
23
23
  if TYPE_CHECKING: # pragma: no cover
24
- from runnable.extensions.nodes import TaskNode
24
+ from extensions.nodes.nodes import TaskNode
25
25
  from runnable.nodes import BaseNode
26
26
 
27
27
 
@@ -86,7 +86,9 @@ def generate_run_id(run_id: str = "") -> str:
86
86
  return run_id
87
87
 
88
88
 
89
- def apply_variables(apply_to: Dict[str, Any], variables: Dict[str, str]) -> Dict[str, Any]:
89
+ def apply_variables(
90
+ apply_to: Dict[str, Any], variables: Dict[str, str]
91
+ ) -> Dict[str, Any]:
90
92
  """Safely applies the variables to a config.
91
93
 
92
94
  For example: For config:
@@ -272,7 +274,9 @@ def get_local_docker_image_id(image_name: str) -> str:
272
274
  image = client.images.get(image_name)
273
275
  return image.attrs["Id"]
274
276
  except ImportError: # pragma: no cover
275
- logger.warning("Did not find docker installed, some functionality might be affected")
277
+ logger.warning(
278
+ "Did not find docker installed, some functionality might be affected"
279
+ )
276
280
  except BaseException:
277
281
  logger.exception(f"Could not find the image by name {image_name}")
278
282
 
@@ -295,7 +299,9 @@ def get_git_code_identity():
295
299
  code_identity.code_identifier_dependable, changed = is_git_clean()
296
300
  code_identity.code_identifier_url = get_git_remote()
297
301
  if changed:
298
- code_identity.code_identifier_message = "changes found in " + ", ".join(changed.split("\n"))
302
+ code_identity.code_identifier_message = "changes found in " + ", ".join(
303
+ changed.split("\n")
304
+ )
299
305
  except BaseException:
300
306
  logger.exception("Git code versioning problems")
301
307
 
@@ -331,7 +337,9 @@ def get_tracked_data() -> Dict[str, str]:
331
337
  try:
332
338
  tracked_data[key.lower()] = json.loads(value)
333
339
  except json.decoder.JSONDecodeError:
334
- logger.warning(f"Tracker {key} could not be JSON decoded, adding the literal value")
340
+ logger.warning(
341
+ f"Tracker {key} could not be JSON decoded, adding the literal value"
342
+ )
335
343
  tracked_data[key.lower()] = value
336
344
 
337
345
  del os.environ[env_var]
@@ -389,9 +397,13 @@ def get_data_hash(file_name: str):
389
397
  str: The SHA ID of the file contents
390
398
  """
391
399
  # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
392
- return hash_bytestr_iter(file_as_blockiter(open(file_name, "rb")), hashlib.sha256()) # pragma: no cover
400
+ # TODO: For a big file, we should only hash the first few bytes
401
+ return hash_bytestr_iter(
402
+ file_as_blockiter(open(file_name, "rb")), hashlib.sha256()
403
+ ) # pragma: no cover
393
404
 
394
405
 
406
+ # TODO: This is not the right place for this.
395
407
  def get_node_execution_command(
396
408
  node: BaseNode,
397
409
  map_variable: TypeMapVariable = None,
@@ -415,7 +427,11 @@ def get_node_execution_command(
415
427
 
416
428
  log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
417
429
 
418
- action = f"runnable execute_single_node {run_id} " f"{node._command_friendly_name()}" f" --log-level {log_level}"
430
+ action = (
431
+ f"runnable execute_single_node {run_id} "
432
+ f"{node._command_friendly_name()}"
433
+ f" --log-level {log_level}"
434
+ )
419
435
 
420
436
  if context.run_context.pipeline_file:
421
437
  action = action + f" --file {context.run_context.pipeline_file}"
@@ -435,6 +451,7 @@ def get_node_execution_command(
435
451
  return action
436
452
 
437
453
 
454
+ # TODO: This is not the right place for this.
438
455
  def get_fan_command(
439
456
  mode: str,
440
457
  node: BaseNode,
@@ -478,6 +495,7 @@ def get_fan_command(
478
495
  return action
479
496
 
480
497
 
498
+ # TODO: This is not the right place for this.
481
499
  def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> str:
482
500
  """Get the execution command to run a job via command line.
483
501
 
@@ -520,7 +538,9 @@ def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> st
520
538
  return action
521
539
 
522
540
 
523
- def get_provider_by_name_and_type(service_type: str, service_details: defaults.ServiceConfig):
541
+ def get_provider_by_name_and_type(
542
+ service_type: str, service_details: defaults.ServiceConfig
543
+ ):
524
544
  """Given a service type, one of executor, run_log_store, catalog, secrets and the config
525
545
  return the exact child class implementing the service.
526
546
  We use stevedore to do the work for us.
@@ -542,7 +562,9 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
542
562
  if "config" in service_details:
543
563
  service_config = service_details.get("config", {})
544
564
 
545
- logger.debug(f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}")
565
+ logger.debug(
566
+ f"Trying to get a service of {service_type} of the name {service_name} with config: {service_config}"
567
+ )
546
568
  try:
547
569
  mgr = driver.DriverManager(
548
570
  namespace=namespace,
@@ -552,8 +574,12 @@ def get_provider_by_name_and_type(service_type: str, service_details: defaults.S
552
574
  )
553
575
  return mgr.driver
554
576
  except Exception as _e:
555
- logger.exception(f"Could not find the service of type: {service_type} with config: {service_details}")
556
- raise Exception(f"Could not find the service of type: {service_type} with config: {service_details}") from _e
577
+ logger.exception(
578
+ f"Could not find the service of type: {service_type} with config: {service_details}"
579
+ )
580
+ raise Exception(
581
+ f"Could not find the service of type: {service_type} with config: {service_details}"
582
+ ) from _e
557
583
 
558
584
 
559
585
  def get_run_config() -> dict:
@@ -585,7 +611,9 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
585
611
  return OrderedDict()
586
612
 
587
613
 
588
- def set_runnable_environment_variables(run_id: str = "", configuration_file: str = "", tag: str = "") -> None:
614
+ def set_runnable_environment_variables(
615
+ run_id: str = "", configuration_file: str = "", tag: str = ""
616
+ ) -> None:
589
617
  """Set the environment variables used by runnable. This function should be called during the prepare configurations
590
618
  by all executors.
591
619
 
@@ -604,7 +632,7 @@ def set_runnable_environment_variables(run_id: str = "", configuration_file: str
604
632
  os.environ[defaults.RUNNABLE_RUN_TAG] = tag
605
633
 
606
634
 
607
- def gather_variables() -> dict:
635
+ def gather_variables() -> Dict[str, str]:
608
636
  """Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
609
637
 
610
638
  Returns:
@@ -1,36 +1,31 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.12.3
4
- Summary: A Compute agnostic pipelining software
5
- Home-page: https://github.com/vijayvammi/runnable
6
- License: Apache-2.0
7
- Author: Vijay Vammi
8
- Author-email: mesanthu@gmail.com
9
- Requires-Python: >=3.9,<3.13
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Programming Language :: Python :: 3.12
16
- Provides-Extra: database
3
+ Version: 0.14.0
4
+ Summary: Add your description here
5
+ Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: catalog
9
+ Requires-Dist: click-plugins>=1.1.1
10
+ Requires-Dist: click<=8.1.3
11
+ Requires-Dist: dill>=0.3.9
12
+ Requires-Dist: executor
13
+ Requires-Dist: nodes
14
+ Requires-Dist: pydantic>=2.10.3
15
+ Requires-Dist: python-dotenv>=1.0.1
16
+ Requires-Dist: rich>=13.9.4
17
+ Requires-Dist: ruamel-yaml>=0.18.6
18
+ Requires-Dist: run-log-store
19
+ Requires-Dist: secrets
20
+ Requires-Dist: setuptools>=75.6.0
21
+ Requires-Dist: stevedore>=5.4.0
22
+ Requires-Dist: typer>=0.15.1
17
23
  Provides-Extra: docker
24
+ Requires-Dist: docker>=7.1.0; extra == 'docker'
25
+ Provides-Extra: examples
26
+ Requires-Dist: pandas>=2.2.3; extra == 'examples'
18
27
  Provides-Extra: notebook
19
- Requires-Dist: click
20
- Requires-Dist: click-plugins (>=1.1.1,<2.0.0)
21
- Requires-Dist: dill (>=0.3.8,<0.4.0)
22
- Requires-Dist: docker ; extra == "docker"
23
- Requires-Dist: mlflow-skinny
24
- Requires-Dist: ploomber-engine (>=0.0.31,<0.0.32) ; extra == "notebook"
25
- Requires-Dist: pydantic (>=2.5,<3.0)
26
- Requires-Dist: rich (>=13.5.2,<14.0.0)
27
- Requires-Dist: ruamel.yaml
28
- Requires-Dist: ruamel.yaml.clib
29
- Requires-Dist: sqlalchemy ; extra == "database"
30
- Requires-Dist: stevedore (>=3.5.0,<4.0.0)
31
- Requires-Dist: typing-extensions ; python_version < "3.8"
32
- Project-URL: Documentation, https://github.com/vijayvammi/runnable
33
- Project-URL: Repository, https://github.com/vijayvammi/runnable
28
+ Requires-Dist: ploomber-engine>=0.0.33; extra == 'notebook'
34
29
  Description-Content-Type: text/markdown
35
30
 
36
31
 
@@ -267,4 +262,3 @@ Execute a pipeline over an iterable parameter.
267
262
 
268
263
  ### [Arbitrary nesting](https://astrazeneca.github.io/runnable-core/concepts/nesting/)
269
264
  Any nesting of parallel within map and so on.
270
-