runnable 0.11.0__tar.gz → 0.11.2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. {runnable-0.11.0 → runnable-0.11.2}/PKG-INFO +2 -3
  2. {runnable-0.11.0 → runnable-0.11.2}/pyproject.toml +15 -13
  3. {runnable-0.11.0 → runnable-0.11.2}/runnable/__init__.py +5 -0
  4. {runnable-0.11.0 → runnable-0.11.2}/runnable/cli.py +1 -0
  5. {runnable-0.11.0 → runnable-0.11.2}/runnable/datastore.py +4 -2
  6. {runnable-0.11.0 → runnable-0.11.2}/runnable/entrypoints.py +5 -0
  7. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/file_system/implementation.py +1 -1
  8. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/__init__.py +2 -0
  9. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/nodes.py +4 -4
  10. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/generic_chunked.py +22 -4
  11. {runnable-0.11.0 → runnable-0.11.2}/runnable/sdk.py +37 -13
  12. {runnable-0.11.0 → runnable-0.11.2}/runnable/tasks.py +116 -46
  13. {runnable-0.11.0 → runnable-0.11.2}/LICENSE +0 -0
  14. {runnable-0.11.0 → runnable-0.11.2}/README.md +0 -0
  15. {runnable-0.11.0 → runnable-0.11.2}/runnable/catalog.py +0 -0
  16. {runnable-0.11.0 → runnable-0.11.2}/runnable/context.py +0 -0
  17. {runnable-0.11.0 → runnable-0.11.2}/runnable/defaults.py +0 -0
  18. {runnable-0.11.0 → runnable-0.11.2}/runnable/exceptions.py +0 -0
  19. {runnable-0.11.0 → runnable-0.11.2}/runnable/executor.py +0 -0
  20. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/__init__.py +0 -0
  21. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/__init__.py +0 -0
  22. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/file_system/__init__.py +0 -0
  23. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  24. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/k8s_pvc/implementation.py +0 -0
  25. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/catalog/k8s_pvc/integration.py +0 -0
  26. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/argo/__init__.py +0 -0
  27. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/argo/implementation.py +0 -0
  28. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/argo/specification.yaml +0 -0
  29. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/k8s_job/__init__.py +0 -0
  30. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/k8s_job/implementation_FF.py +0 -0
  31. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/k8s_job/integration_FF.py +0 -0
  32. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/local/__init__.py +0 -0
  33. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/local/implementation.py +0 -0
  34. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/local_container/__init__.py +0 -0
  35. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/local_container/implementation.py +0 -0
  36. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/mocked/__init__.py +0 -0
  37. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/mocked/implementation.py +0 -0
  38. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/retry/__init__.py +0 -0
  39. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/executor/retry/implementation.py +0 -0
  40. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/__init__.py +0 -0
  41. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  42. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/chunked_file_system/implementation.py +1 -1
  43. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  44. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -0
  45. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -0
  46. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/db/implementation_FF.py +0 -0
  47. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  48. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  49. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/file_system/implementation.py +0 -0
  50. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  51. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -0
  52. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -0
  53. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/secrets/__init__.py +0 -0
  54. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/secrets/dotenv/__init__.py +0 -0
  55. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/secrets/dotenv/implementation.py +0 -0
  56. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  57. {runnable-0.11.0 → runnable-0.11.2}/runnable/extensions/secrets/env_secrets/implementation.py +0 -0
  58. {runnable-0.11.0 → runnable-0.11.2}/runnable/graph.py +0 -0
  59. {runnable-0.11.0 → runnable-0.11.2}/runnable/integration.py +0 -0
  60. {runnable-0.11.0 → runnable-0.11.2}/runnable/names.py +0 -0
  61. {runnable-0.11.0 → runnable-0.11.2}/runnable/nodes.py +0 -0
  62. {runnable-0.11.0 → runnable-0.11.2}/runnable/parameters.py +0 -0
  63. {runnable-0.11.0 → runnable-0.11.2}/runnable/pickler.py +0 -0
  64. {runnable-0.11.0 → runnable-0.11.2}/runnable/secrets.py +0 -0
  65. {runnable-0.11.0 → runnable-0.11.2}/runnable/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: runnable
3
- Version: 0.11.0
3
+ Version: 0.11.2
4
4
  Summary: A Compute agnostic pipelining software
5
5
  Home-page: https://github.com/vijayvammi/runnable
6
6
  License: Apache-2.0
@@ -15,13 +15,12 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Provides-Extra: database
17
17
  Provides-Extra: docker
18
- Provides-Extra: mlflow
19
18
  Provides-Extra: notebook
20
19
  Requires-Dist: click
21
20
  Requires-Dist: click-plugins (>=1.1.1,<2.0.0)
22
21
  Requires-Dist: dill (>=0.3.8,<0.4.0)
23
22
  Requires-Dist: docker ; extra == "docker"
24
- Requires-Dist: mlflow-skinny ; extra == "mlflow"
23
+ Requires-Dist: mlflow-skinny
25
24
  Requires-Dist: ploomber-engine (>=0.0.31,<0.0.32) ; extra == "notebook"
26
25
  Requires-Dist: pydantic (>=2.5,<3.0)
27
26
  Requires-Dist: rich (>=13.5.2,<14.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "runnable"
3
- version = "0.11.0"
3
+ version = "0.11.2"
4
4
  description = "A Compute agnostic pipelining software"
5
5
  authors = ["Vijay Vammi <mesanthu@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -49,11 +49,18 @@ scikit-learn = "^1.4.1.post1"
49
49
  en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz" }
50
50
  matplotlib = "^3.8.3"
51
51
 
52
+
53
+ [tool.poetry.group.release.dependencies]
54
+ python-semantic-release = "^9.4.2"
55
+
56
+
57
+ [tool.poetry.group.examples.dependencies]
58
+ pandas = "^2.2.2"
59
+
52
60
  [tool.poetry.extras]
53
61
  docker = ['docker']
54
62
  notebook = ['ploomber-engine']
55
63
  database = ["sqlalchemy"]
56
- mlflow = ["mlflow-skinny"]
57
64
 
58
65
  [tool.poetry.group.dev.dependencies]
59
66
  pytest = "*"
@@ -66,6 +73,7 @@ ruff = "^0.0.259"
66
73
  commit-linter = "^1.0.2"
67
74
  black = "^23.3.0"
68
75
  gitlint = "^0.19.1"
76
+ pandas = "^2.2.2"
69
77
 
70
78
 
71
79
  [tool.poetry.scripts]
@@ -191,22 +199,16 @@ requires = ["poetry-core>=1.0.0"]
191
199
  build-backend = "poetry.core.masonry.api"
192
200
 
193
201
  [tool.semantic_release]
194
- tag_commit = false
195
- major_on_zero = true
196
- commit_version_number = false
197
- upload_to_pypi = false
198
- upload_to_repository = false
199
- upload_to_release = false
202
+ allow_zero_version = true
203
+ major_on_zero = false
200
204
  tag_format = "{version}"
201
205
 
206
+ [tool.semantic_release.remote.token]
207
+ env = "GH_TOKEN"
208
+
202
209
  [tool.semantic_release.branches.main]
203
210
  match = "main"
204
211
 
205
- [tool.semantic_release.branches."rc"]
206
- match = "rc"
207
- prerelease = true
208
- prerelease_token = "rc"
209
-
210
212
  [tool.semantic_release.remote]
211
213
  ignore_token_for_push = true
212
214
 
@@ -29,8 +29,13 @@ from runnable.sdk import ( # noqa
29
29
  pickled,
30
30
  )
31
31
 
32
+ ## TODO: Summary should be a bit better for catalog.
33
+ ## If the execution fails, hint them about the retry executor.
34
+ # Make the retry executor loose!
35
+
32
36
  # TODO: Think of model registry as a central place to store models.
33
37
  # TODO: Implement Sagemaker pipelines as a executor.
34
38
 
35
39
 
36
40
  # TODO: Think of way of generating dag hash without executor configuration
41
+ # TODO: Add examples of map parameters and task types
@@ -1,3 +1,4 @@
1
+ # A dummy to trigger the PR
1
2
  import logging
2
3
 
3
4
  import click
@@ -312,8 +312,10 @@ class RunLog(BaseModel):
312
312
  summary["Catalog Location"] = _context.catalog_handler.get_summary()
313
313
  summary["Full Run log present at: "] = _context.run_log_store.get_summary()
314
314
 
315
- summary["Final Parameters"] = {p: v.description for p, v in self.parameters.items()}
316
- summary["Collected metrics"] = {p: v.description for p, v in self.parameters.items() if v.kind == "metric"}
315
+ run_log = _context.run_log_store.get_run_log_by_id(run_id=_context.run_id, full=True)
316
+
317
+ summary["Final Parameters"] = {p: v.description for p, v in run_log.parameters.items()}
318
+ summary["Collected metrics"] = {p: v.description for p, v in run_log.parameters.items() if v.kind == "metric"}
317
319
 
318
320
  return summary
319
321
 
@@ -172,6 +172,7 @@ def execute(
172
172
  )
173
173
  console.print("Working with context:")
174
174
  console.print(run_context)
175
+ console.rule(style="[dark orange]")
175
176
 
176
177
  executor = run_context.executor
177
178
 
@@ -243,6 +244,7 @@ def execute_single_node(
243
244
  )
244
245
  console.print("Working with context:")
245
246
  console.print(run_context)
247
+ console.rule(style="[dark orange]")
246
248
 
247
249
  executor = run_context.executor
248
250
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -296,6 +298,7 @@ def execute_notebook(
296
298
 
297
299
  console.print("Working with context:")
298
300
  console.print(run_context)
301
+ console.rule(style="[dark orange]")
299
302
 
300
303
  step_config = {
301
304
  "command": notebook_file,
@@ -358,6 +361,7 @@ def execute_function(
358
361
 
359
362
  console.print("Working with context:")
360
363
  console.print(run_context)
364
+ console.rule(style="[dark orange]")
361
365
 
362
366
  # Prepare the graph with a single node
363
367
  step_config = {
@@ -427,6 +431,7 @@ def fan(
427
431
  )
428
432
  console.print("Working with context:")
429
433
  console.print(run_context)
434
+ console.rule(style="[dark orange]")
430
435
 
431
436
  executor = run_context.executor
432
437
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -226,7 +226,7 @@ class FileSystemCatalog(BaseCatalog):
226
226
  for cataloged_file in cataloged_files:
227
227
  if str(cataloged_file).endswith("execution.log"):
228
228
  continue
229
- print(cataloged_file.name)
229
+
230
230
  if cataloged_file.is_file():
231
231
  shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
232
232
  else:
@@ -476,6 +476,8 @@ class GenericExecutor(BaseExecutor):
476
476
  logger.exception(e)
477
477
  raise
478
478
 
479
+ console.rule(style="[dark orange]")
480
+
479
481
  if working_on.node_type in ["success", "fail"]:
480
482
  break
481
483
 
@@ -505,7 +505,7 @@ class MapNode(CompositeNode):
505
505
  for _, v in map_variable.items():
506
506
  for branch_return in self.branch_returns:
507
507
  param_name, param_type = branch_return
508
- raw_parameters[f"{param_name}_{v}"] = param_type.copy()
508
+ raw_parameters[f"{v}_{param_name}"] = param_type.copy()
509
509
  else:
510
510
  for branch_return in self.branch_returns:
511
511
  param_name, param_type = branch_return
@@ -606,9 +606,9 @@ class MapNode(CompositeNode):
606
606
  param_name, _ = branch_return
607
607
  to_reduce = []
608
608
  for iter_variable in iterate_on:
609
- to_reduce.append(params[f"{param_name}_{iter_variable}"].get_value())
609
+ to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value())
610
610
 
611
- param_name = f"{param_name}_{v}"
611
+ param_name = f"{v}_{param_name}"
612
612
  params[param_name].value = reducer_f(to_reduce)
613
613
  params[param_name].reduced = True
614
614
  else:
@@ -617,7 +617,7 @@ class MapNode(CompositeNode):
617
617
 
618
618
  to_reduce = []
619
619
  for iter_variable in iterate_on:
620
- to_reduce.append(params[f"{param_name}_{iter_variable}"].get_value())
620
+ to_reduce.append(params[f"{iter_variable}_{param_name}"].get_value())
621
621
 
622
622
  params[param_name].value = reducer_f(*to_reduce)
623
623
  params[param_name].reduced = True
@@ -7,7 +7,16 @@ from string import Template
7
7
  from typing import Any, Dict, Optional, Sequence, Union
8
8
 
9
9
  from runnable import defaults, exceptions
10
- from runnable.datastore import BaseRunLogStore, BranchLog, RunLog, StepLog
10
+ from runnable.datastore import (
11
+ BaseRunLogStore,
12
+ BranchLog,
13
+ JsonParameter,
14
+ MetricParameter,
15
+ ObjectParameter,
16
+ Parameter,
17
+ RunLog,
18
+ StepLog,
19
+ )
11
20
 
12
21
  logger = logging.getLogger(defaults.LOGGER_NAME)
13
22
 
@@ -164,7 +173,9 @@ class ChunkedRunLogStore(BaseRunLogStore):
164
173
  raise Exception(f"Name is required during retrieval for {log_type}")
165
174
 
166
175
  naming_pattern = self.naming_pattern(log_type=log_type, name=name)
176
+
167
177
  matches = self.get_matches(run_id=run_id, name=naming_pattern, multiple_allowed=multiple_allowed)
178
+
168
179
  if matches:
169
180
  if not multiple_allowed:
170
181
  contents = self._retrieve(name=matches) # type: ignore
@@ -370,10 +381,17 @@ class ChunkedRunLogStore(BaseRunLogStore):
370
381
  Raises:
371
382
  RunLogNotFoundError: If the run log for run_id is not found in the datastore
372
383
  """
373
- parameters = {}
384
+ parameters: Dict[str, Parameter] = {}
374
385
  try:
375
386
  parameters_list = self.retrieve(run_id=run_id, log_type=self.LogTypes.PARAMETER, multiple_allowed=True)
376
- parameters = {key: value for param in parameters_list for key, value in param.items()}
387
+ for param in parameters_list:
388
+ for key, value in param.items():
389
+ if value["kind"] == "json":
390
+ parameters[key] = JsonParameter(**value)
391
+ if value["kind"] == "metric":
392
+ parameters[key] = MetricParameter(**value)
393
+ if value["kind"] == "object":
394
+ parameters[key] = ObjectParameter(**value)
377
395
  except EntityNotFoundError:
378
396
  # No parameters are set
379
397
  pass
@@ -401,7 +419,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
401
419
  self.store(
402
420
  run_id=run_id,
403
421
  log_type=self.LogTypes.PARAMETER,
404
- contents={key: value},
422
+ contents={key: value.model_dump(by_alias=True)},
405
423
  name=key,
406
424
  )
407
425
 
@@ -15,8 +15,13 @@ from pydantic import (
15
15
  field_validator,
16
16
  model_validator,
17
17
  )
18
- from rich import print
19
- from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
18
+ from rich.progress import (
19
+ BarColumn,
20
+ Progress,
21
+ SpinnerColumn,
22
+ TextColumn,
23
+ TimeElapsedColumn,
24
+ )
20
25
  from rich.table import Column
21
26
  from typing_extensions import Self
22
27
 
@@ -71,7 +76,7 @@ class Catalog(BaseModel):
71
76
 
72
77
  class BaseTraversal(ABC, BaseModel):
73
78
  name: str
74
- next_node: str = Field(default="", alias="next")
79
+ next_node: str = Field(default="", serialization_alias="next_node")
75
80
  terminate_with_success: bool = Field(default=False, exclude=True)
76
81
  terminate_with_failure: bool = Field(default=False, exclude=True)
77
82
  on_failure: str = Field(default="", alias="on_failure")
@@ -83,6 +88,12 @@ class BaseTraversal(ABC, BaseModel):
83
88
  def internal_name(self) -> str:
84
89
  return self.name
85
90
 
91
+ def __hash__(self):
92
+ """
93
+ Needed to Uniqueize DataCatalog objects.
94
+ """
95
+ return hash(self.name)
96
+
86
97
  def __rshift__(self, other: StepType) -> StepType:
87
98
  if self.next_node:
88
99
  raise Exception(f"The node {self} already has a next node: {self.next_node}")
@@ -180,6 +191,7 @@ class BaseTask(BaseTraversal):
180
191
  catalog: Optional[Catalog] = Field(default=None, alias="catalog")
181
192
  overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
182
193
  returns: List[Union[str, TaskReturns]] = Field(default_factory=list, alias="returns")
194
+ secrets: List[str] = Field(default_factory=list)
183
195
 
184
196
  @field_validator("returns", mode="before")
185
197
  @classmethod
@@ -201,7 +213,7 @@ class BaseTask(BaseTraversal):
201
213
  if not (self.terminate_with_failure or self.terminate_with_success):
202
214
  raise AssertionError("A node not being terminated must have a user defined next node")
203
215
 
204
- return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
216
+ return TaskNode.parse_from_config(self.model_dump(exclude_none=True, by_alias=True))
205
217
 
206
218
 
207
219
  class PythonTask(BaseTask):
@@ -297,9 +309,9 @@ class NotebookTask(BaseTask):
297
309
 
298
310
  """
299
311
 
300
- notebook: str = Field(alias="command")
312
+ notebook: str = Field(serialization_alias="command")
301
313
 
302
- notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
314
+ notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path", validate_default=True)
303
315
  optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
304
316
 
305
317
  @computed_field
@@ -526,7 +538,7 @@ class Pipeline(BaseModel):
526
538
  _dag: graph.Graph = PrivateAttr()
527
539
  model_config = ConfigDict(extra="forbid")
528
540
 
529
- def _validate_path(self, path: List[StepType]) -> None:
541
+ def _validate_path(self, path: List[StepType], failure_path: bool = False) -> None:
530
542
  # Check if one and only one step terminates with success
531
543
  # Check no more than one step terminates with failure
532
544
 
@@ -544,7 +556,7 @@ class Pipeline(BaseModel):
544
556
  raise Exception("A pipeline cannot have more than one step that terminates with failure")
545
557
  reached_failure = True
546
558
 
547
- if not reached_success:
559
+ if not reached_success and not reached_failure:
548
560
  raise Exception("A pipeline must have at least one step that terminates with success")
549
561
 
550
562
  def _construct_path(self, path: List[StepType]) -> None:
@@ -594,11 +606,21 @@ class Pipeline(BaseModel):
594
606
 
595
607
  # Check all paths are valid and construct the path
596
608
  paths = [success_path] + on_failure_paths
609
+ failure_path = False
597
610
  for path in paths:
598
- self._validate_path(path)
611
+ self._validate_path(path, failure_path)
599
612
  self._construct_path(path)
600
613
 
601
- all_steps: List[StepType] = [step for step in success_path + on_failure_paths] # type: ignore
614
+ failure_path = True
615
+
616
+ all_steps: List[StepType] = []
617
+
618
+ for path in paths:
619
+ for step in path:
620
+ all_steps.append(step)
621
+
622
+ seen = set()
623
+ unique = [x for x in all_steps if not (x in seen or seen.add(x))] # type: ignore
602
624
 
603
625
  self._dag = graph.Graph(
604
626
  start_at=all_steps[0].name,
@@ -606,7 +628,7 @@ class Pipeline(BaseModel):
606
628
  internal_branch_name=self.internal_branch_name,
607
629
  )
608
630
 
609
- for step in all_steps:
631
+ for step in unique:
610
632
  self._dag.add_node(step.create_node())
611
633
 
612
634
  if self.add_terminal_nodes:
@@ -675,8 +697,9 @@ class Pipeline(BaseModel):
675
697
 
676
698
  run_context.dag = graph.create_graph(dag_definition)
677
699
 
678
- print("Working with context:")
679
- print(run_context)
700
+ console.print("Working with context:")
701
+ console.print(run_context)
702
+ console.rule(style="[dark orange]")
680
703
 
681
704
  if not run_context.executor._local:
682
705
  # We are not working with non local executor
@@ -693,6 +716,7 @@ class Pipeline(BaseModel):
693
716
  run_context.executor.prepare_for_graph_execution()
694
717
 
695
718
  with Progress(
719
+ SpinnerColumn(spinner_name="runner"),
696
720
  TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
697
721
  BarColumn(table_column=Column(ratio=1), style="dark_orange"),
698
722
  TimeElapsedColumn(table_column=Column(ratio=1)),
@@ -1,4 +1,5 @@
1
1
  import contextlib
2
+ import copy
2
3
  import importlib
3
4
  import io
4
5
  import json
@@ -9,13 +10,14 @@ import sys
9
10
  from datetime import datetime
10
11
  from pickle import PicklingError
11
12
  from string import Template
12
- from typing import Any, Dict, List, Literal, Tuple
13
+ from typing import Any, Dict, List, Literal, Optional, Tuple
13
14
 
14
15
  from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
16
+ from rich.console import Console
15
17
  from stevedore import driver
16
18
 
17
19
  import runnable.context as context
18
- from runnable import console, defaults, exceptions, parameters, utils
20
+ from runnable import defaults, exceptions, parameters, utils
19
21
  from runnable.datastore import (
20
22
  JsonParameter,
21
23
  MetricParameter,
@@ -32,6 +34,9 @@ logging.getLogger("stevedore").setLevel(logging.CRITICAL)
32
34
  # TODO: Can we add memory peak, cpu usage, etc. to the metrics?
33
35
 
34
36
 
37
+ console = Console(file=io.StringIO())
38
+
39
+
35
40
  class TaskReturns(BaseModel):
36
41
  name: str
37
42
  kind: Literal["json", "object", "metric"] = Field(default="json")
@@ -42,7 +47,7 @@ class BaseTaskType(BaseModel):
42
47
 
43
48
  task_type: str = Field(serialization_alias="command_type")
44
49
  node_name: str = Field(exclude=True)
45
- secrets: Dict[str, str] = Field(default_factory=dict)
50
+ secrets: List[str] = Field(default_factory=list)
46
51
  returns: List[TaskReturns] = Field(default_factory=list, alias="returns")
47
52
 
48
53
  model_config = ConfigDict(extra="forbid")
@@ -69,15 +74,14 @@ class BaseTaskType(BaseModel):
69
74
  raise NotImplementedError()
70
75
 
71
76
  def set_secrets_as_env_variables(self):
72
- for key, value in self.secrets.items():
77
+ for key in self.secrets:
73
78
  secret_value = context.run_context.secrets_handler.get(key)
74
- self.secrets[value] = secret_value
75
- os.environ[value] = secret_value
79
+ os.environ[key] = secret_value
76
80
 
77
81
  def delete_secrets_from_env_variables(self):
78
- for _, value in self.secrets.items():
79
- if value in os.environ:
80
- del os.environ[value]
82
+ for key in self.secrets:
83
+ if key in os.environ:
84
+ del os.environ[key]
81
85
 
82
86
  def execute_command(
83
87
  self,
@@ -96,6 +100,20 @@ class BaseTaskType(BaseModel):
96
100
  """
97
101
  raise NotImplementedError()
98
102
 
103
+ def _diff_parameters(
104
+ self, parameters_in: Dict[str, Parameter], context_params: Dict[str, Parameter]
105
+ ) -> Dict[str, Parameter]:
106
+ diff: Dict[str, Parameter] = {}
107
+ for param_name, param in context_params.items():
108
+ if param_name in parameters_in:
109
+ if parameters_in[param_name] != param:
110
+ diff[param_name] = param
111
+ continue
112
+
113
+ diff[param_name] = param
114
+
115
+ return diff
116
+
99
117
  @contextlib.contextmanager
100
118
  def expose_secrets(self):
101
119
  """Context manager to expose secrets to the execution.
@@ -125,7 +143,7 @@ class BaseTaskType(BaseModel):
125
143
  if param.reduced is False:
126
144
  context_param = param_name
127
145
  for _, v in map_variable.items(): # type: ignore
128
- context_param = f"{context_param}_{v}"
146
+ context_param = f"{v}_{context_param}"
129
147
 
130
148
  if context_param in params:
131
149
  params[param_name].value = params[context_param].value
@@ -135,17 +153,23 @@ class BaseTaskType(BaseModel):
135
153
  if not allow_complex:
136
154
  params = {key: value for key, value in params.items() if isinstance(value, JsonParameter)}
137
155
 
138
- log_file_name = self.node_name.replace(" ", "_") + ".execution.log"
156
+ log_file_name = self.node_name # + ".execution.log"
139
157
  if map_variable:
140
158
  for _, value in map_variable.items():
141
159
  log_file_name += "_" + str(value)
142
160
 
161
+ log_file_name = "".join(x for x in log_file_name if x.isalnum()) + ".execution.log"
162
+
143
163
  log_file = open(log_file_name, "w")
144
164
 
165
+ parameters_in = copy.deepcopy(params)
166
+
145
167
  f = io.StringIO()
146
168
  try:
147
169
  with contextlib.redirect_stdout(f):
170
+ # with contextlib.nullcontext():
148
171
  yield params
172
+ print(console.file.getvalue()) # type: ignore
149
173
  except Exception as e: # pylint: disable=broad-except
150
174
  logger.exception(e)
151
175
  finally:
@@ -156,11 +180,13 @@ class BaseTaskType(BaseModel):
156
180
  log_file.close()
157
181
 
158
182
  # Put the log file in the catalog
159
- # self._context.catalog_handler.put(name=log_file.name, run_id=context.run_context.run_id)
183
+ self._context.catalog_handler.put(name=log_file.name, run_id=context.run_context.run_id)
160
184
  os.remove(log_file.name)
161
185
 
162
186
  # Update parameters
163
- self._context.run_log_store.set_parameters(parameters=params, run_id=self._context.run_id)
187
+ # This should only update the parameters that are changed at the root level.
188
+ diff_parameters = self._diff_parameters(parameters_in=parameters_in, context_params=params)
189
+ self._context.run_log_store.set_parameters(parameters=diff_parameters, run_id=self._context.run_id)
164
190
 
165
191
 
166
192
  def task_return_to_parameter(task_return: TaskReturns, value: Any) -> Parameter:
@@ -219,8 +245,7 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
219
245
  logger.info(f"Calling {func} from {module} with {filtered_parameters}")
220
246
  user_set_parameters = f(**filtered_parameters) # This is a tuple or single value
221
247
  except Exception as e:
222
- logger.exception(e)
223
- console.print(e, style=defaults.error_style)
248
+ console.log(e, style=defaults.error_style, markup=False)
224
249
  raise exceptions.CommandCallError(f"Function call: {self.command} did not succeed.\n") from e
225
250
 
226
251
  attempt_log.input_parameters = params.copy()
@@ -252,7 +277,7 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
252
277
  param_name = task_return.name
253
278
  if map_variable:
254
279
  for _, v in map_variable.items():
255
- param_name = f"{param_name}_{v}"
280
+ param_name = f"{v}_{param_name}"
256
281
 
257
282
  output_parameters[param_name] = output_parameter
258
283
 
@@ -263,9 +288,9 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
263
288
  attempt_log.status = defaults.SUCCESS
264
289
  except Exception as _e:
265
290
  msg = f"Call to the function {self.command} did not succeed.\n"
266
- logger.exception(_e)
267
291
  attempt_log.message = msg
268
- console.print(_e, style=defaults.error_style)
292
+ console.print_exception(show_locals=False)
293
+ console.log(_e, style=defaults.error_style)
269
294
 
270
295
  attempt_log.end_time = str(datetime.now())
271
296
 
@@ -277,7 +302,7 @@ class NotebookTaskType(BaseTaskType):
277
302
 
278
303
  task_type: str = Field(default="notebook", serialization_alias="command_type")
279
304
  command: str
280
- notebook_output_path: str = Field(default="", validate_default=True)
305
+ notebook_output_path: Optional[str] = Field(default=None, validate_default=True)
281
306
  optional_ploomber_args: dict = {}
282
307
 
283
308
  @field_validator("command")
@@ -319,7 +344,7 @@ class NotebookTaskType(BaseTaskType):
319
344
  import ploomber_engine as pm
320
345
  from ploomber_engine.ipython import PloomberClient
321
346
 
322
- notebook_output_path = self.notebook_output_path
347
+ notebook_output_path = self.notebook_output_path or ""
323
348
 
324
349
  with self.execution_context(
325
350
  map_variable=map_variable, allow_complex=False
@@ -424,15 +449,17 @@ class ShellTaskType(BaseTaskType):
424
449
 
425
450
  # Expose secrets as environment variables
426
451
  if self.secrets:
427
- for key, value in self.secrets.items():
452
+ for key in self.secrets:
428
453
  secret_value = context.run_context.secrets_handler.get(key)
429
- subprocess_env[value] = secret_value
454
+ subprocess_env[key] = secret_value
430
455
 
431
456
  with self.execution_context(map_variable=map_variable, allow_complex=False) as params:
432
457
  subprocess_env.update({k: v.get_value() for k, v in params.items()})
433
458
 
434
459
  # Json dumps all runnable environment variables
435
460
  for key, value in subprocess_env.items():
461
+ if isinstance(value, str):
462
+ continue
436
463
  subprocess_env[key] = json.dumps(value)
437
464
 
438
465
  collect_delimiter = "=== COLLECT ==="
@@ -441,37 +468,80 @@ class ShellTaskType(BaseTaskType):
441
468
  logger.info(f"Executing shell command: {command}")
442
469
 
443
470
  capture = False
444
- return_keys = [x.name for x in self.returns]
471
+ return_keys = {x.name: x for x in self.returns}
445
472
 
446
- with subprocess.Popen(
473
+ proc = subprocess.Popen(
447
474
  command,
448
475
  shell=True,
449
476
  env=subprocess_env,
450
477
  stdout=subprocess.PIPE,
451
478
  stderr=subprocess.PIPE,
452
479
  text=True,
453
- ) as proc:
454
- for line in proc.stdout: # type: ignore
455
- logger.info(line)
456
- print(line)
457
-
458
- if line.strip() == collect_delimiter:
459
- # The lines from now on should be captured
460
- capture = True
461
- continue
462
-
463
- if capture:
464
- key, value = line.strip().split("=", 1)
465
- if key in (return_keys or []):
466
- param_name = Template(key).safe_substitute(map_variable) # type: ignore
467
- try:
468
- params[param_name] = JsonParameter(kind="json", value=json.loads(value))
469
- except json.JSONDecodeError:
470
- params[param_name] = JsonParameter(kind="json", value=value)
471
-
472
- proc.wait()
473
- if proc.returncode == 0:
474
- attempt_log.status = defaults.SUCCESS
480
+ )
481
+ result = proc.communicate()
482
+ logger.debug(result)
483
+ logger.info(proc.returncode)
484
+
485
+ if proc.returncode != 0:
486
+ msg = ",".join(result[1].split("\n"))
487
+ attempt_log.status = defaults.FAIL
488
+ attempt_log.end_time = str(datetime.now())
489
+ attempt_log.message = msg
490
+ console.print(msg, style=defaults.error_style)
491
+ return attempt_log
492
+
493
+ # for stderr
494
+ for line in result[1].split("\n"):
495
+ if line.strip() == "":
496
+ continue
497
+ console.print(line, style=defaults.warning_style)
498
+
499
+ output_parameters: Dict[str, Parameter] = {}
500
+ metrics: Dict[str, Parameter] = {}
501
+
502
+ # only from stdout
503
+ for line in result[0].split("\n"):
504
+ if line.strip() == "":
505
+ continue
506
+
507
+ logger.info(line)
508
+ console.print(line)
509
+
510
+ if line.strip() == collect_delimiter:
511
+ # The lines from now on should be captured
512
+ capture = True
513
+ continue
514
+
515
+ if capture:
516
+ key, value = line.strip().split("=", 1)
517
+ if key in return_keys:
518
+ task_return = return_keys[key]
519
+
520
+ try:
521
+ value = json.loads(value)
522
+ except json.JSONDecodeError:
523
+ value = value
524
+
525
+ output_parameter = task_return_to_parameter(
526
+ task_return=task_return,
527
+ value=value,
528
+ )
529
+
530
+ if task_return.kind == "metric":
531
+ metrics[task_return.name] = output_parameter
532
+
533
+ param_name = task_return.name
534
+ if map_variable:
535
+ for _, v in map_variable.items():
536
+ param_name = f"{param_name}_{v}"
537
+
538
+ output_parameters[param_name] = output_parameter
539
+
540
+ attempt_log.output_parameters = output_parameters
541
+ attempt_log.user_defined_metrics = metrics
542
+ params.update(output_parameters)
543
+
544
+ attempt_log.status = defaults.SUCCESS
475
545
 
476
546
  attempt_log.end_time = str(datetime.now())
477
547
  return attempt_log
File without changes
File without changes
File without changes
File without changes
@@ -35,10 +35,10 @@ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
35
35
  name (str): The suffix of the file name to check in the run log store.
36
36
  """
37
37
  log_folder = self.log_folder_with_run_id(run_id=run_id)
38
-
39
38
  sub_name = Template(name).safe_substitute({"creation_time": ""})
40
39
 
41
40
  matches = list(log_folder.glob(f"{sub_name}*"))
41
+
42
42
  if matches:
43
43
  if not multiple_allowed:
44
44
  if len(matches) > 1:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes