runnable 0.3.0__tar.gz → 0.5.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. {runnable-0.3.0 → runnable-0.5.0}/PKG-INFO +1 -1
  2. {runnable-0.3.0 → runnable-0.5.0}/pyproject.toml +2 -1
  3. {runnable-0.3.0 → runnable-0.5.0}/runnable/__init__.py +12 -1
  4. {runnable-0.3.0 → runnable-0.5.0}/runnable/cli.py +1 -4
  5. {runnable-0.3.0 → runnable-0.5.0}/runnable/context.py +0 -2
  6. {runnable-0.3.0 → runnable-0.5.0}/runnable/datastore.py +0 -4
  7. {runnable-0.3.0 → runnable-0.5.0}/runnable/defaults.py +1 -1
  8. {runnable-0.3.0 → runnable-0.5.0}/runnable/entrypoints.py +3 -16
  9. {runnable-0.3.0 → runnable-0.5.0}/runnable/executor.py +1 -41
  10. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/__init__.py +4 -98
  11. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/mocked/implementation.py +1 -26
  12. runnable-0.5.0/runnable/extensions/executor/retry/implementation.py +305 -0
  13. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/file_system/implementation.py +0 -2
  14. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/generic_chunked.py +0 -2
  15. runnable-0.5.0/runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  16. {runnable-0.3.0 → runnable-0.5.0}/runnable/sdk.py +157 -38
  17. {runnable-0.3.0 → runnable-0.5.0}/LICENSE +0 -0
  18. {runnable-0.3.0 → runnable-0.5.0}/README.md +0 -0
  19. {runnable-0.3.0 → runnable-0.5.0}/runnable/catalog.py +0 -0
  20. {runnable-0.3.0 → runnable-0.5.0}/runnable/exceptions.py +0 -0
  21. {runnable-0.3.0 → runnable-0.5.0}/runnable/experiment_tracker.py +0 -0
  22. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/__init__.py +0 -0
  23. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/__init__.py +0 -0
  24. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/file_system/__init__.py +0 -0
  25. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/file_system/implementation.py +0 -0
  26. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  27. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/implementation.py +0 -0
  28. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/integration.py +0 -0
  29. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/__init__.py +0 -0
  30. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/implementation.py +0 -0
  31. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/specification.yaml +0 -0
  32. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/__init__.py +0 -0
  33. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/implementation_FF.py +0 -0
  34. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/integration_FF.py +0 -0
  35. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local/__init__.py +0 -0
  36. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local/implementation.py +0 -0
  37. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local_container/__init__.py +0 -0
  38. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local_container/implementation.py +0 -0
  39. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/mocked/__init__.py +0 -0
  40. {runnable-0.3.0/runnable/extensions/experiment_tracker → runnable-0.5.0/runnable/extensions/executor/retry}/__init__.py +0 -0
  41. {runnable-0.3.0/runnable/extensions/experiment_tracker/mlflow → runnable-0.5.0/runnable/extensions/experiment_tracker}/__init__.py +0 -0
  42. {runnable-0.3.0/runnable/extensions/run_log_store → runnable-0.5.0/runnable/extensions/experiment_tracker/mlflow}/__init__.py +0 -0
  43. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/experiment_tracker/mlflow/implementation.py +0 -0
  44. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/nodes.py +0 -0
  45. {runnable-0.3.0/runnable/extensions/run_log_store/chunked_file_system → runnable-0.5.0/runnable/extensions/run_log_store}/__init__.py +0 -0
  46. {runnable-0.3.0/runnable/extensions/run_log_store/chunked_k8s_pvc → runnable-0.5.0/runnable/extensions/run_log_store/chunked_file_system}/__init__.py +0 -0
  47. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -0
  48. {runnable-0.3.0/runnable/extensions/run_log_store/file_system → runnable-0.5.0/runnable/extensions/run_log_store/chunked_k8s_pvc}/__init__.py +0 -0
  49. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -0
  50. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -0
  51. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/db/implementation_FF.py +0 -0
  52. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  53. {runnable-0.3.0/runnable/extensions/run_log_store/k8s_pvc → runnable-0.5.0/runnable/extensions/run_log_store/file_system}/__init__.py +0 -0
  54. {runnable-0.3.0/runnable/extensions/secrets → runnable-0.5.0/runnable/extensions/run_log_store/k8s_pvc}/__init__.py +0 -0
  55. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -0
  56. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -0
  57. {runnable-0.3.0/runnable/extensions/secrets/dotenv → runnable-0.5.0/runnable/extensions/secrets}/__init__.py +0 -0
  58. {runnable-0.3.0/runnable/extensions/secrets/env_secrets → runnable-0.5.0/runnable/extensions/secrets/dotenv}/__init__.py +0 -0
  59. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/secrets/dotenv/implementation.py +0 -0
  60. {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/secrets/env_secrets/implementation.py +0 -0
  61. {runnable-0.3.0 → runnable-0.5.0}/runnable/graph.py +0 -0
  62. {runnable-0.3.0 → runnable-0.5.0}/runnable/integration.py +0 -0
  63. {runnable-0.3.0 → runnable-0.5.0}/runnable/interaction.py +0 -0
  64. {runnable-0.3.0 → runnable-0.5.0}/runnable/names.py +0 -0
  65. {runnable-0.3.0 → runnable-0.5.0}/runnable/nodes.py +0 -0
  66. {runnable-0.3.0 → runnable-0.5.0}/runnable/parameters.py +0 -0
  67. {runnable-0.3.0 → runnable-0.5.0}/runnable/pickler.py +0 -0
  68. {runnable-0.3.0 → runnable-0.5.0}/runnable/secrets.py +0 -0
  69. {runnable-0.3.0 → runnable-0.5.0}/runnable/tasks.py +0 -0
  70. {runnable-0.3.0 → runnable-0.5.0}/runnable/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: runnable
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: A Compute agnostic pipelining software
5
5
  Home-page: https://github.com/vijayvammi/runnable
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "runnable"
3
- version = "0.3.0"
3
+ version = "0.5.0"
4
4
  description = "A Compute agnostic pipelining software"
5
5
  authors = ["Vijay Vammi <mesanthu@gmail.com>"]
6
6
  license = "Apache-2.0"
@@ -68,6 +68,7 @@ runnable= 'runnable.cli:cli'
68
68
  "local-container" = "runnable.extensions.executor.local_container.implementation:LocalContainerExecutor"
69
69
  "argo" = "runnable.extensions.executor.argo.implementation:ArgoExecutor"
70
70
  "mocked" = "runnable.extensions.executor.mocked.implementation:MockedExecutor"
71
+ "retry" = "runnable.extensions.executor.retry.implementation:RetryExecutor"
71
72
 
72
73
  # Plugins for Catalog
73
74
  [tool.poetry.plugins."catalog"]
@@ -24,7 +24,18 @@ from runnable.interaction import (
24
24
  set_parameter,
25
25
  track_this,
26
26
  ) # noqa
27
- from runnable.sdk import Stub, Pipeline, Task, Parallel, Map, Catalog, Success, Fail # noqa
27
+ from runnable.sdk import (
28
+ Stub,
29
+ Pipeline,
30
+ Parallel,
31
+ Map,
32
+ Catalog,
33
+ Success,
34
+ Fail,
35
+ PythonTask,
36
+ NotebookTask,
37
+ ShellTask,
38
+ ) # noqa
28
39
 
29
40
 
30
41
  # TODO: Think of model registry as a central place to store models.
@@ -41,8 +41,7 @@ def cli():
41
41
  )
42
42
  @click.option("--tag", default="", help="A tag attached to the run")
43
43
  @click.option("--run-id", help="An optional run_id, one would be generated if not provided")
44
- @click.option("--use-cached", help="Provide the previous run_id to re-run.", show_default=True)
45
- def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cached): # pragma: no cover
44
+ def execute(file, config_file, parameters_file, log_level, tag, run_id): # pragma: no cover
46
45
  """
47
46
  Execute a pipeline
48
47
 
@@ -59,7 +58,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
59
58
  [default: ]
60
59
  --run-id TEXT An optional run_id, one would be generated if not
61
60
  provided
62
- --use-cached TEXT Provide the previous run_id to re-run.
63
61
  """
64
62
  logger.setLevel(log_level)
65
63
  entrypoints.execute(
@@ -67,7 +65,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
67
65
  pipeline_file=file,
68
66
  tag=tag,
69
67
  run_id=run_id,
70
- use_cached=use_cached,
71
68
  parameters_file=parameters_file,
72
69
  )
73
70
 
@@ -24,8 +24,6 @@ class Context(BaseModel):
24
24
  tag: str = ""
25
25
  run_id: str = ""
26
26
  variables: Dict[str, str] = {}
27
- use_cached: bool = False
28
- original_run_id: str = ""
29
27
  dag: Optional[Graph] = None
30
28
  dag_hash: str = ""
31
29
  execution_plan: str = ""
@@ -169,9 +169,7 @@ class RunLog(BaseModel):
169
169
 
170
170
  run_id: str
171
171
  dag_hash: Optional[str] = None
172
- use_cached: bool = False
173
172
  tag: Optional[str] = ""
174
- original_run_id: Optional[str] = ""
175
173
  status: str = defaults.FAIL
176
174
  steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
177
175
  parameters: Dict[str, Any] = Field(default_factory=dict)
@@ -659,9 +657,7 @@ class BufferRunLogstore(BaseRunLogStore):
659
657
  self.run_log = RunLog(
660
658
  run_id=run_id,
661
659
  dag_hash=dag_hash,
662
- use_cached=use_cached,
663
660
  tag=tag,
664
- original_run_id=original_run_id,
665
661
  status=status,
666
662
  )
667
663
  return self.run_log
@@ -35,7 +35,7 @@ class ServiceConfig(TypedDict):
35
35
  config: Mapping[str, Any]
36
36
 
37
37
 
38
- class runnableConfig(TypedDict, total=False):
38
+ class RunnableConfig(TypedDict, total=False):
39
39
  run_log_store: Optional[ServiceConfig]
40
40
  secrets: Optional[ServiceConfig]
41
41
  catalog: Optional[ServiceConfig]
@@ -9,12 +9,12 @@ from rich import print
9
9
 
10
10
  import runnable.context as context
11
11
  from runnable import defaults, graph, utils
12
- from runnable.defaults import ServiceConfig, runnableConfig
12
+ from runnable.defaults import RunnableConfig, ServiceConfig
13
13
 
14
14
  logger = logging.getLogger(defaults.LOGGER_NAME)
15
15
 
16
16
 
17
- def get_default_configs() -> runnableConfig:
17
+ def get_default_configs() -> RunnableConfig:
18
18
  """
19
19
  User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
20
20
  """
@@ -37,7 +37,6 @@ def prepare_configurations(
37
37
  configuration_file: str = "",
38
38
  pipeline_file: str = "",
39
39
  tag: str = "",
40
- use_cached: str = "",
41
40
  parameters_file: str = "",
42
41
  force_local_executor: bool = False,
43
42
  ) -> context.Context:
@@ -51,7 +50,6 @@ def prepare_configurations(
51
50
  pipeline_file (str): The config/dag file
52
51
  run_id (str): The run id of the run.
53
52
  tag (str): If a tag is provided at the run time
54
- use_cached (str): Provide the run_id of the older run
55
53
 
56
54
  Returns:
57
55
  executor.BaseExecutor : A prepared executor as per the dag/config
@@ -64,7 +62,7 @@ def prepare_configurations(
64
62
  if configuration_file:
65
63
  templated_configuration = utils.load_yaml(configuration_file) or {}
66
64
 
67
- configuration: runnableConfig = cast(runnableConfig, templated_configuration)
65
+ configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
68
66
 
69
67
  # Run log settings, configuration over-rides everything
70
68
  run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
@@ -141,11 +139,6 @@ def prepare_configurations(
141
139
  run_context.pipeline_file = pipeline_file
142
140
  run_context.dag = dag
143
141
 
144
- run_context.use_cached = False
145
- if use_cached:
146
- run_context.use_cached = True
147
- run_context.original_run_id = use_cached
148
-
149
142
  context.run_context = run_context
150
143
 
151
144
  return run_context
@@ -156,7 +149,6 @@ def execute(
156
149
  pipeline_file: str,
157
150
  tag: str = "",
158
151
  run_id: str = "",
159
- use_cached: str = "",
160
152
  parameters_file: str = "",
161
153
  ):
162
154
  # pylint: disable=R0914,R0913
@@ -168,10 +160,8 @@ def execute(
168
160
  pipeline_file (str): The config/dag file
169
161
  run_id (str): The run id of the run.
170
162
  tag (str): If a tag is provided at the run time
171
- use_cached (str): The previous run_id to use.
172
163
  parameters_file (str): The parameters being sent in to the application
173
164
  """
174
- # Re run settings
175
165
  run_id = utils.generate_run_id(run_id=run_id)
176
166
 
177
167
  run_context = prepare_configurations(
@@ -179,7 +169,6 @@ def execute(
179
169
  pipeline_file=pipeline_file,
180
170
  run_id=run_id,
181
171
  tag=tag,
182
- use_cached=use_cached,
183
172
  parameters_file=parameters_file,
184
173
  )
185
174
  print("Working with context:")
@@ -231,7 +220,6 @@ def execute_single_node(
231
220
  pipeline_file=pipeline_file,
232
221
  run_id=run_id,
233
222
  tag=tag,
234
- use_cached="",
235
223
  parameters_file=parameters_file,
236
224
  )
237
225
  print("Working with context:")
@@ -416,7 +404,6 @@ def fan(
416
404
  pipeline_file=pipeline_file,
417
405
  run_id=run_id,
418
406
  tag=tag,
419
- use_cached="",
420
407
  parameters_file=parameters_file,
421
408
  )
422
409
  print("Working with context:")
@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict
9
9
 
10
10
  import runnable.context as context
11
11
  from runnable import defaults
12
- from runnable.datastore import DataCatalog, RunLog, StepLog
12
+ from runnable.datastore import DataCatalog, StepLog
13
13
  from runnable.defaults import TypeMapVariable
14
14
  from runnable.graph import Graph
15
15
 
@@ -36,9 +36,6 @@ class BaseExecutor(ABC, BaseModel):
36
36
 
37
37
  overrides: dict = {}
38
38
 
39
- # TODO: This needs to go away
40
- _previous_run_log: Optional[RunLog] = None
41
- _single_step: str = ""
42
39
  _local: bool = False # This is a flag to indicate whether the executor is local or not.
43
40
 
44
41
  _context_step_log = None # type : StepLog
@@ -60,21 +57,6 @@ class BaseExecutor(ABC, BaseModel):
60
57
  """
61
58
  ...
62
59
 
63
- # TODO: This needs to go away
64
- @abstractmethod
65
- def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
66
- """
67
- Set up the executor for using a previous execution.
68
-
69
- Retrieve the older run log, error out if it does not exist.
70
- Sync the catalogs from the previous run log with the current one.
71
-
72
- Update the parameters of this execution with the previous one. The previous one take precedence.
73
-
74
- Args:
75
- parameters (Dict[str, Any]): The parameters for the current execution.
76
- """
77
-
78
60
  @abstractmethod
79
61
  def _set_up_run_log(self, exists_ok=False):
80
62
  """
@@ -293,28 +275,6 @@ class BaseExecutor(ABC, BaseModel):
293
275
  """
294
276
  ...
295
277
 
296
- # TODO: This needs to go away
297
- @abstractmethod
298
- def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
299
- """
300
- In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
301
- necessary.
302
- * True: If its not a re-run.
303
- * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
304
- * False: If its a re-run and we succeeded in the last run.
305
-
306
- Most cases, this logic need not be touched
307
-
308
- Args:
309
- node (Node): The node to check against re-run
310
- map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
311
- Defaults to None.
312
-
313
- Returns:
314
- bool: Eligibility for re-run. True means re-run, False means skip to the next step.
315
- """
316
- ...
317
-
318
278
  @abstractmethod
319
279
  def send_return_code(self, stage="traversal"):
320
280
  """
@@ -3,12 +3,12 @@ import json
3
3
  import logging
4
4
  import os
5
5
  from abc import abstractmethod
6
- from typing import Any, Dict, List, Optional, cast
6
+ from typing import Any, Dict, List, Optional
7
7
 
8
8
  from rich import print
9
9
 
10
10
  from runnable import context, defaults, exceptions, integration, parameters, utils
11
- from runnable.datastore import DataCatalog, RunLog, StepLog
11
+ from runnable.datastore import DataCatalog, StepLog
12
12
  from runnable.defaults import TypeMapVariable
13
13
  from runnable.executor import BaseExecutor
14
14
  from runnable.experiment_tracker import get_tracked_data
@@ -40,20 +40,6 @@ class GenericExecutor(BaseExecutor):
40
40
  def _context(self):
41
41
  return context.run_context
42
42
 
43
- @property
44
- def step_decorator_run_id(self):
45
- """
46
- TODO: Experimental feature, design is not mature yet.
47
-
48
- This function is used by the decorator function.
49
- The design idea is we can over-ride this method in different implementations to retrieve the run_id.
50
- But is it really intrusive to ask to set the environmental variable runnable_RUN_ID?
51
-
52
- Returns:
53
- _type_: _description_
54
- """
55
- return os.environ.get("runnable_RUN_ID", None)
56
-
57
43
  def _get_parameters(self) -> Dict[str, Any]:
58
44
  """
59
45
  Consolidate the parameters from the environment variables
@@ -72,28 +58,6 @@ class GenericExecutor(BaseExecutor):
72
58
  params.update(parameters.get_user_set_parameters())
73
59
  return params
74
60
 
75
- def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
76
- try:
77
- attempt_run_log = self._context.run_log_store.get_run_log_by_id(
78
- run_id=self._context.original_run_id, full=False
79
- )
80
- except exceptions.RunLogNotFoundError as e:
81
- msg = (
82
- f"Expected a run log with id: {self._context.original_run_id} "
83
- "but it does not exist in the run log store. "
84
- "If the original execution was in a different environment, ensure that it is available in the current "
85
- "environment."
86
- )
87
- logger.exception(msg)
88
- raise Exception(msg) from e
89
-
90
- # Sync the previous run log catalog to this one.
91
- self._context.catalog_handler.sync_between_runs(
92
- previous_run_id=self._context.original_run_id, run_id=self._context.run_id
93
- )
94
-
95
- parameters.update(cast(RunLog, attempt_run_log).parameters)
96
-
97
61
  def _set_up_run_log(self, exists_ok=False):
98
62
  """
99
63
  Create a run log and put that in the run log store
@@ -115,22 +79,16 @@ class GenericExecutor(BaseExecutor):
115
79
  raise
116
80
 
117
81
  # Consolidate and get the parameters
118
- parameters = self._get_parameters()
119
-
120
- # TODO: This needs to go away
121
- if self._context.use_cached:
122
- self._set_up_for_re_run(parameters=parameters)
82
+ params = self._get_parameters()
123
83
 
124
84
  self._context.run_log_store.create_run_log(
125
85
  run_id=self._context.run_id,
126
86
  tag=self._context.tag,
127
87
  status=defaults.PROCESSING,
128
88
  dag_hash=self._context.dag_hash,
129
- use_cached=self._context.use_cached,
130
- original_run_id=self._context.original_run_id,
131
89
  )
132
90
  # Any interaction with run log store attributes should happen via API if available.
133
- self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=parameters)
91
+ self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=params)
134
92
 
135
93
  # Update run_config
136
94
  run_config = utils.get_run_config()
@@ -409,17 +367,6 @@ class GenericExecutor(BaseExecutor):
409
367
  self._execute_node(node, map_variable=map_variable, **kwargs)
410
368
  return
411
369
 
412
- # TODO: This needs to go away
413
- # In single step
414
- if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun(
415
- node, map_variable=map_variable
416
- ):
417
- # If the node name does not match, we move on to the next node.
418
- # If previous run was successful, move on to the next step
419
- step_log.mock = True
420
- step_log.status = defaults.SUCCESS
421
- self._context.run_log_store.add_step_log(step_log, self._context.run_id)
422
- return
423
370
  # We call an internal function to iterate the sub graphs and execute them
424
371
  if node.is_composite:
425
372
  self._context.run_log_store.add_step_log(step_log, self._context.run_id)
@@ -543,47 +490,6 @@ class GenericExecutor(BaseExecutor):
543
490
  run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
544
491
  print(json.dumps(run_log.model_dump(), indent=4))
545
492
 
546
- # TODO: This needs to go away
547
- def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
548
- """
549
- In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
550
- necessary.
551
- * True: If its not a re-run.
552
- * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
553
- * False: If its a re-run and we succeeded in the last run.
554
-
555
- Most cases, this logic need not be touched
556
-
557
- Args:
558
- node (Node): The node to check against re-run
559
- map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
560
- Defaults to None.
561
-
562
- Returns:
563
- bool: Eligibility for re-run. True means re-run, False means skip to the next step.
564
- """
565
- if self._context.use_cached:
566
- node_step_log_name = node._get_step_log_name(map_variable=map_variable)
567
- logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
568
-
569
- try:
570
- previous_node_log = self._context.run_log_store.get_step_log(
571
- internal_name=node_step_log_name, run_id=self._context.original_run_id
572
- )
573
- except exceptions.StepLogNotFoundError:
574
- logger.warning(f"Did not find the node {node.name} in previous run log")
575
- return True # We should re-run the node.
576
-
577
- logger.info(f"The original step status: {previous_node_log.status}")
578
-
579
- if previous_node_log.status == defaults.SUCCESS:
580
- return False # We need not run the node
581
-
582
- logger.info(f"The new execution should start executing graph from this node {node.name}")
583
- return True
584
-
585
- return True
586
-
587
493
  def send_return_code(self, stage="traversal"):
588
494
  """
589
495
  Convenience function used by pipeline to send return code to the caller of the cli
@@ -32,9 +32,6 @@ class MockedExecutor(GenericExecutor):
32
32
  def _context(self):
33
33
  return context.run_context
34
34
 
35
- def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
36
- raise Exception("MockedExecutor does not support re-run")
37
-
38
35
  def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
39
36
  """
40
37
  This is the entry point to from the graph execution.
@@ -85,7 +82,7 @@ class MockedExecutor(GenericExecutor):
85
82
  # node is not patched, so mock it
86
83
  step_log.mock = True
87
84
  else:
88
- # node is mocked, change the executable to python with the
85
+ # node is patched
89
86
  # command as the patch value
90
87
  executable_type = node_to_send.executable.__class__
91
88
  executable = create_executable(
@@ -94,7 +91,6 @@ class MockedExecutor(GenericExecutor):
94
91
  node_name=node.name,
95
92
  )
96
93
  node_to_send.executable = executable
97
- pass
98
94
 
99
95
  # Executor specific way to trigger a job
100
96
  self._context.run_log_store.add_step_log(step_log, self._context.run_id)
@@ -117,27 +113,6 @@ class MockedExecutor(GenericExecutor):
117
113
  self.prepare_for_node_execution()
118
114
  self.execute_node(node=node, map_variable=map_variable, **kwargs)
119
115
 
120
- # TODO: This needs to go away
121
- def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
122
- """
123
- In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
124
- necessary.
125
- * True: If its not a re-run.
126
- * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
127
- * False: If its a re-run and we succeeded in the last run.
128
-
129
- Most cases, this logic need not be touched
130
-
131
- Args:
132
- node (Node): The node to check against re-run
133
- map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
134
- Defaults to None.
135
-
136
- Returns:
137
- bool: Eligibility for re-run. True means re-run, False means skip to the next step.
138
- """
139
- return True
140
-
141
116
  def _resolve_executor_config(self, node: BaseNode):
142
117
  """
143
118
  The overrides section can contain specific over-rides to an global executor config.
@@ -0,0 +1,305 @@
1
+ import copy
2
+ import json
3
+ import logging
4
+ from functools import cached_property
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from rich import print
8
+
9
+ from runnable import context, defaults, exceptions, parameters, utils
10
+ from runnable.datastore import DataCatalog, RunLog
11
+ from runnable.defaults import TypeMapVariable
12
+ from runnable.experiment_tracker import get_tracked_data
13
+ from runnable.extensions.executor import GenericExecutor
14
+ from runnable.graph import Graph
15
+ from runnable.nodes import BaseNode
16
+
17
+ logger = logging.getLogger(defaults.LOGGER_NAME)
18
+
19
+
20
+ class RetryExecutor(GenericExecutor):
21
+ """
22
+ The skeleton of an executor class.
23
+ Any implementation of an executor should inherit this class and over-ride accordingly.
24
+
25
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
26
+ Look at the function docs to understand how to use them appropriately.
27
+
28
+ For any implementation:
29
+ 1). Who/when should the run log be set up?
30
+ 2). Who/When should the step log be set up?
31
+
32
+ """
33
+
34
+ service_name: str = "retry"
35
+ service_type: str = "executor"
36
+ run_id: str
37
+
38
+ _local: bool = True
39
+ _original_run_log: Optional[RunLog] = None
40
+
41
+ @property
42
+ def _context(self):
43
+ return context.run_context
44
+
45
+ @cached_property
46
+ def original_run_log(self):
47
+ self.original_run_log = self._context.run_log_store.get_run_log_by_id(
48
+ run_id=self.run_id,
49
+ full=True,
50
+ )
51
+
52
+ def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
53
+ # Sync the previous run log catalog to this one.
54
+ self._context.catalog_handler.sync_between_runs(previous_run_id=self.run_id, run_id=self._context.run_id)
55
+
56
+ params.update(self.original_run_log.parameters)
57
+
58
+ def _set_up_run_log(self, exists_ok=False):
59
+ """
60
+ Create a run log and put that in the run log store
61
+
62
+ If exists_ok, we allow the run log to be already present in the run log store.
63
+ """
64
+ super()._set_up_run_log(exists_ok=exists_ok)
65
+
66
+ params = self._get_parameters()
67
+
68
+ self._set_up_for_re_run(params=params)
69
+
70
+ def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
71
+ """
72
+ This is the entry point when we do the actual execution of the function.
73
+ DO NOT Over-ride this function.
74
+
75
+ While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
76
+ this function.
77
+
78
+ In most cases,
79
+ * We get the corresponding step_log of the node and the parameters.
80
+ * We sync the catalog to GET any data sets that are in the catalog
81
+ * We call the execute method of the node for the actual compute and retry it as many times as asked.
82
+ * If the node succeeds, we get any of the user defined metrics provided by the user.
83
+ * We sync the catalog to PUT any data sets that are in the catalog.
84
+
85
+ Args:
86
+ node (Node): The node to execute
87
+ map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
88
+ Defaults to None.
89
+ """
90
+ step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
91
+ """
92
+ By now, all the parameters are part of the run log as a dictionary.
93
+ We set them as environment variables, serialized as json strings.
94
+ """
95
+ params = self._context.run_log_store.get_parameters(run_id=self._context.run_id)
96
+ params_copy = copy.deepcopy(params)
97
+ # This is only for the API to work.
98
+ parameters.set_user_defined_params_as_environment_variables(params)
99
+
100
+ attempt = self.step_attempt_number
101
+ logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}")
102
+
103
+ attempt_log = self._context.run_log_store.create_attempt_log()
104
+ self._context_step_log = step_log
105
+ self._context_node = node
106
+
107
+ data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get")
108
+ try:
109
+ attempt_log = node.execute(
110
+ executor=self,
111
+ mock=step_log.mock,
112
+ map_variable=map_variable,
113
+ params=params,
114
+ **kwargs,
115
+ )
116
+ except Exception as e:
117
+ # Any exception here is a runnable exception as node suppresses exceptions.
118
+ msg = "This is clearly runnable fault, please report a bug and the logs"
119
+ logger.exception(msg)
120
+ raise Exception(msg) from e
121
+ finally:
122
+ attempt_log.attempt_number = attempt
123
+ step_log.attempts.append(attempt_log)
124
+
125
+ tracked_data = get_tracked_data()
126
+
127
+ self._context.experiment_tracker.publish_data(tracked_data)
128
+ parameters_out = attempt_log.output_parameters
129
+
130
+ if attempt_log.status == defaults.FAIL:
131
+ logger.exception(f"Node: {node} failed")
132
+ step_log.status = defaults.FAIL
133
+ else:
134
+ # Mock is always set to False, bad design??
135
+ # TODO: Stub nodes should not sync back data
136
+ # TODO: Errors in catalog syncing should point to Fail step
137
+ # TODO: Even for a failed execution, the catalog can happen
138
+ step_log.status = defaults.SUCCESS
139
+ self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get)
140
+ step_log.user_defined_metrics = tracked_data
141
+
142
+ diff_parameters = utils.diff_dict(params_copy, parameters_out)
143
+ self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters)
144
+
145
+ # Remove the step context
146
+ parameters.get_user_set_parameters(remove=True)
147
+ self._context_step_log = None
148
+ self._context_node = None # type: ignore
149
+ self._context_metrics = {}
150
+
151
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
152
+
153
+ def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
154
+ """
155
+ This is the entry point to from the graph execution.
156
+
157
+ While the self.execute_graph is responsible for traversing the graph, this function is responsible for
158
+ actual execution of the node.
159
+
160
+ If the node type is:
161
+ * task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
162
+ * success: We can delegate to _execute_node
163
+ * fail: We can delegate to _execute_node
164
+
165
+ For nodes that are internally graphs:
166
+ * parallel: Delegate the responsibility of execution to the node.execute_as_graph()
167
+ * dag: Delegate the responsibility of execution to the node.execute_as_graph()
168
+ * map: Delegate the responsibility of execution to the node.execute_as_graph()
169
+
170
+ Transpilers will NEVER use this method and will NEVER call ths method.
171
+ This method should only be used by interactive executors.
172
+
173
+ Args:
174
+ node (Node): The node to execute
175
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
176
+ Defaults to None.
177
+ """
178
+ step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
179
+
180
+ self.add_code_identities(node=node, step_log=step_log)
181
+
182
+ step_log.step_type = node.node_type
183
+ step_log.status = defaults.PROCESSING
184
+
185
+ # Add the step log to the database as per the situation.
186
+ # If its a terminal node, complete it now
187
+ if node.node_type in ["success", "fail"]:
188
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
189
+ self._execute_node(node, map_variable=map_variable, **kwargs)
190
+ return
191
+
192
+ # In single step
193
+ if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
194
+ # If the node name does not match, we move on to the next node.
195
+ # If previous run was successful, move on to the next step
196
+ step_log.mock = True
197
+ step_log.status = defaults.SUCCESS
198
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
199
+ return
200
+
201
+ # We call an internal function to iterate the sub graphs and execute them
202
+ if node.is_composite:
203
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
204
+ node.execute_as_graph(map_variable=map_variable, **kwargs)
205
+ return
206
+
207
+ # Executor specific way to trigger a job
208
+ self._context.run_log_store.add_step_log(step_log, self._context.run_id)
209
+ self.execute_node(node=node, map_variable=map_variable, **kwargs)
210
+
211
+ def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
212
+ """
213
+ The parallelization is controlled by the nodes and not by this function.
214
+
215
+ Transpilers should over ride this method to do the translation of dag to the platform specific way.
216
+ Interactive methods should use this to traverse and execute the dag.
217
+ - Use execute_from_graph to handle sub-graphs
218
+
219
+ Logically the method should:
220
+ * Start at the dag.start_at of the dag.
221
+ * Call the self.execute_from_graph(node)
222
+ * depending upon the status of the execution, either move to the success node or failure node.
223
+
224
+ Args:
225
+ dag (Graph): The directed acyclic graph to traverse and execute.
226
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
227
+ Defaults to None.
228
+ """
229
+ current_node = dag.start_at
230
+ previous_node = None
231
+ logger.info(f"Running the execution with {current_node}")
232
+
233
+ while True:
234
+ working_on = dag.get_node_by_name(current_node)
235
+
236
+ if previous_node == current_node:
237
+ raise Exception("Potentially running in a infinite loop")
238
+
239
+ previous_node = current_node
240
+
241
+ logger.info(f"Creating execution log for {working_on}")
242
+ self.execute_from_graph(working_on, map_variable=map_variable, **kwargs)
243
+
244
+ _, next_node_name = self._get_status_and_next_node_name(
245
+ current_node=working_on, dag=dag, map_variable=map_variable
246
+ )
247
+
248
+ if working_on.node_type in ["success", "fail"]:
249
+ break
250
+
251
+ current_node = next_node_name
252
+
253
+ run_log = self._context.run_log_store.get_branch_log(
254
+ working_on._get_branch_log_name(map_variable), self._context.run_id
255
+ )
256
+
257
+ branch = "graph"
258
+ if working_on.internal_branch_name:
259
+ branch = working_on.internal_branch_name
260
+
261
+ logger.info(f"Finished execution of the {branch} with status {run_log.status}")
262
+
263
+ # get the final run log
264
+ if branch == "graph":
265
+ run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
266
+ print(json.dumps(run_log.model_dump(), indent=4))
267
+
268
+ def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
269
+ """
270
+ In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
271
+ necessary.
272
+ * True: If its not a re-run.
273
+ * True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
274
+ * False: If its a re-run and we succeeded in the last run.
275
+
276
+ Most cases, this logic need not be touched
277
+
278
+ Args:
279
+ node (Node): The node to check against re-run
280
+ map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
281
+ Defaults to None.
282
+
283
+ Returns:
284
+ bool: Eligibility for re-run. True means re-run, False means skip to the next step.
285
+ """
286
+
287
+ node_step_log_name = node._get_step_log_name(map_variable=map_variable)
288
+ logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
289
+
290
+ try:
291
+ previous_attempt_log, _ = self.original_run_log.search_step_by_internal_name(node_step_log_name)
292
+ except exceptions.StepLogNotFoundError:
293
+ logger.warning(f"Did not find the node {node.name} in previous run log")
294
+ return True # We should re-run the node.
295
+
296
+ logger.info(f"The original step status: {previous_attempt_log.status}")
297
+
298
+ if previous_attempt_log.status == defaults.SUCCESS:
299
+ return False # We need not run the node
300
+
301
+ logger.info(f"The new execution should start executing graph from this node {node.name}")
302
+ return True
303
+
304
+ def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
305
+ self._execute_node(node, map_variable=map_variable, **kwargs)
@@ -108,9 +108,7 @@ class FileSystemRunLogstore(BaseRunLogStore):
108
108
  run_log = RunLog(
109
109
  run_id=run_id,
110
110
  dag_hash=dag_hash,
111
- use_cached=use_cached,
112
111
  tag=tag,
113
- original_run_id=original_run_id,
114
112
  status=status,
115
113
  )
116
114
  self.write_to_folder(run_log)
@@ -305,9 +305,7 @@ class ChunkedRunLogStore(BaseRunLogStore):
305
305
  run_log = RunLog(
306
306
  run_id=run_id,
307
307
  dag_hash=dag_hash,
308
- use_cached=use_cached,
309
308
  tag=tag,
310
- original_run_id=original_run_id,
311
309
  status=status,
312
310
  )
313
311
 
@@ -3,9 +3,9 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  from abc import ABC, abstractmethod
6
- from typing import Any, Dict, List, Optional, Union
6
+ from typing import Any, Callable, Dict, List, Optional, Union
7
7
 
8
- from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, field_validator, model_validator
8
+ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
9
9
  from rich import print
10
10
  from typing_extensions import Self
11
11
 
@@ -15,11 +15,8 @@ from runnable.nodes import TraversalNode
15
15
 
16
16
  logger = logging.getLogger(defaults.LOGGER_NAME)
17
17
 
18
- StepType = Union["Stub", "Task", "Success", "Fail", "Parallel", "Map"]
19
- TraversalTypes = Union["Stub", "Task", "Parallel", "Map"]
20
-
21
-
22
- ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
18
+ StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
19
+ TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
23
20
 
24
21
 
25
22
  class Catalog(BaseModel):
@@ -106,10 +103,7 @@ class BaseTraversal(ABC, BaseModel):
106
103
  ...
107
104
 
108
105
 
109
- ## TODO: Add python task, shell task, and notebook task.
110
-
111
-
112
- class Task(BaseTraversal):
106
+ class BaseTask(BaseTraversal):
113
107
  """
114
108
  An execution node of the pipeline.
115
109
  Please refer to [concepts](concepts/task.md) for more information.
@@ -157,41 +151,166 @@ class Task(BaseTraversal):
157
151
 
158
152
  """
159
153
 
160
- command: str = Field(alias="command")
161
- command_type: str = Field(default="python")
162
154
  catalog: Optional[Catalog] = Field(default=None, alias="catalog")
163
155
  overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
164
156
 
157
+ def create_node(self) -> TaskNode:
158
+ if not self.next_node:
159
+ if not (self.terminate_with_failure or self.terminate_with_success):
160
+ raise AssertionError("A node not being terminated must have a user defined next node")
161
+
162
+ print(self.model_dump(exclude_none=True))
163
+ return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
164
+
165
+
166
+ class PythonTask(BaseTask):
167
+ """
168
+ An execution node of the pipeline of python functions.
169
+ Please refer to [concepts](concepts/task.md) for more information.
170
+
171
+ Attributes:
172
+ name (str): The name of the node.
173
+ function (callable): The function to execute.
174
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
175
+ Please see Catalog about the structure of the catalog.
176
+ overrides (Dict[str, Any]): Any overrides to the command.
177
+ Individual tasks can override the global configuration config by referring to the
178
+ specific override.
179
+
180
+ For example,
181
+ ### Global configuration
182
+ ```yaml
183
+ executor:
184
+ type: local-container
185
+ config:
186
+ docker_image: "runnable/runnable:latest"
187
+ overrides:
188
+ custom_docker_image:
189
+ docker_image: "runnable/runnable:custom"
190
+ ```
191
+ ### Task specific configuration
192
+ ```python
193
+ task = PythonTask(name="task", function="function'",
194
+ overrides={'local-container': custom_docker_image})
195
+ ```
196
+
197
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
198
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
199
+ on_failure (str): The name of the node to execute if the step fails.
200
+
201
+ """
202
+
203
+ function: Callable = Field(exclude=True)
204
+
205
+ @computed_field
206
+ def command_type(self) -> str:
207
+ return "python"
208
+
209
+ @computed_field
210
+ def command(self) -> str:
211
+ module = self.function.__module__
212
+ name = self.function.__name__
213
+
214
+ return f"{module}.{name}"
215
+
216
+
217
+ class NotebookTask(BaseTask):
218
+ """
219
+ An execution node of the pipeline of type notebook.
220
+ Please refer to [concepts](concepts/task.md) for more information.
221
+
222
+ Attributes:
223
+ name (str): The name of the node.
224
+ notebook: The path to the notebook
225
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
226
+ Please see Catalog about the structure of the catalog.
227
+ returns: A list of the names of variables to return from the notebook.
228
+ overrides (Dict[str, Any]): Any overrides to the command.
229
+ Individual tasks can override the global configuration config by referring to the
230
+ specific override.
231
+
232
+ For example,
233
+ ### Global configuration
234
+ ```yaml
235
+ executor:
236
+ type: local-container
237
+ config:
238
+ docker_image: "runnable/runnable:latest"
239
+ overrides:
240
+ custom_docker_image:
241
+ docker_image: "runnable/runnable:custom"
242
+ ```
243
+ ### Task specific configuration
244
+ ```python
245
+ task = NotebookTask(name="task", notebook="evaluation.ipynb",
246
+ overrides={'local-container': custom_docker_image})
247
+ ```
248
+ notebook_output_path (Optional[str]): The path to save the notebook output.
249
+ Only used when command_type is 'notebook', defaults to command+_out.ipynb
250
+ optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
251
+ Only used when command_type is 'notebook', defaults to {}
252
+
253
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
254
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
255
+ on_failure (str): The name of the node to execute if the step fails.
256
+
257
+ """
258
+
259
+ notebook: str = Field(alias="command")
260
+
165
261
  notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
166
262
  optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
167
- output_cell_tag: Optional[str] = Field(default=None, alias="output_cell_tag")
263
+ returns: List[str] = Field(default_factory=list, alias="returns")
168
264
 
169
- @field_validator("command_type", mode="before")
170
- @classmethod
171
- def validate_command_type(cls, value: str) -> str:
172
- if value not in ALLOWED_COMMAND_TYPES:
173
- raise ValueError(f"Invalid command_type: {value}")
174
- return value
265
+ @computed_field
266
+ def command_type(self) -> str:
267
+ return "notebook"
175
268
 
176
- @model_validator(mode="after")
177
- def check_notebook_args(self) -> "Task":
178
- if self.command_type != "notebook":
179
- assert (
180
- self.notebook_output_path is None
181
- ), "Only command_types of 'notebook' can be used with notebook_output_path"
182
269
 
183
- assert (
184
- self.optional_ploomber_args is None
185
- ), "Only command_types of 'notebook' can be used with optional_ploomber_args"
270
+ class ShellTask(BaseTask):
271
+ """
272
+ An execution node of the pipeline of type shell.
273
+ Please refer to [concepts](concepts/task.md) for more information.
186
274
 
187
- assert self.output_cell_tag is None, "Only command_types of 'notebook' can be used with output_cell_tag"
188
- return self
275
+ Attributes:
276
+ name (str): The name of the node.
277
+ command: The shell command to execute.
278
+ catalog (Optional[Catalog]): The catalog to sync data from/to.
279
+ Please see Catalog about the structure of the catalog.
280
+ returns: A list of the names of variables to capture from environment variables of shell.
281
+ overrides (Dict[str, Any]): Any overrides to the command.
282
+ Individual tasks can override the global configuration config by referring to the
283
+ specific override.
189
284
 
190
- def create_node(self) -> TaskNode:
191
- if not self.next_node:
192
- if not (self.terminate_with_failure or self.terminate_with_success):
193
- raise AssertionError("A node not being terminated must have a user defined next node")
194
- return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
285
+ For example,
286
+ ### Global configuration
287
+ ```yaml
288
+ executor:
289
+ type: local-container
290
+ config:
291
+ docker_image: "runnable/runnable:latest"
292
+ overrides:
293
+ custom_docker_image:
294
+ docker_image: "runnable/runnable:custom"
295
+ ```
296
+ ### Task specific configuration
297
+ ```python
298
+ task = ShellTask(name="task", command="exit 0",
299
+ overrides={'local-container': custom_docker_image})
300
+ ```
301
+
302
+ terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
303
+ terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
304
+ on_failure (str): The name of the node to execute if the step fails.
305
+
306
+ """
307
+
308
+ command: str = Field(alias="command")
309
+ returns: List[str] = Field(default_factory=list, alias="returns")
310
+
311
+ @computed_field
312
+ def command_type(self) -> str:
313
+ return "shell"
195
314
 
196
315
 
197
316
  class Stub(BaseTraversal):
@@ -343,7 +462,8 @@ class Pipeline(BaseModel):
343
462
  A Pipeline is a directed acyclic graph of Steps that define a workflow.
344
463
 
345
464
  Attributes:
346
- steps (List[Stub | Task | Parallel | Map | Success | Fail]): A list of Steps that make up the Pipeline.
465
+ steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
466
+ A list of Steps that make up the Pipeline.
347
467
  start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
348
468
  name (str, optional): The name of the Pipeline. Defaults to "".
349
469
  description (str, optional): A description of the Pipeline. Defaults to "".
@@ -440,7 +560,6 @@ class Pipeline(BaseModel):
440
560
  run_id=run_id,
441
561
  tag=tag,
442
562
  parameters_file=parameters_file,
443
- use_cached=use_cached,
444
563
  )
445
564
 
446
565
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes