runnable 0.3.0__tar.gz → 0.5.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {runnable-0.3.0 → runnable-0.5.0}/PKG-INFO +1 -1
- {runnable-0.3.0 → runnable-0.5.0}/pyproject.toml +2 -1
- {runnable-0.3.0 → runnable-0.5.0}/runnable/__init__.py +12 -1
- {runnable-0.3.0 → runnable-0.5.0}/runnable/cli.py +1 -4
- {runnable-0.3.0 → runnable-0.5.0}/runnable/context.py +0 -2
- {runnable-0.3.0 → runnable-0.5.0}/runnable/datastore.py +0 -4
- {runnable-0.3.0 → runnable-0.5.0}/runnable/defaults.py +1 -1
- {runnable-0.3.0 → runnable-0.5.0}/runnable/entrypoints.py +3 -16
- {runnable-0.3.0 → runnable-0.5.0}/runnable/executor.py +1 -41
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/__init__.py +4 -98
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/mocked/implementation.py +1 -26
- runnable-0.5.0/runnable/extensions/executor/retry/implementation.py +305 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/file_system/implementation.py +0 -2
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/generic_chunked.py +0 -2
- runnable-0.5.0/runnable/extensions/secrets/env_secrets/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/sdk.py +157 -38
- {runnable-0.3.0 → runnable-0.5.0}/LICENSE +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/README.md +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/catalog.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/exceptions.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/experiment_tracker.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/file_system/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/file_system/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/catalog/k8s_pvc/integration.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/argo/specification.yaml +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/implementation_FF.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/k8s_job/integration_FF.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local_container/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local_container/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/mocked/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/experiment_tracker → runnable-0.5.0/runnable/extensions/executor/retry}/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/experiment_tracker/mlflow → runnable-0.5.0/runnable/extensions/experiment_tracker}/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/run_log_store → runnable-0.5.0/runnable/extensions/experiment_tracker/mlflow}/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/experiment_tracker/mlflow/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/nodes.py +0 -0
- {runnable-0.3.0/runnable/extensions/run_log_store/chunked_file_system → runnable-0.5.0/runnable/extensions/run_log_store}/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/run_log_store/chunked_k8s_pvc → runnable-0.5.0/runnable/extensions/run_log_store/chunked_file_system}/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -0
- {runnable-0.3.0/runnable/extensions/run_log_store/file_system → runnable-0.5.0/runnable/extensions/run_log_store/chunked_k8s_pvc}/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/db/implementation_FF.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- {runnable-0.3.0/runnable/extensions/run_log_store/k8s_pvc → runnable-0.5.0/runnable/extensions/run_log_store/file_system}/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/secrets → runnable-0.5.0/runnable/extensions/run_log_store/k8s_pvc}/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -0
- {runnable-0.3.0/runnable/extensions/secrets/dotenv → runnable-0.5.0/runnable/extensions/secrets}/__init__.py +0 -0
- {runnable-0.3.0/runnable/extensions/secrets/env_secrets → runnable-0.5.0/runnable/extensions/secrets/dotenv}/__init__.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/secrets/dotenv/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/secrets/env_secrets/implementation.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/graph.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/integration.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/interaction.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/names.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/nodes.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/parameters.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/pickler.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/secrets.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/tasks.py +0 -0
- {runnable-0.3.0 → runnable-0.5.0}/runnable/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "runnable"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.0"
|
4
4
|
description = "A Compute agnostic pipelining software"
|
5
5
|
authors = ["Vijay Vammi <mesanthu@gmail.com>"]
|
6
6
|
license = "Apache-2.0"
|
@@ -68,6 +68,7 @@ runnable= 'runnable.cli:cli'
|
|
68
68
|
"local-container" = "runnable.extensions.executor.local_container.implementation:LocalContainerExecutor"
|
69
69
|
"argo" = "runnable.extensions.executor.argo.implementation:ArgoExecutor"
|
70
70
|
"mocked" = "runnable.extensions.executor.mocked.implementation:MockedExecutor"
|
71
|
+
"retry" = "runnable.extensions.executor.retry.implementation:RetryExecutor"
|
71
72
|
|
72
73
|
# Plugins for Catalog
|
73
74
|
[tool.poetry.plugins."catalog"]
|
@@ -24,7 +24,18 @@ from runnable.interaction import (
|
|
24
24
|
set_parameter,
|
25
25
|
track_this,
|
26
26
|
) # noqa
|
27
|
-
from runnable.sdk import
|
27
|
+
from runnable.sdk import (
|
28
|
+
Stub,
|
29
|
+
Pipeline,
|
30
|
+
Parallel,
|
31
|
+
Map,
|
32
|
+
Catalog,
|
33
|
+
Success,
|
34
|
+
Fail,
|
35
|
+
PythonTask,
|
36
|
+
NotebookTask,
|
37
|
+
ShellTask,
|
38
|
+
) # noqa
|
28
39
|
|
29
40
|
|
30
41
|
# TODO: Think of model registry as a central place to store models.
|
@@ -41,8 +41,7 @@ def cli():
|
|
41
41
|
)
|
42
42
|
@click.option("--tag", default="", help="A tag attached to the run")
|
43
43
|
@click.option("--run-id", help="An optional run_id, one would be generated if not provided")
|
44
|
-
|
45
|
-
def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cached): # pragma: no cover
|
44
|
+
def execute(file, config_file, parameters_file, log_level, tag, run_id): # pragma: no cover
|
46
45
|
"""
|
47
46
|
Execute a pipeline
|
48
47
|
|
@@ -59,7 +58,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
|
|
59
58
|
[default: ]
|
60
59
|
--run-id TEXT An optional run_id, one would be generated if not
|
61
60
|
provided
|
62
|
-
--use-cached TEXT Provide the previous run_id to re-run.
|
63
61
|
"""
|
64
62
|
logger.setLevel(log_level)
|
65
63
|
entrypoints.execute(
|
@@ -67,7 +65,6 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
|
|
67
65
|
pipeline_file=file,
|
68
66
|
tag=tag,
|
69
67
|
run_id=run_id,
|
70
|
-
use_cached=use_cached,
|
71
68
|
parameters_file=parameters_file,
|
72
69
|
)
|
73
70
|
|
@@ -169,9 +169,7 @@ class RunLog(BaseModel):
|
|
169
169
|
|
170
170
|
run_id: str
|
171
171
|
dag_hash: Optional[str] = None
|
172
|
-
use_cached: bool = False
|
173
172
|
tag: Optional[str] = ""
|
174
|
-
original_run_id: Optional[str] = ""
|
175
173
|
status: str = defaults.FAIL
|
176
174
|
steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
|
177
175
|
parameters: Dict[str, Any] = Field(default_factory=dict)
|
@@ -659,9 +657,7 @@ class BufferRunLogstore(BaseRunLogStore):
|
|
659
657
|
self.run_log = RunLog(
|
660
658
|
run_id=run_id,
|
661
659
|
dag_hash=dag_hash,
|
662
|
-
use_cached=use_cached,
|
663
660
|
tag=tag,
|
664
|
-
original_run_id=original_run_id,
|
665
661
|
status=status,
|
666
662
|
)
|
667
663
|
return self.run_log
|
@@ -35,7 +35,7 @@ class ServiceConfig(TypedDict):
|
|
35
35
|
config: Mapping[str, Any]
|
36
36
|
|
37
37
|
|
38
|
-
class
|
38
|
+
class RunnableConfig(TypedDict, total=False):
|
39
39
|
run_log_store: Optional[ServiceConfig]
|
40
40
|
secrets: Optional[ServiceConfig]
|
41
41
|
catalog: Optional[ServiceConfig]
|
@@ -9,12 +9,12 @@ from rich import print
|
|
9
9
|
|
10
10
|
import runnable.context as context
|
11
11
|
from runnable import defaults, graph, utils
|
12
|
-
from runnable.defaults import
|
12
|
+
from runnable.defaults import RunnableConfig, ServiceConfig
|
13
13
|
|
14
14
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
15
15
|
|
16
16
|
|
17
|
-
def get_default_configs() ->
|
17
|
+
def get_default_configs() -> RunnableConfig:
|
18
18
|
"""
|
19
19
|
User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
|
20
20
|
"""
|
@@ -37,7 +37,6 @@ def prepare_configurations(
|
|
37
37
|
configuration_file: str = "",
|
38
38
|
pipeline_file: str = "",
|
39
39
|
tag: str = "",
|
40
|
-
use_cached: str = "",
|
41
40
|
parameters_file: str = "",
|
42
41
|
force_local_executor: bool = False,
|
43
42
|
) -> context.Context:
|
@@ -51,7 +50,6 @@ def prepare_configurations(
|
|
51
50
|
pipeline_file (str): The config/dag file
|
52
51
|
run_id (str): The run id of the run.
|
53
52
|
tag (str): If a tag is provided at the run time
|
54
|
-
use_cached (str): Provide the run_id of the older run
|
55
53
|
|
56
54
|
Returns:
|
57
55
|
executor.BaseExecutor : A prepared executor as per the dag/config
|
@@ -64,7 +62,7 @@ def prepare_configurations(
|
|
64
62
|
if configuration_file:
|
65
63
|
templated_configuration = utils.load_yaml(configuration_file) or {}
|
66
64
|
|
67
|
-
configuration:
|
65
|
+
configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
|
68
66
|
|
69
67
|
# Run log settings, configuration over-rides everything
|
70
68
|
run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
|
@@ -141,11 +139,6 @@ def prepare_configurations(
|
|
141
139
|
run_context.pipeline_file = pipeline_file
|
142
140
|
run_context.dag = dag
|
143
141
|
|
144
|
-
run_context.use_cached = False
|
145
|
-
if use_cached:
|
146
|
-
run_context.use_cached = True
|
147
|
-
run_context.original_run_id = use_cached
|
148
|
-
|
149
142
|
context.run_context = run_context
|
150
143
|
|
151
144
|
return run_context
|
@@ -156,7 +149,6 @@ def execute(
|
|
156
149
|
pipeline_file: str,
|
157
150
|
tag: str = "",
|
158
151
|
run_id: str = "",
|
159
|
-
use_cached: str = "",
|
160
152
|
parameters_file: str = "",
|
161
153
|
):
|
162
154
|
# pylint: disable=R0914,R0913
|
@@ -168,10 +160,8 @@ def execute(
|
|
168
160
|
pipeline_file (str): The config/dag file
|
169
161
|
run_id (str): The run id of the run.
|
170
162
|
tag (str): If a tag is provided at the run time
|
171
|
-
use_cached (str): The previous run_id to use.
|
172
163
|
parameters_file (str): The parameters being sent in to the application
|
173
164
|
"""
|
174
|
-
# Re run settings
|
175
165
|
run_id = utils.generate_run_id(run_id=run_id)
|
176
166
|
|
177
167
|
run_context = prepare_configurations(
|
@@ -179,7 +169,6 @@ def execute(
|
|
179
169
|
pipeline_file=pipeline_file,
|
180
170
|
run_id=run_id,
|
181
171
|
tag=tag,
|
182
|
-
use_cached=use_cached,
|
183
172
|
parameters_file=parameters_file,
|
184
173
|
)
|
185
174
|
print("Working with context:")
|
@@ -231,7 +220,6 @@ def execute_single_node(
|
|
231
220
|
pipeline_file=pipeline_file,
|
232
221
|
run_id=run_id,
|
233
222
|
tag=tag,
|
234
|
-
use_cached="",
|
235
223
|
parameters_file=parameters_file,
|
236
224
|
)
|
237
225
|
print("Working with context:")
|
@@ -416,7 +404,6 @@ def fan(
|
|
416
404
|
pipeline_file=pipeline_file,
|
417
405
|
run_id=run_id,
|
418
406
|
tag=tag,
|
419
|
-
use_cached="",
|
420
407
|
parameters_file=parameters_file,
|
421
408
|
)
|
422
409
|
print("Working with context:")
|
@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict
|
|
9
9
|
|
10
10
|
import runnable.context as context
|
11
11
|
from runnable import defaults
|
12
|
-
from runnable.datastore import DataCatalog,
|
12
|
+
from runnable.datastore import DataCatalog, StepLog
|
13
13
|
from runnable.defaults import TypeMapVariable
|
14
14
|
from runnable.graph import Graph
|
15
15
|
|
@@ -36,9 +36,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
36
36
|
|
37
37
|
overrides: dict = {}
|
38
38
|
|
39
|
-
# TODO: This needs to go away
|
40
|
-
_previous_run_log: Optional[RunLog] = None
|
41
|
-
_single_step: str = ""
|
42
39
|
_local: bool = False # This is a flag to indicate whether the executor is local or not.
|
43
40
|
|
44
41
|
_context_step_log = None # type : StepLog
|
@@ -60,21 +57,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
60
57
|
"""
|
61
58
|
...
|
62
59
|
|
63
|
-
# TODO: This needs to go away
|
64
|
-
@abstractmethod
|
65
|
-
def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
|
66
|
-
"""
|
67
|
-
Set up the executor for using a previous execution.
|
68
|
-
|
69
|
-
Retrieve the older run log, error out if it does not exist.
|
70
|
-
Sync the catalogs from the previous run log with the current one.
|
71
|
-
|
72
|
-
Update the parameters of this execution with the previous one. The previous one take precedence.
|
73
|
-
|
74
|
-
Args:
|
75
|
-
parameters (Dict[str, Any]): The parameters for the current execution.
|
76
|
-
"""
|
77
|
-
|
78
60
|
@abstractmethod
|
79
61
|
def _set_up_run_log(self, exists_ok=False):
|
80
62
|
"""
|
@@ -293,28 +275,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
293
275
|
"""
|
294
276
|
...
|
295
277
|
|
296
|
-
# TODO: This needs to go away
|
297
|
-
@abstractmethod
|
298
|
-
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
299
|
-
"""
|
300
|
-
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
301
|
-
necessary.
|
302
|
-
* True: If its not a re-run.
|
303
|
-
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
304
|
-
* False: If its a re-run and we succeeded in the last run.
|
305
|
-
|
306
|
-
Most cases, this logic need not be touched
|
307
|
-
|
308
|
-
Args:
|
309
|
-
node (Node): The node to check against re-run
|
310
|
-
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
311
|
-
Defaults to None.
|
312
|
-
|
313
|
-
Returns:
|
314
|
-
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
315
|
-
"""
|
316
|
-
...
|
317
|
-
|
318
278
|
@abstractmethod
|
319
279
|
def send_return_code(self, stage="traversal"):
|
320
280
|
"""
|
@@ -3,12 +3,12 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
from abc import abstractmethod
|
6
|
-
from typing import Any, Dict, List, Optional
|
6
|
+
from typing import Any, Dict, List, Optional
|
7
7
|
|
8
8
|
from rich import print
|
9
9
|
|
10
10
|
from runnable import context, defaults, exceptions, integration, parameters, utils
|
11
|
-
from runnable.datastore import DataCatalog,
|
11
|
+
from runnable.datastore import DataCatalog, StepLog
|
12
12
|
from runnable.defaults import TypeMapVariable
|
13
13
|
from runnable.executor import BaseExecutor
|
14
14
|
from runnable.experiment_tracker import get_tracked_data
|
@@ -40,20 +40,6 @@ class GenericExecutor(BaseExecutor):
|
|
40
40
|
def _context(self):
|
41
41
|
return context.run_context
|
42
42
|
|
43
|
-
@property
|
44
|
-
def step_decorator_run_id(self):
|
45
|
-
"""
|
46
|
-
TODO: Experimental feature, design is not mature yet.
|
47
|
-
|
48
|
-
This function is used by the decorator function.
|
49
|
-
The design idea is we can over-ride this method in different implementations to retrieve the run_id.
|
50
|
-
But is it really intrusive to ask to set the environmental variable runnable_RUN_ID?
|
51
|
-
|
52
|
-
Returns:
|
53
|
-
_type_: _description_
|
54
|
-
"""
|
55
|
-
return os.environ.get("runnable_RUN_ID", None)
|
56
|
-
|
57
43
|
def _get_parameters(self) -> Dict[str, Any]:
|
58
44
|
"""
|
59
45
|
Consolidate the parameters from the environment variables
|
@@ -72,28 +58,6 @@ class GenericExecutor(BaseExecutor):
|
|
72
58
|
params.update(parameters.get_user_set_parameters())
|
73
59
|
return params
|
74
60
|
|
75
|
-
def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
|
76
|
-
try:
|
77
|
-
attempt_run_log = self._context.run_log_store.get_run_log_by_id(
|
78
|
-
run_id=self._context.original_run_id, full=False
|
79
|
-
)
|
80
|
-
except exceptions.RunLogNotFoundError as e:
|
81
|
-
msg = (
|
82
|
-
f"Expected a run log with id: {self._context.original_run_id} "
|
83
|
-
"but it does not exist in the run log store. "
|
84
|
-
"If the original execution was in a different environment, ensure that it is available in the current "
|
85
|
-
"environment."
|
86
|
-
)
|
87
|
-
logger.exception(msg)
|
88
|
-
raise Exception(msg) from e
|
89
|
-
|
90
|
-
# Sync the previous run log catalog to this one.
|
91
|
-
self._context.catalog_handler.sync_between_runs(
|
92
|
-
previous_run_id=self._context.original_run_id, run_id=self._context.run_id
|
93
|
-
)
|
94
|
-
|
95
|
-
parameters.update(cast(RunLog, attempt_run_log).parameters)
|
96
|
-
|
97
61
|
def _set_up_run_log(self, exists_ok=False):
|
98
62
|
"""
|
99
63
|
Create a run log and put that in the run log store
|
@@ -115,22 +79,16 @@ class GenericExecutor(BaseExecutor):
|
|
115
79
|
raise
|
116
80
|
|
117
81
|
# Consolidate and get the parameters
|
118
|
-
|
119
|
-
|
120
|
-
# TODO: This needs to go away
|
121
|
-
if self._context.use_cached:
|
122
|
-
self._set_up_for_re_run(parameters=parameters)
|
82
|
+
params = self._get_parameters()
|
123
83
|
|
124
84
|
self._context.run_log_store.create_run_log(
|
125
85
|
run_id=self._context.run_id,
|
126
86
|
tag=self._context.tag,
|
127
87
|
status=defaults.PROCESSING,
|
128
88
|
dag_hash=self._context.dag_hash,
|
129
|
-
use_cached=self._context.use_cached,
|
130
|
-
original_run_id=self._context.original_run_id,
|
131
89
|
)
|
132
90
|
# Any interaction with run log store attributes should happen via API if available.
|
133
|
-
self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=
|
91
|
+
self._context.run_log_store.set_parameters(run_id=self._context.run_id, parameters=params)
|
134
92
|
|
135
93
|
# Update run_config
|
136
94
|
run_config = utils.get_run_config()
|
@@ -409,17 +367,6 @@ class GenericExecutor(BaseExecutor):
|
|
409
367
|
self._execute_node(node, map_variable=map_variable, **kwargs)
|
410
368
|
return
|
411
369
|
|
412
|
-
# TODO: This needs to go away
|
413
|
-
# In single step
|
414
|
-
if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun(
|
415
|
-
node, map_variable=map_variable
|
416
|
-
):
|
417
|
-
# If the node name does not match, we move on to the next node.
|
418
|
-
# If previous run was successful, move on to the next step
|
419
|
-
step_log.mock = True
|
420
|
-
step_log.status = defaults.SUCCESS
|
421
|
-
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
422
|
-
return
|
423
370
|
# We call an internal function to iterate the sub graphs and execute them
|
424
371
|
if node.is_composite:
|
425
372
|
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
@@ -543,47 +490,6 @@ class GenericExecutor(BaseExecutor):
|
|
543
490
|
run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
|
544
491
|
print(json.dumps(run_log.model_dump(), indent=4))
|
545
492
|
|
546
|
-
# TODO: This needs to go away
|
547
|
-
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
548
|
-
"""
|
549
|
-
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
550
|
-
necessary.
|
551
|
-
* True: If its not a re-run.
|
552
|
-
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
553
|
-
* False: If its a re-run and we succeeded in the last run.
|
554
|
-
|
555
|
-
Most cases, this logic need not be touched
|
556
|
-
|
557
|
-
Args:
|
558
|
-
node (Node): The node to check against re-run
|
559
|
-
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
560
|
-
Defaults to None.
|
561
|
-
|
562
|
-
Returns:
|
563
|
-
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
564
|
-
"""
|
565
|
-
if self._context.use_cached:
|
566
|
-
node_step_log_name = node._get_step_log_name(map_variable=map_variable)
|
567
|
-
logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
|
568
|
-
|
569
|
-
try:
|
570
|
-
previous_node_log = self._context.run_log_store.get_step_log(
|
571
|
-
internal_name=node_step_log_name, run_id=self._context.original_run_id
|
572
|
-
)
|
573
|
-
except exceptions.StepLogNotFoundError:
|
574
|
-
logger.warning(f"Did not find the node {node.name} in previous run log")
|
575
|
-
return True # We should re-run the node.
|
576
|
-
|
577
|
-
logger.info(f"The original step status: {previous_node_log.status}")
|
578
|
-
|
579
|
-
if previous_node_log.status == defaults.SUCCESS:
|
580
|
-
return False # We need not run the node
|
581
|
-
|
582
|
-
logger.info(f"The new execution should start executing graph from this node {node.name}")
|
583
|
-
return True
|
584
|
-
|
585
|
-
return True
|
586
|
-
|
587
493
|
def send_return_code(self, stage="traversal"):
|
588
494
|
"""
|
589
495
|
Convenience function used by pipeline to send return code to the caller of the cli
|
@@ -32,9 +32,6 @@ class MockedExecutor(GenericExecutor):
|
|
32
32
|
def _context(self):
|
33
33
|
return context.run_context
|
34
34
|
|
35
|
-
def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
|
36
|
-
raise Exception("MockedExecutor does not support re-run")
|
37
|
-
|
38
35
|
def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
39
36
|
"""
|
40
37
|
This is the entry point to from the graph execution.
|
@@ -85,7 +82,7 @@ class MockedExecutor(GenericExecutor):
|
|
85
82
|
# node is not patched, so mock it
|
86
83
|
step_log.mock = True
|
87
84
|
else:
|
88
|
-
# node is
|
85
|
+
# node is patched
|
89
86
|
# command as the patch value
|
90
87
|
executable_type = node_to_send.executable.__class__
|
91
88
|
executable = create_executable(
|
@@ -94,7 +91,6 @@ class MockedExecutor(GenericExecutor):
|
|
94
91
|
node_name=node.name,
|
95
92
|
)
|
96
93
|
node_to_send.executable = executable
|
97
|
-
pass
|
98
94
|
|
99
95
|
# Executor specific way to trigger a job
|
100
96
|
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
@@ -117,27 +113,6 @@ class MockedExecutor(GenericExecutor):
|
|
117
113
|
self.prepare_for_node_execution()
|
118
114
|
self.execute_node(node=node, map_variable=map_variable, **kwargs)
|
119
115
|
|
120
|
-
# TODO: This needs to go away
|
121
|
-
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
122
|
-
"""
|
123
|
-
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
124
|
-
necessary.
|
125
|
-
* True: If its not a re-run.
|
126
|
-
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
127
|
-
* False: If its a re-run and we succeeded in the last run.
|
128
|
-
|
129
|
-
Most cases, this logic need not be touched
|
130
|
-
|
131
|
-
Args:
|
132
|
-
node (Node): The node to check against re-run
|
133
|
-
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
134
|
-
Defaults to None.
|
135
|
-
|
136
|
-
Returns:
|
137
|
-
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
138
|
-
"""
|
139
|
-
return True
|
140
|
-
|
141
116
|
def _resolve_executor_config(self, node: BaseNode):
|
142
117
|
"""
|
143
118
|
The overrides section can contain specific over-rides to an global executor config.
|
@@ -0,0 +1,305 @@
|
|
1
|
+
import copy
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
from functools import cached_property
|
5
|
+
from typing import Any, Dict, List, Optional
|
6
|
+
|
7
|
+
from rich import print
|
8
|
+
|
9
|
+
from runnable import context, defaults, exceptions, parameters, utils
|
10
|
+
from runnable.datastore import DataCatalog, RunLog
|
11
|
+
from runnable.defaults import TypeMapVariable
|
12
|
+
from runnable.experiment_tracker import get_tracked_data
|
13
|
+
from runnable.extensions.executor import GenericExecutor
|
14
|
+
from runnable.graph import Graph
|
15
|
+
from runnable.nodes import BaseNode
|
16
|
+
|
17
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
18
|
+
|
19
|
+
|
20
|
+
class RetryExecutor(GenericExecutor):
|
21
|
+
"""
|
22
|
+
The skeleton of an executor class.
|
23
|
+
Any implementation of an executor should inherit this class and over-ride accordingly.
|
24
|
+
|
25
|
+
This is a loaded base class which has a lot of methods already implemented for "typical" executions.
|
26
|
+
Look at the function docs to understand how to use them appropriately.
|
27
|
+
|
28
|
+
For any implementation:
|
29
|
+
1). Who/when should the run log be set up?
|
30
|
+
2). Who/When should the step log be set up?
|
31
|
+
|
32
|
+
"""
|
33
|
+
|
34
|
+
service_name: str = "retry"
|
35
|
+
service_type: str = "executor"
|
36
|
+
run_id: str
|
37
|
+
|
38
|
+
_local: bool = True
|
39
|
+
_original_run_log: Optional[RunLog] = None
|
40
|
+
|
41
|
+
@property
|
42
|
+
def _context(self):
|
43
|
+
return context.run_context
|
44
|
+
|
45
|
+
@cached_property
|
46
|
+
def original_run_log(self):
|
47
|
+
self.original_run_log = self._context.run_log_store.get_run_log_by_id(
|
48
|
+
run_id=self.run_id,
|
49
|
+
full=True,
|
50
|
+
)
|
51
|
+
|
52
|
+
def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
|
53
|
+
# Sync the previous run log catalog to this one.
|
54
|
+
self._context.catalog_handler.sync_between_runs(previous_run_id=self.run_id, run_id=self._context.run_id)
|
55
|
+
|
56
|
+
params.update(self.original_run_log.parameters)
|
57
|
+
|
58
|
+
def _set_up_run_log(self, exists_ok=False):
|
59
|
+
"""
|
60
|
+
Create a run log and put that in the run log store
|
61
|
+
|
62
|
+
If exists_ok, we allow the run log to be already present in the run log store.
|
63
|
+
"""
|
64
|
+
super()._set_up_run_log(exists_ok=exists_ok)
|
65
|
+
|
66
|
+
params = self._get_parameters()
|
67
|
+
|
68
|
+
self._set_up_for_re_run(params=params)
|
69
|
+
|
70
|
+
def _execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
71
|
+
"""
|
72
|
+
This is the entry point when we do the actual execution of the function.
|
73
|
+
DO NOT Over-ride this function.
|
74
|
+
|
75
|
+
While in interactive execution, we just compute, in 3rd party interactive execution, we need to reach
|
76
|
+
this function.
|
77
|
+
|
78
|
+
In most cases,
|
79
|
+
* We get the corresponding step_log of the node and the parameters.
|
80
|
+
* We sync the catalog to GET any data sets that are in the catalog
|
81
|
+
* We call the execute method of the node for the actual compute and retry it as many times as asked.
|
82
|
+
* If the node succeeds, we get any of the user defined metrics provided by the user.
|
83
|
+
* We sync the catalog to PUT any data sets that are in the catalog.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
node (Node): The node to execute
|
87
|
+
map_variable (dict, optional): If the node is of a map state, map_variable is the value of the iterable.
|
88
|
+
Defaults to None.
|
89
|
+
"""
|
90
|
+
step_log = self._context.run_log_store.get_step_log(node._get_step_log_name(map_variable), self._context.run_id)
|
91
|
+
"""
|
92
|
+
By now, all the parameters are part of the run log as a dictionary.
|
93
|
+
We set them as environment variables, serialized as json strings.
|
94
|
+
"""
|
95
|
+
params = self._context.run_log_store.get_parameters(run_id=self._context.run_id)
|
96
|
+
params_copy = copy.deepcopy(params)
|
97
|
+
# This is only for the API to work.
|
98
|
+
parameters.set_user_defined_params_as_environment_variables(params)
|
99
|
+
|
100
|
+
attempt = self.step_attempt_number
|
101
|
+
logger.info(f"Trying to execute node: {node.internal_name}, attempt : {attempt}")
|
102
|
+
|
103
|
+
attempt_log = self._context.run_log_store.create_attempt_log()
|
104
|
+
self._context_step_log = step_log
|
105
|
+
self._context_node = node
|
106
|
+
|
107
|
+
data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get")
|
108
|
+
try:
|
109
|
+
attempt_log = node.execute(
|
110
|
+
executor=self,
|
111
|
+
mock=step_log.mock,
|
112
|
+
map_variable=map_variable,
|
113
|
+
params=params,
|
114
|
+
**kwargs,
|
115
|
+
)
|
116
|
+
except Exception as e:
|
117
|
+
# Any exception here is a runnable exception as node suppresses exceptions.
|
118
|
+
msg = "This is clearly runnable fault, please report a bug and the logs"
|
119
|
+
logger.exception(msg)
|
120
|
+
raise Exception(msg) from e
|
121
|
+
finally:
|
122
|
+
attempt_log.attempt_number = attempt
|
123
|
+
step_log.attempts.append(attempt_log)
|
124
|
+
|
125
|
+
tracked_data = get_tracked_data()
|
126
|
+
|
127
|
+
self._context.experiment_tracker.publish_data(tracked_data)
|
128
|
+
parameters_out = attempt_log.output_parameters
|
129
|
+
|
130
|
+
if attempt_log.status == defaults.FAIL:
|
131
|
+
logger.exception(f"Node: {node} failed")
|
132
|
+
step_log.status = defaults.FAIL
|
133
|
+
else:
|
134
|
+
# Mock is always set to False, bad design??
|
135
|
+
# TODO: Stub nodes should not sync back data
|
136
|
+
# TODO: Errors in catalog syncing should point to Fail step
|
137
|
+
# TODO: Even for a failed execution, the catalog can happen
|
138
|
+
step_log.status = defaults.SUCCESS
|
139
|
+
self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get)
|
140
|
+
step_log.user_defined_metrics = tracked_data
|
141
|
+
|
142
|
+
diff_parameters = utils.diff_dict(params_copy, parameters_out)
|
143
|
+
self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters)
|
144
|
+
|
145
|
+
# Remove the step context
|
146
|
+
parameters.get_user_set_parameters(remove=True)
|
147
|
+
self._context_step_log = None
|
148
|
+
self._context_node = None # type: ignore
|
149
|
+
self._context_metrics = {}
|
150
|
+
|
151
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
152
|
+
|
153
|
+
def execute_from_graph(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
154
|
+
"""
|
155
|
+
This is the entry point to from the graph execution.
|
156
|
+
|
157
|
+
While the self.execute_graph is responsible for traversing the graph, this function is responsible for
|
158
|
+
actual execution of the node.
|
159
|
+
|
160
|
+
If the node type is:
|
161
|
+
* task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
|
162
|
+
* success: We can delegate to _execute_node
|
163
|
+
* fail: We can delegate to _execute_node
|
164
|
+
|
165
|
+
For nodes that are internally graphs:
|
166
|
+
* parallel: Delegate the responsibility of execution to the node.execute_as_graph()
|
167
|
+
* dag: Delegate the responsibility of execution to the node.execute_as_graph()
|
168
|
+
* map: Delegate the responsibility of execution to the node.execute_as_graph()
|
169
|
+
|
170
|
+
Transpilers will NEVER use this method and will NEVER call ths method.
|
171
|
+
This method should only be used by interactive executors.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
node (Node): The node to execute
|
175
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
|
176
|
+
Defaults to None.
|
177
|
+
"""
|
178
|
+
step_log = self._context.run_log_store.create_step_log(node.name, node._get_step_log_name(map_variable))
|
179
|
+
|
180
|
+
self.add_code_identities(node=node, step_log=step_log)
|
181
|
+
|
182
|
+
step_log.step_type = node.node_type
|
183
|
+
step_log.status = defaults.PROCESSING
|
184
|
+
|
185
|
+
# Add the step log to the database as per the situation.
|
186
|
+
# If its a terminal node, complete it now
|
187
|
+
if node.node_type in ["success", "fail"]:
|
188
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
189
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
190
|
+
return
|
191
|
+
|
192
|
+
# In single step
|
193
|
+
if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
|
194
|
+
# If the node name does not match, we move on to the next node.
|
195
|
+
# If previous run was successful, move on to the next step
|
196
|
+
step_log.mock = True
|
197
|
+
step_log.status = defaults.SUCCESS
|
198
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
199
|
+
return
|
200
|
+
|
201
|
+
# We call an internal function to iterate the sub graphs and execute them
|
202
|
+
if node.is_composite:
|
203
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
204
|
+
node.execute_as_graph(map_variable=map_variable, **kwargs)
|
205
|
+
return
|
206
|
+
|
207
|
+
# Executor specific way to trigger a job
|
208
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
209
|
+
self.execute_node(node=node, map_variable=map_variable, **kwargs)
|
210
|
+
|
211
|
+
def execute_graph(self, dag: Graph, map_variable: TypeMapVariable = None, **kwargs):
|
212
|
+
"""
|
213
|
+
The parallelization is controlled by the nodes and not by this function.
|
214
|
+
|
215
|
+
Transpilers should over ride this method to do the translation of dag to the platform specific way.
|
216
|
+
Interactive methods should use this to traverse and execute the dag.
|
217
|
+
- Use execute_from_graph to handle sub-graphs
|
218
|
+
|
219
|
+
Logically the method should:
|
220
|
+
* Start at the dag.start_at of the dag.
|
221
|
+
* Call the self.execute_from_graph(node)
|
222
|
+
* depending upon the status of the execution, either move to the success node or failure node.
|
223
|
+
|
224
|
+
Args:
|
225
|
+
dag (Graph): The directed acyclic graph to traverse and execute.
|
226
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of the iterable.
|
227
|
+
Defaults to None.
|
228
|
+
"""
|
229
|
+
current_node = dag.start_at
|
230
|
+
previous_node = None
|
231
|
+
logger.info(f"Running the execution with {current_node}")
|
232
|
+
|
233
|
+
while True:
|
234
|
+
working_on = dag.get_node_by_name(current_node)
|
235
|
+
|
236
|
+
if previous_node == current_node:
|
237
|
+
raise Exception("Potentially running in a infinite loop")
|
238
|
+
|
239
|
+
previous_node = current_node
|
240
|
+
|
241
|
+
logger.info(f"Creating execution log for {working_on}")
|
242
|
+
self.execute_from_graph(working_on, map_variable=map_variable, **kwargs)
|
243
|
+
|
244
|
+
_, next_node_name = self._get_status_and_next_node_name(
|
245
|
+
current_node=working_on, dag=dag, map_variable=map_variable
|
246
|
+
)
|
247
|
+
|
248
|
+
if working_on.node_type in ["success", "fail"]:
|
249
|
+
break
|
250
|
+
|
251
|
+
current_node = next_node_name
|
252
|
+
|
253
|
+
run_log = self._context.run_log_store.get_branch_log(
|
254
|
+
working_on._get_branch_log_name(map_variable), self._context.run_id
|
255
|
+
)
|
256
|
+
|
257
|
+
branch = "graph"
|
258
|
+
if working_on.internal_branch_name:
|
259
|
+
branch = working_on.internal_branch_name
|
260
|
+
|
261
|
+
logger.info(f"Finished execution of the {branch} with status {run_log.status}")
|
262
|
+
|
263
|
+
# get the final run log
|
264
|
+
if branch == "graph":
|
265
|
+
run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
|
266
|
+
print(json.dumps(run_log.model_dump(), indent=4))
|
267
|
+
|
268
|
+
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
269
|
+
"""
|
270
|
+
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
271
|
+
necessary.
|
272
|
+
* True: If its not a re-run.
|
273
|
+
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
274
|
+
* False: If its a re-run and we succeeded in the last run.
|
275
|
+
|
276
|
+
Most cases, this logic need not be touched
|
277
|
+
|
278
|
+
Args:
|
279
|
+
node (Node): The node to check against re-run
|
280
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
281
|
+
Defaults to None.
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
285
|
+
"""
|
286
|
+
|
287
|
+
node_step_log_name = node._get_step_log_name(map_variable=map_variable)
|
288
|
+
logger.info(f"Scanning previous run logs for node logs of: {node_step_log_name}")
|
289
|
+
|
290
|
+
try:
|
291
|
+
previous_attempt_log, _ = self.original_run_log.search_step_by_internal_name(node_step_log_name)
|
292
|
+
except exceptions.StepLogNotFoundError:
|
293
|
+
logger.warning(f"Did not find the node {node.name} in previous run log")
|
294
|
+
return True # We should re-run the node.
|
295
|
+
|
296
|
+
logger.info(f"The original step status: {previous_attempt_log.status}")
|
297
|
+
|
298
|
+
if previous_attempt_log.status == defaults.SUCCESS:
|
299
|
+
return False # We need not run the node
|
300
|
+
|
301
|
+
logger.info(f"The new execution should start executing graph from this node {node.name}")
|
302
|
+
return True
|
303
|
+
|
304
|
+
def execute_node(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
305
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
File without changes
|
@@ -3,9 +3,9 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
from abc import ABC, abstractmethod
|
6
|
-
from typing import Any, Dict, List, Optional, Union
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
7
7
|
|
8
|
-
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field,
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field, model_validator
|
9
9
|
from rich import print
|
10
10
|
from typing_extensions import Self
|
11
11
|
|
@@ -15,11 +15,8 @@ from runnable.nodes import TraversalNode
|
|
15
15
|
|
16
16
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
17
17
|
|
18
|
-
StepType = Union["Stub", "
|
19
|
-
TraversalTypes = Union["Stub", "
|
20
|
-
|
21
|
-
|
22
|
-
ALLOWED_COMMAND_TYPES = ["shell", "python", "notebook"]
|
18
|
+
StepType = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Success", "Fail", "Parallel", "Map"]
|
19
|
+
TraversalTypes = Union["Stub", "PythonTask", "NotebookTask", "ShellTask", "Parallel", "Map"]
|
23
20
|
|
24
21
|
|
25
22
|
class Catalog(BaseModel):
|
@@ -106,10 +103,7 @@ class BaseTraversal(ABC, BaseModel):
|
|
106
103
|
...
|
107
104
|
|
108
105
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
class Task(BaseTraversal):
|
106
|
+
class BaseTask(BaseTraversal):
|
113
107
|
"""
|
114
108
|
An execution node of the pipeline.
|
115
109
|
Please refer to [concepts](concepts/task.md) for more information.
|
@@ -157,41 +151,166 @@ class Task(BaseTraversal):
|
|
157
151
|
|
158
152
|
"""
|
159
153
|
|
160
|
-
command: str = Field(alias="command")
|
161
|
-
command_type: str = Field(default="python")
|
162
154
|
catalog: Optional[Catalog] = Field(default=None, alias="catalog")
|
163
155
|
overrides: Dict[str, Any] = Field(default_factory=dict, alias="overrides")
|
164
156
|
|
157
|
+
def create_node(self) -> TaskNode:
|
158
|
+
if not self.next_node:
|
159
|
+
if not (self.terminate_with_failure or self.terminate_with_success):
|
160
|
+
raise AssertionError("A node not being terminated must have a user defined next node")
|
161
|
+
|
162
|
+
print(self.model_dump(exclude_none=True))
|
163
|
+
return TaskNode.parse_from_config(self.model_dump(exclude_none=True))
|
164
|
+
|
165
|
+
|
166
|
+
class PythonTask(BaseTask):
|
167
|
+
"""
|
168
|
+
An execution node of the pipeline of python functions.
|
169
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
170
|
+
|
171
|
+
Attributes:
|
172
|
+
name (str): The name of the node.
|
173
|
+
function (callable): The function to execute.
|
174
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
175
|
+
Please see Catalog about the structure of the catalog.
|
176
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
177
|
+
Individual tasks can override the global configuration config by referring to the
|
178
|
+
specific override.
|
179
|
+
|
180
|
+
For example,
|
181
|
+
### Global configuration
|
182
|
+
```yaml
|
183
|
+
executor:
|
184
|
+
type: local-container
|
185
|
+
config:
|
186
|
+
docker_image: "runnable/runnable:latest"
|
187
|
+
overrides:
|
188
|
+
custom_docker_image:
|
189
|
+
docker_image: "runnable/runnable:custom"
|
190
|
+
```
|
191
|
+
### Task specific configuration
|
192
|
+
```python
|
193
|
+
task = PythonTask(name="task", function="function'",
|
194
|
+
overrides={'local-container': custom_docker_image})
|
195
|
+
```
|
196
|
+
|
197
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
198
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
199
|
+
on_failure (str): The name of the node to execute if the step fails.
|
200
|
+
|
201
|
+
"""
|
202
|
+
|
203
|
+
function: Callable = Field(exclude=True)
|
204
|
+
|
205
|
+
@computed_field
|
206
|
+
def command_type(self) -> str:
|
207
|
+
return "python"
|
208
|
+
|
209
|
+
@computed_field
|
210
|
+
def command(self) -> str:
|
211
|
+
module = self.function.__module__
|
212
|
+
name = self.function.__name__
|
213
|
+
|
214
|
+
return f"{module}.{name}"
|
215
|
+
|
216
|
+
|
217
|
+
class NotebookTask(BaseTask):
|
218
|
+
"""
|
219
|
+
An execution node of the pipeline of type notebook.
|
220
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
221
|
+
|
222
|
+
Attributes:
|
223
|
+
name (str): The name of the node.
|
224
|
+
notebook: The path to the notebook
|
225
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
226
|
+
Please see Catalog about the structure of the catalog.
|
227
|
+
returns: A list of the names of variables to return from the notebook.
|
228
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
229
|
+
Individual tasks can override the global configuration config by referring to the
|
230
|
+
specific override.
|
231
|
+
|
232
|
+
For example,
|
233
|
+
### Global configuration
|
234
|
+
```yaml
|
235
|
+
executor:
|
236
|
+
type: local-container
|
237
|
+
config:
|
238
|
+
docker_image: "runnable/runnable:latest"
|
239
|
+
overrides:
|
240
|
+
custom_docker_image:
|
241
|
+
docker_image: "runnable/runnable:custom"
|
242
|
+
```
|
243
|
+
### Task specific configuration
|
244
|
+
```python
|
245
|
+
task = NotebookTask(name="task", notebook="evaluation.ipynb",
|
246
|
+
overrides={'local-container': custom_docker_image})
|
247
|
+
```
|
248
|
+
notebook_output_path (Optional[str]): The path to save the notebook output.
|
249
|
+
Only used when command_type is 'notebook', defaults to command+_out.ipynb
|
250
|
+
optional_ploomber_args (Optional[Dict[str, Any]]): Any optional ploomber args.
|
251
|
+
Only used when command_type is 'notebook', defaults to {}
|
252
|
+
|
253
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
254
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
255
|
+
on_failure (str): The name of the node to execute if the step fails.
|
256
|
+
|
257
|
+
"""
|
258
|
+
|
259
|
+
notebook: str = Field(alias="command")
|
260
|
+
|
165
261
|
notebook_output_path: Optional[str] = Field(default=None, alias="notebook_output_path")
|
166
262
|
optional_ploomber_args: Optional[Dict[str, Any]] = Field(default=None, alias="optional_ploomber_args")
|
167
|
-
|
263
|
+
returns: List[str] = Field(default_factory=list, alias="returns")
|
168
264
|
|
169
|
-
@
|
170
|
-
|
171
|
-
|
172
|
-
if value not in ALLOWED_COMMAND_TYPES:
|
173
|
-
raise ValueError(f"Invalid command_type: {value}")
|
174
|
-
return value
|
265
|
+
@computed_field
|
266
|
+
def command_type(self) -> str:
|
267
|
+
return "notebook"
|
175
268
|
|
176
|
-
@model_validator(mode="after")
|
177
|
-
def check_notebook_args(self) -> "Task":
|
178
|
-
if self.command_type != "notebook":
|
179
|
-
assert (
|
180
|
-
self.notebook_output_path is None
|
181
|
-
), "Only command_types of 'notebook' can be used with notebook_output_path"
|
182
269
|
|
183
|
-
|
184
|
-
|
185
|
-
|
270
|
+
class ShellTask(BaseTask):
|
271
|
+
"""
|
272
|
+
An execution node of the pipeline of type shell.
|
273
|
+
Please refer to [concepts](concepts/task.md) for more information.
|
186
274
|
|
187
|
-
|
188
|
-
|
275
|
+
Attributes:
|
276
|
+
name (str): The name of the node.
|
277
|
+
command: The shell command to execute.
|
278
|
+
catalog (Optional[Catalog]): The catalog to sync data from/to.
|
279
|
+
Please see Catalog about the structure of the catalog.
|
280
|
+
returns: A list of the names of variables to capture from environment variables of shell.
|
281
|
+
overrides (Dict[str, Any]): Any overrides to the command.
|
282
|
+
Individual tasks can override the global configuration config by referring to the
|
283
|
+
specific override.
|
189
284
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
285
|
+
For example,
|
286
|
+
### Global configuration
|
287
|
+
```yaml
|
288
|
+
executor:
|
289
|
+
type: local-container
|
290
|
+
config:
|
291
|
+
docker_image: "runnable/runnable:latest"
|
292
|
+
overrides:
|
293
|
+
custom_docker_image:
|
294
|
+
docker_image: "runnable/runnable:custom"
|
295
|
+
```
|
296
|
+
### Task specific configuration
|
297
|
+
```python
|
298
|
+
task = ShellTask(name="task", command="exit 0",
|
299
|
+
overrides={'local-container': custom_docker_image})
|
300
|
+
```
|
301
|
+
|
302
|
+
terminate_with_failure (bool): Whether to terminate the pipeline with a failure after this node.
|
303
|
+
terminate_with_success (bool): Whether to terminate the pipeline with a success after this node.
|
304
|
+
on_failure (str): The name of the node to execute if the step fails.
|
305
|
+
|
306
|
+
"""
|
307
|
+
|
308
|
+
command: str = Field(alias="command")
|
309
|
+
returns: List[str] = Field(default_factory=list, alias="returns")
|
310
|
+
|
311
|
+
@computed_field
|
312
|
+
def command_type(self) -> str:
|
313
|
+
return "shell"
|
195
314
|
|
196
315
|
|
197
316
|
class Stub(BaseTraversal):
|
@@ -343,7 +462,8 @@ class Pipeline(BaseModel):
|
|
343
462
|
A Pipeline is a directed acyclic graph of Steps that define a workflow.
|
344
463
|
|
345
464
|
Attributes:
|
346
|
-
steps (List[Stub |
|
465
|
+
steps (List[Stub | PythonTask | NotebookTask | ShellTask | Parallel | Map | Success | Fail]):
|
466
|
+
A list of Steps that make up the Pipeline.
|
347
467
|
start_at (Stub | Task | Parallel | Map): The name of the first Step in the Pipeline.
|
348
468
|
name (str, optional): The name of the Pipeline. Defaults to "".
|
349
469
|
description (str, optional): A description of the Pipeline. Defaults to "".
|
@@ -440,7 +560,6 @@ class Pipeline(BaseModel):
|
|
440
560
|
run_id=run_id,
|
441
561
|
tag=tag,
|
442
562
|
parameters_file=parameters_file,
|
443
|
-
use_cached=use_cached,
|
444
563
|
)
|
445
564
|
|
446
565
|
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/executor/local_container/implementation.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/experiment_tracker/mlflow/implementation.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{runnable-0.3.0 → runnable-0.5.0}/runnable/extensions/run_log_store/k8s_pvc/implementation.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|