runnable 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/catalog.py +2 -2
- runnable/cli.py +5 -5
- runnable/datastore.py +3 -2
- runnable/defaults.py +21 -18
- runnable/entrypoints.py +41 -77
- runnable/executor.py +6 -16
- runnable/extensions/catalog/file_system/implementation.py +2 -1
- runnable/extensions/executor/__init__.py +20 -9
- runnable/extensions/executor/argo/implementation.py +6 -5
- runnable/extensions/executor/argo/specification.yaml +1 -1
- runnable/extensions/executor/k8s_job/implementation_FF.py +4 -4
- runnable/extensions/executor/local/implementation.py +1 -0
- runnable/extensions/executor/local_container/implementation.py +4 -10
- runnable/extensions/executor/mocked/implementation.py +2 -33
- runnable/extensions/nodes.py +40 -60
- runnable/integration.py +2 -2
- runnable/interaction.py +9 -4
- runnable/nodes.py +19 -7
- runnable/parameters.py +1 -1
- runnable/sdk.py +27 -25
- runnable/tasks.py +124 -121
- runnable/utils.py +11 -11
- {runnable-0.2.0.dist-info → runnable-0.3.0.dist-info}/METADATA +53 -53
- {runnable-0.2.0.dist-info → runnable-0.3.0.dist-info}/RECORD +27 -27
- {runnable-0.2.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
- {runnable-0.2.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
- {runnable-0.2.0.dist-info → runnable-0.3.0.dist-info}/entry_points.txt +0 -0
runnable/catalog.py
CHANGED
@@ -43,7 +43,7 @@ class BaseCatalog(ABC, BaseModel):
|
|
43
43
|
Args:
|
44
44
|
name (str): The name of the catalog item
|
45
45
|
run_id (str): The run_id of the run.
|
46
|
-
compute_data_folder (str, optional): The compute data folder. Defaults to
|
46
|
+
compute_data_folder (str, optional): The compute data folder. Defaults to runnable default (data/)
|
47
47
|
|
48
48
|
Raises:
|
49
49
|
NotImplementedError: Base class, hence not implemented
|
@@ -70,7 +70,7 @@ class BaseCatalog(ABC, BaseModel):
|
|
70
70
|
Args:
|
71
71
|
name (str): The name of the catalog item.
|
72
72
|
run_id (str): The run_id of the run.
|
73
|
-
compute_data_folder (str, optional): The compute data folder. Defaults to
|
73
|
+
compute_data_folder (str, optional): The compute data folder. Defaults to runnable default (data/)
|
74
74
|
synced_catalogs (dict, optional): Any previously synced catalogs. Defaults to None.
|
75
75
|
|
76
76
|
Raises:
|
runnable/cli.py
CHANGED
@@ -9,7 +9,7 @@ from runnable import defaults, entrypoints
|
|
9
9
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
10
10
|
|
11
11
|
|
12
|
-
@with_plugins(iter_entry_points("
|
12
|
+
@with_plugins(iter_entry_points("runnable.cli_plugins"))
|
13
13
|
@click.group()
|
14
14
|
@click.version_option()
|
15
15
|
def cli():
|
@@ -46,7 +46,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
|
|
46
46
|
"""
|
47
47
|
Execute a pipeline
|
48
48
|
|
49
|
-
Usage:
|
49
|
+
Usage: runnable execute [OPTIONS]
|
50
50
|
|
51
51
|
Options:
|
52
52
|
-f, --file TEXT The pipeline definition file [default: pipeline.yaml]
|
@@ -97,9 +97,9 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id, use_cach
|
|
97
97
|
@click.option("--tag", default="", help="A tag attached to the run")
|
98
98
|
def execute_single_node(run_id, step_name, map_variable, file, config_file, parameters_file, log_level, tag):
|
99
99
|
"""
|
100
|
-
Internal entrypoint for
|
100
|
+
Internal entrypoint for runnable to execute a single node.
|
101
101
|
|
102
|
-
Other than local executor, every other executor uses this entry point to execute a step in the context of
|
102
|
+
Other than local executor, every other executor uses this entry point to execute a step in the context of runnable.
|
103
103
|
Only chained executions should use this method. Unchained executions should use execute_
|
104
104
|
"""
|
105
105
|
logger.setLevel(log_level)
|
@@ -248,7 +248,7 @@ def execute_function(
|
|
248
248
|
@click.option("--tag", default="", help="A tag attached to the run")
|
249
249
|
def fan(run_id, step_name, mode, map_variable, file, config_file, parameters_file, log_level, tag):
|
250
250
|
"""
|
251
|
-
Internal entrypoint for
|
251
|
+
Internal entrypoint for runnable to fan in or out a composite node.
|
252
252
|
|
253
253
|
Only 3rd party orchestrators should use this entry point.
|
254
254
|
"""
|
runnable/datastore.py
CHANGED
@@ -12,7 +12,7 @@ from runnable import defaults, exceptions
|
|
12
12
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
13
13
|
|
14
14
|
# Once defined these classes are sealed to any additions unless a default is provided
|
15
|
-
# Breaking this rule might make
|
15
|
+
# Breaking this rule might make runnable backwardly incompatible
|
16
16
|
|
17
17
|
|
18
18
|
class DataCatalog(BaseModel, extra="allow"):
|
@@ -53,7 +53,8 @@ class StepAttempt(BaseModel):
|
|
53
53
|
duration: str = "" # end_time - start_time
|
54
54
|
status: str = "FAIL"
|
55
55
|
message: str = ""
|
56
|
-
|
56
|
+
input_parameters: Dict[str, Any] = Field(default_factory=dict)
|
57
|
+
output_parameters: Dict[str, Any] = Field(default_factory=dict)
|
57
58
|
|
58
59
|
|
59
60
|
class CodeIdentity(BaseModel, extra="allow"):
|
runnable/defaults.py
CHANGED
@@ -12,8 +12,8 @@ except ImportError: # pragma: no cover
|
|
12
12
|
from typing_extensions import TypedDict # type: ignore[unused-ignore]
|
13
13
|
|
14
14
|
|
15
|
-
NAME = "
|
16
|
-
LOGGER_NAME = "
|
15
|
+
NAME = "runnable"
|
16
|
+
LOGGER_NAME = "runnable"
|
17
17
|
|
18
18
|
# CLI settings
|
19
19
|
LOG_LEVEL = "WARNING"
|
@@ -21,7 +21,7 @@ LOG_LEVEL = "WARNING"
|
|
21
21
|
|
22
22
|
class EXECUTION_PLAN(Enum):
|
23
23
|
"""
|
24
|
-
The possible execution plans for a
|
24
|
+
The possible execution plans for a runnable job.
|
25
25
|
"""
|
26
26
|
|
27
27
|
CHAINED = "chained" # 121 relationship between run log and the dag.
|
@@ -35,7 +35,7 @@ class ServiceConfig(TypedDict):
|
|
35
35
|
config: Mapping[str, Any]
|
36
36
|
|
37
37
|
|
38
|
-
class
|
38
|
+
class runnableConfig(TypedDict, total=False):
|
39
39
|
run_log_store: Optional[ServiceConfig]
|
40
40
|
secrets: Optional[ServiceConfig]
|
41
41
|
catalog: Optional[ServiceConfig]
|
@@ -47,17 +47,20 @@ TypeMapVariable: TypeAlias = Optional[Dict[str, Union[str, int, float]]]
|
|
47
47
|
|
48
48
|
|
49
49
|
# Config file environment variable
|
50
|
-
|
51
|
-
|
50
|
+
RUNNABLE_CONFIG_FILE = "RUNNABLE_CONFIG_FILE"
|
51
|
+
RUNNABLE_RUN_TAG = "RUNNABLE_RUN_TAG"
|
52
52
|
|
53
53
|
# Interaction settings
|
54
|
-
TRACK_PREFIX = "
|
54
|
+
TRACK_PREFIX = "RUNNABLE_TRACK_"
|
55
55
|
STEP_INDICATOR = "_STEP_"
|
56
|
-
PARAMETER_PREFIX = "
|
57
|
-
MAP_VARIABLE = "
|
58
|
-
VARIABLE_PREFIX = "
|
59
|
-
ENV_RUN_ID = "
|
60
|
-
ATTEMPT_NUMBER = "
|
56
|
+
PARAMETER_PREFIX = "RUNNABLE_PRM_"
|
57
|
+
MAP_VARIABLE = "RUNNABLE_MAP_VARIABLE"
|
58
|
+
VARIABLE_PREFIX = "RUNNABLE_VAR_"
|
59
|
+
ENV_RUN_ID = "RUNNABLE_RUN_ID"
|
60
|
+
ATTEMPT_NUMBER = "RUNNABLE_STEP_ATTEMPT"
|
61
|
+
|
62
|
+
## Generated pipeline file
|
63
|
+
GENERATED_PIPELINE_FILE = "generated_pipeline.yaml"
|
61
64
|
|
62
65
|
# STATUS progression
|
63
66
|
# For Branch, CREATED -> PROCESSING -> SUCCESS OR FAIL
|
@@ -72,7 +75,7 @@ TRIGGERED = "TRIGGERED"
|
|
72
75
|
COMMAND_TYPE = "python"
|
73
76
|
NODE_SPEC_FILE = "node_spec.yaml"
|
74
77
|
COMMAND_FRIENDLY_CHARACTER = "%"
|
75
|
-
DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/
|
78
|
+
DEFAULT_CONTAINER_CONTEXT_PATH = "/opt/runnable/"
|
76
79
|
DEFAULT_CONTAINER_DATA_PATH = "data/"
|
77
80
|
DEFAULT_CONTAINER_OUTPUT_PARAMETERS = "parameters.json"
|
78
81
|
|
@@ -94,7 +97,7 @@ RANDOM_RUN_ID_LEN = 6
|
|
94
97
|
MAX_TIME = 86400 # 1 day in seconds
|
95
98
|
|
96
99
|
# User extensions
|
97
|
-
USER_CONFIG_FILE = "
|
100
|
+
USER_CONFIG_FILE = "runnable-config.yaml"
|
98
101
|
|
99
102
|
# Executor settings
|
100
103
|
ENABLE_PARALLEL = False
|
@@ -155,7 +158,7 @@ LOGGING_CONFIG = {
|
|
155
158
|
"disable_existing_loggers": True,
|
156
159
|
"formatters": {
|
157
160
|
"standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"},
|
158
|
-
"
|
161
|
+
"runnable_formatter": {"format": "%(message)s", "datefmt": "[%X]"},
|
159
162
|
},
|
160
163
|
"handlers": {
|
161
164
|
"default": {
|
@@ -163,8 +166,8 @@ LOGGING_CONFIG = {
|
|
163
166
|
"class": "logging.StreamHandler",
|
164
167
|
"stream": "ext://sys.stdout", # Default is stderr
|
165
168
|
},
|
166
|
-
"
|
167
|
-
"formatter": "
|
169
|
+
"runnable_handler": {
|
170
|
+
"formatter": "runnable_formatter",
|
168
171
|
"class": "rich.logging.RichHandler",
|
169
172
|
"rich_tracebacks": True,
|
170
173
|
},
|
@@ -174,6 +177,6 @@ LOGGING_CONFIG = {
|
|
174
177
|
"handlers": ["default"],
|
175
178
|
"propagate": True,
|
176
179
|
}, # Root logger
|
177
|
-
LOGGER_NAME: {"handlers": ["
|
180
|
+
LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
|
178
181
|
},
|
179
182
|
}
|
runnable/entrypoints.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1
|
+
import importlib
|
1
2
|
import json
|
2
3
|
import logging
|
4
|
+
import os
|
5
|
+
import sys
|
3
6
|
from typing import Optional, cast
|
4
7
|
|
5
8
|
from rich import print
|
6
9
|
|
7
10
|
import runnable.context as context
|
8
11
|
from runnable import defaults, graph, utils
|
9
|
-
from runnable.defaults import
|
12
|
+
from runnable.defaults import ServiceConfig, runnableConfig
|
10
13
|
|
11
14
|
logger = logging.getLogger(defaults.LOGGER_NAME)
|
12
15
|
|
13
16
|
|
14
|
-
def get_default_configs() ->
|
17
|
+
def get_default_configs() -> runnableConfig:
|
15
18
|
"""
|
16
|
-
User can provide extensions as part of their code base,
|
19
|
+
User can provide extensions as part of their code base, runnable-config.yaml provides the place to put them.
|
17
20
|
"""
|
18
21
|
user_configs = {}
|
19
22
|
if utils.does_file_exist(defaults.USER_CONFIG_FILE):
|
@@ -53,7 +56,7 @@ def prepare_configurations(
|
|
53
56
|
Returns:
|
54
57
|
executor.BaseExecutor : A prepared executor as per the dag/config
|
55
58
|
"""
|
56
|
-
|
59
|
+
runnable_defaults = get_default_configs()
|
57
60
|
|
58
61
|
variables = utils.gather_variables()
|
59
62
|
|
@@ -61,31 +64,31 @@ def prepare_configurations(
|
|
61
64
|
if configuration_file:
|
62
65
|
templated_configuration = utils.load_yaml(configuration_file) or {}
|
63
66
|
|
64
|
-
configuration:
|
67
|
+
configuration: runnableConfig = cast(runnableConfig, templated_configuration)
|
65
68
|
|
66
69
|
# Run log settings, configuration over-rides everything
|
67
70
|
run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
|
68
71
|
if not run_log_config:
|
69
|
-
run_log_config = cast(ServiceConfig,
|
72
|
+
run_log_config = cast(ServiceConfig, runnable_defaults.get("run_log_store", defaults.DEFAULT_RUN_LOG_STORE))
|
70
73
|
run_log_store = utils.get_provider_by_name_and_type("run_log_store", run_log_config)
|
71
74
|
|
72
75
|
# Catalog handler settings, configuration over-rides everything
|
73
76
|
catalog_config: Optional[ServiceConfig] = configuration.get("catalog", None)
|
74
77
|
if not catalog_config:
|
75
|
-
catalog_config = cast(ServiceConfig,
|
78
|
+
catalog_config = cast(ServiceConfig, runnable_defaults.get("catalog", defaults.DEFAULT_CATALOG))
|
76
79
|
catalog_handler = utils.get_provider_by_name_and_type("catalog", catalog_config)
|
77
80
|
|
78
81
|
# Secret handler settings, configuration over-rides everything
|
79
82
|
secrets_config: Optional[ServiceConfig] = configuration.get("secrets", None)
|
80
83
|
if not secrets_config:
|
81
|
-
secrets_config = cast(ServiceConfig,
|
84
|
+
secrets_config = cast(ServiceConfig, runnable_defaults.get("secrets", defaults.DEFAULT_SECRETS))
|
82
85
|
secrets_handler = utils.get_provider_by_name_and_type("secrets", secrets_config)
|
83
86
|
|
84
87
|
# experiment tracker settings, configuration over-rides everything
|
85
88
|
tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None)
|
86
89
|
if not tracker_config:
|
87
90
|
tracker_config = cast(
|
88
|
-
ServiceConfig,
|
91
|
+
ServiceConfig, runnable_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER)
|
89
92
|
)
|
90
93
|
tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config)
|
91
94
|
|
@@ -95,7 +98,7 @@ def prepare_configurations(
|
|
95
98
|
executor_config = ServiceConfig(type="local", config={})
|
96
99
|
|
97
100
|
if not executor_config:
|
98
|
-
executor_config = cast(ServiceConfig,
|
101
|
+
executor_config = cast(ServiceConfig, runnable_defaults.get("executor", defaults.DEFAULT_EXECUTOR))
|
99
102
|
configured_executor = utils.get_provider_by_name_and_type("executor", executor_config)
|
100
103
|
|
101
104
|
# Construct the context
|
@@ -113,20 +116,30 @@ def prepare_configurations(
|
|
113
116
|
)
|
114
117
|
|
115
118
|
if pipeline_file:
|
116
|
-
|
117
|
-
|
119
|
+
if pipeline_file.endswith(".py"):
|
120
|
+
# converting a pipeline defined in python to a dag in yaml
|
121
|
+
module_file = pipeline_file.strip(".py")
|
122
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
123
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
124
|
+
imported_module = importlib.import_module(module)
|
118
125
|
|
119
|
-
|
120
|
-
|
126
|
+
os.environ["RUNNABLE_PY_TO_YAML"] = "true"
|
127
|
+
dag = getattr(imported_module, func)().return_dag()
|
121
128
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
129
|
+
else:
|
130
|
+
pipeline_config = utils.load_yaml(pipeline_file)
|
131
|
+
|
132
|
+
logger.info("The input pipeline:")
|
133
|
+
logger.info(json.dumps(pipeline_config, indent=4))
|
134
|
+
|
135
|
+
dag_config = pipeline_config["dag"]
|
136
|
+
|
137
|
+
dag_hash = utils.get_dag_hash(dag_config)
|
138
|
+
dag = graph.create_graph(dag_config)
|
139
|
+
run_context.dag_hash = dag_hash
|
126
140
|
|
127
141
|
run_context.pipeline_file = pipeline_file
|
128
142
|
run_context.dag = dag
|
129
|
-
run_context.dag_hash = dag_hash
|
130
143
|
|
131
144
|
run_context.use_cached = False
|
132
145
|
if use_cached:
|
@@ -148,7 +161,7 @@ def execute(
|
|
148
161
|
):
|
149
162
|
# pylint: disable=R0914,R0913
|
150
163
|
"""
|
151
|
-
The entry point to
|
164
|
+
The entry point to runnable execution. This method would prepare the configurations and delegates traversal to the
|
152
165
|
executor
|
153
166
|
|
154
167
|
Args:
|
@@ -176,7 +189,7 @@ def execute(
|
|
176
189
|
|
177
190
|
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
178
191
|
|
179
|
-
utils.
|
192
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
180
193
|
|
181
194
|
# Prepare for graph execution
|
182
195
|
executor.prepare_for_graph_execution()
|
@@ -197,7 +210,7 @@ def execute_single_node(
|
|
197
210
|
parameters_file: str = "",
|
198
211
|
):
|
199
212
|
"""
|
200
|
-
The entry point into executing a single node of
|
213
|
+
The entry point into executing a single node of runnable. Orchestration modes should extensively use this
|
201
214
|
entry point.
|
202
215
|
|
203
216
|
It should have similar set up of configurations to execute because orchestrator modes can initiate the execution.
|
@@ -226,7 +239,7 @@ def execute_single_node(
|
|
226
239
|
|
227
240
|
executor = run_context.executor
|
228
241
|
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
229
|
-
utils.
|
242
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
230
243
|
|
231
244
|
executor.prepare_for_node_execution()
|
232
245
|
|
@@ -247,55 +260,6 @@ def execute_single_node(
|
|
247
260
|
executor.send_return_code(stage="execution")
|
248
261
|
|
249
262
|
|
250
|
-
def execute_single_brach(
|
251
|
-
configuration_file: str,
|
252
|
-
pipeline_file: str,
|
253
|
-
branch_name: str,
|
254
|
-
map_variable: str,
|
255
|
-
run_id: str,
|
256
|
-
tag: str,
|
257
|
-
):
|
258
|
-
"""
|
259
|
-
The entry point into executing a branch of the graph. Interactive modes in parallel runs use this to execute
|
260
|
-
branches in parallel.
|
261
|
-
|
262
|
-
This entry point is never used by its own but rather from a node. So the arguments sent into this are fewer.
|
263
|
-
|
264
|
-
Args:
|
265
|
-
variables_file (str): The variables file, if used or None
|
266
|
-
branch_name : The name of the branch to execute, in dot.path.convention
|
267
|
-
pipeline_file (str): The config/dag file
|
268
|
-
run_id (str): The run id of the run.
|
269
|
-
tag (str): If a tag is provided at the run time
|
270
|
-
"""
|
271
|
-
from runnable import nodes
|
272
|
-
|
273
|
-
run_context = prepare_configurations(
|
274
|
-
configuration_file=configuration_file,
|
275
|
-
pipeline_file=pipeline_file,
|
276
|
-
run_id=run_id,
|
277
|
-
tag=tag,
|
278
|
-
use_cached="",
|
279
|
-
)
|
280
|
-
print("Working with context:")
|
281
|
-
print(run_context)
|
282
|
-
|
283
|
-
executor = run_context.executor
|
284
|
-
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
285
|
-
utils.set_magnus_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
286
|
-
|
287
|
-
branch_internal_name = nodes.BaseNode._get_internal_name_from_command_name(branch_name)
|
288
|
-
|
289
|
-
map_variable_dict = utils.json_to_ordered_dict(map_variable)
|
290
|
-
|
291
|
-
branch_to_execute = graph.search_branch_by_internal_name(run_context.dag, branch_internal_name) # type: ignore
|
292
|
-
|
293
|
-
logger.info("Executing the single branch of %s", branch_to_execute)
|
294
|
-
executor.execute_graph(dag=branch_to_execute, map_variable=map_variable_dict)
|
295
|
-
|
296
|
-
executor.send_return_code()
|
297
|
-
|
298
|
-
|
299
263
|
def execute_notebook(
|
300
264
|
entrypoint: str,
|
301
265
|
notebook_file: str,
|
@@ -307,7 +271,7 @@ def execute_notebook(
|
|
307
271
|
parameters_file: str = "",
|
308
272
|
):
|
309
273
|
"""
|
310
|
-
The entry point to
|
274
|
+
The entry point to runnable execution of a notebook. This method would prepare the configurations and
|
311
275
|
delegates traversal to the executor
|
312
276
|
"""
|
313
277
|
run_id = utils.generate_run_id(run_id=run_id)
|
@@ -321,7 +285,7 @@ def execute_notebook(
|
|
321
285
|
|
322
286
|
executor = run_context.executor
|
323
287
|
run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
|
324
|
-
utils.
|
288
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
325
289
|
|
326
290
|
print("Working with context:")
|
327
291
|
print(run_context)
|
@@ -368,7 +332,7 @@ def execute_function(
|
|
368
332
|
parameters_file: str = "",
|
369
333
|
):
|
370
334
|
"""
|
371
|
-
The entry point to
|
335
|
+
The entry point to runnable execution of a function. This method would prepare the configurations and
|
372
336
|
delegates traversal to the executor
|
373
337
|
"""
|
374
338
|
run_id = utils.generate_run_id(run_id=run_id)
|
@@ -383,7 +347,7 @@ def execute_function(
|
|
383
347
|
executor = run_context.executor
|
384
348
|
|
385
349
|
run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
|
386
|
-
utils.
|
350
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
387
351
|
|
388
352
|
print("Working with context:")
|
389
353
|
print(run_context)
|
@@ -460,7 +424,7 @@ def fan(
|
|
460
424
|
|
461
425
|
executor = run_context.executor
|
462
426
|
run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
|
463
|
-
utils.
|
427
|
+
utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
|
464
428
|
|
465
429
|
executor.prepare_for_node_execution()
|
466
430
|
|
runnable/executor.py
CHANGED
@@ -25,7 +25,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
25
25
|
The skeleton of an executor class.
|
26
26
|
Any implementation of an executor should inherit this class and over-ride accordingly.
|
27
27
|
|
28
|
-
There is a extension available in
|
28
|
+
There is a extension available in runnable/extensions/executor/__init__.py
|
29
29
|
which implements the most common functionality which is easier to
|
30
30
|
extend/override in most scenarios.
|
31
31
|
|
@@ -34,11 +34,12 @@ class BaseExecutor(ABC, BaseModel):
|
|
34
34
|
service_name: str = ""
|
35
35
|
service_type: str = "executor"
|
36
36
|
|
37
|
-
enable_parallel: bool = defaults.ENABLE_PARALLEL
|
38
37
|
overrides: dict = {}
|
39
38
|
|
39
|
+
# TODO: This needs to go away
|
40
40
|
_previous_run_log: Optional[RunLog] = None
|
41
41
|
_single_step: str = ""
|
42
|
+
_local: bool = False # This is a flag to indicate whether the executor is local or not.
|
42
43
|
|
43
44
|
_context_step_log = None # type : StepLog
|
44
45
|
_context_node = None # type: BaseNode
|
@@ -48,19 +49,6 @@ class BaseExecutor(ABC, BaseModel):
|
|
48
49
|
def _context(self):
|
49
50
|
return context.run_context
|
50
51
|
|
51
|
-
def _is_parallel_execution(self) -> bool:
|
52
|
-
"""
|
53
|
-
Controls the parallelization of branches in map and parallel state.
|
54
|
-
Defaults to False and left for the compute modes to decide.
|
55
|
-
|
56
|
-
Interactive executors like local and local-container need decisions.
|
57
|
-
For most transpilers it is inconsequential as its always True and supported by platforms.
|
58
|
-
|
59
|
-
Returns:
|
60
|
-
bool: True if the execution allows parallel execution of branches.
|
61
|
-
"""
|
62
|
-
return self.enable_parallel
|
63
|
-
|
64
52
|
@abstractmethod
|
65
53
|
def _get_parameters(self) -> Dict[str, Any]:
|
66
54
|
"""
|
@@ -72,6 +60,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
72
60
|
"""
|
73
61
|
...
|
74
62
|
|
63
|
+
# TODO: This needs to go away
|
75
64
|
@abstractmethod
|
76
65
|
def _set_up_for_re_run(self, parameters: Dict[str, Any]) -> None:
|
77
66
|
"""
|
@@ -251,7 +240,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
251
240
|
@abstractmethod
|
252
241
|
def trigger_job(self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs):
|
253
242
|
"""
|
254
|
-
Executor specific way of triggering jobs when
|
243
|
+
Executor specific way of triggering jobs when runnable does both traversal and execution
|
255
244
|
|
256
245
|
Transpilers will NEVER use this method and will NEVER call them.
|
257
246
|
Only interactive executors who need execute_from_graph will ever implement it.
|
@@ -304,6 +293,7 @@ class BaseExecutor(ABC, BaseModel):
|
|
304
293
|
"""
|
305
294
|
...
|
306
295
|
|
296
|
+
# TODO: This needs to go away
|
307
297
|
@abstractmethod
|
308
298
|
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
309
299
|
"""
|
@@ -144,7 +144,8 @@ class FileSystemCatalog(BaseCatalog):
|
|
144
144
|
if not utils.does_dir_exist(copy_from):
|
145
145
|
msg = (
|
146
146
|
f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
|
147
|
-
"Note:
|
147
|
+
"Note: runnable does not create the compute data folder for you. Please ensure that the "
|
148
|
+
"folder exists.\n"
|
148
149
|
)
|
149
150
|
raise Exception(msg)
|
150
151
|
|
@@ -47,12 +47,12 @@ class GenericExecutor(BaseExecutor):
|
|
47
47
|
|
48
48
|
This function is used by the decorator function.
|
49
49
|
The design idea is we can over-ride this method in different implementations to retrieve the run_id.
|
50
|
-
But is it really intrusive to ask to set the environmental variable
|
50
|
+
But is it really intrusive to ask to set the environmental variable runnable_RUN_ID?
|
51
51
|
|
52
52
|
Returns:
|
53
53
|
_type_: _description_
|
54
54
|
"""
|
55
|
-
return os.environ.get("
|
55
|
+
return os.environ.get("runnable_RUN_ID", None)
|
56
56
|
|
57
57
|
def _get_parameters(self) -> Dict[str, Any]:
|
58
58
|
"""
|
@@ -117,6 +117,7 @@ class GenericExecutor(BaseExecutor):
|
|
117
117
|
# Consolidate and get the parameters
|
118
118
|
parameters = self._get_parameters()
|
119
119
|
|
120
|
+
# TODO: This needs to go away
|
120
121
|
if self._context.use_cached:
|
121
122
|
self._set_up_for_re_run(parameters=parameters)
|
122
123
|
|
@@ -300,6 +301,8 @@ class GenericExecutor(BaseExecutor):
|
|
300
301
|
We set them as environment variables, serialized as json strings.
|
301
302
|
"""
|
302
303
|
params = self._context.run_log_store.get_parameters(run_id=self._context.run_id)
|
304
|
+
params_copy = copy.deepcopy(params)
|
305
|
+
# This is only for the API to work.
|
303
306
|
parameters.set_user_defined_params_as_environment_variables(params)
|
304
307
|
|
305
308
|
attempt = self.step_attempt_number
|
@@ -311,22 +314,26 @@ class GenericExecutor(BaseExecutor):
|
|
311
314
|
|
312
315
|
data_catalogs_get: Optional[List[DataCatalog]] = self._sync_catalog(step_log, stage="get")
|
313
316
|
try:
|
314
|
-
attempt_log = node.execute(
|
317
|
+
attempt_log = node.execute(
|
318
|
+
executor=self,
|
319
|
+
mock=step_log.mock,
|
320
|
+
map_variable=map_variable,
|
321
|
+
params=params,
|
322
|
+
**kwargs,
|
323
|
+
)
|
315
324
|
except Exception as e:
|
316
|
-
# Any exception here is a
|
317
|
-
msg = "This is clearly
|
325
|
+
# Any exception here is a runnable exception as node suppresses exceptions.
|
326
|
+
msg = "This is clearly runnable fault, please report a bug and the logs"
|
318
327
|
logger.exception(msg)
|
319
328
|
raise Exception(msg) from e
|
320
329
|
finally:
|
321
330
|
attempt_log.attempt_number = attempt
|
322
|
-
attempt_log.parameters = params.copy()
|
323
331
|
step_log.attempts.append(attempt_log)
|
324
332
|
|
325
333
|
tracked_data = get_tracked_data()
|
326
334
|
|
327
335
|
self._context.experiment_tracker.publish_data(tracked_data)
|
328
|
-
|
329
|
-
parameters_out = parameters.get_user_set_parameters(remove=True)
|
336
|
+
parameters_out = attempt_log.output_parameters
|
330
337
|
|
331
338
|
if attempt_log.status == defaults.FAIL:
|
332
339
|
logger.exception(f"Node: {node} failed")
|
@@ -339,10 +346,12 @@ class GenericExecutor(BaseExecutor):
|
|
339
346
|
step_log.status = defaults.SUCCESS
|
340
347
|
self._sync_catalog(step_log, stage="put", synced_catalogs=data_catalogs_get)
|
341
348
|
step_log.user_defined_metrics = tracked_data
|
342
|
-
|
349
|
+
|
350
|
+
diff_parameters = utils.diff_dict(params_copy, parameters_out)
|
343
351
|
self._context.run_log_store.set_parameters(self._context.run_id, diff_parameters)
|
344
352
|
|
345
353
|
# Remove the step context
|
354
|
+
parameters.get_user_set_parameters(remove=True)
|
346
355
|
self._context_step_log = None
|
347
356
|
self._context_node = None # type: ignore
|
348
357
|
self._context_metrics = {} # type: ignore
|
@@ -400,6 +409,7 @@ class GenericExecutor(BaseExecutor):
|
|
400
409
|
self._execute_node(node, map_variable=map_variable, **kwargs)
|
401
410
|
return
|
402
411
|
|
412
|
+
# TODO: This needs to go away
|
403
413
|
# In single step
|
404
414
|
if (self._single_step and not node.name == self._single_step) or not self._is_step_eligible_for_rerun(
|
405
415
|
node, map_variable=map_variable
|
@@ -533,6 +543,7 @@ class GenericExecutor(BaseExecutor):
|
|
533
543
|
run_log = self._context.run_log_store.get_run_log_by_id(run_id=self._context.run_id, full=True)
|
534
544
|
print(json.dumps(run_log.model_dump(), indent=4))
|
535
545
|
|
546
|
+
# TODO: This needs to go away
|
536
547
|
def _is_step_eligible_for_rerun(self, node: BaseNode, map_variable: TypeMapVariable = None):
|
537
548
|
"""
|
538
549
|
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
@@ -292,7 +292,7 @@ class ContainerTemplate(BaseModel):
|
|
292
292
|
|
293
293
|
class DagTemplate(BaseModel):
|
294
294
|
# These are used for parallel, map nodes dag definition
|
295
|
-
name: str = "
|
295
|
+
name: str = "runnable-dag"
|
296
296
|
tasks: List[DagTaskTemplate] = Field(default=[], exclude=True)
|
297
297
|
inputs: Optional[List[Parameter]] = Field(default=None, serialization_alias="inputs")
|
298
298
|
parallelism: Optional[int] = None
|
@@ -561,7 +561,7 @@ def get_renderer(node):
|
|
561
561
|
|
562
562
|
|
563
563
|
class MetaData(BaseModel):
|
564
|
-
generate_name: str = Field(default="
|
564
|
+
generate_name: str = Field(default="runnable-dag-", serialization_alias="generateName")
|
565
565
|
annotations: Optional[Dict[str, str]] = Field(default_factory=dict)
|
566
566
|
labels: Optional[Dict[str, str]] = Field(default_factory=dict)
|
567
567
|
namespace: Optional[str] = Field(default=None)
|
@@ -569,7 +569,7 @@ class MetaData(BaseModel):
|
|
569
569
|
|
570
570
|
class Spec(BaseModel):
|
571
571
|
active_deadline_seconds: int = Field(serialization_alias="activeDeadlineSeconds")
|
572
|
-
entrypoint: str = Field(default="
|
572
|
+
entrypoint: str = Field(default="runnable-dag")
|
573
573
|
node_selector: Optional[Dict[str, str]] = Field(default_factory=dict, serialization_alias="nodeSelector")
|
574
574
|
tolerations: Optional[List[Toleration]] = Field(default=None, serialization_alias="tolerations")
|
575
575
|
parallelism: Optional[int] = Field(default=None, serialization_alias="parallelism")
|
@@ -665,6 +665,7 @@ class Override(BaseModel):
|
|
665
665
|
|
666
666
|
class ArgoExecutor(GenericExecutor):
|
667
667
|
service_name: str = "argo"
|
668
|
+
_local: bool = False
|
668
669
|
|
669
670
|
model_config = ConfigDict(extra="forbid")
|
670
671
|
|
@@ -674,7 +675,7 @@ class ArgoExecutor(GenericExecutor):
|
|
674
675
|
output_file: str = "argo-pipeline.yaml"
|
675
676
|
|
676
677
|
# Metadata related fields
|
677
|
-
name: str = Field(default="
|
678
|
+
name: str = Field(default="runnable-dag-", description="Used as an identifier for the workflow")
|
678
679
|
annotations: Dict[str, str] = Field(default_factory=dict)
|
679
680
|
labels: Dict[str, str] = Field(default_factory=dict)
|
680
681
|
|
@@ -994,7 +995,7 @@ class ArgoExecutor(GenericExecutor):
|
|
994
995
|
return DagTaskTemplate(name=f"{clean_name}-fan-in", template=f"{clean_name}-fan-in")
|
995
996
|
|
996
997
|
def _gather_task_templates_of_dag(
|
997
|
-
self, dag: Graph, dag_name="
|
998
|
+
self, dag: Graph, dag_name="runnable-dag", list_of_iter_values: Optional[List] = None
|
998
999
|
):
|
999
1000
|
current_node = dag.start_at
|
1000
1001
|
previous_node = None
|