runnable 0.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/README.md +0 -0
- extensions/__init__.py +0 -0
- extensions/catalog/README.md +0 -0
- extensions/catalog/any_path.py +214 -0
- extensions/catalog/file_system.py +52 -0
- extensions/catalog/minio.py +72 -0
- extensions/catalog/pyproject.toml +14 -0
- extensions/catalog/s3.py +11 -0
- extensions/job_executor/README.md +0 -0
- extensions/job_executor/__init__.py +236 -0
- extensions/job_executor/emulate.py +70 -0
- extensions/job_executor/k8s.py +553 -0
- extensions/job_executor/k8s_job_spec.yaml +37 -0
- extensions/job_executor/local.py +35 -0
- extensions/job_executor/local_container.py +161 -0
- extensions/job_executor/pyproject.toml +16 -0
- extensions/nodes/README.md +0 -0
- extensions/nodes/__init__.py +0 -0
- extensions/nodes/conditional.py +301 -0
- extensions/nodes/fail.py +78 -0
- extensions/nodes/loop.py +394 -0
- extensions/nodes/map.py +477 -0
- extensions/nodes/parallel.py +281 -0
- extensions/nodes/pyproject.toml +15 -0
- extensions/nodes/stub.py +93 -0
- extensions/nodes/success.py +78 -0
- extensions/nodes/task.py +156 -0
- extensions/pipeline_executor/README.md +0 -0
- extensions/pipeline_executor/__init__.py +871 -0
- extensions/pipeline_executor/argo.py +1266 -0
- extensions/pipeline_executor/emulate.py +119 -0
- extensions/pipeline_executor/local.py +226 -0
- extensions/pipeline_executor/local_container.py +369 -0
- extensions/pipeline_executor/mocked.py +159 -0
- extensions/pipeline_executor/pyproject.toml +16 -0
- extensions/run_log_store/README.md +0 -0
- extensions/run_log_store/__init__.py +0 -0
- extensions/run_log_store/any_path.py +100 -0
- extensions/run_log_store/chunked_fs.py +122 -0
- extensions/run_log_store/chunked_minio.py +141 -0
- extensions/run_log_store/file_system.py +91 -0
- extensions/run_log_store/generic_chunked.py +549 -0
- extensions/run_log_store/minio.py +114 -0
- extensions/run_log_store/pyproject.toml +15 -0
- extensions/secrets/README.md +0 -0
- extensions/secrets/dotenv.py +62 -0
- extensions/secrets/pyproject.toml +15 -0
- runnable/__init__.py +108 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +484 -0
- runnable/context.py +730 -0
- runnable/datastore.py +1058 -0
- runnable/defaults.py +159 -0
- runnable/entrypoints.py +390 -0
- runnable/exceptions.py +137 -0
- runnable/executor.py +561 -0
- runnable/gantt.py +1646 -0
- runnable/graph.py +501 -0
- runnable/names.py +546 -0
- runnable/nodes.py +593 -0
- runnable/parameters.py +217 -0
- runnable/pickler.py +96 -0
- runnable/sdk.py +1277 -0
- runnable/secrets.py +92 -0
- runnable/tasks.py +1268 -0
- runnable/telemetry.py +142 -0
- runnable/utils.py +423 -0
- runnable-0.50.0.dist-info/METADATA +189 -0
- runnable-0.50.0.dist-info/RECORD +72 -0
- runnable-0.50.0.dist-info/WHEEL +4 -0
- runnable-0.50.0.dist-info/entry_points.txt +53 -0
- runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/context.py
ADDED
|
@@ -0,0 +1,730 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import hashlib
|
|
3
|
+
import importlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from functools import cached_property, partial
|
|
11
|
+
from typing import TYPE_CHECKING, Annotated, Any, Callable, Dict, Optional
|
|
12
|
+
|
|
13
|
+
import logfire_api as logfire
|
|
14
|
+
from pydantic import (
|
|
15
|
+
BaseModel,
|
|
16
|
+
BeforeValidator,
|
|
17
|
+
ConfigDict,
|
|
18
|
+
Field,
|
|
19
|
+
computed_field,
|
|
20
|
+
field_validator,
|
|
21
|
+
)
|
|
22
|
+
from stevedore import driver
|
|
23
|
+
|
|
24
|
+
from runnable import console, defaults, exceptions, names, utils
|
|
25
|
+
from runnable.catalog import BaseCatalog
|
|
26
|
+
from runnable.datastore import BaseRunLogStore
|
|
27
|
+
from runnable.executor import BaseJobExecutor, BasePipelineExecutor
|
|
28
|
+
from runnable.graph import Graph, create_graph
|
|
29
|
+
from runnable.nodes import BaseNode
|
|
30
|
+
from runnable.pickler import BasePickler
|
|
31
|
+
from runnable.secrets import BaseSecrets
|
|
32
|
+
from runnable.tasks import BaseTaskType
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_pipeline_spec_from_yaml(pipeline_file: str) -> Graph:
|
|
39
|
+
"""
|
|
40
|
+
Reads the pipeline file from a YAML file and sets the pipeline spec in the run context
|
|
41
|
+
"""
|
|
42
|
+
pipeline_config = utils.load_yaml(pipeline_file)
|
|
43
|
+
logger.info("The input pipeline:")
|
|
44
|
+
logger.info(json.dumps(pipeline_config, indent=4))
|
|
45
|
+
|
|
46
|
+
dag_config = pipeline_config["dag"]
|
|
47
|
+
|
|
48
|
+
dag = create_graph(dag_config)
|
|
49
|
+
return dag
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_pipeline_spec_from_python(python_module: str) -> Graph:
|
|
53
|
+
# Call the SDK to get the dag
|
|
54
|
+
# Import the module and call the function to get the dag
|
|
55
|
+
module_file = python_module.rstrip(".py")
|
|
56
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
|
57
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
|
58
|
+
imported_module = importlib.import_module(module)
|
|
59
|
+
|
|
60
|
+
dag = getattr(imported_module, func)().return_dag()
|
|
61
|
+
|
|
62
|
+
return dag
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_job_spec_from_python(
|
|
66
|
+
job_file: str,
|
|
67
|
+
) -> tuple[BaseTaskType, list[str]]:
|
|
68
|
+
"""
|
|
69
|
+
Reads the job file from a Python file and sets the job spec in the run context
|
|
70
|
+
"""
|
|
71
|
+
from runnable.sdk import BaseJob
|
|
72
|
+
|
|
73
|
+
# Import the module and call the function to get the job
|
|
74
|
+
module_file = job_file.rstrip(".py")
|
|
75
|
+
module, func = utils.get_module_and_attr_names(module_file)
|
|
76
|
+
sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
|
|
77
|
+
imported_module = importlib.import_module(module)
|
|
78
|
+
|
|
79
|
+
job: BaseJob = getattr(imported_module, func)()
|
|
80
|
+
|
|
81
|
+
return job.get_task(), job.catalog.put if job.catalog else []
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_service_by_name(namespace: str, service_config: dict[str, Any], _) -> Any: # noqa: ANN401, ANN001
|
|
85
|
+
"""Get the service by name."""
|
|
86
|
+
service_config = service_config.copy()
|
|
87
|
+
|
|
88
|
+
kind = service_config.pop("type", None)
|
|
89
|
+
|
|
90
|
+
if "config" in service_config:
|
|
91
|
+
service_config = service_config.get("config", {})
|
|
92
|
+
|
|
93
|
+
logger.debug(
|
|
94
|
+
f"Trying to get a service of {namespace} with config: {service_config}"
|
|
95
|
+
)
|
|
96
|
+
try:
|
|
97
|
+
mgr: driver.DriverManager = driver.DriverManager(
|
|
98
|
+
namespace=namespace, # eg: reader
|
|
99
|
+
name=kind, # eg: csv, pdf
|
|
100
|
+
invoke_on_load=True,
|
|
101
|
+
invoke_kwds={**service_config},
|
|
102
|
+
)
|
|
103
|
+
return mgr.driver
|
|
104
|
+
except Exception as _e:
|
|
105
|
+
raise Exception(
|
|
106
|
+
f"Could not find the service of kind: {kind} in namespace:{namespace} with config: {service_config}"
|
|
107
|
+
) from _e
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_service(service: str) -> Callable:
|
|
111
|
+
"""Get the service by name.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
service (str): service name.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Callable: callable function of service.
|
|
118
|
+
"""
|
|
119
|
+
return partial(get_service_by_name, service)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
InstantiatedCatalog = Annotated[BaseCatalog, BeforeValidator(get_service("catalog"))]
|
|
123
|
+
InstantiatedSecrets = Annotated[BaseSecrets, BeforeValidator(get_service("secrets"))]
|
|
124
|
+
InstantiatedPickler = Annotated[BasePickler, BeforeValidator(get_service("pickler"))]
|
|
125
|
+
InstantiatedRunLogStore = Annotated[
|
|
126
|
+
BaseRunLogStore, BeforeValidator(get_service("run_log_store"))
|
|
127
|
+
]
|
|
128
|
+
InstantiatedPipelineExecutor = Annotated[
|
|
129
|
+
BasePipelineExecutor, BeforeValidator(get_service("pipeline_executor"))
|
|
130
|
+
]
|
|
131
|
+
InstantiatedJobExecutor = Annotated[
|
|
132
|
+
BaseJobExecutor, BeforeValidator(get_service("job_executor"))
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ExecutionMode(str, Enum):
|
|
137
|
+
YAML = "yaml"
|
|
138
|
+
PYTHON = "python"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ExecutionContext(str, Enum):
|
|
142
|
+
PIPELINE = "pipeline"
|
|
143
|
+
JOB = "job"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class ServiceConfigurations(BaseModel):
|
|
147
|
+
configuration_file: Optional[str] = Field(
|
|
148
|
+
default=None, exclude=True, description="Path to the configuration file."
|
|
149
|
+
)
|
|
150
|
+
execution_context: ExecutionContext = ExecutionContext.PIPELINE
|
|
151
|
+
variables: dict[str, str] = Field(
|
|
152
|
+
default_factory=utils.gather_variables,
|
|
153
|
+
exclude=True,
|
|
154
|
+
description="Variables to be used.",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@field_validator("configuration_file", mode="before")
|
|
158
|
+
@classmethod
|
|
159
|
+
def override_configuration_file(cls, configuration_file: str | None) -> str | None:
|
|
160
|
+
"""Determine the configuration file to use, following the order of precedence."""
|
|
161
|
+
# 1. Environment variable
|
|
162
|
+
env_config = os.environ.get(defaults.RUNNABLE_CONFIGURATION_FILE)
|
|
163
|
+
if env_config:
|
|
164
|
+
return env_config
|
|
165
|
+
|
|
166
|
+
# 2. User-provided at runtime
|
|
167
|
+
if configuration_file:
|
|
168
|
+
return configuration_file
|
|
169
|
+
|
|
170
|
+
# 3. Default user config file
|
|
171
|
+
if utils.does_file_exist(defaults.USER_CONFIG_FILE):
|
|
172
|
+
return defaults.USER_CONFIG_FILE
|
|
173
|
+
|
|
174
|
+
# 4. No config file
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
@computed_field # type: ignore
|
|
178
|
+
@property
|
|
179
|
+
def services(self) -> dict[str, Any]:
|
|
180
|
+
"""Get the effective services"""
|
|
181
|
+
_services = defaults.DEFAULT_SERVICES.copy()
|
|
182
|
+
|
|
183
|
+
if not self.configuration_file:
|
|
184
|
+
return _services
|
|
185
|
+
|
|
186
|
+
# Load the configuration file
|
|
187
|
+
templated_config = utils.load_yaml(self.configuration_file)
|
|
188
|
+
config = utils.apply_variables(templated_config, self.variables)
|
|
189
|
+
|
|
190
|
+
for key, value in config.items():
|
|
191
|
+
_services[key.replace("-", "_")] = value
|
|
192
|
+
|
|
193
|
+
if self.execution_context == ExecutionContext.JOB:
|
|
194
|
+
_services.pop("pipeline_executor", None)
|
|
195
|
+
elif self.execution_context == ExecutionContext.PIPELINE:
|
|
196
|
+
_services.pop("job_executor", None)
|
|
197
|
+
else:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"Invalid execution context: {self.execution_context}. Must be 'pipeline' or 'job'."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return _services
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class RunnableContext(BaseModel):
|
|
206
|
+
model_config = ConfigDict(use_enum_values=True, loc_by_alias=True)
|
|
207
|
+
|
|
208
|
+
execution_mode: ExecutionMode = ExecutionMode.PYTHON
|
|
209
|
+
|
|
210
|
+
parameters_file: Optional[str] = Field(
|
|
211
|
+
default=None, exclude=True, description="Path to the parameters file."
|
|
212
|
+
)
|
|
213
|
+
configuration_file: Optional[str] = Field(
|
|
214
|
+
default=None, exclude=True, description="Path to the configuration file."
|
|
215
|
+
)
|
|
216
|
+
variables: dict[str, str] = Field(
|
|
217
|
+
default_factory=utils.gather_variables,
|
|
218
|
+
exclude=True,
|
|
219
|
+
description="Variables to be used.",
|
|
220
|
+
)
|
|
221
|
+
run_id: str = Field(
|
|
222
|
+
description="The run ID, generated automatically if not provided"
|
|
223
|
+
)
|
|
224
|
+
tag: Optional[str] = Field(default=None, description="Tag to be used for the run.")
|
|
225
|
+
|
|
226
|
+
# TODO: Verify the design
|
|
227
|
+
object_serialisation: bool = (
|
|
228
|
+
True # Should be validated against executor being local
|
|
229
|
+
)
|
|
230
|
+
return_objects: Dict[
|
|
231
|
+
str, Any
|
|
232
|
+
] = {} # Should be validated against executor being local, should this be here?
|
|
233
|
+
|
|
234
|
+
@field_validator("parameters_file", mode="before")
|
|
235
|
+
@classmethod
|
|
236
|
+
def override_parameters_file(cls, parameters_file: str) -> str:
|
|
237
|
+
"""Override the parameters file if provided."""
|
|
238
|
+
if os.environ.get(defaults.RUNNABLE_PARAMETERS_FILE, None):
|
|
239
|
+
return os.environ.get(defaults.RUNNABLE_PARAMETERS_FILE, parameters_file)
|
|
240
|
+
return parameters_file
|
|
241
|
+
|
|
242
|
+
@field_validator("configuration_file", mode="before")
|
|
243
|
+
@classmethod
|
|
244
|
+
def override_configuration_file(cls, configuration_file: str) -> str:
|
|
245
|
+
"""Override the configuration file if provided."""
|
|
246
|
+
return os.environ.get(defaults.RUNNABLE_CONFIGURATION_FILE, configuration_file)
|
|
247
|
+
|
|
248
|
+
@field_validator("run_id", mode="before")
|
|
249
|
+
@classmethod
|
|
250
|
+
def generate_run_id(cls, run_id: str) -> str:
|
|
251
|
+
"""Generate a run id if not provided."""
|
|
252
|
+
# Convert None to empty string for consistency
|
|
253
|
+
if run_id is None:
|
|
254
|
+
run_id = ""
|
|
255
|
+
|
|
256
|
+
# Check for retry run id first - this takes precedence
|
|
257
|
+
retry_run_id = os.environ.get(defaults.RETRY_RUN_ID, "")
|
|
258
|
+
if retry_run_id:
|
|
259
|
+
return retry_run_id
|
|
260
|
+
|
|
261
|
+
if not run_id:
|
|
262
|
+
run_id = os.environ.get(defaults.ENV_RUN_ID, "")
|
|
263
|
+
|
|
264
|
+
# If both are not given, generate one
|
|
265
|
+
if not run_id:
|
|
266
|
+
now = datetime.now()
|
|
267
|
+
run_id = f"{names.get_random_name()}-{now.hour:02}{now.minute:02}"
|
|
268
|
+
|
|
269
|
+
return run_id
|
|
270
|
+
|
|
271
|
+
@computed_field # type: ignore
|
|
272
|
+
@property
|
|
273
|
+
def retry_indicator(self) -> str:
|
|
274
|
+
"""Indicator for retry executions to distinguish attempt logs."""
|
|
275
|
+
return os.environ.get(defaults.RETRY_INDICATOR, "")
|
|
276
|
+
|
|
277
|
+
@computed_field # type: ignore
|
|
278
|
+
@property
|
|
279
|
+
def is_retry(self) -> bool:
|
|
280
|
+
"""Flag indicating if this is a retry run based on environment variable."""
|
|
281
|
+
retry_run_id = os.environ.get(defaults.RETRY_RUN_ID, "")
|
|
282
|
+
return bool(retry_run_id)
|
|
283
|
+
|
|
284
|
+
def model_post_init(self, __context: Any) -> None:
|
|
285
|
+
os.environ[defaults.ENV_RUN_ID] = self.run_id
|
|
286
|
+
|
|
287
|
+
if self.configuration_file:
|
|
288
|
+
os.environ[defaults.RUNNABLE_CONFIGURATION_FILE] = self.configuration_file
|
|
289
|
+
if self.tag:
|
|
290
|
+
os.environ[defaults.RUNNABLE_RUN_TAG] = self.tag
|
|
291
|
+
|
|
292
|
+
# Set the context using contextvars for proper isolation
|
|
293
|
+
set_run_context(self)
|
|
294
|
+
|
|
295
|
+
def execute(self):
|
|
296
|
+
"Execute the pipeline or the job"
|
|
297
|
+
raise NotImplementedError
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class PipelineContext(RunnableContext):
|
|
301
|
+
pipeline_executor: InstantiatedPipelineExecutor
|
|
302
|
+
catalog: InstantiatedCatalog
|
|
303
|
+
secrets: InstantiatedSecrets
|
|
304
|
+
pickler: InstantiatedPickler
|
|
305
|
+
run_log_store: InstantiatedRunLogStore
|
|
306
|
+
|
|
307
|
+
pipeline_definition_file: str
|
|
308
|
+
|
|
309
|
+
@computed_field # type: ignore
|
|
310
|
+
@cached_property
|
|
311
|
+
def dag(self) -> Graph | None:
|
|
312
|
+
"""Get the dag."""
|
|
313
|
+
if self.execution_mode == ExecutionMode.YAML:
|
|
314
|
+
return get_pipeline_spec_from_yaml(self.pipeline_definition_file)
|
|
315
|
+
elif self.execution_mode == ExecutionMode.PYTHON:
|
|
316
|
+
return get_pipeline_spec_from_python(self.pipeline_definition_file)
|
|
317
|
+
else:
|
|
318
|
+
raise ValueError(
|
|
319
|
+
f"Invalid execution mode: {self.execution_mode}. Must be 'yaml' or 'python'."
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
@computed_field # type: ignore
|
|
323
|
+
@cached_property
|
|
324
|
+
def dag_hash(self) -> str:
|
|
325
|
+
dag = self.dag
|
|
326
|
+
if not dag:
|
|
327
|
+
return ""
|
|
328
|
+
dag_str = json.dumps(dag.model_dump(), sort_keys=True, ensure_ascii=True)
|
|
329
|
+
return hashlib.sha1(dag_str.encode("utf-8")).hexdigest()
|
|
330
|
+
|
|
331
|
+
def get_node_callable_command(
|
|
332
|
+
self,
|
|
333
|
+
node: BaseNode,
|
|
334
|
+
iter_variable: defaults.IterableParameterModel | None = None,
|
|
335
|
+
over_write_run_id: str = "",
|
|
336
|
+
log_level: str = "",
|
|
337
|
+
) -> str:
|
|
338
|
+
run_id = self.run_id
|
|
339
|
+
|
|
340
|
+
if over_write_run_id:
|
|
341
|
+
run_id = over_write_run_id
|
|
342
|
+
|
|
343
|
+
log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
|
|
344
|
+
|
|
345
|
+
action = (
|
|
346
|
+
f"runnable execute-single-node {run_id} "
|
|
347
|
+
f"{self.pipeline_definition_file} "
|
|
348
|
+
f"{node._command_friendly_name()} "
|
|
349
|
+
f"--log-level {log_level} "
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# yaml is the default mode
|
|
353
|
+
if self.execution_mode == ExecutionMode.PYTHON:
|
|
354
|
+
action = action + "--mode python "
|
|
355
|
+
|
|
356
|
+
if iter_variable:
|
|
357
|
+
action = action + f"--iter-variable '{iter_variable.model_dump_json()}' "
|
|
358
|
+
|
|
359
|
+
if self.configuration_file:
|
|
360
|
+
action = action + f"--config {self.configuration_file} "
|
|
361
|
+
|
|
362
|
+
if self.parameters_file:
|
|
363
|
+
action = action + f"--parameters-file {self.parameters_file} "
|
|
364
|
+
|
|
365
|
+
if self.tag:
|
|
366
|
+
action = action + f"--tag {self.tag}"
|
|
367
|
+
|
|
368
|
+
console.log(
|
|
369
|
+
f"Generated command for node {node._command_friendly_name()}: {action}"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return action
|
|
373
|
+
|
|
374
|
+
def get_fan_command(
|
|
375
|
+
self,
|
|
376
|
+
node: BaseNode,
|
|
377
|
+
mode: str,
|
|
378
|
+
run_id: str,
|
|
379
|
+
iter_variable: defaults.IterableParameterModel | None = None,
|
|
380
|
+
log_level: str = "",
|
|
381
|
+
) -> str:
|
|
382
|
+
"""
|
|
383
|
+
Return the fan "in or out" command for this pipeline context.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
node (BaseNode): The composite node that we are fanning in or out
|
|
387
|
+
mode (str): "in" or "out"
|
|
388
|
+
map_variable (dict, optional): If the node is a map, we have the map variable. Defaults to None.
|
|
389
|
+
log_level (str, optional): Log level. Defaults to "".
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
str: The fan in or out command
|
|
393
|
+
"""
|
|
394
|
+
log_level = log_level or logging.getLevelName(logger.getEffectiveLevel())
|
|
395
|
+
action = (
|
|
396
|
+
f"runnable fan {run_id} "
|
|
397
|
+
f"{node._command_friendly_name()} "
|
|
398
|
+
f"{self.pipeline_definition_file} "
|
|
399
|
+
f"{mode} "
|
|
400
|
+
f"--log-level {log_level}"
|
|
401
|
+
)
|
|
402
|
+
if self.configuration_file:
|
|
403
|
+
action += f" --config-file {self.configuration_file}"
|
|
404
|
+
if self.parameters_file:
|
|
405
|
+
action += f" --parameters-file {self.parameters_file}"
|
|
406
|
+
if iter_variable:
|
|
407
|
+
action += f" --iter-variable '{iter_variable.model_dump_json()}'"
|
|
408
|
+
if self.execution_mode == ExecutionMode.PYTHON:
|
|
409
|
+
action += " --mode python"
|
|
410
|
+
if self.tag:
|
|
411
|
+
action += f" --tag {self.tag}"
|
|
412
|
+
|
|
413
|
+
console.log(
|
|
414
|
+
f"Generated command for fan {mode} for node {node._command_friendly_name()}: {action}"
|
|
415
|
+
)
|
|
416
|
+
return action
|
|
417
|
+
|
|
418
|
+
def execute(self):
|
|
419
|
+
assert self.dag is not None
|
|
420
|
+
|
|
421
|
+
pipeline_name = getattr(self.dag, "name", "unnamed")
|
|
422
|
+
|
|
423
|
+
with logfire.span(
|
|
424
|
+
"pipeline:{pipeline_name}",
|
|
425
|
+
pipeline_name=pipeline_name,
|
|
426
|
+
run_id=self.run_id,
|
|
427
|
+
executor=self.pipeline_executor.__class__.__name__,
|
|
428
|
+
):
|
|
429
|
+
logfire.info("Pipeline execution started")
|
|
430
|
+
|
|
431
|
+
console.print("Working with context:")
|
|
432
|
+
console.print(get_run_context())
|
|
433
|
+
console.rule(style="[dark orange]")
|
|
434
|
+
|
|
435
|
+
# Prepare for graph execution
|
|
436
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
437
|
+
self.pipeline_executor._set_up_run_log(exists_ok=False)
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
self.pipeline_executor.execute_graph(dag=self.dag)
|
|
441
|
+
if not self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
442
|
+
# non local executors just traverse the graph and do nothing
|
|
443
|
+
logfire.info("Pipeline submitted", status="submitted")
|
|
444
|
+
return {}
|
|
445
|
+
|
|
446
|
+
ctx = get_run_context()
|
|
447
|
+
assert ctx
|
|
448
|
+
assert isinstance(ctx, PipelineContext)
|
|
449
|
+
run_log = ctx.run_log_store.get_run_log_by_id(
|
|
450
|
+
run_id=ctx.run_id, full=False
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if run_log.status == defaults.SUCCESS:
|
|
454
|
+
console.print(
|
|
455
|
+
"Pipeline executed successfully!", style=defaults.success_style
|
|
456
|
+
)
|
|
457
|
+
logfire.info("Pipeline completed", status="success")
|
|
458
|
+
else:
|
|
459
|
+
console.print(
|
|
460
|
+
"Pipeline execution failed.", style=defaults.error_style
|
|
461
|
+
)
|
|
462
|
+
logfire.error("Pipeline failed", status="failed")
|
|
463
|
+
raise exceptions.ExecutionFailedError(ctx.run_id)
|
|
464
|
+
except Exception as e: # noqa: E722
|
|
465
|
+
console.print(e, style=defaults.error_style)
|
|
466
|
+
logfire.error("Pipeline failed with exception", error=str(e)[:256])
|
|
467
|
+
raise
|
|
468
|
+
|
|
469
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
470
|
+
ctx = get_run_context()
|
|
471
|
+
assert ctx
|
|
472
|
+
assert isinstance(ctx, PipelineContext)
|
|
473
|
+
return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
|
|
474
|
+
|
|
475
|
+
def _handle_completion(self):
|
|
476
|
+
"""Handle post-execution - shared by sync/async."""
|
|
477
|
+
ctx = get_run_context()
|
|
478
|
+
assert ctx
|
|
479
|
+
assert isinstance(ctx, PipelineContext)
|
|
480
|
+
run_log = ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id, full=False)
|
|
481
|
+
|
|
482
|
+
if run_log.status == defaults.SUCCESS:
|
|
483
|
+
console.print(
|
|
484
|
+
"Pipeline executed successfully!", style=defaults.success_style
|
|
485
|
+
)
|
|
486
|
+
logfire.info("Pipeline completed", status="success")
|
|
487
|
+
else:
|
|
488
|
+
console.print("Pipeline execution failed.", style=defaults.error_style)
|
|
489
|
+
logfire.error("Pipeline failed", status="failed")
|
|
490
|
+
raise exceptions.ExecutionFailedError(ctx.run_id)
|
|
491
|
+
|
|
492
|
+
async def execute_async(self):
|
|
493
|
+
"""Async pipeline execution."""
|
|
494
|
+
assert self.dag is not None
|
|
495
|
+
|
|
496
|
+
pipeline_name = getattr(self.dag, "name", "unnamed")
|
|
497
|
+
|
|
498
|
+
with logfire.span(
|
|
499
|
+
"pipeline:{pipeline_name}",
|
|
500
|
+
pipeline_name=pipeline_name,
|
|
501
|
+
run_id=self.run_id,
|
|
502
|
+
executor=self.pipeline_executor.__class__.__name__,
|
|
503
|
+
):
|
|
504
|
+
logfire.info("Async pipeline execution started")
|
|
505
|
+
|
|
506
|
+
console.print("Working with context:")
|
|
507
|
+
console.print(get_run_context())
|
|
508
|
+
console.rule(style="[dark orange]")
|
|
509
|
+
|
|
510
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
511
|
+
self.pipeline_executor._set_up_run_log(exists_ok=False)
|
|
512
|
+
|
|
513
|
+
try:
|
|
514
|
+
await self.pipeline_executor.execute_graph_async(dag=self.dag)
|
|
515
|
+
self._handle_completion()
|
|
516
|
+
|
|
517
|
+
except Exception as e:
|
|
518
|
+
console.print(e, style=defaults.error_style)
|
|
519
|
+
logfire.error("Pipeline failed with exception", error=str(e)[:256])
|
|
520
|
+
raise
|
|
521
|
+
|
|
522
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
523
|
+
ctx = get_run_context()
|
|
524
|
+
assert ctx
|
|
525
|
+
assert isinstance(ctx, PipelineContext)
|
|
526
|
+
return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class AsyncPipelineContext(RunnableContext):
|
|
530
|
+
"""
|
|
531
|
+
Simplified context for async pipeline execution.
|
|
532
|
+
|
|
533
|
+
Unlike PipelineContext, this accepts the DAG directly rather than
|
|
534
|
+
introspecting from a file. This simplifies async execution since
|
|
535
|
+
we only support local executors for async pipelines.
|
|
536
|
+
"""
|
|
537
|
+
|
|
538
|
+
pipeline_executor: InstantiatedPipelineExecutor
|
|
539
|
+
catalog: InstantiatedCatalog
|
|
540
|
+
secrets: InstantiatedSecrets
|
|
541
|
+
pickler: InstantiatedPickler
|
|
542
|
+
run_log_store: InstantiatedRunLogStore
|
|
543
|
+
|
|
544
|
+
# DAG is passed directly, not computed from a file
|
|
545
|
+
dag: Graph
|
|
546
|
+
|
|
547
|
+
@computed_field # type: ignore
|
|
548
|
+
@cached_property
|
|
549
|
+
def dag_hash(self) -> str:
|
|
550
|
+
dag = self.dag
|
|
551
|
+
if not dag:
|
|
552
|
+
return ""
|
|
553
|
+
dag_str = json.dumps(dag.model_dump(), sort_keys=True, ensure_ascii=True)
|
|
554
|
+
return hashlib.sha1(dag_str.encode("utf-8")).hexdigest()
|
|
555
|
+
|
|
556
|
+
async def execute_async(self):
|
|
557
|
+
"""Async pipeline execution."""
|
|
558
|
+
assert self.dag is not None
|
|
559
|
+
|
|
560
|
+
pipeline_name = getattr(self.dag, "name", "unnamed")
|
|
561
|
+
|
|
562
|
+
with logfire.span(
|
|
563
|
+
"pipeline:{pipeline_name}",
|
|
564
|
+
pipeline_name=pipeline_name,
|
|
565
|
+
run_id=self.run_id,
|
|
566
|
+
executor=self.pipeline_executor.__class__.__name__,
|
|
567
|
+
):
|
|
568
|
+
logfire.info("Async pipeline execution started")
|
|
569
|
+
|
|
570
|
+
console.print("Working with context:")
|
|
571
|
+
console.print(get_run_context())
|
|
572
|
+
console.rule(style="[dark orange]")
|
|
573
|
+
|
|
574
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
575
|
+
self.pipeline_executor._set_up_run_log(exists_ok=False)
|
|
576
|
+
|
|
577
|
+
try:
|
|
578
|
+
await self.pipeline_executor.execute_graph_async(dag=self.dag)
|
|
579
|
+
|
|
580
|
+
run_log = self.run_log_store.get_run_log_by_id(
|
|
581
|
+
run_id=self.run_id, full=False
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
if run_log.status == defaults.SUCCESS:
|
|
585
|
+
console.print(
|
|
586
|
+
"Pipeline executed successfully!", style=defaults.success_style
|
|
587
|
+
)
|
|
588
|
+
logfire.info("Pipeline completed", status="success")
|
|
589
|
+
else:
|
|
590
|
+
console.print(
|
|
591
|
+
"Pipeline execution failed.", style=defaults.error_style
|
|
592
|
+
)
|
|
593
|
+
logfire.error("Pipeline failed", status="failed")
|
|
594
|
+
raise exceptions.ExecutionFailedError(self.run_id)
|
|
595
|
+
|
|
596
|
+
except Exception as e:
|
|
597
|
+
console.print(e, style=defaults.error_style)
|
|
598
|
+
logfire.error("Pipeline failed with exception", error=str(e)[:256])
|
|
599
|
+
raise
|
|
600
|
+
|
|
601
|
+
if self.pipeline_executor._should_setup_run_log_at_traversal:
|
|
602
|
+
return self.run_log_store.get_run_log_by_id(run_id=self.run_id)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
class JobContext(RunnableContext):
|
|
606
|
+
job_executor: InstantiatedJobExecutor
|
|
607
|
+
catalog: InstantiatedCatalog
|
|
608
|
+
secrets: InstantiatedSecrets
|
|
609
|
+
pickler: InstantiatedPickler
|
|
610
|
+
run_log_store: InstantiatedRunLogStore
|
|
611
|
+
|
|
612
|
+
job_definition_file: str
|
|
613
|
+
catalog_store_copy: bool = Field(default=True, alias="catalog_store_copy")
|
|
614
|
+
|
|
615
|
+
@computed_field # type: ignore
|
|
616
|
+
@cached_property
|
|
617
|
+
def job(self) -> BaseTaskType:
|
|
618
|
+
job, _ = get_job_spec_from_python(self.job_definition_file)
|
|
619
|
+
return job
|
|
620
|
+
|
|
621
|
+
@computed_field # type: ignore
|
|
622
|
+
@cached_property
|
|
623
|
+
def catalog_settings(self) -> list[str] | None:
|
|
624
|
+
_, catalog_config = get_job_spec_from_python(self.job_definition_file)
|
|
625
|
+
return catalog_config
|
|
626
|
+
|
|
627
|
+
def get_job_callable_command(
|
|
628
|
+
self,
|
|
629
|
+
over_write_run_id: str = "",
|
|
630
|
+
):
|
|
631
|
+
run_id = self.run_id
|
|
632
|
+
|
|
633
|
+
if over_write_run_id:
|
|
634
|
+
run_id = over_write_run_id
|
|
635
|
+
|
|
636
|
+
log_level = logging.getLevelName(logger.getEffectiveLevel())
|
|
637
|
+
|
|
638
|
+
action = (
|
|
639
|
+
f"runnable execute-job {self.job_definition_file} {run_id} "
|
|
640
|
+
f" --log-level {log_level}"
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
if self.configuration_file:
|
|
644
|
+
action = action + f" --config {self.configuration_file}"
|
|
645
|
+
|
|
646
|
+
if self.parameters_file:
|
|
647
|
+
action = action + f" --parameters {self.parameters_file}"
|
|
648
|
+
|
|
649
|
+
if self.tag:
|
|
650
|
+
action = action + f" --tag {self.tag}"
|
|
651
|
+
|
|
652
|
+
return action
|
|
653
|
+
|
|
654
|
+
def execute(self):
|
|
655
|
+
with logfire.span(
|
|
656
|
+
"job:{job_name}",
|
|
657
|
+
job_name=self.job_definition_file,
|
|
658
|
+
run_id=self.run_id,
|
|
659
|
+
executor=self.job_executor.__class__.__name__,
|
|
660
|
+
):
|
|
661
|
+
logfire.info("Job execution started")
|
|
662
|
+
|
|
663
|
+
console.print("Working with context:")
|
|
664
|
+
console.print(get_run_context())
|
|
665
|
+
console.rule(style="[dark orange]")
|
|
666
|
+
|
|
667
|
+
try:
|
|
668
|
+
self.job_executor.submit_job(
|
|
669
|
+
job=self.job, catalog_settings=self.catalog_settings
|
|
670
|
+
)
|
|
671
|
+
logfire.info("Job submitted", status="submitted")
|
|
672
|
+
except Exception as e:
|
|
673
|
+
logfire.error("Job failed", error=str(e)[:256])
|
|
674
|
+
raise
|
|
675
|
+
finally:
|
|
676
|
+
console.print(f"Job execution completed for run id: {self.run_id}")
|
|
677
|
+
|
|
678
|
+
logger.info(
|
|
679
|
+
"Executing the job from the user. We are still in the caller's compute"
|
|
680
|
+
" environment"
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
if self.job_executor._should_setup_run_log_at_traversal:
|
|
684
|
+
ctx = get_run_context()
|
|
685
|
+
assert ctx
|
|
686
|
+
assert isinstance(ctx, JobContext)
|
|
687
|
+
return ctx.run_log_store.get_run_log_by_id(run_id=ctx.run_id)
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
# Context variable for thread/async-safe run context storage
|
|
691
|
+
if TYPE_CHECKING:
|
|
692
|
+
from typing import Union
|
|
693
|
+
|
|
694
|
+
RunnableContextType = Union[
|
|
695
|
+
"RunnableContext", "PipelineContext", "JobContext", "AsyncPipelineContext"
|
|
696
|
+
]
|
|
697
|
+
else:
|
|
698
|
+
RunnableContextType = Any
|
|
699
|
+
|
|
700
|
+
_run_context_var: contextvars.ContextVar[
|
|
701
|
+
Optional["PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"]
|
|
702
|
+
] = contextvars.ContextVar("run_context", default=None)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def get_run_context() -> (
|
|
706
|
+
Optional["PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"]
|
|
707
|
+
):
|
|
708
|
+
"""Get the current run context for this execution context."""
|
|
709
|
+
return _run_context_var.get()
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def set_run_context(
|
|
713
|
+
context: Optional[
|
|
714
|
+
"PipelineContext | JobContext | AsyncPipelineContext | RunnableContext"
|
|
715
|
+
],
|
|
716
|
+
) -> None:
|
|
717
|
+
"""Set the run context for this execution context."""
|
|
718
|
+
_run_context_var.set(context)
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
# BREAKING CHANGE: The global run_context variable has been replaced with
|
|
722
|
+
# get_run_context() and set_run_context() functions for proper context isolation.
|
|
723
|
+
# All code must be updated to use the new API.
|
|
724
|
+
#
|
|
725
|
+
# Migration guide:
|
|
726
|
+
# Before: run_context.run_log_store
|
|
727
|
+
# After: get_run_context().run_log_store
|
|
728
|
+
#
|
|
729
|
+
# This change was necessary to fix concurrency issues by using contextvars
|
|
730
|
+
# for proper thread and async isolation of run contexts.
|