FlowerPower 0.11.6.20__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +400 -132
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
flowerpower/pipeline/runner.py
DELETED
@@ -1,603 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
"""Pipeline Runner."""
|
3
|
-
|
4
|
-
from __future__ import annotations
|
5
|
-
|
6
|
-
import datetime as dt
|
7
|
-
import importlib.util
|
8
|
-
import random
|
9
|
-
import time
|
10
|
-
from typing import Any, Callable
|
11
|
-
|
12
|
-
import humanize
|
13
|
-
from hamilton import driver
|
14
|
-
from hamilton.execution import executors
|
15
|
-
from hamilton.registry import disable_autoload
|
16
|
-
from hamilton.telemetry import disable_telemetry
|
17
|
-
from hamilton_sdk.api.clients import UnauthorizedException
|
18
|
-
from requests.exceptions import ConnectionError, HTTPError
|
19
|
-
|
20
|
-
from .. import settings
|
21
|
-
|
22
|
-
if importlib.util.find_spec("opentelemetry"):
|
23
|
-
from hamilton.plugins import h_opentelemetry
|
24
|
-
|
25
|
-
from ..utils.open_telemetry import init_tracer
|
26
|
-
else:
|
27
|
-
h_opentelemetry = None
|
28
|
-
init_tracer = None
|
29
|
-
|
30
|
-
if importlib.util.find_spec("mlflow"):
|
31
|
-
from hamilton.plugins import h_mlflow
|
32
|
-
else:
|
33
|
-
h_mlflow = None
|
34
|
-
|
35
|
-
from hamilton.plugins import h_rich
|
36
|
-
from hamilton.plugins.h_threadpool import FutureAdapter
|
37
|
-
from hamilton_sdk.adapters import HamiltonTracker
|
38
|
-
from hamilton_sdk.tracking import constants
|
39
|
-
from loguru import logger
|
40
|
-
|
41
|
-
if importlib.util.find_spec("distributed"):
|
42
|
-
from dask import distributed
|
43
|
-
from hamilton.plugins import h_dask
|
44
|
-
else:
|
45
|
-
distributed = None
|
46
|
-
|
47
|
-
|
48
|
-
if importlib.util.find_spec("ray"):
|
49
|
-
import ray
|
50
|
-
from hamilton.plugins import h_ray
|
51
|
-
else:
|
52
|
-
h_ray = None
|
53
|
-
|
54
|
-
from ..cfg import PipelineConfig, ProjectConfig
|
55
|
-
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
56
|
-
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
57
|
-
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
58
|
-
from ..utils.logging import setup_logging
|
59
|
-
from .base import load_module
|
60
|
-
|
61
|
-
setup_logging(level=settings.LOG_LEVEL)
|
62
|
-
|
63
|
-
# from .executor import get_executor
|
64
|
-
|
65
|
-
|
66
|
-
class PipelineRunner:
|
67
|
-
"""PipelineRunner is responsible for executing a specific pipeline run.
|
68
|
-
It handles the loading of the pipeline module, configuration, and execution"""
|
69
|
-
|
70
|
-
def __init__(
|
71
|
-
self,
|
72
|
-
project_cfg: ProjectConfig,
|
73
|
-
pipeline_cfg: PipelineConfig,
|
74
|
-
):
|
75
|
-
self.project_cfg = project_cfg
|
76
|
-
self.pipeline_cfg = pipeline_cfg
|
77
|
-
self.name = pipeline_cfg.name
|
78
|
-
|
79
|
-
if not settings.HAMILTON_TELEMETRY_ENABLED:
|
80
|
-
disable_telemetry()
|
81
|
-
if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
|
82
|
-
disable_autoload()
|
83
|
-
|
84
|
-
def __enter__(self):
|
85
|
-
"""Enable use as a context manager."""
|
86
|
-
return self
|
87
|
-
|
88
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
89
|
-
"""No special cleanup required."""
|
90
|
-
pass
|
91
|
-
|
92
|
-
def _get_executor(
|
93
|
-
self, executor_cfg: str | dict | ExecutorConfig | None = None
|
94
|
-
) -> tuple[executors.BaseExecutor, Callable | None]:
|
95
|
-
"""
|
96
|
-
Get the executor based on the provided configuration.
|
97
|
-
|
98
|
-
Args:
|
99
|
-
executor (dict | None): Executor configuration.
|
100
|
-
|
101
|
-
Returns:
|
102
|
-
tuple[executors.BaseExecutor, Callable | None]: A tuple containing the executor and shutdown function.
|
103
|
-
"""
|
104
|
-
logger.debug("Setting up executor...")
|
105
|
-
if executor_cfg:
|
106
|
-
if isinstance(executor_cfg, str):
|
107
|
-
executor_cfg = ExecutorConfig(type=executor_cfg)
|
108
|
-
elif isinstance(executor_cfg, dict):
|
109
|
-
executor_cfg = ExecutorConfig.from_dict(executor_cfg)
|
110
|
-
elif not isinstance(executor_cfg, ExecutorConfig):
|
111
|
-
raise TypeError(
|
112
|
-
"Executor must be a string, dictionary, or ExecutorConfig instance."
|
113
|
-
)
|
114
|
-
|
115
|
-
executor_cfg = self.pipeline_cfg.run.executor.merge(executor_cfg)
|
116
|
-
else:
|
117
|
-
executor_cfg = self.pipeline_cfg.run.executor
|
118
|
-
|
119
|
-
if executor_cfg.type is None:
|
120
|
-
logger.debug(
|
121
|
-
"No executor type specified. Using SynchronousLocalTaskExecutor as default."
|
122
|
-
)
|
123
|
-
return executors.SynchronousLocalTaskExecutor(), None
|
124
|
-
|
125
|
-
if executor_cfg.type == "threadpool":
|
126
|
-
logger.debug(
|
127
|
-
f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
|
128
|
-
)
|
129
|
-
return executors.MultiThreadingExecutor(
|
130
|
-
max_tasks=executor_cfg.max_workers
|
131
|
-
), None
|
132
|
-
elif executor_cfg.type == "processpool":
|
133
|
-
logger.debug(
|
134
|
-
f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
|
135
|
-
)
|
136
|
-
return executors.MultiProcessingExecutor(
|
137
|
-
max_tasks=executor_cfg.max_workers
|
138
|
-
), None
|
139
|
-
elif executor_cfg.type == "ray":
|
140
|
-
if h_ray:
|
141
|
-
logger.debug(
|
142
|
-
f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
|
143
|
-
)
|
144
|
-
|
145
|
-
return (
|
146
|
-
h_ray.RayTaskExecutor(
|
147
|
-
num_cpus=executor_cfg.num_cpus,
|
148
|
-
ray_init_config=self.project_cfg.adapter.ray.ray_init_config,
|
149
|
-
),
|
150
|
-
ray.shutdown
|
151
|
-
if self.project_cfg.adapter.ray.shutdown_ray_on_completion
|
152
|
-
else None,
|
153
|
-
)
|
154
|
-
else:
|
155
|
-
logger.warning("Ray is not installed. Using local executor.")
|
156
|
-
return executors.SynchronousLocalTaskExecutor(), None
|
157
|
-
elif executor_cfg.type == "dask":
|
158
|
-
if distributed:
|
159
|
-
cluster = distributed.LocalCluster()
|
160
|
-
client = distributed.Client(cluster)
|
161
|
-
return h_dask.DaskExecutor(client=client), cluster.close
|
162
|
-
else:
|
163
|
-
logger.warning("Dask is not installed. Using local executor.")
|
164
|
-
return executors.SynchronousLocalTaskExecutor(), None
|
165
|
-
else:
|
166
|
-
logger.warning(
|
167
|
-
f"Unknown executor type: {executor_cfg.type}. Using local executor."
|
168
|
-
)
|
169
|
-
return executors.SynchronousLocalTaskExecutor(), None
|
170
|
-
|
171
|
-
def _get_adapters(
|
172
|
-
self,
|
173
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
174
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
175
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
176
|
-
adapter: dict[str, Any] | None = None,
|
177
|
-
) -> list:
|
178
|
-
"""
|
179
|
-
Set the adapters for the pipeline.
|
180
|
-
|
181
|
-
Args:
|
182
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
|
183
|
-
Overrides the with_adapter settings in the pipeline config.
|
184
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
|
185
|
-
Overrides the adapter settings in the pipeline config.
|
186
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
|
187
|
-
Overrides the adapter settings in the project config.
|
188
|
-
adapter (dict[str, Any] | None): Any additional hamilton adapters can be passed here.
|
189
|
-
"""
|
190
|
-
logger.debug("Setting up adapters...")
|
191
|
-
if with_adapter_cfg:
|
192
|
-
if isinstance(with_adapter_cfg, dict):
|
193
|
-
with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
|
194
|
-
elif not isinstance(with_adapter_cfg, WithAdapterConfig):
|
195
|
-
raise TypeError(
|
196
|
-
"with_adapter must be a dictionary or WithAdapterConfig instance."
|
197
|
-
)
|
198
|
-
|
199
|
-
with_adapter_cfg = self.pipeline_cfg.run.with_adapter.merge(
|
200
|
-
with_adapter_cfg
|
201
|
-
)
|
202
|
-
else:
|
203
|
-
with_adapter_cfg = self.pipeline_cfg.run.with_adapter
|
204
|
-
|
205
|
-
if pipeline_adapter_cfg:
|
206
|
-
if isinstance(pipeline_adapter_cfg, dict):
|
207
|
-
pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
|
208
|
-
pipeline_adapter_cfg
|
209
|
-
)
|
210
|
-
elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
|
211
|
-
raise TypeError(
|
212
|
-
"pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
|
213
|
-
)
|
214
|
-
|
215
|
-
pipeline_adapter_cfg = self.pipeline_cfg.adapter.merge(pipeline_adapter_cfg)
|
216
|
-
else:
|
217
|
-
pipeline_adapter_cfg = self.pipeline_cfg.adapter
|
218
|
-
|
219
|
-
if project_adapter_cfg:
|
220
|
-
if isinstance(project_adapter_cfg, dict):
|
221
|
-
project_adapter_cfg = ProjectAdapterConfig.from_dict(
|
222
|
-
project_adapter_cfg
|
223
|
-
)
|
224
|
-
elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
|
225
|
-
raise TypeError(
|
226
|
-
"project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
|
227
|
-
)
|
228
|
-
|
229
|
-
project_adapter_cfg = self.project_cfg.adapter.merge(project_adapter_cfg)
|
230
|
-
else:
|
231
|
-
project_adapter_cfg = self.project_cfg.adapter
|
232
|
-
|
233
|
-
adapters = []
|
234
|
-
if with_adapter_cfg.hamilton_tracker:
|
235
|
-
tracker_kwargs = project_adapter_cfg.hamilton_tracker.to_dict()
|
236
|
-
tracker_kwargs.update(pipeline_adapter_cfg.hamilton_tracker.to_dict())
|
237
|
-
tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
|
238
|
-
tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
|
239
|
-
|
240
|
-
constants.MAX_DICT_LENGTH_CAPTURE = (
|
241
|
-
tracker_kwargs.pop("max_dict_length_capture", None)
|
242
|
-
or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
|
243
|
-
)
|
244
|
-
constants.MAX_LIST_LENGTH_CAPTURE = (
|
245
|
-
tracker_kwargs.pop("max_list_length_capture", None)
|
246
|
-
or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
|
247
|
-
)
|
248
|
-
constants.CAPTURE_DATA_STATISTICS = (
|
249
|
-
tracker_kwargs.pop("capture_data_statistics", None)
|
250
|
-
or settings.HAMILTON_CAPTURE_DATA_STATISTICS
|
251
|
-
)
|
252
|
-
|
253
|
-
tracker = HamiltonTracker(**tracker_kwargs)
|
254
|
-
|
255
|
-
adapters.append(tracker)
|
256
|
-
|
257
|
-
if with_adapter_cfg.mlflow:
|
258
|
-
if h_mlflow is None:
|
259
|
-
logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
|
260
|
-
else:
|
261
|
-
mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
|
262
|
-
mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
|
263
|
-
mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
|
264
|
-
adapters.append(mlflow_adapter)
|
265
|
-
|
266
|
-
if with_adapter_cfg.opentelemetry:
|
267
|
-
if h_opentelemetry is None:
|
268
|
-
logger.warning(
|
269
|
-
"OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
|
270
|
-
)
|
271
|
-
else:
|
272
|
-
otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
|
273
|
-
otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
|
274
|
-
trace = init_tracer(**otel_kwargs, name=self.project_cfg.name)
|
275
|
-
tracer = trace.get_tracer(self.name)
|
276
|
-
otel_adapter = h_opentelemetry.OpenTelemetryTracer(
|
277
|
-
tracer_name=f"{self.project_cfg.name}.{self.name}",
|
278
|
-
tracer=tracer,
|
279
|
-
)
|
280
|
-
adapters.append(otel_adapter)
|
281
|
-
|
282
|
-
if with_adapter_cfg.progressbar:
|
283
|
-
adapters.append(
|
284
|
-
h_rich.RichProgressBar(run_desc=f"{self.project_cfg.name}.{self.name}")
|
285
|
-
)
|
286
|
-
|
287
|
-
if with_adapter_cfg.future:
|
288
|
-
adapters.append(FutureAdapter())
|
289
|
-
|
290
|
-
if with_adapter_cfg.ray:
|
291
|
-
if h_ray is None:
|
292
|
-
logger.warning("Ray is not installed. Skipping Ray adapter.")
|
293
|
-
else:
|
294
|
-
ray_kwargs = project_adapter_cfg.ray.to_dict()
|
295
|
-
ray_kwargs.update(pipeline_adapter_cfg.ray.to_dict())
|
296
|
-
ray_adapter = h_ray.RayGraphAdapter(**ray_kwargs)
|
297
|
-
adapters.append(ray_adapter)
|
298
|
-
|
299
|
-
all_adapters = [
|
300
|
-
f"{adp}: ✅" if enabled else f"{adp}: ❌"
|
301
|
-
for adp, enabled in with_adapter_cfg.to_dict().items()
|
302
|
-
]
|
303
|
-
|
304
|
-
if adapter:
|
305
|
-
adapters += list(adapter.values())
|
306
|
-
all_adapters += [f"{adp}: ✅" for adp in adapter.keys()]
|
307
|
-
|
308
|
-
logger.debug(f"Adapters enabled: {' | '.join(all_adapters)}")
|
309
|
-
return adapters
|
310
|
-
|
311
|
-
def _get_driver(
|
312
|
-
self,
|
313
|
-
config: dict | None = None,
|
314
|
-
cache: bool | dict = False,
|
315
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
316
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
317
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
318
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
319
|
-
adapter: dict[str, Any] | None = None,
|
320
|
-
reload: bool = False,
|
321
|
-
) -> tuple[driver.Driver, Callable | None]:
|
322
|
-
"""
|
323
|
-
Get the driver and shutdown function for a given pipeline.
|
324
|
-
|
325
|
-
Args:
|
326
|
-
config (dict | None): The configuration for the pipeline.
|
327
|
-
cache (bool): Use cache or not.
|
328
|
-
To fine tune the cache settings, pass a dictionary with the cache settings
|
329
|
-
or adjust the pipeline config.
|
330
|
-
If set to True, the default cache settings will be used.
|
331
|
-
executor_cfg (str | dict | ExecutorConfig | None): The executor to use.
|
332
|
-
Overrides the executor settings in the pipeline config.
|
333
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
|
334
|
-
Overrides the with_adapter settings in the pipeline config.
|
335
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
|
336
|
-
Overrides the adapter settings in the pipeline config.
|
337
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
|
338
|
-
Overrides the adapter settings in the project config.
|
339
|
-
adapter (dict[str, Any] | None): Any additional Hamilton adapters can be passed here.
|
340
|
-
reload (bool): Whether to reload the module.
|
341
|
-
|
342
|
-
|
343
|
-
Returns:
|
344
|
-
tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
|
345
|
-
"""
|
346
|
-
logger.debug("Setting up driver...")
|
347
|
-
module = load_module(name=self.name, reload=reload)
|
348
|
-
executor, shutdown = self._get_executor(executor_cfg)
|
349
|
-
adapters = self._get_adapters(
|
350
|
-
with_adapter_cfg,
|
351
|
-
pipeline_adapter_cfg,
|
352
|
-
project_adapter_cfg,
|
353
|
-
adapter=adapter,
|
354
|
-
)
|
355
|
-
|
356
|
-
config = config or self.pipeline_cfg.run.config
|
357
|
-
|
358
|
-
dr = (
|
359
|
-
driver.Builder()
|
360
|
-
.enable_dynamic_execution(allow_experimental_mode=True)
|
361
|
-
.with_modules(module)
|
362
|
-
.with_config(config)
|
363
|
-
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
364
|
-
)
|
365
|
-
|
366
|
-
if cache:
|
367
|
-
if isinstance(cache, dict):
|
368
|
-
cache = cache or self.pipeline_cfg.run.cache
|
369
|
-
dr = dr.with_cache(**cache)
|
370
|
-
else:
|
371
|
-
dr = dr.with_cache()
|
372
|
-
|
373
|
-
if executor:
|
374
|
-
dr = dr.with_remote_executor(executor)
|
375
|
-
|
376
|
-
if adapters:
|
377
|
-
dr = dr.with_adapters(*adapters)
|
378
|
-
|
379
|
-
dr = dr.build()
|
380
|
-
return dr, shutdown
|
381
|
-
|
382
|
-
def run(
|
383
|
-
self,
|
384
|
-
inputs: dict | None = None,
|
385
|
-
final_vars: list[str] | None = None,
|
386
|
-
config: dict | None = None,
|
387
|
-
cache: dict | None = None,
|
388
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
389
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
390
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
391
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
392
|
-
adapter: dict[str, Any] | None = None,
|
393
|
-
reload: bool = False,
|
394
|
-
log_level: str | None = None,
|
395
|
-
max_retries: int | None = None,
|
396
|
-
retry_delay: float | None = None,
|
397
|
-
jitter_factor: float | None = None,
|
398
|
-
retry_exceptions: tuple = (
|
399
|
-
Exception,
|
400
|
-
HTTPError,
|
401
|
-
UnauthorizedException,
|
402
|
-
),
|
403
|
-
) -> dict[str, Any]:
|
404
|
-
"""
|
405
|
-
Run the pipeline with the given parameters.
|
406
|
-
Args:
|
407
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
408
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
409
|
-
config (dict | None, optional): The config for the hamilton driver. Defaults to None.
|
410
|
-
cache (dict | None, optional): The cache configuration. Defaults to None.
|
411
|
-
executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
|
412
|
-
Overrides the executor settings in the pipeline config. Defaults to None.
|
413
|
-
with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
|
414
|
-
Overrides the with_adapter settings in the pipeline config. Defaults to None.
|
415
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
|
416
|
-
Overrides the adapter settings in the pipeline config. Defaults to None.
|
417
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
|
418
|
-
Overrides the adapter settings in the project config. Defaults to None.
|
419
|
-
adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
|
420
|
-
reload (bool, optional): Whether to reload the module. Defaults to False.
|
421
|
-
log_level (str | None, optional): The log level to use. Defaults to None.
|
422
|
-
max_retries (int | None, optional): The maximum number of retry attempts. Defaults to None.
|
423
|
-
retry_delay (float | None, optional): The base delay between retries in seconds. Defaults to None.
|
424
|
-
jitter_factor (float | None, optional): The factor to apply for jitter. Defaults to None.
|
425
|
-
retry_exceptions: tuple | None, optional): The exceptions to catch for retries.
|
426
|
-
Defaults to (Exception, HTTPError, UnauthorizedException).
|
427
|
-
|
428
|
-
Returns:
|
429
|
-
dict[str, Any]: The result of executing the pipeline.
|
430
|
-
"""
|
431
|
-
self.start_time = dt.datetime.now()
|
432
|
-
|
433
|
-
if log_level or self.pipeline_cfg.run.log_level:
|
434
|
-
setup_logging(level=log_level or self.pipeline_cfg.run.log_level)
|
435
|
-
|
436
|
-
logger.info(f"Starting pipeline {self.project_cfg.name}.{self.name}")
|
437
|
-
|
438
|
-
final_vars = final_vars or self.pipeline_cfg.run.final_vars
|
439
|
-
inputs = {
|
440
|
-
**(self.pipeline_cfg.run.inputs or {}),
|
441
|
-
**(inputs or {}),
|
442
|
-
} # <-- inputs override and/or extend config inputs
|
443
|
-
|
444
|
-
max_retries = max_retries or self.pipeline_cfg.run.max_retries
|
445
|
-
retry_delay = retry_delay or self.pipeline_cfg.run.retry_delay
|
446
|
-
jitter_factor = jitter_factor or self.pipeline_cfg.run.jitter_factor
|
447
|
-
retry_exceptions = retry_exceptions or self.pipeline_cfg.run.retry_exceptions
|
448
|
-
|
449
|
-
if not isinstance(retry_exceptions, (tuple, list)):
|
450
|
-
retry_exceptions = [retry_exceptions]
|
451
|
-
retry_exceptions = [
|
452
|
-
eval(exc) if isinstance(exc, str) else exc for exc in retry_exceptions
|
453
|
-
]
|
454
|
-
|
455
|
-
attempts = 1
|
456
|
-
last_exception = None
|
457
|
-
|
458
|
-
while attempts <= max_retries:
|
459
|
-
logger.debug(f"Attempting to execute pipeline {attempts}/{max_retries}")
|
460
|
-
try:
|
461
|
-
dr, shutdown = self._get_driver(
|
462
|
-
config=config,
|
463
|
-
cache=cache,
|
464
|
-
executor_cfg=executor_cfg,
|
465
|
-
with_adapter_cfg=with_adapter_cfg,
|
466
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
467
|
-
project_adapter_cfg=project_adapter_cfg,
|
468
|
-
adapter=adapter,
|
469
|
-
reload=reload,
|
470
|
-
)
|
471
|
-
|
472
|
-
res = dr.execute(final_vars=final_vars, inputs=inputs)
|
473
|
-
self.end_time = dt.datetime.now()
|
474
|
-
self.execution_time = self.end_time - self.start_time
|
475
|
-
logger.success(
|
476
|
-
f"Finished: Pipeline {self.project_cfg.name}.{self.name} executed in {humanize.naturaldelta(self.execution_time)}"
|
477
|
-
)
|
478
|
-
|
479
|
-
if shutdown is not None:
|
480
|
-
logger.info("Shutting down executor...")
|
481
|
-
shutdown()
|
482
|
-
logger.info("Executor shut down.")
|
483
|
-
|
484
|
-
return res
|
485
|
-
except tuple(retry_exceptions) as e:
|
486
|
-
# set success to False and handle retries
|
487
|
-
|
488
|
-
if (
|
489
|
-
isinstance(e, HTTPError)
|
490
|
-
or isinstance(e, UnauthorizedException)
|
491
|
-
or isinstance(e, ConnectionError)
|
492
|
-
):
|
493
|
-
if with_adapter_cfg["hamilton_tracker"]:
|
494
|
-
logger.info(
|
495
|
-
"Hamilton Tracker is enabled. Disabling tracker for the next run."
|
496
|
-
)
|
497
|
-
with_adapter_cfg["hamilton_tracker"] = False
|
498
|
-
|
499
|
-
attempts += 1
|
500
|
-
last_exception = e
|
501
|
-
|
502
|
-
if attempts <= max_retries:
|
503
|
-
logger.warning(
|
504
|
-
f"Pipeline execution failed (attempt {attempts}/{max_retries}): {e}"
|
505
|
-
)
|
506
|
-
|
507
|
-
# Calculate base delay with exponential backoff
|
508
|
-
base_delay = retry_delay * (2 ** (attempts - 1))
|
509
|
-
|
510
|
-
# Add jitter: random value between -jitter_factor and +jitter_factor of the base delay
|
511
|
-
jitter = base_delay * jitter_factor * (2 * random.random() - 1)
|
512
|
-
actual_delay = max(
|
513
|
-
0, base_delay + jitter
|
514
|
-
) # Ensure non-negative delay
|
515
|
-
|
516
|
-
logger.debug(
|
517
|
-
f"Retrying in {actual_delay:.2f} seconds (base: {base_delay:.2f}s, jitter: {jitter:.2f}s)"
|
518
|
-
)
|
519
|
-
time.sleep(actual_delay)
|
520
|
-
|
521
|
-
else:
|
522
|
-
# Last attempt failed
|
523
|
-
logger.error(
|
524
|
-
f"Pipeline execution failed after {max_retries} attempts"
|
525
|
-
)
|
526
|
-
raise last_exception
|
527
|
-
|
528
|
-
|
529
|
-
def run_pipeline(
|
530
|
-
project_cfg: ProjectConfig,
|
531
|
-
pipeline_cfg: PipelineConfig,
|
532
|
-
inputs: dict | None = None,
|
533
|
-
final_vars: list[str] | None = None,
|
534
|
-
config: dict | None = None,
|
535
|
-
cache: dict | None = None,
|
536
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
537
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
538
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
539
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
540
|
-
adapter: dict[str, Any] | None = None,
|
541
|
-
reload: bool = False,
|
542
|
-
log_level: str | None = None,
|
543
|
-
max_retries: int = 0,
|
544
|
-
retry_delay: float = 1.0,
|
545
|
-
jitter_factor: float = 0.1,
|
546
|
-
retry_exceptions: tuple = (
|
547
|
-
Exception,
|
548
|
-
HTTPError,
|
549
|
-
UnauthorizedException,
|
550
|
-
), # Adjust to specific exceptions
|
551
|
-
) -> dict[str, Any]:
|
552
|
-
"""Run the pipeline with the given parameters.
|
553
|
-
|
554
|
-
Args:
|
555
|
-
|
556
|
-
project_cfg (ProjectConfig): The project configuration.
|
557
|
-
pipeline_cfg (PipelineConfig): The pipeline configuration.
|
558
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
559
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
560
|
-
config (dict | None, optional): The config for the hamilton driver. Defaults to None.
|
561
|
-
cache (dict | None, optional): The cache configuration. Defaults to None.
|
562
|
-
executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
|
563
|
-
Overrides the executor settings in the pipeline config. Defaults to None.
|
564
|
-
with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
|
565
|
-
Overrides the with_adapter settings in the pipeline config. Defaults to None.
|
566
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
|
567
|
-
Overrides the adapter settings in the pipeline config. Defaults to None.
|
568
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
|
569
|
-
Overrides the adapter settings in the project config. Defaults to None.
|
570
|
-
adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
|
571
|
-
reload (bool, optional): Whether to reload the module. Defaults to False.
|
572
|
-
log_level (str | None, optional): The log level to use. Defaults to None.
|
573
|
-
max_retries (int, optional): The maximum number of retry attempts. Defaults to 0.
|
574
|
-
retry_delay (float, optional): The base delay between retries in seconds. Defaults to 1.0.
|
575
|
-
jitter_factor (float, optional): The factor to apply for jitter. Defaults to 0.1.
|
576
|
-
retry_exceptions (tuple, optional): A tuple of exception classes to catch for retries. Defaults to (Exception,).
|
577
|
-
|
578
|
-
Returns:
|
579
|
-
|
580
|
-
dict[str, Any]: The result of executing the pipeline.
|
581
|
-
|
582
|
-
Raises:
|
583
|
-
Exception: If the pipeline execution fails after the maximum number of retries.
|
584
|
-
"""
|
585
|
-
|
586
|
-
with PipelineRunner(project_cfg, pipeline_cfg) as runner:
|
587
|
-
return runner.run(
|
588
|
-
inputs=inputs,
|
589
|
-
final_vars=final_vars,
|
590
|
-
config=config,
|
591
|
-
cache=cache,
|
592
|
-
executor_cfg=executor_cfg,
|
593
|
-
with_adapter_cfg=with_adapter_cfg,
|
594
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
595
|
-
project_adapter_cfg=project_adapter_cfg,
|
596
|
-
adapter=adapter,
|
597
|
-
reload=reload,
|
598
|
-
log_level=log_level,
|
599
|
-
max_retries=max_retries,
|
600
|
-
retry_delay=retry_delay,
|
601
|
-
jitter_factor=jitter_factor,
|
602
|
-
retry_exceptions=retry_exceptions,
|
603
|
-
)
|