FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -37
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +32 -1
- flowerpower/cli/pipeline.py +559 -406
- flowerpower/cli/utils.py +29 -18
- flowerpower/flowerpower.py +12 -8
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +2 -142
- flowerpower-1.0.0b2.dist-info/METADATA +324 -0
- flowerpower-1.0.0b2.dist-info/RECORD +94 -0
- flowerpower/_web/__init__.py +0 -61
- flowerpower/_web/routes/config.py +0 -103
- flowerpower/_web/routes/pipelines.py +0 -173
- flowerpower/_web/routes/scheduler.py +0 -136
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/cli/web.py +0 -44
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -609
- flowerpower/pipeline.py +0 -2499
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.13.1.dist-info/METADATA +0 -586
- flowerpower-0.9.13.1.dist-info/RECORD +0 -76
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/WHEEL +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,560 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Pipeline Runner."""
|
3
|
+
|
4
|
+
from __future__ import annotations
|
5
|
+
import time
|
6
|
+
import random
|
7
|
+
import datetime as dt
|
8
|
+
import importlib.util
|
9
|
+
from typing import Any, Callable
|
10
|
+
|
11
|
+
import humanize
|
12
|
+
from hamilton import driver
|
13
|
+
from hamilton.execution import executors
|
14
|
+
from hamilton.registry import disable_autoload
|
15
|
+
from hamilton.telemetry import disable_telemetry
|
16
|
+
from hamilton_sdk.api.clients import UnauthorizedException
|
17
|
+
|
18
|
+
from requests.exceptions import HTTPError
|
19
|
+
|
20
|
+
from .. import settings
|
21
|
+
|
22
|
+
|
23
|
+
if importlib.util.find_spec("opentelemetry"):
|
24
|
+
from hamilton.plugins import h_opentelemetry
|
25
|
+
|
26
|
+
from ..utils.open_telemetry import init_tracer
|
27
|
+
else:
|
28
|
+
h_opentelemetry = None
|
29
|
+
init_tracer = None
|
30
|
+
|
31
|
+
if importlib.util.find_spec("mlflow"):
|
32
|
+
from hamilton.plugins import h_mlflow
|
33
|
+
else:
|
34
|
+
h_mlflow = None
|
35
|
+
|
36
|
+
from hamilton.plugins import h_rich
|
37
|
+
from hamilton.plugins.h_threadpool import FutureAdapter
|
38
|
+
from hamilton_sdk.adapters import HamiltonTracker
|
39
|
+
from hamilton_sdk.tracking import constants
|
40
|
+
from loguru import logger
|
41
|
+
|
42
|
+
if importlib.util.find_spec("distributed"):
|
43
|
+
from dask import distributed
|
44
|
+
from hamilton.plugins import h_dask
|
45
|
+
else:
|
46
|
+
distributed = None
|
47
|
+
|
48
|
+
|
49
|
+
if importlib.util.find_spec("ray"):
|
50
|
+
import ray
|
51
|
+
from hamilton.plugins import h_ray
|
52
|
+
else:
|
53
|
+
h_ray = None
|
54
|
+
|
55
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
56
|
+
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
57
|
+
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
58
|
+
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
59
|
+
from ..utils.logging import setup_logging
|
60
|
+
from .base import load_module
|
61
|
+
|
62
|
+
setup_logging(level=settings.LOG_LEVEL)
|
63
|
+
|
64
|
+
# from .executor import get_executor
|
65
|
+
|
66
|
+
|
67
|
+
class PipelineRunner:
|
68
|
+
"""PipelineRunner is responsible for executing a specific pipeline run.
|
69
|
+
It handles the loading of the pipeline module, configuration, and execution"""
|
70
|
+
|
71
|
+
def __init__(
|
72
|
+
self,
|
73
|
+
project_cfg: ProjectConfig,
|
74
|
+
pipeline_cfg: PipelineConfig,
|
75
|
+
):
|
76
|
+
self.project_cfg = project_cfg
|
77
|
+
self.pipeline_cfg = pipeline_cfg
|
78
|
+
self.name = pipeline_cfg.name
|
79
|
+
|
80
|
+
if not settings.HAMILTON_TELEMETRY_ENABLED:
|
81
|
+
disable_telemetry()
|
82
|
+
if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
|
83
|
+
disable_autoload()
|
84
|
+
|
85
|
+
def __enter__(self):
|
86
|
+
"""Enable use as a context manager."""
|
87
|
+
return self
|
88
|
+
|
89
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
90
|
+
"""No special cleanup required."""
|
91
|
+
pass
|
92
|
+
|
93
|
+
def _get_executor(
|
94
|
+
self, executor_cfg: str | dict | ExecutorConfig | None = None
|
95
|
+
) -> tuple[executors.BaseExecutor, Callable | None]:
|
96
|
+
"""
|
97
|
+
Get the executor based on the provided configuration.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
executor (dict | None): Executor configuration.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
tuple[executors.BaseExecutor, Callable | None]: A tuple containing the executor and shutdown function.
|
104
|
+
"""
|
105
|
+
logger.debug("Setting up executor...")
|
106
|
+
if executor_cfg:
|
107
|
+
if isinstance(executor_cfg, str):
|
108
|
+
executor_cfg = ExecutorConfig(type=executor_cfg)
|
109
|
+
elif isinstance(executor_cfg, dict):
|
110
|
+
executor_cfg = ExecutorConfig.from_dict(executor_cfg)
|
111
|
+
elif not isinstance(executor_cfg, ExecutorConfig):
|
112
|
+
raise TypeError(
|
113
|
+
"Executor must be a string, dictionary, or ExecutorConfig instance."
|
114
|
+
)
|
115
|
+
|
116
|
+
executor_cfg = self.pipeline_cfg.run.executor.merge(executor_cfg)
|
117
|
+
else:
|
118
|
+
executor_cfg = self.pipeline_cfg.run.executor
|
119
|
+
|
120
|
+
if executor_cfg.type is None:
|
121
|
+
logger.debug(
|
122
|
+
"No executor type specified. Using SynchronousLocalTaskExecutor as default."
|
123
|
+
)
|
124
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
125
|
+
|
126
|
+
if executor_cfg.type == "threadpool":
|
127
|
+
logger.debug(
|
128
|
+
f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
|
129
|
+
)
|
130
|
+
return executors.MultiThreadingExecutor(
|
131
|
+
max_tasks=executor_cfg.max_workers
|
132
|
+
), None
|
133
|
+
elif executor_cfg.type == "processpool":
|
134
|
+
logger.debug(
|
135
|
+
f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
|
136
|
+
)
|
137
|
+
return executors.MultiProcessingExecutor(
|
138
|
+
max_tasks=executor_cfg.max_workers
|
139
|
+
), None
|
140
|
+
elif executor_cfg.type == "ray":
|
141
|
+
if h_ray:
|
142
|
+
logger.debug(
|
143
|
+
f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
|
144
|
+
)
|
145
|
+
|
146
|
+
return (
|
147
|
+
h_ray.RayTaskExecutor(
|
148
|
+
num_cpus=executor_cfg.num_cpus,
|
149
|
+
ray_init_config=self.project_cfg.adapter.ray.ray_init_config,
|
150
|
+
),
|
151
|
+
ray.shutdown
|
152
|
+
if self.project_cfg.adapter.ray.shutdown_ray_on_completion
|
153
|
+
else None,
|
154
|
+
)
|
155
|
+
else:
|
156
|
+
logger.warning("Ray is not installed. Using local executor.")
|
157
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
158
|
+
elif executor_cfg.type == "dask":
|
159
|
+
if distributed:
|
160
|
+
cluster = distributed.LocalCluster()
|
161
|
+
client = distributed.Client(cluster)
|
162
|
+
return h_dask.DaskExecutor(client=client), cluster.close
|
163
|
+
else:
|
164
|
+
logger.warning("Dask is not installed. Using local executor.")
|
165
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
166
|
+
else:
|
167
|
+
logger.warning(
|
168
|
+
f"Unknown executor type: {executor_cfg.type}. Using local executor."
|
169
|
+
)
|
170
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
171
|
+
|
172
|
+
def _get_adapters(
|
173
|
+
self,
|
174
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
175
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
176
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
177
|
+
adapter: dict[str, Any] | None = None,
|
178
|
+
) -> list:
|
179
|
+
"""
|
180
|
+
Set the adapters for the pipeline.
|
181
|
+
|
182
|
+
Args:
|
183
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
|
184
|
+
Overrides the with_adapter settings in the pipeline config.
|
185
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
|
186
|
+
Overrides the adapter settings in the pipeline config.
|
187
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
|
188
|
+
Overrides the adapter settings in the project config.
|
189
|
+
adapter (dict[str, Any] | None): Any additional hamilton adapters can be passed here.
|
190
|
+
"""
|
191
|
+
logger.debug("Setting up adapters...")
|
192
|
+
if with_adapter_cfg:
|
193
|
+
if isinstance(with_adapter_cfg, dict):
|
194
|
+
with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
|
195
|
+
elif not isinstance(with_adapter_cfg, WithAdapterConfig):
|
196
|
+
raise TypeError(
|
197
|
+
"with_adapter must be a dictionary or WithAdapterConfig instance."
|
198
|
+
)
|
199
|
+
|
200
|
+
with_adapter_cfg = self.pipeline_cfg.run.with_adapter.merge(
|
201
|
+
with_adapter_cfg
|
202
|
+
)
|
203
|
+
else:
|
204
|
+
with_adapter_cfg = self.pipeline_cfg.run.with_adapter
|
205
|
+
|
206
|
+
if pipeline_adapter_cfg:
|
207
|
+
if isinstance(pipeline_adapter_cfg, dict):
|
208
|
+
pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
|
209
|
+
pipeline_adapter_cfg
|
210
|
+
)
|
211
|
+
elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
|
212
|
+
raise TypeError(
|
213
|
+
"pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
|
214
|
+
)
|
215
|
+
|
216
|
+
pipeline_adapter_cfg = self.pipeline_cfg.adapter.merge(pipeline_adapter_cfg)
|
217
|
+
else:
|
218
|
+
pipeline_adapter_cfg = self.pipeline_cfg.adapter
|
219
|
+
|
220
|
+
if project_adapter_cfg:
|
221
|
+
if isinstance(project_adapter_cfg, dict):
|
222
|
+
project_adapter_cfg = ProjectAdapterConfig.from_dict(
|
223
|
+
project_adapter_cfg
|
224
|
+
)
|
225
|
+
elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
|
226
|
+
raise TypeError(
|
227
|
+
"project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
|
228
|
+
)
|
229
|
+
|
230
|
+
project_adapter_cfg = self.project_cfg.adapter.merge(project_adapter_cfg)
|
231
|
+
else:
|
232
|
+
project_adapter_cfg = self.project_cfg.adapter
|
233
|
+
|
234
|
+
adapters = []
|
235
|
+
if with_adapter_cfg.tracker:
|
236
|
+
tracker_kwargs = project_adapter_cfg.tracker.to_dict()
|
237
|
+
tracker_kwargs.update(pipeline_adapter_cfg.tracker.to_dict())
|
238
|
+
tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
|
239
|
+
tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
|
240
|
+
|
241
|
+
constants.MAX_DICT_LENGTH_CAPTURE = (
|
242
|
+
tracker_kwargs.pop("max_dict_length_capture", None)
|
243
|
+
or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
|
244
|
+
)
|
245
|
+
constants.MAX_LIST_LENGTH_CAPTURE = (
|
246
|
+
tracker_kwargs.pop("max_list_length_capture", None)
|
247
|
+
or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
|
248
|
+
)
|
249
|
+
constants.CAPTURE_DATA_STATISTICS = (
|
250
|
+
tracker_kwargs.pop("capture_data_statistics", None)
|
251
|
+
or settings.HAMILTON_CAPTURE_DATA_STATISTICS
|
252
|
+
)
|
253
|
+
|
254
|
+
tracker = HamiltonTracker(**tracker_kwargs)
|
255
|
+
|
256
|
+
adapters.append(tracker)
|
257
|
+
|
258
|
+
if with_adapter_cfg.mlflow:
|
259
|
+
if h_mlflow is None:
|
260
|
+
logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
|
261
|
+
else:
|
262
|
+
mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
|
263
|
+
mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
|
264
|
+
mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
|
265
|
+
adapters.append(mlflow_adapter)
|
266
|
+
|
267
|
+
if with_adapter_cfg.opentelemetry:
|
268
|
+
if h_opentelemetry is None:
|
269
|
+
logger.warning(
|
270
|
+
"OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
|
271
|
+
)
|
272
|
+
else:
|
273
|
+
otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
|
274
|
+
otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
|
275
|
+
trace = init_tracer(**otel_kwargs, name=self.project_cfg.name)
|
276
|
+
tracer = trace.get_tracer(self.name)
|
277
|
+
otel_adapter = h_opentelemetry.OpenTelemetryTracer(
|
278
|
+
tracer_name=f"{self.project_cfg.name}.{self.name}",
|
279
|
+
tracer=tracer,
|
280
|
+
)
|
281
|
+
adapters.append(otel_adapter)
|
282
|
+
|
283
|
+
if with_adapter_cfg.progressbar:
|
284
|
+
adapters.append(
|
285
|
+
h_rich.RichProgressBar(run_desc=f"{self.project_cfg.name}.{self.name}")
|
286
|
+
)
|
287
|
+
|
288
|
+
if with_adapter_cfg.future:
|
289
|
+
adapters.append(FutureAdapter())
|
290
|
+
|
291
|
+
if with_adapter_cfg.ray:
|
292
|
+
if h_ray is None:
|
293
|
+
logger.warning("Ray is not installed. Skipping Ray adapter.")
|
294
|
+
else:
|
295
|
+
ray_kwargs = project_adapter_cfg.ray.to_dict()
|
296
|
+
ray_kwargs.update(pipeline_adapter_cfg.ray.to_dict())
|
297
|
+
ray_adapter = h_ray.RayGraphAdapter(**ray_kwargs)
|
298
|
+
adapters.append(ray_adapter)
|
299
|
+
|
300
|
+
all_adapters = [
|
301
|
+
f"{adp}: ✅" if enabled else f"{adp}: ❌"
|
302
|
+
for adp, enabled in with_adapter_cfg.to_dict().items()
|
303
|
+
]
|
304
|
+
|
305
|
+
if adapter:
|
306
|
+
adapters += list(adapter.values())
|
307
|
+
all_adapters += [f"{adp}: ✅" for adp in adapter.keys()]
|
308
|
+
|
309
|
+
logger.debug(f"Adapters enabled: {' | '.join(all_adapters)}")
|
310
|
+
return adapters
|
311
|
+
|
312
|
+
def _get_driver(
|
313
|
+
self,
|
314
|
+
config: dict | None = None,
|
315
|
+
cache: bool | dict = False,
|
316
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
317
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
318
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
319
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
320
|
+
adapter: dict[str, Any] | None = None,
|
321
|
+
reload: bool = False,
|
322
|
+
) -> tuple[driver.Driver, Callable | None]:
|
323
|
+
"""
|
324
|
+
Get the driver and shutdown function for a given pipeline.
|
325
|
+
|
326
|
+
Args:
|
327
|
+
config (dict | None): The configuration for the pipeline.
|
328
|
+
cache (bool): Use cache or not.
|
329
|
+
To fine tune the cache settings, pass a dictionary with the cache settings
|
330
|
+
or adjust the pipeline config.
|
331
|
+
If set to True, the default cache settings will be used.
|
332
|
+
executor_cfg (str | dict | ExecutorConfig | None): The executor to use.
|
333
|
+
Overrides the executor settings in the pipeline config.
|
334
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
|
335
|
+
Overrides the with_adapter settings in the pipeline config.
|
336
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
|
337
|
+
Overrides the adapter settings in the pipeline config.
|
338
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
|
339
|
+
Overrides the adapter settings in the project config.
|
340
|
+
adapter (dict[str, Any] | None): Any additional Hamilton adapters can be passed here.
|
341
|
+
reload (bool): Whether to reload the module.
|
342
|
+
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
|
346
|
+
"""
|
347
|
+
logger.debug("Setting up driver...")
|
348
|
+
module = load_module(name=self.name, reload=reload)
|
349
|
+
executor, shutdown = self._get_executor(executor_cfg)
|
350
|
+
adapters = self._get_adapters(
|
351
|
+
with_adapter_cfg,
|
352
|
+
pipeline_adapter_cfg,
|
353
|
+
project_adapter_cfg,
|
354
|
+
adapter=adapter,
|
355
|
+
)
|
356
|
+
|
357
|
+
config = config or self.pipeline_cfg.run.config
|
358
|
+
|
359
|
+
dr = (
|
360
|
+
driver.Builder()
|
361
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
362
|
+
.with_modules(module)
|
363
|
+
.with_config(config)
|
364
|
+
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
365
|
+
)
|
366
|
+
|
367
|
+
if cache:
|
368
|
+
if isinstance(cache, dict):
|
369
|
+
cache = cache or self.pipeline_cfg.run.cache
|
370
|
+
dr = dr.with_cache(**cache)
|
371
|
+
else:
|
372
|
+
dr = dr.with_cache()
|
373
|
+
|
374
|
+
if executor:
|
375
|
+
dr = dr.with_remote_executor(executor)
|
376
|
+
|
377
|
+
if adapters:
|
378
|
+
dr = dr.with_adapters(*adapters)
|
379
|
+
|
380
|
+
dr = dr.build()
|
381
|
+
return dr, shutdown
|
382
|
+
|
383
|
+
def run(
|
384
|
+
self,
|
385
|
+
inputs: dict | None = None,
|
386
|
+
final_vars: list[str] | None = None,
|
387
|
+
config: dict | None = None,
|
388
|
+
cache: dict | None = None,
|
389
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
390
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
391
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
392
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
393
|
+
adapter: dict[str, Any] | None = None,
|
394
|
+
reload: bool = False,
|
395
|
+
log_level: str | None = None,
|
396
|
+
) -> dict[str, Any]:
|
397
|
+
"""
|
398
|
+
Run the pipeline with the given parameters.
|
399
|
+
Args:
|
400
|
+
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
401
|
+
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
402
|
+
config (dict | None, optional): The config for the hamilton driver. Defaults to None.
|
403
|
+
cache (dict | None, optional): The cache configuration. Defaults to None.
|
404
|
+
executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
|
405
|
+
Overrides the executor settings in the pipeline config. Defaults to None.
|
406
|
+
with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
|
407
|
+
Overrides the with_adapter settings in the pipeline config. Defaults to None.
|
408
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
|
409
|
+
Overrides the adapter settings in the pipeline config. Defaults to None.
|
410
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
|
411
|
+
Overrides the adapter settings in the project config. Defaults to None.
|
412
|
+
adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
|
413
|
+
reload (bool, optional): Whether to reload the module. Defaults to False.
|
414
|
+
log_level (str | None, optional): The log level to use. Defaults to None.
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
dict[str, Any]: The result of executing the pipeline.
|
418
|
+
"""
|
419
|
+
self.start_time = dt.datetime.now()
|
420
|
+
|
421
|
+
if log_level or self.pipeline_cfg.run.log_level:
|
422
|
+
setup_logging(level=log_level or self.pipeline_cfg.run.log_level)
|
423
|
+
|
424
|
+
logger.info(f"Starting pipeline {self.project_cfg.name}.{self.name}")
|
425
|
+
# Load the module and get the driver
|
426
|
+
dr, shutdown = self._get_driver(
|
427
|
+
config=config,
|
428
|
+
cache=cache,
|
429
|
+
executor_cfg=executor_cfg,
|
430
|
+
with_adapter_cfg=with_adapter_cfg,
|
431
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
432
|
+
project_adapter_cfg=project_adapter_cfg,
|
433
|
+
adapter=adapter,
|
434
|
+
reload=reload,
|
435
|
+
)
|
436
|
+
final_vars = final_vars or self.pipeline_cfg.run.final_vars
|
437
|
+
inputs = {
|
438
|
+
**(self.pipeline_cfg.run.inputs or {}),
|
439
|
+
**(inputs or {}),
|
440
|
+
} # <-- inputs override and/or extend config inputs
|
441
|
+
|
442
|
+
res = dr.execute(final_vars=final_vars, inputs=inputs)
|
443
|
+
self.end_time = dt.datetime.now()
|
444
|
+
self.execution_time = self.end_time - self.start_time
|
445
|
+
logger.success(
|
446
|
+
f"Finished: Pipeline {self.project_cfg.name}.{self.name} executed in {humanize.naturaldelta(self.execution_time)}"
|
447
|
+
)
|
448
|
+
|
449
|
+
if shutdown is not None:
|
450
|
+
logger.info("Shutting down executor...")
|
451
|
+
shutdown()
|
452
|
+
logger.info("Executor shut down.")
|
453
|
+
|
454
|
+
return res
|
455
|
+
|
456
|
+
|
457
|
+
def run_pipeline(
|
458
|
+
project_cfg: ProjectConfig,
|
459
|
+
pipeline_cfg: PipelineConfig,
|
460
|
+
inputs: dict | None = None,
|
461
|
+
final_vars: list[str] | None = None,
|
462
|
+
config: dict | None = None,
|
463
|
+
cache: dict | None = None,
|
464
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
465
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
466
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
467
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
468
|
+
adapter: dict[str, Any] | None = None,
|
469
|
+
reload: bool = False,
|
470
|
+
log_level: str | None = None,
|
471
|
+
max_retries: int = 0,
|
472
|
+
retry_delay: float = 1.0,
|
473
|
+
jitter_factor: float = 0.1,
|
474
|
+
retry_exceptions: tuple = (
|
475
|
+
Exception,
|
476
|
+
HTTPError,
|
477
|
+
UnauthorizedException,
|
478
|
+
), # Adjust to specific exceptions
|
479
|
+
) -> dict[str, Any]:
|
480
|
+
"""Run the pipeline with the given parameters.
|
481
|
+
|
482
|
+
Args:
|
483
|
+
|
484
|
+
project_cfg (ProjectConfig): The project configuration.
|
485
|
+
pipeline_cfg (PipelineConfig): The pipeline configuration.
|
486
|
+
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
487
|
+
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
488
|
+
config (dict | None, optional): The config for the hamilton driver. Defaults to None.
|
489
|
+
cache (dict | None, optional): The cache configuration. Defaults to None.
|
490
|
+
executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
|
491
|
+
Overrides the executor settings in the pipeline config. Defaults to None.
|
492
|
+
with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
|
493
|
+
Overrides the with_adapter settings in the pipeline config. Defaults to None.
|
494
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
|
495
|
+
Overrides the adapter settings in the pipeline config. Defaults to None.
|
496
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
|
497
|
+
Overrides the adapter settings in the project config. Defaults to None.
|
498
|
+
adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
|
499
|
+
reload (bool, optional): Whether to reload the module. Defaults to False.
|
500
|
+
log_level (str | None, optional): The log level to use. Defaults to None.
|
501
|
+
max_retries (int, optional): The maximum number of retry attempts. Defaults to 0.
|
502
|
+
retry_delay (float, optional): The base delay between retries in seconds. Defaults to 1.0.
|
503
|
+
jitter_factor (float, optional): The factor to apply for jitter. Defaults to 0.1.
|
504
|
+
retry_exceptions (tuple, optional): A tuple of exception classes to catch for retries. Defaults to (Exception,).
|
505
|
+
|
506
|
+
Returns:
|
507
|
+
|
508
|
+
dict[str, Any]: The result of executing the pipeline.
|
509
|
+
|
510
|
+
Raises:
|
511
|
+
Exception: If the pipeline execution fails after the maximum number of retries.
|
512
|
+
"""
|
513
|
+
attempts = 0
|
514
|
+
last_exception = None
|
515
|
+
|
516
|
+
while attempts <= max_retries:
|
517
|
+
try:
|
518
|
+
with PipelineRunner(project_cfg, pipeline_cfg) as runner:
|
519
|
+
return runner.run(
|
520
|
+
inputs=inputs,
|
521
|
+
final_vars=final_vars,
|
522
|
+
config=config,
|
523
|
+
cache=cache,
|
524
|
+
executor_cfg=executor_cfg,
|
525
|
+
with_adapter_cfg=with_adapter_cfg,
|
526
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
527
|
+
project_adapter_cfg=project_adapter_cfg,
|
528
|
+
adapter=adapter,
|
529
|
+
reload=reload,
|
530
|
+
log_level=log_level,
|
531
|
+
)
|
532
|
+
except retry_exceptions as e:
|
533
|
+
if isinstance(e, HTTPError) or isinstance(e, UnauthorizedException):
|
534
|
+
if with_adapter_cfg["tracker"]:
|
535
|
+
logger.info("Tracker is enabled. Disabling tracker for this run.")
|
536
|
+
with_adapter_cfg["tracker"] = False
|
537
|
+
|
538
|
+
attempts += 1
|
539
|
+
last_exception = e
|
540
|
+
|
541
|
+
if attempts <= max_retries:
|
542
|
+
logger.warning(
|
543
|
+
f"Pipeline execution failed (attempt {attempts}/{max_retries}): {e}"
|
544
|
+
)
|
545
|
+
|
546
|
+
# Calculate base delay with exponential backoff
|
547
|
+
base_delay = retry_delay * (2 ** (attempts - 1))
|
548
|
+
|
549
|
+
# Add jitter: random value between -jitter_factor and +jitter_factor of the base delay
|
550
|
+
jitter = base_delay * jitter_factor * (2 * random.random() - 1)
|
551
|
+
actual_delay = max(0, base_delay + jitter) # Ensure non-negative delay
|
552
|
+
|
553
|
+
logger.debug(
|
554
|
+
f"Retrying in {actual_delay:.2f} seconds (base: {base_delay:.2f}s, jitter: {jitter:.2f}s)"
|
555
|
+
)
|
556
|
+
time.sleep(actual_delay)
|
557
|
+
else:
|
558
|
+
# Last attempt failed
|
559
|
+
logger.error(f"Pipeline execution failed after {max_retries} attempts")
|
560
|
+
raise last_exception
|
@@ -0,0 +1,142 @@
|
|
1
|
+
import posixpath
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
from hamilton import driver
|
5
|
+
from rich import print
|
6
|
+
|
7
|
+
# Import necessary config types and utility functions
|
8
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
9
|
+
from ..fs import AbstractFileSystem
|
10
|
+
from ..utils.misc import view_img
|
11
|
+
from .base import load_module # Import module loading utility
|
12
|
+
|
13
|
+
|
14
|
+
class PipelineVisualizer:
|
15
|
+
"""Handles the visualization of pipeline DAGs."""
|
16
|
+
|
17
|
+
def __init__(self, project_cfg: ProjectConfig, fs: AbstractFileSystem):
|
18
|
+
"""
|
19
|
+
Initializes the PipelineVisualizer.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
project_cfg: The project configuration object.
|
23
|
+
fs: The filesystem instance.
|
24
|
+
"""
|
25
|
+
self.project_cfg = project_cfg
|
26
|
+
self._fs = fs
|
27
|
+
# Attributes like fs and base_dir are accessed via self.project_cfg
|
28
|
+
|
29
|
+
def _display_all_function(self, name: str, reload: bool = False):
|
30
|
+
"""Internal helper to load module/config and get the Hamilton DAG object.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
name (str): The name of the pipeline.
|
34
|
+
reload (bool): Whether to reload the module.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
Hamilton DAG object.
|
38
|
+
|
39
|
+
Raises:
|
40
|
+
ImportError: If the module cannot be loaded.
|
41
|
+
|
42
|
+
"""
|
43
|
+
# Load pipeline-specific config
|
44
|
+
pipeline_cfg = PipelineConfig.load(name=name, fs=self._fs)
|
45
|
+
|
46
|
+
# Load the pipeline module
|
47
|
+
# Ensure the pipelines directory is in sys.path (handled by PipelineManager usually)
|
48
|
+
module = load_module(name=name, reload=reload)
|
49
|
+
|
50
|
+
# Create a basic driver builder for visualization purposes
|
51
|
+
# Use the run config from the loaded pipeline_cfg
|
52
|
+
builder = (
|
53
|
+
driver.Builder()
|
54
|
+
.enable_dynamic_execution(allow_experimental_mode=True)
|
55
|
+
.with_modules(module)
|
56
|
+
.with_config(pipeline_cfg.run.config or {})
|
57
|
+
# No adapters or complex executors needed for display_all_functions
|
58
|
+
)
|
59
|
+
|
60
|
+
# Build the driver
|
61
|
+
dr = builder.build()
|
62
|
+
|
63
|
+
# Return the visualization object
|
64
|
+
return dr.display_all_functions()
|
65
|
+
|
66
|
+
def save_dag(
|
67
|
+
self,
|
68
|
+
name: str,
|
69
|
+
format: str = "png",
|
70
|
+
reload: bool = False,
|
71
|
+
):
|
72
|
+
"""
|
73
|
+
Save an image of the graph of functions for a given pipeline name.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
name (str): The name of the pipeline graph.
|
77
|
+
format (str, optional): The format of the graph file. Defaults to "png".
|
78
|
+
reload (bool, optional): Whether to reload the pipeline data. Defaults to False.
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
ImportError: If the module cannot be loaded.
|
82
|
+
|
83
|
+
Example:
|
84
|
+
>>> from flowerpower.pipeline.visualizer import PipelineVisualizer
|
85
|
+
>>> visualizer = PipelineVisualizer(project_cfg, fs)
|
86
|
+
>>> visualizer.save_dag(name="example_pipeline", format="png")
|
87
|
+
"""
|
88
|
+
dag = self._display_all_function(name=name, reload=reload)
|
89
|
+
|
90
|
+
# Use project_cfg attributes for path and filesystem access
|
91
|
+
graph_dir = posixpath.join(self.project_cfg.base_dir, "graphs")
|
92
|
+
self._fs.makedirs(graph_dir, exist_ok=True)
|
93
|
+
|
94
|
+
output_path = posixpath.join(
|
95
|
+
graph_dir, name
|
96
|
+
) # Output filename is just the pipeline name
|
97
|
+
output_path_with_ext = f"{output_path}.{format}"
|
98
|
+
|
99
|
+
# Render the DAG using the graphviz object returned by display_all_functions
|
100
|
+
dag.render(
|
101
|
+
output_path, # graphviz appends the format automatically
|
102
|
+
format=format,
|
103
|
+
cleanup=True,
|
104
|
+
view=False,
|
105
|
+
)
|
106
|
+
print(
|
107
|
+
f"📊 Saved graph for [bold blue]{self.project_cfg.name}.{name}[/bold blue] to [green]{output_path_with_ext}[/green]"
|
108
|
+
)
|
109
|
+
|
110
|
+
def show_dag(
|
111
|
+
self,
|
112
|
+
name: str,
|
113
|
+
format: str = "png",
|
114
|
+
reload: bool = False,
|
115
|
+
raw: bool = False,
|
116
|
+
):
|
117
|
+
"""
|
118
|
+
Display the graph of functions for a given pipeline name.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
name (str): The name of the pipeline graph.
|
122
|
+
format (str, optional): The format of the graph file. Defaults to "png".
|
123
|
+
reload (bool, optional): Whether to reload the pipeline data. Defaults to False.
|
124
|
+
raw (bool, optional): Whether to return the raw graph object instead of displaying. Defaults to False.
|
125
|
+
|
126
|
+
Returns:
|
127
|
+
Optional[graphviz.Digraph]: The generated graph object if raw=True, else None.
|
128
|
+
|
129
|
+
Raises:
|
130
|
+
ImportError: If the module cannot be loaded.
|
131
|
+
|
132
|
+
Example:
|
133
|
+
>>> from flowerpower.pipeline.visualizer import PipelineVisualizer
|
134
|
+
>>> visualizer = PipelineVisualizer(project_cfg, fs)
|
135
|
+
>>> visualizer.show_dag(name="example_pipeline", format="png")
|
136
|
+
"""
|
137
|
+
dag = self._display_all_function(name=name, reload=reload)
|
138
|
+
if raw:
|
139
|
+
return dag
|
140
|
+
# Use view_img utility to display the rendered graph
|
141
|
+
view_img(dag.pipe(format=format), format=format)
|
142
|
+
return None # Explicitly return None when not raw
|