FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +401 -133
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.19.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,571 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Active Pipeline class for FlowerPower."""
|
3
|
+
|
4
|
+
from __future__ import annotations
|
5
|
+
|
6
|
+
import datetime as dt
|
7
|
+
import importlib
|
8
|
+
import importlib.util
|
9
|
+
import random
|
10
|
+
import time
|
11
|
+
from typing import TYPE_CHECKING, Any, Callable
|
12
|
+
|
13
|
+
import humanize
|
14
|
+
import msgspec
|
15
|
+
from hamilton import driver
|
16
|
+
from hamilton.execution import executors
|
17
|
+
from hamilton.registry import disable_autoload
|
18
|
+
from hamilton.telemetry import disable_telemetry
|
19
|
+
from hamilton_sdk.api.clients import UnauthorizedException
|
20
|
+
from requests.exceptions import ConnectionError, HTTPError
|
21
|
+
|
22
|
+
from .. import settings
|
23
|
+
|
24
|
+
if importlib.util.find_spec("opentelemetry"):
|
25
|
+
from hamilton.plugins import h_opentelemetry
|
26
|
+
|
27
|
+
from ..utils.open_telemetry import init_tracer
|
28
|
+
else:
|
29
|
+
h_opentelemetry = None
|
30
|
+
init_tracer = None
|
31
|
+
|
32
|
+
if importlib.util.find_spec("mlflow"):
|
33
|
+
from hamilton.plugins import h_mlflow
|
34
|
+
else:
|
35
|
+
h_mlflow = None
|
36
|
+
|
37
|
+
from hamilton.plugins import h_rich
|
38
|
+
from hamilton.plugins.h_threadpool import FutureAdapter
|
39
|
+
from hamilton_sdk.adapters import HamiltonTracker
|
40
|
+
from hamilton_sdk.tracking import constants
|
41
|
+
from loguru import logger
|
42
|
+
|
43
|
+
if importlib.util.find_spec("distributed"):
|
44
|
+
from dask import distributed
|
45
|
+
from hamilton.plugins import h_dask
|
46
|
+
else:
|
47
|
+
distributed = None
|
48
|
+
|
49
|
+
if importlib.util.find_spec("ray"):
|
50
|
+
import ray
|
51
|
+
|
52
|
+
# from hamilton.plugins import h_ray
|
53
|
+
h_ray = None
|
54
|
+
else:
|
55
|
+
ray = None
|
56
|
+
h_ray = None
|
57
|
+
|
58
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
59
|
+
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
60
|
+
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
61
|
+
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
62
|
+
|
63
|
+
if TYPE_CHECKING:
|
64
|
+
from ..flowerpower import FlowerPowerProject
|
65
|
+
|
66
|
+
|
67
|
+
class Pipeline(msgspec.Struct):
|
68
|
+
"""Active pipeline object that encapsulates its own execution logic.
|
69
|
+
|
70
|
+
This class represents a single pipeline with its configuration, loaded module,
|
71
|
+
and project context. It is responsible for its own execution, including
|
72
|
+
setting up Hamilton drivers, managing adapters, and handling retries.
|
73
|
+
|
74
|
+
Attributes:
|
75
|
+
name: The name of the pipeline
|
76
|
+
config: The pipeline configuration
|
77
|
+
module: The loaded Python module containing Hamilton functions
|
78
|
+
project_context: Reference to the FlowerPowerProject
|
79
|
+
"""
|
80
|
+
|
81
|
+
name: str
|
82
|
+
config: PipelineConfig
|
83
|
+
module: Any
|
84
|
+
project_context: FlowerPowerProject
|
85
|
+
|
86
|
+
def __post_init__(self):
|
87
|
+
"""Initialize Hamilton settings."""
|
88
|
+
if not settings.HAMILTON_TELEMETRY_ENABLED:
|
89
|
+
disable_telemetry()
|
90
|
+
if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
|
91
|
+
disable_autoload()
|
92
|
+
|
93
|
+
def run(
|
94
|
+
self,
|
95
|
+
inputs: dict | None = None,
|
96
|
+
final_vars: list[str] | None = None,
|
97
|
+
config: dict | None = None,
|
98
|
+
cache: dict | None = None,
|
99
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
100
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
101
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
102
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
103
|
+
adapter: dict[str, Any] | None = None,
|
104
|
+
reload: bool = False,
|
105
|
+
log_level: str | None = None,
|
106
|
+
max_retries: int | None = None,
|
107
|
+
retry_delay: float | None = None,
|
108
|
+
jitter_factor: float | None = None,
|
109
|
+
retry_exceptions: tuple = (
|
110
|
+
Exception,
|
111
|
+
HTTPError,
|
112
|
+
UnauthorizedException,
|
113
|
+
),
|
114
|
+
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
115
|
+
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
116
|
+
) -> dict[str, Any]:
|
117
|
+
"""Execute the pipeline with the given parameters.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
inputs: Override pipeline input values
|
121
|
+
final_vars: Specify which output variables to return
|
122
|
+
config: Configuration for Hamilton pipeline executor
|
123
|
+
cache: Cache configuration for results
|
124
|
+
executor_cfg: Execution configuration
|
125
|
+
with_adapter_cfg: Adapter settings for pipeline execution
|
126
|
+
pipeline_adapter_cfg: Pipeline-specific adapter configuration
|
127
|
+
project_adapter_cfg: Project-wide adapter configuration
|
128
|
+
adapter: Additional Hamilton adapters
|
129
|
+
reload: Whether to reload the module
|
130
|
+
log_level: Log level for execution
|
131
|
+
max_retries: Maximum number of retry attempts
|
132
|
+
retry_delay: Base delay between retries in seconds
|
133
|
+
jitter_factor: Factor to apply for jitter
|
134
|
+
retry_exceptions: Exceptions to catch for retries
|
135
|
+
on_success: Callback for successful execution
|
136
|
+
on_failure: Callback for failed execution
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
The result of executing the pipeline
|
140
|
+
"""
|
141
|
+
start_time = dt.datetime.now()
|
142
|
+
|
143
|
+
# Reload module if requested
|
144
|
+
if reload:
|
145
|
+
self._reload_module()
|
146
|
+
|
147
|
+
# Set up configuration with defaults from pipeline config
|
148
|
+
inputs = inputs or self.config.run.inputs or {}
|
149
|
+
final_vars = final_vars or self.config.run.final_vars or []
|
150
|
+
config = {**(self.config.run.config or {}), **(config or {})}
|
151
|
+
cache = cache or self.config.run.cache or {}
|
152
|
+
|
153
|
+
# Set up retry configuration
|
154
|
+
max_retries = max_retries or self.config.run.max_retries or 0
|
155
|
+
retry_delay = retry_delay or self.config.run.retry_delay or 1.0
|
156
|
+
jitter_factor = jitter_factor or self.config.run.jitter_factor or 0.1
|
157
|
+
|
158
|
+
# Convert string exceptions to actual exception classes
|
159
|
+
if retry_exceptions and isinstance(retry_exceptions, (list, tuple)):
|
160
|
+
converted_exceptions = []
|
161
|
+
for exc in retry_exceptions:
|
162
|
+
if isinstance(exc, str):
|
163
|
+
try:
|
164
|
+
exc_class = eval(exc)
|
165
|
+
# Ensure it's actually an exception class
|
166
|
+
if isinstance(exc_class, type) and issubclass(
|
167
|
+
exc_class, BaseException
|
168
|
+
):
|
169
|
+
converted_exceptions.append(exc_class)
|
170
|
+
else:
|
171
|
+
logger.warning(
|
172
|
+
f"'{exc}' is not an exception class, using Exception"
|
173
|
+
)
|
174
|
+
converted_exceptions.append(Exception)
|
175
|
+
except (NameError, AttributeError):
|
176
|
+
logger.warning(
|
177
|
+
f"Unknown exception type: {exc}, using Exception"
|
178
|
+
)
|
179
|
+
converted_exceptions.append(Exception)
|
180
|
+
elif isinstance(exc, type) and issubclass(exc, BaseException):
|
181
|
+
converted_exceptions.append(exc)
|
182
|
+
else:
|
183
|
+
logger.warning(f"Invalid exception type: {exc}, using Exception")
|
184
|
+
converted_exceptions.append(Exception)
|
185
|
+
retry_exceptions = tuple(converted_exceptions)
|
186
|
+
elif not retry_exceptions:
|
187
|
+
retry_exceptions = (Exception,)
|
188
|
+
|
189
|
+
# Execute with retry logic
|
190
|
+
for attempt in range(max_retries + 1):
|
191
|
+
try:
|
192
|
+
logger.info(
|
193
|
+
f"🚀 Running pipeline '{self.name}' (attempt {attempt + 1}/{max_retries + 1})"
|
194
|
+
)
|
195
|
+
|
196
|
+
result = self._execute_pipeline(
|
197
|
+
inputs=inputs,
|
198
|
+
final_vars=final_vars,
|
199
|
+
config=config,
|
200
|
+
cache=cache,
|
201
|
+
executor_cfg=executor_cfg,
|
202
|
+
with_adapter_cfg=with_adapter_cfg,
|
203
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
204
|
+
project_adapter_cfg=project_adapter_cfg,
|
205
|
+
adapter=adapter,
|
206
|
+
log_level=log_level,
|
207
|
+
)
|
208
|
+
|
209
|
+
end_time = dt.datetime.now()
|
210
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
211
|
+
|
212
|
+
logger.success(
|
213
|
+
f"✅ Pipeline '{self.name}' completed successfully in {duration}"
|
214
|
+
)
|
215
|
+
|
216
|
+
# Execute success callback if provided
|
217
|
+
if on_success:
|
218
|
+
self._execute_callback(on_success, result, None)
|
219
|
+
|
220
|
+
return result
|
221
|
+
|
222
|
+
except retry_exceptions as e:
|
223
|
+
if attempt < max_retries:
|
224
|
+
delay = retry_delay * (2**attempt)
|
225
|
+
jitter = delay * jitter_factor * random.random()
|
226
|
+
total_delay = delay + jitter
|
227
|
+
|
228
|
+
logger.warning(
|
229
|
+
f"⚠️ Pipeline '{self.name}' failed (attempt {attempt + 1}/{max_retries + 1}): {e}"
|
230
|
+
)
|
231
|
+
logger.info(f"🔄 Retrying in {total_delay:.2f} seconds...")
|
232
|
+
time.sleep(total_delay)
|
233
|
+
else:
|
234
|
+
end_time = dt.datetime.now()
|
235
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
236
|
+
|
237
|
+
logger.error(
|
238
|
+
f"❌ Pipeline '{self.name}' failed after {max_retries + 1} attempts in {duration}: {e}"
|
239
|
+
)
|
240
|
+
|
241
|
+
# Execute failure callback if provided
|
242
|
+
if on_failure:
|
243
|
+
self._execute_callback(on_failure, None, e)
|
244
|
+
|
245
|
+
raise
|
246
|
+
except Exception as e:
|
247
|
+
end_time = dt.datetime.now()
|
248
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
249
|
+
|
250
|
+
logger.error(f"❌ Pipeline '{self.name}' failed in {duration}: {e}")
|
251
|
+
|
252
|
+
# Execute failure callback if provided
|
253
|
+
if on_failure:
|
254
|
+
self._execute_callback(on_failure, None, e)
|
255
|
+
|
256
|
+
raise
|
257
|
+
|
258
|
+
def _execute_pipeline(
|
259
|
+
self,
|
260
|
+
inputs: dict,
|
261
|
+
final_vars: list[str],
|
262
|
+
config: dict,
|
263
|
+
cache: dict,
|
264
|
+
executor_cfg: str | dict | ExecutorConfig | None,
|
265
|
+
with_adapter_cfg: dict | WithAdapterConfig | None,
|
266
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None,
|
267
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None,
|
268
|
+
adapter: dict[str, Any] | None,
|
269
|
+
log_level: str | None,
|
270
|
+
) -> dict[str, Any]:
|
271
|
+
"""Execute the pipeline with Hamilton."""
|
272
|
+
# Get executor and adapters
|
273
|
+
executor, shutdown_func = self._get_executor(executor_cfg)
|
274
|
+
adapters = self._get_adapters(
|
275
|
+
with_adapter_cfg=with_adapter_cfg,
|
276
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
277
|
+
project_adapter_cfg=project_adapter_cfg,
|
278
|
+
adapter=adapter,
|
279
|
+
)
|
280
|
+
|
281
|
+
try:
|
282
|
+
# Create Hamilton driver
|
283
|
+
dr = (
|
284
|
+
driver.Builder()
|
285
|
+
.with_config(config)
|
286
|
+
.with_modules(self.module)
|
287
|
+
.with_adapters(*adapters)
|
288
|
+
.build()
|
289
|
+
)
|
290
|
+
|
291
|
+
# Execute the pipeline
|
292
|
+
result = dr.execute(
|
293
|
+
final_vars=final_vars,
|
294
|
+
inputs=inputs,
|
295
|
+
)
|
296
|
+
|
297
|
+
return result
|
298
|
+
|
299
|
+
finally:
|
300
|
+
# Clean up executor if needed
|
301
|
+
if shutdown_func:
|
302
|
+
try:
|
303
|
+
shutdown_func()
|
304
|
+
except Exception as e:
|
305
|
+
logger.warning(f"Failed to shutdown executor: {e}")
|
306
|
+
|
307
|
+
def _get_executor(
|
308
|
+
self, executor_cfg: str | dict | ExecutorConfig | None = None
|
309
|
+
) -> tuple[executors.BaseExecutor, Callable | None]:
|
310
|
+
"""Get the executor based on the provided configuration."""
|
311
|
+
logger.debug("Setting up executor...")
|
312
|
+
|
313
|
+
if executor_cfg:
|
314
|
+
if isinstance(executor_cfg, str):
|
315
|
+
executor_cfg = ExecutorConfig(type=executor_cfg)
|
316
|
+
elif isinstance(executor_cfg, dict):
|
317
|
+
executor_cfg = ExecutorConfig.from_dict(executor_cfg)
|
318
|
+
elif not isinstance(executor_cfg, ExecutorConfig):
|
319
|
+
raise TypeError(
|
320
|
+
"Executor must be a string, dictionary, or ExecutorConfig instance."
|
321
|
+
)
|
322
|
+
|
323
|
+
executor_cfg = self.config.run.executor.merge(executor_cfg)
|
324
|
+
else:
|
325
|
+
executor_cfg = self.config.run.executor
|
326
|
+
|
327
|
+
if executor_cfg.type is None or executor_cfg.type == "synchronous":
|
328
|
+
logger.debug("Using SynchronousLocalTaskExecutor as default.")
|
329
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
330
|
+
|
331
|
+
if executor_cfg.type == "threadpool":
|
332
|
+
logger.debug(
|
333
|
+
f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
|
334
|
+
)
|
335
|
+
return executors.MultiThreadingExecutor(
|
336
|
+
max_tasks=executor_cfg.max_workers
|
337
|
+
), None
|
338
|
+
elif executor_cfg.type == "processpool":
|
339
|
+
logger.debug(
|
340
|
+
f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
|
341
|
+
)
|
342
|
+
return executors.MultiProcessingExecutor(
|
343
|
+
max_tasks=executor_cfg.max_workers
|
344
|
+
), None
|
345
|
+
elif executor_cfg.type == "ray":
|
346
|
+
if h_ray:
|
347
|
+
logger.debug(
|
348
|
+
f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
|
349
|
+
)
|
350
|
+
|
351
|
+
# Handle temporary case where project_context is PipelineManager
|
352
|
+
project_cfg = getattr(
|
353
|
+
self.project_context, "project_cfg", None
|
354
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
355
|
+
|
356
|
+
return (
|
357
|
+
h_ray.RayTaskExecutor(
|
358
|
+
num_cpus=executor_cfg.num_cpus,
|
359
|
+
ray_init_config=project_cfg.adapter.ray.ray_init_config,
|
360
|
+
),
|
361
|
+
ray.shutdown
|
362
|
+
if project_cfg.adapter.ray.shutdown_ray_on_completion
|
363
|
+
else None,
|
364
|
+
)
|
365
|
+
else:
|
366
|
+
logger.warning("Ray is not installed. Using local executor.")
|
367
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
368
|
+
elif executor_cfg.type == "dask":
|
369
|
+
if distributed:
|
370
|
+
cluster = distributed.LocalCluster()
|
371
|
+
client = distributed.Client(cluster)
|
372
|
+
return h_dask.DaskExecutor(client=client), cluster.close
|
373
|
+
else:
|
374
|
+
logger.warning("Dask is not installed. Using local executor.")
|
375
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
376
|
+
else:
|
377
|
+
logger.warning(
|
378
|
+
f"Unknown executor type: {executor_cfg.type}. Using local executor."
|
379
|
+
)
|
380
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
381
|
+
|
382
|
+
def _get_adapters(
|
383
|
+
self,
|
384
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
385
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
386
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
387
|
+
adapter: dict[str, Any] | None = None,
|
388
|
+
) -> list:
|
389
|
+
"""Set up the adapters for the pipeline."""
|
390
|
+
logger.debug("Setting up adapters...")
|
391
|
+
|
392
|
+
# Resolve adapter configurations
|
393
|
+
if with_adapter_cfg:
|
394
|
+
if isinstance(with_adapter_cfg, dict):
|
395
|
+
with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
|
396
|
+
elif not isinstance(with_adapter_cfg, WithAdapterConfig):
|
397
|
+
raise TypeError(
|
398
|
+
"with_adapter must be a dictionary or WithAdapterConfig instance."
|
399
|
+
)
|
400
|
+
|
401
|
+
with_adapter_cfg = self.config.run.with_adapter.merge(with_adapter_cfg)
|
402
|
+
else:
|
403
|
+
with_adapter_cfg = self.config.run.with_adapter
|
404
|
+
|
405
|
+
if pipeline_adapter_cfg:
|
406
|
+
if isinstance(pipeline_adapter_cfg, dict):
|
407
|
+
pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
|
408
|
+
pipeline_adapter_cfg
|
409
|
+
)
|
410
|
+
elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
|
411
|
+
raise TypeError(
|
412
|
+
"pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
|
413
|
+
)
|
414
|
+
|
415
|
+
pipeline_adapter_cfg = self.config.adapter.merge(pipeline_adapter_cfg)
|
416
|
+
else:
|
417
|
+
pipeline_adapter_cfg = self.config.adapter
|
418
|
+
|
419
|
+
if project_adapter_cfg:
|
420
|
+
if isinstance(project_adapter_cfg, dict):
|
421
|
+
project_adapter_cfg = ProjectAdapterConfig.from_dict(
|
422
|
+
project_adapter_cfg
|
423
|
+
)
|
424
|
+
elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
|
425
|
+
raise TypeError(
|
426
|
+
"project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
|
427
|
+
)
|
428
|
+
|
429
|
+
# Handle temporary case where project_context is PipelineManager
|
430
|
+
manager_project_cfg = getattr(
|
431
|
+
self.project_context, "project_cfg", None
|
432
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
433
|
+
if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
|
434
|
+
project_adapter_cfg = manager_project_cfg.adapter.merge(
|
435
|
+
project_adapter_cfg
|
436
|
+
)
|
437
|
+
else:
|
438
|
+
# Use project context directly if it's FlowerPowerProject
|
439
|
+
if hasattr(self.project_context, "pipeline_manager"):
|
440
|
+
pm_cfg = getattr(
|
441
|
+
self.project_context.pipeline_manager, "project_cfg", None
|
442
|
+
) or getattr(
|
443
|
+
self.project_context.pipeline_manager, "_project_cfg", None
|
444
|
+
)
|
445
|
+
base_cfg = pm_cfg.adapter if pm_cfg else None
|
446
|
+
if base_cfg:
|
447
|
+
project_adapter_cfg = base_cfg.merge(project_adapter_cfg)
|
448
|
+
else:
|
449
|
+
from ..cfg.project.adapter import \
|
450
|
+
AdapterConfig as ProjectAdapterConfig
|
451
|
+
|
452
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
453
|
+
else:
|
454
|
+
from ..cfg.project.adapter import \
|
455
|
+
AdapterConfig as ProjectAdapterConfig
|
456
|
+
|
457
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
458
|
+
else:
|
459
|
+
# Handle temporary case where project_context is PipelineManager
|
460
|
+
manager_project_cfg = getattr(
|
461
|
+
self.project_context, "project_cfg", None
|
462
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
463
|
+
if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
|
464
|
+
project_adapter_cfg = manager_project_cfg.adapter
|
465
|
+
else:
|
466
|
+
# Use project context directly if it's FlowerPowerProject
|
467
|
+
if hasattr(self.project_context, "pipeline_manager"):
|
468
|
+
pm_cfg = getattr(
|
469
|
+
self.project_context.pipeline_manager, "project_cfg", None
|
470
|
+
) or getattr(
|
471
|
+
self.project_context.pipeline_manager, "_project_cfg", None
|
472
|
+
)
|
473
|
+
project_adapter_cfg = pm_cfg.adapter if pm_cfg else None
|
474
|
+
else:
|
475
|
+
project_adapter_cfg = None
|
476
|
+
|
477
|
+
# Create default adapter config if none found
|
478
|
+
if project_adapter_cfg is None:
|
479
|
+
from ..cfg.project.adapter import \
|
480
|
+
AdapterConfig as ProjectAdapterConfig
|
481
|
+
|
482
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
483
|
+
|
484
|
+
adapters = []
|
485
|
+
|
486
|
+
# Hamilton Tracker adapter
|
487
|
+
if with_adapter_cfg.hamilton_tracker:
|
488
|
+
tracker_kwargs = project_adapter_cfg.hamilton_tracker.to_dict()
|
489
|
+
tracker_kwargs.update(pipeline_adapter_cfg.hamilton_tracker.to_dict())
|
490
|
+
tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
|
491
|
+
tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
|
492
|
+
|
493
|
+
constants.MAX_DICT_LENGTH_CAPTURE = (
|
494
|
+
tracker_kwargs.pop("max_dict_length_capture", None)
|
495
|
+
or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
|
496
|
+
)
|
497
|
+
constants.MAX_LIST_LENGTH_CAPTURE = (
|
498
|
+
tracker_kwargs.pop("max_list_length_capture", None)
|
499
|
+
or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
|
500
|
+
)
|
501
|
+
constants.CAPTURE_DATA_STATISTICS = (
|
502
|
+
tracker_kwargs.pop("capture_data_statistics", None)
|
503
|
+
or settings.HAMILTON_CAPTURE_DATA_STATISTICS
|
504
|
+
)
|
505
|
+
|
506
|
+
tracker = HamiltonTracker(**tracker_kwargs)
|
507
|
+
adapters.append(tracker)
|
508
|
+
|
509
|
+
# MLFlow adapter
|
510
|
+
if with_adapter_cfg.mlflow:
|
511
|
+
if h_mlflow is None:
|
512
|
+
logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
|
513
|
+
else:
|
514
|
+
mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
|
515
|
+
mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
|
516
|
+
mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
|
517
|
+
adapters.append(mlflow_adapter)
|
518
|
+
|
519
|
+
# OpenTelemetry adapter
|
520
|
+
if with_adapter_cfg.opentelemetry:
|
521
|
+
if h_opentelemetry is None:
|
522
|
+
logger.warning(
|
523
|
+
"OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
|
524
|
+
)
|
525
|
+
else:
|
526
|
+
otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
|
527
|
+
otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
|
528
|
+
init_tracer()
|
529
|
+
otel_adapter = h_opentelemetry.OpenTelemetryTracker(**otel_kwargs)
|
530
|
+
adapters.append(otel_adapter)
|
531
|
+
|
532
|
+
# Progress bar adapter
|
533
|
+
if with_adapter_cfg.progressbar:
|
534
|
+
progressbar_kwargs = project_adapter_cfg.progressbar.to_dict()
|
535
|
+
progressbar_kwargs.update(pipeline_adapter_cfg.progressbar.to_dict())
|
536
|
+
progressbar_adapter = h_rich.ProgressBar(**progressbar_kwargs)
|
537
|
+
adapters.append(progressbar_adapter)
|
538
|
+
|
539
|
+
# Add any additional adapters
|
540
|
+
if adapter:
|
541
|
+
for key, value in adapter.items():
|
542
|
+
adapters.append(value)
|
543
|
+
|
544
|
+
return adapters
|
545
|
+
|
546
|
+
def _execute_callback(
|
547
|
+
self,
|
548
|
+
callback: Callable | tuple[Callable, tuple | None, dict | None],
|
549
|
+
result: dict[str, Any] | None,
|
550
|
+
exception: Exception | None,
|
551
|
+
):
|
552
|
+
"""Execute a callback function with proper error handling."""
|
553
|
+
try:
|
554
|
+
if isinstance(callback, tuple):
|
555
|
+
func, args, kwargs = callback
|
556
|
+
args = args or ()
|
557
|
+
kwargs = kwargs or {}
|
558
|
+
func(*args, **kwargs)
|
559
|
+
else:
|
560
|
+
callback(result, exception)
|
561
|
+
except Exception as e:
|
562
|
+
logger.error(f"Callback execution failed: {e}")
|
563
|
+
|
564
|
+
def _reload_module(self):
|
565
|
+
"""Reload the pipeline module."""
|
566
|
+
try:
|
567
|
+
importlib.reload(self.module)
|
568
|
+
logger.debug(f"Reloaded module for pipeline '{self.name}'")
|
569
|
+
except Exception as e:
|
570
|
+
logger.error(f"Failed to reload module for pipeline '{self.name}': {e}")
|
571
|
+
raise
|