FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +7 -14
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +8 -6
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +11 -24
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +22 -415
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +345 -146
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +21 -12
- flowerpower/pipeline/io.py +58 -54
- flowerpower/pipeline/manager.py +165 -726
- flowerpower/pipeline/pipeline.py +643 -0
- flowerpower/pipeline/registry.py +285 -18
- flowerpower/pipeline/visualizer.py +5 -6
- flowerpower/plugins/io/__init__.py +8 -0
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -21
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -256
- flowerpower/utils/monkey.py +1 -83
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -238
- flowerpower/cli/job_queue.py +0 -1061
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/__init__.py +0 -294
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/job_queue/base.py +0 -413
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
- flowerpower/job_queue/rq/manager.py +0 -1582
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -87
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/METADATA +0 -537
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,643 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Active Pipeline class for FlowerPower."""
|
3
|
+
|
4
|
+
from __future__ import annotations
|
5
|
+
|
6
|
+
import datetime as dt
|
7
|
+
import importlib
|
8
|
+
import importlib.util
|
9
|
+
import random
|
10
|
+
import time
|
11
|
+
from typing import TYPE_CHECKING, Any, Callable
|
12
|
+
|
13
|
+
import humanize
|
14
|
+
import msgspec
|
15
|
+
from hamilton import driver
|
16
|
+
from hamilton.execution import executors
|
17
|
+
from hamilton.registry import disable_autoload
|
18
|
+
from hamilton.telemetry import disable_telemetry
|
19
|
+
from hamilton_sdk.api.clients import UnauthorizedException
|
20
|
+
from requests.exceptions import ConnectionError, HTTPError
|
21
|
+
|
22
|
+
from .. import settings
|
23
|
+
|
24
|
+
if importlib.util.find_spec("opentelemetry"):
|
25
|
+
from hamilton.plugins import h_opentelemetry
|
26
|
+
|
27
|
+
from ..utils.open_telemetry import init_tracer
|
28
|
+
else:
|
29
|
+
h_opentelemetry = None
|
30
|
+
init_tracer = None
|
31
|
+
|
32
|
+
if importlib.util.find_spec("mlflow"):
|
33
|
+
from hamilton.plugins import h_mlflow
|
34
|
+
else:
|
35
|
+
h_mlflow = None
|
36
|
+
|
37
|
+
from hamilton.plugins import h_rich
|
38
|
+
from hamilton.plugins.h_threadpool import FutureAdapter
|
39
|
+
from hamilton_sdk.adapters import HamiltonTracker
|
40
|
+
from hamilton_sdk.tracking import constants
|
41
|
+
from loguru import logger
|
42
|
+
|
43
|
+
if importlib.util.find_spec("distributed"):
|
44
|
+
from dask import distributed
|
45
|
+
from hamilton.plugins import h_dask
|
46
|
+
else:
|
47
|
+
distributed = None
|
48
|
+
|
49
|
+
if importlib.util.find_spec("ray"):
|
50
|
+
import ray
|
51
|
+
|
52
|
+
# from hamilton.plugins import h_ray
|
53
|
+
h_ray = None
|
54
|
+
else:
|
55
|
+
ray = None
|
56
|
+
h_ray = None
|
57
|
+
|
58
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
59
|
+
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
60
|
+
from ..cfg.pipeline.run import ExecutorConfig, RunConfig, WithAdapterConfig
|
61
|
+
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
62
|
+
|
63
|
+
if TYPE_CHECKING:
|
64
|
+
from ..flowerpower import FlowerPowerProject
|
65
|
+
|
66
|
+
|
67
|
+
class Pipeline(msgspec.Struct):
|
68
|
+
"""Active pipeline object that encapsulates its own execution logic.
|
69
|
+
|
70
|
+
This class represents a single pipeline with its configuration, loaded module,
|
71
|
+
and project context. It is responsible for its own execution, including
|
72
|
+
setting up Hamilton drivers, managing adapters, and handling retries.
|
73
|
+
|
74
|
+
Attributes:
|
75
|
+
name: The name of the pipeline
|
76
|
+
config: The pipeline configuration
|
77
|
+
module: The loaded Python module containing Hamilton functions
|
78
|
+
project_context: Reference to the FlowerPowerProject
|
79
|
+
"""
|
80
|
+
|
81
|
+
name: str
|
82
|
+
config: PipelineConfig
|
83
|
+
module: Any
|
84
|
+
project_context: FlowerPowerProject
|
85
|
+
|
86
|
+
def __post_init__(self):
|
87
|
+
"""Initialize Hamilton settings."""
|
88
|
+
if not settings.HAMILTON_TELEMETRY_ENABLED:
|
89
|
+
disable_telemetry()
|
90
|
+
if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
|
91
|
+
disable_autoload()
|
92
|
+
|
93
|
+
def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
|
94
|
+
"""Merge kwargs into the run_config object.
|
95
|
+
|
96
|
+
Args:
|
97
|
+
run_config: The base RunConfig object to merge into
|
98
|
+
kwargs: Additional parameters to merge into the run_config
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
Updated RunConfig object with merged kwargs
|
102
|
+
"""
|
103
|
+
from copy import deepcopy
|
104
|
+
|
105
|
+
# Create a deep copy of the run_config to avoid modifying the original
|
106
|
+
merged_config = deepcopy(run_config)
|
107
|
+
|
108
|
+
# Handle each possible kwarg
|
109
|
+
for key, value in kwargs.items():
|
110
|
+
if key == 'inputs' and value is not None:
|
111
|
+
if merged_config.inputs is None:
|
112
|
+
merged_config.inputs = {}
|
113
|
+
merged_config.inputs.update(value)
|
114
|
+
elif key == 'final_vars' and value is not None:
|
115
|
+
if merged_config.final_vars is None:
|
116
|
+
merged_config.final_vars = []
|
117
|
+
merged_config.final_vars = value
|
118
|
+
elif key == 'config' and value is not None:
|
119
|
+
if merged_config.config is None:
|
120
|
+
merged_config.config = {}
|
121
|
+
merged_config.config.update(value)
|
122
|
+
elif key == 'cache' and value is not None:
|
123
|
+
merged_config.cache = value
|
124
|
+
elif key == 'executor_cfg' and value is not None:
|
125
|
+
if isinstance(value, str):
|
126
|
+
merged_config.executor = ExecutorConfig(type=value)
|
127
|
+
elif isinstance(value, dict):
|
128
|
+
merged_config.executor = ExecutorConfig.from_dict(value)
|
129
|
+
elif isinstance(value, ExecutorConfig):
|
130
|
+
merged_config.executor = value
|
131
|
+
elif key == 'with_adapter_cfg' and value is not None:
|
132
|
+
if isinstance(value, dict):
|
133
|
+
merged_config.with_adapter = WithAdapterConfig.from_dict(value)
|
134
|
+
elif isinstance(value, WithAdapterConfig):
|
135
|
+
merged_config.with_adapter = value
|
136
|
+
elif key == 'pipeline_adapter_cfg' and value is not None:
|
137
|
+
merged_config.pipeline_adapter_cfg = value
|
138
|
+
elif key == 'project_adapter_cfg' and value is not None:
|
139
|
+
merged_config.project_adapter_cfg = value
|
140
|
+
elif key == 'adapter' and value is not None:
|
141
|
+
if merged_config.adapter is None:
|
142
|
+
merged_config.adapter = {}
|
143
|
+
merged_config.adapter.update(value)
|
144
|
+
elif key == 'reload' and value is not None:
|
145
|
+
merged_config.reload = value
|
146
|
+
elif key == 'log_level' and value is not None:
|
147
|
+
merged_config.log_level = value
|
148
|
+
elif key == 'max_retries' and value is not None:
|
149
|
+
merged_config.max_retries = value
|
150
|
+
elif key == 'retry_delay' and value is not None:
|
151
|
+
merged_config.retry_delay = value
|
152
|
+
elif key == 'jitter_factor' and value is not None:
|
153
|
+
merged_config.jitter_factor = value
|
154
|
+
elif key == 'retry_exceptions' and value is not None:
|
155
|
+
merged_config.retry_exceptions = value
|
156
|
+
elif key == 'on_success' and value is not None:
|
157
|
+
merged_config.on_success = value
|
158
|
+
elif key == 'on_failure' and value is not None:
|
159
|
+
merged_config.on_failure = value
|
160
|
+
|
161
|
+
return merged_config
|
162
|
+
|
163
|
+
def run(
|
164
|
+
self,
|
165
|
+
run_config: RunConfig | None = None,
|
166
|
+
**kwargs
|
167
|
+
) -> dict[str, Any]:
|
168
|
+
"""Execute the pipeline with the given parameters.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
run_config: Run configuration object containing all execution parameters.
|
172
|
+
If None, uses the pipeline's default configuration.
|
173
|
+
**kwargs: Additional parameters to override or extend the run_config.
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
The result of executing the pipeline
|
177
|
+
"""
|
178
|
+
start_time = dt.datetime.now()
|
179
|
+
|
180
|
+
# Initialize run_config with pipeline defaults if not provided
|
181
|
+
run_config = run_config or self.config.run
|
182
|
+
|
183
|
+
# Merge kwargs into the run_config
|
184
|
+
if kwargs:
|
185
|
+
run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
|
186
|
+
|
187
|
+
# Reload module if requested
|
188
|
+
if run_config.reload:
|
189
|
+
self._reload_module()
|
190
|
+
|
191
|
+
# Set up retry configuration
|
192
|
+
retry_config = self._setup_retry_config(
|
193
|
+
run_config.max_retries, run_config.retry_delay, run_config.jitter_factor, run_config.retry_exceptions
|
194
|
+
)
|
195
|
+
max_retries = retry_config["max_retries"]
|
196
|
+
retry_delay = retry_config["retry_delay"]
|
197
|
+
jitter_factor = retry_config["jitter_factor"]
|
198
|
+
retry_exceptions = retry_config["retry_exceptions"]
|
199
|
+
|
200
|
+
# Execute with retry logic
|
201
|
+
return self._execute_with_retry(
|
202
|
+
run_config=run_config,
|
203
|
+
max_retries=max_retries,
|
204
|
+
retry_delay=retry_delay,
|
205
|
+
jitter_factor=jitter_factor,
|
206
|
+
retry_exceptions=retry_exceptions,
|
207
|
+
start_time=start_time,
|
208
|
+
)
|
209
|
+
|
210
|
+
def _setup_retry_config(
|
211
|
+
self,
|
212
|
+
max_retries: int | None,
|
213
|
+
retry_delay: float | None,
|
214
|
+
jitter_factor: float | None,
|
215
|
+
retry_exceptions: tuple | None,
|
216
|
+
) -> dict:
|
217
|
+
"""Set up retry configuration with defaults and validation."""
|
218
|
+
max_retries = max_retries or self.config.run.max_retries or 0
|
219
|
+
retry_delay = retry_delay or self.config.run.retry_delay or 1.0
|
220
|
+
jitter_factor = jitter_factor or self.config.run.jitter_factor or 0.1
|
221
|
+
|
222
|
+
# Convert string exceptions to actual exception classes
|
223
|
+
if retry_exceptions and isinstance(retry_exceptions, (list, tuple)):
|
224
|
+
converted_exceptions = []
|
225
|
+
for exc in retry_exceptions:
|
226
|
+
if isinstance(exc, str):
|
227
|
+
try:
|
228
|
+
exc_class = eval(exc)
|
229
|
+
# Ensure it's actually an exception class
|
230
|
+
if isinstance(exc_class, type) and issubclass(
|
231
|
+
exc_class, BaseException
|
232
|
+
):
|
233
|
+
converted_exceptions.append(exc_class)
|
234
|
+
else:
|
235
|
+
logger.warning(
|
236
|
+
f"'{exc}' is not an exception class, using Exception"
|
237
|
+
)
|
238
|
+
converted_exceptions.append(Exception)
|
239
|
+
except (NameError, AttributeError):
|
240
|
+
logger.warning(
|
241
|
+
f"Unknown exception type: {exc}, using Exception"
|
242
|
+
)
|
243
|
+
converted_exceptions.append(Exception)
|
244
|
+
elif isinstance(exc, type) and issubclass(exc, BaseException):
|
245
|
+
converted_exceptions.append(exc)
|
246
|
+
else:
|
247
|
+
logger.warning(f"Invalid exception type: {exc}, using Exception")
|
248
|
+
converted_exceptions.append(Exception)
|
249
|
+
retry_exceptions = tuple(converted_exceptions)
|
250
|
+
elif not retry_exceptions:
|
251
|
+
retry_exceptions = (Exception,)
|
252
|
+
|
253
|
+
return {
|
254
|
+
"max_retries": max_retries,
|
255
|
+
"retry_delay": retry_delay,
|
256
|
+
"jitter_factor": jitter_factor,
|
257
|
+
"retry_exceptions": retry_exceptions,
|
258
|
+
}
|
259
|
+
|
260
|
+
def _execute_with_retry(
|
261
|
+
self,
|
262
|
+
run_config: RunConfig,
|
263
|
+
max_retries: int,
|
264
|
+
retry_delay: float,
|
265
|
+
jitter_factor: float,
|
266
|
+
retry_exceptions: tuple,
|
267
|
+
start_time: dt.datetime,
|
268
|
+
) -> dict[str, Any]:
|
269
|
+
"""Execute pipeline with retry logic."""
|
270
|
+
for attempt in range(max_retries + 1):
|
271
|
+
try:
|
272
|
+
logger.info(
|
273
|
+
f"🚀 Running pipeline '{self.name}' (attempt {attempt + 1}/{max_retries + 1})"
|
274
|
+
)
|
275
|
+
|
276
|
+
result = self._execute_pipeline(run_config=run_config)
|
277
|
+
|
278
|
+
end_time = dt.datetime.now()
|
279
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
280
|
+
|
281
|
+
logger.success(
|
282
|
+
f"✅ Pipeline '{self.name}' completed successfully in {duration}"
|
283
|
+
)
|
284
|
+
|
285
|
+
# Execute success callback if provided
|
286
|
+
if run_config.on_success:
|
287
|
+
self._execute_callback(run_config.on_success, result, None)
|
288
|
+
|
289
|
+
return result
|
290
|
+
|
291
|
+
except retry_exceptions as e:
|
292
|
+
if attempt < max_retries:
|
293
|
+
delay = retry_delay * (2**attempt)
|
294
|
+
jitter = delay * jitter_factor * random.random()
|
295
|
+
total_delay = delay + jitter
|
296
|
+
|
297
|
+
logger.warning(
|
298
|
+
f"⚠️ Pipeline '{self.name}' failed (attempt {attempt + 1}/{max_retries + 1}): {e}"
|
299
|
+
)
|
300
|
+
logger.info(f"🔄 Retrying in {total_delay:.2f} seconds...")
|
301
|
+
time.sleep(total_delay)
|
302
|
+
else:
|
303
|
+
end_time = dt.datetime.now()
|
304
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
305
|
+
|
306
|
+
logger.error(
|
307
|
+
f"❌ Pipeline '{self.name}' failed after {max_retries + 1} attempts in {duration}: {e}"
|
308
|
+
)
|
309
|
+
|
310
|
+
# Execute failure callback if provided
|
311
|
+
if run_config.on_failure:
|
312
|
+
self._execute_callback(run_config.on_failure, None, e)
|
313
|
+
|
314
|
+
raise
|
315
|
+
except Exception as e:
|
316
|
+
end_time = dt.datetime.now()
|
317
|
+
duration = humanize.naturaldelta(end_time - start_time)
|
318
|
+
|
319
|
+
logger.error(f"❌ Pipeline '{self.name}' failed in {duration}: {e}")
|
320
|
+
|
321
|
+
# Execute failure callback if provided
|
322
|
+
if run_config.on_failure:
|
323
|
+
self._execute_callback(run_config.on_failure, None, e)
|
324
|
+
|
325
|
+
raise
|
326
|
+
|
327
|
+
def _setup_execution_context(
|
328
|
+
self,
|
329
|
+
run_config: RunConfig,
|
330
|
+
) -> tuple[executors.BaseExecutor, Callable | None, list]:
|
331
|
+
"""Set up executor and adapters for pipeline execution."""
|
332
|
+
# Get executor and adapters
|
333
|
+
executor, shutdown_func = self._get_executor(run_config.executor)
|
334
|
+
adapters = self._get_adapters(
|
335
|
+
with_adapter_cfg=run_config.with_adapter,
|
336
|
+
pipeline_adapter_cfg=run_config.pipeline_adapter_cfg,
|
337
|
+
project_adapter_cfg=run_config.project_adapter_cfg,
|
338
|
+
adapter=run_config.adapter,
|
339
|
+
)
|
340
|
+
return executor, shutdown_func, adapters
|
341
|
+
|
342
|
+
def _execute_pipeline(
|
343
|
+
self,
|
344
|
+
run_config: RunConfig,
|
345
|
+
) -> dict[str, Any]:
|
346
|
+
"""Execute the pipeline with Hamilton."""
|
347
|
+
# Set up execution context
|
348
|
+
executor, shutdown_func, adapters = self._setup_execution_context(run_config=run_config)
|
349
|
+
|
350
|
+
try:
|
351
|
+
# Create Hamilton driver
|
352
|
+
dr = (
|
353
|
+
driver.Builder()
|
354
|
+
.with_config(run_config.config)
|
355
|
+
.with_modules(self.module)
|
356
|
+
.with_adapters(*adapters)
|
357
|
+
.build()
|
358
|
+
)
|
359
|
+
|
360
|
+
# Execute the pipeline
|
361
|
+
result = dr.execute(
|
362
|
+
final_vars=run_config.final_vars,
|
363
|
+
inputs=run_config.inputs,
|
364
|
+
)
|
365
|
+
|
366
|
+
return result
|
367
|
+
|
368
|
+
finally:
|
369
|
+
# Clean up executor if needed
|
370
|
+
if shutdown_func:
|
371
|
+
try:
|
372
|
+
shutdown_func()
|
373
|
+
except Exception as e:
|
374
|
+
logger.warning(f"Failed to shutdown executor: {e}")
|
375
|
+
|
376
|
+
def _get_executor(
|
377
|
+
self, executor_cfg: str | dict | ExecutorConfig | None = None
|
378
|
+
) -> tuple[executors.BaseExecutor, Callable | None]:
|
379
|
+
"""Get the executor based on the provided configuration."""
|
380
|
+
logger.debug("Setting up executor...")
|
381
|
+
|
382
|
+
if executor_cfg:
|
383
|
+
if isinstance(executor_cfg, str):
|
384
|
+
executor_cfg = ExecutorConfig(type=executor_cfg)
|
385
|
+
elif isinstance(executor_cfg, dict):
|
386
|
+
executor_cfg = ExecutorConfig.from_dict(executor_cfg)
|
387
|
+
elif not isinstance(executor_cfg, ExecutorConfig):
|
388
|
+
raise TypeError(
|
389
|
+
"Executor must be a string, dictionary, or ExecutorConfig instance."
|
390
|
+
)
|
391
|
+
|
392
|
+
executor_cfg = self.config.run.executor.merge(executor_cfg)
|
393
|
+
else:
|
394
|
+
executor_cfg = self.config.run.executor
|
395
|
+
|
396
|
+
if executor_cfg.type is None or executor_cfg.type == "synchronous":
|
397
|
+
logger.debug("Using SynchronousLocalTaskExecutor as default.")
|
398
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
399
|
+
|
400
|
+
if executor_cfg.type == "threadpool":
|
401
|
+
logger.debug(
|
402
|
+
f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
|
403
|
+
)
|
404
|
+
return executors.MultiThreadingExecutor(
|
405
|
+
max_tasks=executor_cfg.max_workers
|
406
|
+
), None
|
407
|
+
elif executor_cfg.type == "processpool":
|
408
|
+
logger.debug(
|
409
|
+
f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
|
410
|
+
)
|
411
|
+
return executors.MultiProcessingExecutor(
|
412
|
+
max_tasks=executor_cfg.max_workers
|
413
|
+
), None
|
414
|
+
elif executor_cfg.type == "ray":
|
415
|
+
if h_ray:
|
416
|
+
logger.debug(
|
417
|
+
f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
|
418
|
+
)
|
419
|
+
|
420
|
+
# Handle temporary case where project_context is PipelineManager
|
421
|
+
project_cfg = getattr(
|
422
|
+
self.project_context, "project_cfg", None
|
423
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
424
|
+
|
425
|
+
return (
|
426
|
+
h_ray.RayTaskExecutor(
|
427
|
+
num_cpus=executor_cfg.num_cpus,
|
428
|
+
ray_init_config=project_cfg.adapter.ray.ray_init_config,
|
429
|
+
),
|
430
|
+
ray.shutdown
|
431
|
+
if project_cfg.adapter.ray.shutdown_ray_on_completion
|
432
|
+
else None,
|
433
|
+
)
|
434
|
+
else:
|
435
|
+
logger.warning("Ray is not installed. Using local executor.")
|
436
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
437
|
+
elif executor_cfg.type == "dask":
|
438
|
+
if distributed:
|
439
|
+
cluster = distributed.LocalCluster()
|
440
|
+
client = distributed.Client(cluster)
|
441
|
+
return h_dask.DaskExecutor(client=client), cluster.close
|
442
|
+
else:
|
443
|
+
logger.warning("Dask is not installed. Using local executor.")
|
444
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
445
|
+
else:
|
446
|
+
logger.warning(
|
447
|
+
f"Unknown executor type: {executor_cfg.type}. Using local executor."
|
448
|
+
)
|
449
|
+
return executors.SynchronousLocalTaskExecutor(), None
|
450
|
+
|
451
|
+
def _get_adapters(
|
452
|
+
self,
|
453
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
454
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
455
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
456
|
+
adapter: dict[str, Any] | None = None,
|
457
|
+
) -> list:
|
458
|
+
"""Set up the adapters for the pipeline."""
|
459
|
+
logger.debug("Setting up adapters...")
|
460
|
+
|
461
|
+
# Resolve adapter configurations
|
462
|
+
if with_adapter_cfg:
|
463
|
+
if isinstance(with_adapter_cfg, dict):
|
464
|
+
with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
|
465
|
+
elif not isinstance(with_adapter_cfg, WithAdapterConfig):
|
466
|
+
raise TypeError(
|
467
|
+
"with_adapter must be a dictionary or WithAdapterConfig instance."
|
468
|
+
)
|
469
|
+
|
470
|
+
with_adapter_cfg = self.config.run.with_adapter.merge(with_adapter_cfg)
|
471
|
+
else:
|
472
|
+
with_adapter_cfg = self.config.run.with_adapter
|
473
|
+
|
474
|
+
if pipeline_adapter_cfg:
|
475
|
+
if isinstance(pipeline_adapter_cfg, dict):
|
476
|
+
pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
|
477
|
+
pipeline_adapter_cfg
|
478
|
+
)
|
479
|
+
elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
|
480
|
+
raise TypeError(
|
481
|
+
"pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
|
482
|
+
)
|
483
|
+
|
484
|
+
pipeline_adapter_cfg = self.config.adapter.merge(pipeline_adapter_cfg)
|
485
|
+
else:
|
486
|
+
pipeline_adapter_cfg = self.config.adapter
|
487
|
+
|
488
|
+
if project_adapter_cfg:
|
489
|
+
if isinstance(project_adapter_cfg, dict):
|
490
|
+
project_adapter_cfg = ProjectAdapterConfig.from_dict(
|
491
|
+
project_adapter_cfg
|
492
|
+
)
|
493
|
+
elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
|
494
|
+
raise TypeError(
|
495
|
+
"project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
|
496
|
+
)
|
497
|
+
|
498
|
+
# Handle temporary case where project_context is PipelineManager
|
499
|
+
manager_project_cfg = getattr(
|
500
|
+
self.project_context, "project_cfg", None
|
501
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
502
|
+
if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
|
503
|
+
project_adapter_cfg = manager_project_cfg.adapter.merge(
|
504
|
+
project_adapter_cfg
|
505
|
+
)
|
506
|
+
else:
|
507
|
+
# Use project context directly if it's FlowerPowerProject
|
508
|
+
if hasattr(self.project_context, "pipeline_manager"):
|
509
|
+
pm_cfg = getattr(
|
510
|
+
self.project_context.pipeline_manager, "project_cfg", None
|
511
|
+
) or getattr(
|
512
|
+
self.project_context.pipeline_manager, "_project_cfg", None
|
513
|
+
)
|
514
|
+
base_cfg = pm_cfg.adapter if pm_cfg else None
|
515
|
+
if base_cfg:
|
516
|
+
project_adapter_cfg = base_cfg.merge(project_adapter_cfg)
|
517
|
+
else:
|
518
|
+
from ..cfg.project.adapter import \
|
519
|
+
AdapterConfig as ProjectAdapterConfig
|
520
|
+
|
521
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
522
|
+
else:
|
523
|
+
from ..cfg.project.adapter import \
|
524
|
+
AdapterConfig as ProjectAdapterConfig
|
525
|
+
|
526
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
527
|
+
else:
|
528
|
+
# Handle temporary case where project_context is PipelineManager
|
529
|
+
manager_project_cfg = getattr(
|
530
|
+
self.project_context, "project_cfg", None
|
531
|
+
) or getattr(self.project_context, "_project_cfg", None)
|
532
|
+
if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
|
533
|
+
project_adapter_cfg = manager_project_cfg.adapter
|
534
|
+
else:
|
535
|
+
# Use project context directly if it's FlowerPowerProject
|
536
|
+
if hasattr(self.project_context, "pipeline_manager"):
|
537
|
+
pm_cfg = getattr(
|
538
|
+
self.project_context.pipeline_manager, "project_cfg", None
|
539
|
+
) or getattr(
|
540
|
+
self.project_context.pipeline_manager, "_project_cfg", None
|
541
|
+
)
|
542
|
+
project_adapter_cfg = pm_cfg.adapter if pm_cfg else None
|
543
|
+
else:
|
544
|
+
project_adapter_cfg = None
|
545
|
+
|
546
|
+
# Create default adapter config if none found
|
547
|
+
if project_adapter_cfg is None:
|
548
|
+
from ..cfg.project.adapter import \
|
549
|
+
AdapterConfig as ProjectAdapterConfig
|
550
|
+
|
551
|
+
project_adapter_cfg = ProjectAdapterConfig()
|
552
|
+
|
553
|
+
adapters = []
|
554
|
+
|
555
|
+
# Hamilton Tracker adapter
|
556
|
+
if with_adapter_cfg.hamilton_tracker:
|
557
|
+
tracker_kwargs = project_adapter_cfg.hamilton_tracker.to_dict()
|
558
|
+
tracker_kwargs.update(pipeline_adapter_cfg.hamilton_tracker.to_dict())
|
559
|
+
tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
|
560
|
+
tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
|
561
|
+
|
562
|
+
constants.MAX_DICT_LENGTH_CAPTURE = (
|
563
|
+
tracker_kwargs.pop("max_dict_length_capture", None)
|
564
|
+
or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
|
565
|
+
)
|
566
|
+
constants.MAX_LIST_LENGTH_CAPTURE = (
|
567
|
+
tracker_kwargs.pop("max_list_length_capture", None)
|
568
|
+
or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
|
569
|
+
)
|
570
|
+
constants.CAPTURE_DATA_STATISTICS = (
|
571
|
+
tracker_kwargs.pop("capture_data_statistics", None)
|
572
|
+
or settings.HAMILTON_CAPTURE_DATA_STATISTICS
|
573
|
+
)
|
574
|
+
|
575
|
+
tracker = HamiltonTracker(**tracker_kwargs)
|
576
|
+
adapters.append(tracker)
|
577
|
+
|
578
|
+
# MLFlow adapter
|
579
|
+
if with_adapter_cfg.mlflow:
|
580
|
+
if h_mlflow is None:
|
581
|
+
logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
|
582
|
+
else:
|
583
|
+
mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
|
584
|
+
mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
|
585
|
+
mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
|
586
|
+
adapters.append(mlflow_adapter)
|
587
|
+
|
588
|
+
# OpenTelemetry adapter
|
589
|
+
if with_adapter_cfg.opentelemetry:
|
590
|
+
if h_opentelemetry is None:
|
591
|
+
logger.warning(
|
592
|
+
"OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
|
593
|
+
)
|
594
|
+
else:
|
595
|
+
otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
|
596
|
+
otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
|
597
|
+
init_tracer()
|
598
|
+
otel_adapter = h_opentelemetry.OpenTelemetryTracker(**otel_kwargs)
|
599
|
+
adapters.append(otel_adapter)
|
600
|
+
|
601
|
+
# Progress bar adapter
|
602
|
+
if with_adapter_cfg.progressbar:
|
603
|
+
progressbar_kwargs = project_adapter_cfg.progressbar.to_dict()
|
604
|
+
progressbar_kwargs.update(pipeline_adapter_cfg.progressbar.to_dict())
|
605
|
+
progressbar_adapter = h_rich.ProgressBar(**progressbar_kwargs)
|
606
|
+
adapters.append(progressbar_adapter)
|
607
|
+
|
608
|
+
# Add any additional adapters
|
609
|
+
if adapter:
|
610
|
+
for key, value in adapter.items():
|
611
|
+
adapters.append(value)
|
612
|
+
|
613
|
+
return adapters
|
614
|
+
|
615
|
+
def _execute_callback(
|
616
|
+
self,
|
617
|
+
callback: Callable | tuple[Callable, tuple | None, dict | None],
|
618
|
+
result: dict[str, Any] | None,
|
619
|
+
exception: Exception | None,
|
620
|
+
):
|
621
|
+
"""Execute a callback function with proper error handling."""
|
622
|
+
try:
|
623
|
+
if isinstance(callback, tuple):
|
624
|
+
func, args, kwargs = callback
|
625
|
+
args = args or ()
|
626
|
+
kwargs = kwargs or {}
|
627
|
+
func(*args, **kwargs)
|
628
|
+
else:
|
629
|
+
callback(result, exception)
|
630
|
+
except Exception as e:
|
631
|
+
logger.error(f"Callback execution failed: {e}")
|
632
|
+
|
633
|
+
def _reload_module(self):
|
634
|
+
"""Reload the pipeline module."""
|
635
|
+
try:
|
636
|
+
importlib.reload(self.module)
|
637
|
+
logger.debug(f"Reloaded module for pipeline '{self.name}'")
|
638
|
+
except (ImportError, ModuleNotFoundError, AttributeError) as e:
|
639
|
+
logger.error(f"Failed to reload module for pipeline '{self.name}': {e}")
|
640
|
+
raise
|
641
|
+
except Exception as e:
|
642
|
+
logger.error(f"Unexpected error reloading module for pipeline '{self.name}': {e}")
|
643
|
+
raise
|