FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,643 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Active Pipeline class for FlowerPower."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import datetime as dt
7
+ import importlib
8
+ import importlib.util
9
+ import random
10
+ import time
11
+ from typing import TYPE_CHECKING, Any, Callable
12
+
13
+ import humanize
14
+ import msgspec
15
+ from hamilton import driver
16
+ from hamilton.execution import executors
17
+ from hamilton.registry import disable_autoload
18
+ from hamilton.telemetry import disable_telemetry
19
+ from hamilton_sdk.api.clients import UnauthorizedException
20
+ from requests.exceptions import ConnectionError, HTTPError
21
+
22
+ from .. import settings
23
+
24
+ if importlib.util.find_spec("opentelemetry"):
25
+ from hamilton.plugins import h_opentelemetry
26
+
27
+ from ..utils.open_telemetry import init_tracer
28
+ else:
29
+ h_opentelemetry = None
30
+ init_tracer = None
31
+
32
+ if importlib.util.find_spec("mlflow"):
33
+ from hamilton.plugins import h_mlflow
34
+ else:
35
+ h_mlflow = None
36
+
37
+ from hamilton.plugins import h_rich
38
+ from hamilton.plugins.h_threadpool import FutureAdapter
39
+ from hamilton_sdk.adapters import HamiltonTracker
40
+ from hamilton_sdk.tracking import constants
41
+ from loguru import logger
42
+
43
+ if importlib.util.find_spec("distributed"):
44
+ from dask import distributed
45
+ from hamilton.plugins import h_dask
46
+ else:
47
+ distributed = None
48
+
49
+ if importlib.util.find_spec("ray"):
50
+ import ray
51
+
52
+ # from hamilton.plugins import h_ray
53
+ h_ray = None
54
+ else:
55
+ ray = None
56
+ h_ray = None
57
+
58
+ from ..cfg import PipelineConfig, ProjectConfig
59
+ from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
60
+ from ..cfg.pipeline.run import ExecutorConfig, RunConfig, WithAdapterConfig
61
+ from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
62
+
63
+ if TYPE_CHECKING:
64
+ from ..flowerpower import FlowerPowerProject
65
+
66
+
67
+ class Pipeline(msgspec.Struct):
68
+ """Active pipeline object that encapsulates its own execution logic.
69
+
70
+ This class represents a single pipeline with its configuration, loaded module,
71
+ and project context. It is responsible for its own execution, including
72
+ setting up Hamilton drivers, managing adapters, and handling retries.
73
+
74
+ Attributes:
75
+ name: The name of the pipeline
76
+ config: The pipeline configuration
77
+ module: The loaded Python module containing Hamilton functions
78
+ project_context: Reference to the FlowerPowerProject
79
+ """
80
+
81
+ name: str
82
+ config: PipelineConfig
83
+ module: Any
84
+ project_context: FlowerPowerProject
85
+
86
+ def __post_init__(self):
87
+ """Initialize Hamilton settings."""
88
+ if not settings.HAMILTON_TELEMETRY_ENABLED:
89
+ disable_telemetry()
90
+ if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
91
+ disable_autoload()
92
+
93
+ def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
94
+ """Merge kwargs into the run_config object.
95
+
96
+ Args:
97
+ run_config: The base RunConfig object to merge into
98
+ kwargs: Additional parameters to merge into the run_config
99
+
100
+ Returns:
101
+ Updated RunConfig object with merged kwargs
102
+ """
103
+ from copy import deepcopy
104
+
105
+ # Create a deep copy of the run_config to avoid modifying the original
106
+ merged_config = deepcopy(run_config)
107
+
108
+ # Handle each possible kwarg
109
+ for key, value in kwargs.items():
110
+ if key == 'inputs' and value is not None:
111
+ if merged_config.inputs is None:
112
+ merged_config.inputs = {}
113
+ merged_config.inputs.update(value)
114
+ elif key == 'final_vars' and value is not None:
115
+ if merged_config.final_vars is None:
116
+ merged_config.final_vars = []
117
+ merged_config.final_vars = value
118
+ elif key == 'config' and value is not None:
119
+ if merged_config.config is None:
120
+ merged_config.config = {}
121
+ merged_config.config.update(value)
122
+ elif key == 'cache' and value is not None:
123
+ merged_config.cache = value
124
+ elif key == 'executor_cfg' and value is not None:
125
+ if isinstance(value, str):
126
+ merged_config.executor = ExecutorConfig(type=value)
127
+ elif isinstance(value, dict):
128
+ merged_config.executor = ExecutorConfig.from_dict(value)
129
+ elif isinstance(value, ExecutorConfig):
130
+ merged_config.executor = value
131
+ elif key == 'with_adapter_cfg' and value is not None:
132
+ if isinstance(value, dict):
133
+ merged_config.with_adapter = WithAdapterConfig.from_dict(value)
134
+ elif isinstance(value, WithAdapterConfig):
135
+ merged_config.with_adapter = value
136
+ elif key == 'pipeline_adapter_cfg' and value is not None:
137
+ merged_config.pipeline_adapter_cfg = value
138
+ elif key == 'project_adapter_cfg' and value is not None:
139
+ merged_config.project_adapter_cfg = value
140
+ elif key == 'adapter' and value is not None:
141
+ if merged_config.adapter is None:
142
+ merged_config.adapter = {}
143
+ merged_config.adapter.update(value)
144
+ elif key == 'reload' and value is not None:
145
+ merged_config.reload = value
146
+ elif key == 'log_level' and value is not None:
147
+ merged_config.log_level = value
148
+ elif key == 'max_retries' and value is not None:
149
+ merged_config.max_retries = value
150
+ elif key == 'retry_delay' and value is not None:
151
+ merged_config.retry_delay = value
152
+ elif key == 'jitter_factor' and value is not None:
153
+ merged_config.jitter_factor = value
154
+ elif key == 'retry_exceptions' and value is not None:
155
+ merged_config.retry_exceptions = value
156
+ elif key == 'on_success' and value is not None:
157
+ merged_config.on_success = value
158
+ elif key == 'on_failure' and value is not None:
159
+ merged_config.on_failure = value
160
+
161
+ return merged_config
162
+
163
+ def run(
164
+ self,
165
+ run_config: RunConfig | None = None,
166
+ **kwargs
167
+ ) -> dict[str, Any]:
168
+ """Execute the pipeline with the given parameters.
169
+
170
+ Args:
171
+ run_config: Run configuration object containing all execution parameters.
172
+ If None, uses the pipeline's default configuration.
173
+ **kwargs: Additional parameters to override or extend the run_config.
174
+
175
+ Returns:
176
+ The result of executing the pipeline
177
+ """
178
+ start_time = dt.datetime.now()
179
+
180
+ # Initialize run_config with pipeline defaults if not provided
181
+ run_config = run_config or self.config.run
182
+
183
+ # Merge kwargs into the run_config
184
+ if kwargs:
185
+ run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
186
+
187
+ # Reload module if requested
188
+ if run_config.reload:
189
+ self._reload_module()
190
+
191
+ # Set up retry configuration
192
+ retry_config = self._setup_retry_config(
193
+ run_config.max_retries, run_config.retry_delay, run_config.jitter_factor, run_config.retry_exceptions
194
+ )
195
+ max_retries = retry_config["max_retries"]
196
+ retry_delay = retry_config["retry_delay"]
197
+ jitter_factor = retry_config["jitter_factor"]
198
+ retry_exceptions = retry_config["retry_exceptions"]
199
+
200
+ # Execute with retry logic
201
+ return self._execute_with_retry(
202
+ run_config=run_config,
203
+ max_retries=max_retries,
204
+ retry_delay=retry_delay,
205
+ jitter_factor=jitter_factor,
206
+ retry_exceptions=retry_exceptions,
207
+ start_time=start_time,
208
+ )
209
+
210
+ def _setup_retry_config(
211
+ self,
212
+ max_retries: int | None,
213
+ retry_delay: float | None,
214
+ jitter_factor: float | None,
215
+ retry_exceptions: tuple | None,
216
+ ) -> dict:
217
+ """Set up retry configuration with defaults and validation."""
218
+ max_retries = max_retries or self.config.run.max_retries or 0
219
+ retry_delay = retry_delay or self.config.run.retry_delay or 1.0
220
+ jitter_factor = jitter_factor or self.config.run.jitter_factor or 0.1
221
+
222
+ # Convert string exceptions to actual exception classes
223
+ if retry_exceptions and isinstance(retry_exceptions, (list, tuple)):
224
+ converted_exceptions = []
225
+ for exc in retry_exceptions:
226
+ if isinstance(exc, str):
227
+ try:
228
+ exc_class = eval(exc)
229
+ # Ensure it's actually an exception class
230
+ if isinstance(exc_class, type) and issubclass(
231
+ exc_class, BaseException
232
+ ):
233
+ converted_exceptions.append(exc_class)
234
+ else:
235
+ logger.warning(
236
+ f"'{exc}' is not an exception class, using Exception"
237
+ )
238
+ converted_exceptions.append(Exception)
239
+ except (NameError, AttributeError):
240
+ logger.warning(
241
+ f"Unknown exception type: {exc}, using Exception"
242
+ )
243
+ converted_exceptions.append(Exception)
244
+ elif isinstance(exc, type) and issubclass(exc, BaseException):
245
+ converted_exceptions.append(exc)
246
+ else:
247
+ logger.warning(f"Invalid exception type: {exc}, using Exception")
248
+ converted_exceptions.append(Exception)
249
+ retry_exceptions = tuple(converted_exceptions)
250
+ elif not retry_exceptions:
251
+ retry_exceptions = (Exception,)
252
+
253
+ return {
254
+ "max_retries": max_retries,
255
+ "retry_delay": retry_delay,
256
+ "jitter_factor": jitter_factor,
257
+ "retry_exceptions": retry_exceptions,
258
+ }
259
+
260
+ def _execute_with_retry(
261
+ self,
262
+ run_config: RunConfig,
263
+ max_retries: int,
264
+ retry_delay: float,
265
+ jitter_factor: float,
266
+ retry_exceptions: tuple,
267
+ start_time: dt.datetime,
268
+ ) -> dict[str, Any]:
269
+ """Execute pipeline with retry logic."""
270
+ for attempt in range(max_retries + 1):
271
+ try:
272
+ logger.info(
273
+ f"🚀 Running pipeline '{self.name}' (attempt {attempt + 1}/{max_retries + 1})"
274
+ )
275
+
276
+ result = self._execute_pipeline(run_config=run_config)
277
+
278
+ end_time = dt.datetime.now()
279
+ duration = humanize.naturaldelta(end_time - start_time)
280
+
281
+ logger.success(
282
+ f"✅ Pipeline '{self.name}' completed successfully in {duration}"
283
+ )
284
+
285
+ # Execute success callback if provided
286
+ if run_config.on_success:
287
+ self._execute_callback(run_config.on_success, result, None)
288
+
289
+ return result
290
+
291
+ except retry_exceptions as e:
292
+ if attempt < max_retries:
293
+ delay = retry_delay * (2**attempt)
294
+ jitter = delay * jitter_factor * random.random()
295
+ total_delay = delay + jitter
296
+
297
+ logger.warning(
298
+ f"⚠️ Pipeline '{self.name}' failed (attempt {attempt + 1}/{max_retries + 1}): {e}"
299
+ )
300
+ logger.info(f"🔄 Retrying in {total_delay:.2f} seconds...")
301
+ time.sleep(total_delay)
302
+ else:
303
+ end_time = dt.datetime.now()
304
+ duration = humanize.naturaldelta(end_time - start_time)
305
+
306
+ logger.error(
307
+ f"❌ Pipeline '{self.name}' failed after {max_retries + 1} attempts in {duration}: {e}"
308
+ )
309
+
310
+ # Execute failure callback if provided
311
+ if run_config.on_failure:
312
+ self._execute_callback(run_config.on_failure, None, e)
313
+
314
+ raise
315
+ except Exception as e:
316
+ end_time = dt.datetime.now()
317
+ duration = humanize.naturaldelta(end_time - start_time)
318
+
319
+ logger.error(f"❌ Pipeline '{self.name}' failed in {duration}: {e}")
320
+
321
+ # Execute failure callback if provided
322
+ if run_config.on_failure:
323
+ self._execute_callback(run_config.on_failure, None, e)
324
+
325
+ raise
326
+
327
+ def _setup_execution_context(
328
+ self,
329
+ run_config: RunConfig,
330
+ ) -> tuple[executors.BaseExecutor, Callable | None, list]:
331
+ """Set up executor and adapters for pipeline execution."""
332
+ # Get executor and adapters
333
+ executor, shutdown_func = self._get_executor(run_config.executor)
334
+ adapters = self._get_adapters(
335
+ with_adapter_cfg=run_config.with_adapter,
336
+ pipeline_adapter_cfg=run_config.pipeline_adapter_cfg,
337
+ project_adapter_cfg=run_config.project_adapter_cfg,
338
+ adapter=run_config.adapter,
339
+ )
340
+ return executor, shutdown_func, adapters
341
+
342
+ def _execute_pipeline(
343
+ self,
344
+ run_config: RunConfig,
345
+ ) -> dict[str, Any]:
346
+ """Execute the pipeline with Hamilton."""
347
+ # Set up execution context
348
+ executor, shutdown_func, adapters = self._setup_execution_context(run_config=run_config)
349
+
350
+ try:
351
+ # Create Hamilton driver
352
+ dr = (
353
+ driver.Builder()
354
+ .with_config(run_config.config)
355
+ .with_modules(self.module)
356
+ .with_adapters(*adapters)
357
+ .build()
358
+ )
359
+
360
+ # Execute the pipeline
361
+ result = dr.execute(
362
+ final_vars=run_config.final_vars,
363
+ inputs=run_config.inputs,
364
+ )
365
+
366
+ return result
367
+
368
+ finally:
369
+ # Clean up executor if needed
370
+ if shutdown_func:
371
+ try:
372
+ shutdown_func()
373
+ except Exception as e:
374
+ logger.warning(f"Failed to shutdown executor: {e}")
375
+
376
+ def _get_executor(
377
+ self, executor_cfg: str | dict | ExecutorConfig | None = None
378
+ ) -> tuple[executors.BaseExecutor, Callable | None]:
379
+ """Get the executor based on the provided configuration."""
380
+ logger.debug("Setting up executor...")
381
+
382
+ if executor_cfg:
383
+ if isinstance(executor_cfg, str):
384
+ executor_cfg = ExecutorConfig(type=executor_cfg)
385
+ elif isinstance(executor_cfg, dict):
386
+ executor_cfg = ExecutorConfig.from_dict(executor_cfg)
387
+ elif not isinstance(executor_cfg, ExecutorConfig):
388
+ raise TypeError(
389
+ "Executor must be a string, dictionary, or ExecutorConfig instance."
390
+ )
391
+
392
+ executor_cfg = self.config.run.executor.merge(executor_cfg)
393
+ else:
394
+ executor_cfg = self.config.run.executor
395
+
396
+ if executor_cfg.type is None or executor_cfg.type == "synchronous":
397
+ logger.debug("Using SynchronousLocalTaskExecutor as default.")
398
+ return executors.SynchronousLocalTaskExecutor(), None
399
+
400
+ if executor_cfg.type == "threadpool":
401
+ logger.debug(
402
+ f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
403
+ )
404
+ return executors.MultiThreadingExecutor(
405
+ max_tasks=executor_cfg.max_workers
406
+ ), None
407
+ elif executor_cfg.type == "processpool":
408
+ logger.debug(
409
+ f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
410
+ )
411
+ return executors.MultiProcessingExecutor(
412
+ max_tasks=executor_cfg.max_workers
413
+ ), None
414
+ elif executor_cfg.type == "ray":
415
+ if h_ray:
416
+ logger.debug(
417
+ f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
418
+ )
419
+
420
+ # Handle temporary case where project_context is PipelineManager
421
+ project_cfg = getattr(
422
+ self.project_context, "project_cfg", None
423
+ ) or getattr(self.project_context, "_project_cfg", None)
424
+
425
+ return (
426
+ h_ray.RayTaskExecutor(
427
+ num_cpus=executor_cfg.num_cpus,
428
+ ray_init_config=project_cfg.adapter.ray.ray_init_config,
429
+ ),
430
+ ray.shutdown
431
+ if project_cfg.adapter.ray.shutdown_ray_on_completion
432
+ else None,
433
+ )
434
+ else:
435
+ logger.warning("Ray is not installed. Using local executor.")
436
+ return executors.SynchronousLocalTaskExecutor(), None
437
+ elif executor_cfg.type == "dask":
438
+ if distributed:
439
+ cluster = distributed.LocalCluster()
440
+ client = distributed.Client(cluster)
441
+ return h_dask.DaskExecutor(client=client), cluster.close
442
+ else:
443
+ logger.warning("Dask is not installed. Using local executor.")
444
+ return executors.SynchronousLocalTaskExecutor(), None
445
+ else:
446
+ logger.warning(
447
+ f"Unknown executor type: {executor_cfg.type}. Using local executor."
448
+ )
449
+ return executors.SynchronousLocalTaskExecutor(), None
450
+
451
+ def _get_adapters(
452
+ self,
453
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
454
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
455
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
456
+ adapter: dict[str, Any] | None = None,
457
+ ) -> list:
458
+ """Set up the adapters for the pipeline."""
459
+ logger.debug("Setting up adapters...")
460
+
461
+ # Resolve adapter configurations
462
+ if with_adapter_cfg:
463
+ if isinstance(with_adapter_cfg, dict):
464
+ with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
465
+ elif not isinstance(with_adapter_cfg, WithAdapterConfig):
466
+ raise TypeError(
467
+ "with_adapter must be a dictionary or WithAdapterConfig instance."
468
+ )
469
+
470
+ with_adapter_cfg = self.config.run.with_adapter.merge(with_adapter_cfg)
471
+ else:
472
+ with_adapter_cfg = self.config.run.with_adapter
473
+
474
+ if pipeline_adapter_cfg:
475
+ if isinstance(pipeline_adapter_cfg, dict):
476
+ pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
477
+ pipeline_adapter_cfg
478
+ )
479
+ elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
480
+ raise TypeError(
481
+ "pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
482
+ )
483
+
484
+ pipeline_adapter_cfg = self.config.adapter.merge(pipeline_adapter_cfg)
485
+ else:
486
+ pipeline_adapter_cfg = self.config.adapter
487
+
488
+ if project_adapter_cfg:
489
+ if isinstance(project_adapter_cfg, dict):
490
+ project_adapter_cfg = ProjectAdapterConfig.from_dict(
491
+ project_adapter_cfg
492
+ )
493
+ elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
494
+ raise TypeError(
495
+ "project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
496
+ )
497
+
498
+ # Handle temporary case where project_context is PipelineManager
499
+ manager_project_cfg = getattr(
500
+ self.project_context, "project_cfg", None
501
+ ) or getattr(self.project_context, "_project_cfg", None)
502
+ if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
503
+ project_adapter_cfg = manager_project_cfg.adapter.merge(
504
+ project_adapter_cfg
505
+ )
506
+ else:
507
+ # Use project context directly if it's FlowerPowerProject
508
+ if hasattr(self.project_context, "pipeline_manager"):
509
+ pm_cfg = getattr(
510
+ self.project_context.pipeline_manager, "project_cfg", None
511
+ ) or getattr(
512
+ self.project_context.pipeline_manager, "_project_cfg", None
513
+ )
514
+ base_cfg = pm_cfg.adapter if pm_cfg else None
515
+ if base_cfg:
516
+ project_adapter_cfg = base_cfg.merge(project_adapter_cfg)
517
+ else:
518
+ from ..cfg.project.adapter import \
519
+ AdapterConfig as ProjectAdapterConfig
520
+
521
+ project_adapter_cfg = ProjectAdapterConfig()
522
+ else:
523
+ from ..cfg.project.adapter import \
524
+ AdapterConfig as ProjectAdapterConfig
525
+
526
+ project_adapter_cfg = ProjectAdapterConfig()
527
+ else:
528
+ # Handle temporary case where project_context is PipelineManager
529
+ manager_project_cfg = getattr(
530
+ self.project_context, "project_cfg", None
531
+ ) or getattr(self.project_context, "_project_cfg", None)
532
+ if manager_project_cfg and hasattr(manager_project_cfg, "adapter"):
533
+ project_adapter_cfg = manager_project_cfg.adapter
534
+ else:
535
+ # Use project context directly if it's FlowerPowerProject
536
+ if hasattr(self.project_context, "pipeline_manager"):
537
+ pm_cfg = getattr(
538
+ self.project_context.pipeline_manager, "project_cfg", None
539
+ ) or getattr(
540
+ self.project_context.pipeline_manager, "_project_cfg", None
541
+ )
542
+ project_adapter_cfg = pm_cfg.adapter if pm_cfg else None
543
+ else:
544
+ project_adapter_cfg = None
545
+
546
+ # Create default adapter config if none found
547
+ if project_adapter_cfg is None:
548
+ from ..cfg.project.adapter import \
549
+ AdapterConfig as ProjectAdapterConfig
550
+
551
+ project_adapter_cfg = ProjectAdapterConfig()
552
+
553
+ adapters = []
554
+
555
+ # Hamilton Tracker adapter
556
+ if with_adapter_cfg.hamilton_tracker:
557
+ tracker_kwargs = project_adapter_cfg.hamilton_tracker.to_dict()
558
+ tracker_kwargs.update(pipeline_adapter_cfg.hamilton_tracker.to_dict())
559
+ tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
560
+ tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
561
+
562
+ constants.MAX_DICT_LENGTH_CAPTURE = (
563
+ tracker_kwargs.pop("max_dict_length_capture", None)
564
+ or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
565
+ )
566
+ constants.MAX_LIST_LENGTH_CAPTURE = (
567
+ tracker_kwargs.pop("max_list_length_capture", None)
568
+ or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
569
+ )
570
+ constants.CAPTURE_DATA_STATISTICS = (
571
+ tracker_kwargs.pop("capture_data_statistics", None)
572
+ or settings.HAMILTON_CAPTURE_DATA_STATISTICS
573
+ )
574
+
575
+ tracker = HamiltonTracker(**tracker_kwargs)
576
+ adapters.append(tracker)
577
+
578
+ # MLFlow adapter
579
+ if with_adapter_cfg.mlflow:
580
+ if h_mlflow is None:
581
+ logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
582
+ else:
583
+ mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
584
+ mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
585
+ mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
586
+ adapters.append(mlflow_adapter)
587
+
588
+ # OpenTelemetry adapter
589
+ if with_adapter_cfg.opentelemetry:
590
+ if h_opentelemetry is None:
591
+ logger.warning(
592
+ "OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
593
+ )
594
+ else:
595
+ otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
596
+ otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
597
+ init_tracer()
598
+ otel_adapter = h_opentelemetry.OpenTelemetryTracker(**otel_kwargs)
599
+ adapters.append(otel_adapter)
600
+
601
+ # Progress bar adapter
602
+ if with_adapter_cfg.progressbar:
603
+ progressbar_kwargs = project_adapter_cfg.progressbar.to_dict()
604
+ progressbar_kwargs.update(pipeline_adapter_cfg.progressbar.to_dict())
605
+ progressbar_adapter = h_rich.ProgressBar(**progressbar_kwargs)
606
+ adapters.append(progressbar_adapter)
607
+
608
+ # Add any additional adapters
609
+ if adapter:
610
+ for key, value in adapter.items():
611
+ adapters.append(value)
612
+
613
+ return adapters
614
+
615
+ def _execute_callback(
616
+ self,
617
+ callback: Callable | tuple[Callable, tuple | None, dict | None],
618
+ result: dict[str, Any] | None,
619
+ exception: Exception | None,
620
+ ):
621
+ """Execute a callback function with proper error handling."""
622
+ try:
623
+ if isinstance(callback, tuple):
624
+ func, args, kwargs = callback
625
+ args = args or ()
626
+ kwargs = kwargs or {}
627
+ func(*args, **kwargs)
628
+ else:
629
+ callback(result, exception)
630
+ except Exception as e:
631
+ logger.error(f"Callback execution failed: {e}")
632
+
633
+ def _reload_module(self):
634
+ """Reload the pipeline module."""
635
+ try:
636
+ importlib.reload(self.module)
637
+ logger.debug(f"Reloaded module for pipeline '{self.name}'")
638
+ except (ImportError, ModuleNotFoundError, AttributeError) as e:
639
+ logger.error(f"Failed to reload module for pipeline '{self.name}': {e}")
640
+ raise
641
+ except Exception as e:
642
+ logger.error(f"Unexpected error reloading module for pipeline '{self.name}': {e}")
643
+ raise