FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -37
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +32 -1
  14. flowerpower/cli/pipeline.py +559 -406
  15. flowerpower/cli/utils.py +29 -18
  16. flowerpower/flowerpower.py +12 -8
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +2 -142
  59. flowerpower-1.0.0b2.dist-info/METADATA +324 -0
  60. flowerpower-1.0.0b2.dist-info/RECORD +94 -0
  61. flowerpower/_web/__init__.py +0 -61
  62. flowerpower/_web/routes/config.py +0 -103
  63. flowerpower/_web/routes/pipelines.py +0 -173
  64. flowerpower/_web/routes/scheduler.py +0 -136
  65. flowerpower/cfg/pipeline/tracker.py +0 -14
  66. flowerpower/cfg/project/open_telemetry.py +0 -8
  67. flowerpower/cfg/project/tracker.py +0 -11
  68. flowerpower/cfg/project/worker.py +0 -19
  69. flowerpower/cli/scheduler.py +0 -309
  70. flowerpower/cli/web.py +0 -44
  71. flowerpower/event_handler.py +0 -23
  72. flowerpower/mqtt.py +0 -609
  73. flowerpower/pipeline.py +0 -2499
  74. flowerpower/scheduler.py +0 -680
  75. flowerpower/tui.py +0 -79
  76. flowerpower/utils/datastore.py +0 -186
  77. flowerpower/utils/eventbroker.py +0 -127
  78. flowerpower/utils/executor.py +0 -58
  79. flowerpower/utils/trigger.py +0 -140
  80. flowerpower-0.9.13.1.dist-info/METADATA +0 -586
  81. flowerpower-0.9.13.1.dist-info/RECORD +0 -76
  82. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  83. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/WHEEL +0 -0
  84. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/entry_points.txt +0 -0
  85. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,560 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Pipeline Runner."""
3
+
4
+ from __future__ import annotations
5
+ import time
6
+ import random
7
+ import datetime as dt
8
+ import importlib.util
9
+ from typing import Any, Callable
10
+
11
+ import humanize
12
+ from hamilton import driver
13
+ from hamilton.execution import executors
14
+ from hamilton.registry import disable_autoload
15
+ from hamilton.telemetry import disable_telemetry
16
+ from hamilton_sdk.api.clients import UnauthorizedException
17
+
18
+ from requests.exceptions import HTTPError
19
+
20
+ from .. import settings
21
+
22
+
23
+ if importlib.util.find_spec("opentelemetry"):
24
+ from hamilton.plugins import h_opentelemetry
25
+
26
+ from ..utils.open_telemetry import init_tracer
27
+ else:
28
+ h_opentelemetry = None
29
+ init_tracer = None
30
+
31
+ if importlib.util.find_spec("mlflow"):
32
+ from hamilton.plugins import h_mlflow
33
+ else:
34
+ h_mlflow = None
35
+
36
+ from hamilton.plugins import h_rich
37
+ from hamilton.plugins.h_threadpool import FutureAdapter
38
+ from hamilton_sdk.adapters import HamiltonTracker
39
+ from hamilton_sdk.tracking import constants
40
+ from loguru import logger
41
+
42
+ if importlib.util.find_spec("distributed"):
43
+ from dask import distributed
44
+ from hamilton.plugins import h_dask
45
+ else:
46
+ distributed = None
47
+
48
+
49
+ if importlib.util.find_spec("ray"):
50
+ import ray
51
+ from hamilton.plugins import h_ray
52
+ else:
53
+ h_ray = None
54
+
55
+ from ..cfg import PipelineConfig, ProjectConfig
56
+ from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
57
+ from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
58
+ from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
59
+ from ..utils.logging import setup_logging
60
+ from .base import load_module
61
+
62
+ setup_logging(level=settings.LOG_LEVEL)
63
+
64
+ # from .executor import get_executor
65
+
66
+
67
+ class PipelineRunner:
68
+ """PipelineRunner is responsible for executing a specific pipeline run.
69
+ It handles the loading of the pipeline module, configuration, and execution"""
70
+
71
+ def __init__(
72
+ self,
73
+ project_cfg: ProjectConfig,
74
+ pipeline_cfg: PipelineConfig,
75
+ ):
76
+ self.project_cfg = project_cfg
77
+ self.pipeline_cfg = pipeline_cfg
78
+ self.name = pipeline_cfg.name
79
+
80
+ if not settings.HAMILTON_TELEMETRY_ENABLED:
81
+ disable_telemetry()
82
+ if not settings.HAMILTON_AUTOLOAD_EXTENSIONS:
83
+ disable_autoload()
84
+
85
+ def __enter__(self):
86
+ """Enable use as a context manager."""
87
+ return self
88
+
89
+ def __exit__(self, exc_type, exc_val, exc_tb):
90
+ """No special cleanup required."""
91
+ pass
92
+
93
+ def _get_executor(
94
+ self, executor_cfg: str | dict | ExecutorConfig | None = None
95
+ ) -> tuple[executors.BaseExecutor, Callable | None]:
96
+ """
97
+ Get the executor based on the provided configuration.
98
+
99
+ Args:
100
+ executor (dict | None): Executor configuration.
101
+
102
+ Returns:
103
+ tuple[executors.BaseExecutor, Callable | None]: A tuple containing the executor and shutdown function.
104
+ """
105
+ logger.debug("Setting up executor...")
106
+ if executor_cfg:
107
+ if isinstance(executor_cfg, str):
108
+ executor_cfg = ExecutorConfig(type=executor_cfg)
109
+ elif isinstance(executor_cfg, dict):
110
+ executor_cfg = ExecutorConfig.from_dict(executor_cfg)
111
+ elif not isinstance(executor_cfg, ExecutorConfig):
112
+ raise TypeError(
113
+ "Executor must be a string, dictionary, or ExecutorConfig instance."
114
+ )
115
+
116
+ executor_cfg = self.pipeline_cfg.run.executor.merge(executor_cfg)
117
+ else:
118
+ executor_cfg = self.pipeline_cfg.run.executor
119
+
120
+ if executor_cfg.type is None:
121
+ logger.debug(
122
+ "No executor type specified. Using SynchronousLocalTaskExecutor as default."
123
+ )
124
+ return executors.SynchronousLocalTaskExecutor(), None
125
+
126
+ if executor_cfg.type == "threadpool":
127
+ logger.debug(
128
+ f"Using MultiThreadingExecutor with max_workers={executor_cfg.max_workers}"
129
+ )
130
+ return executors.MultiThreadingExecutor(
131
+ max_tasks=executor_cfg.max_workers
132
+ ), None
133
+ elif executor_cfg.type == "processpool":
134
+ logger.debug(
135
+ f"Using MultiProcessingExecutor with max_workers={executor_cfg.max_workers}"
136
+ )
137
+ return executors.MultiProcessingExecutor(
138
+ max_tasks=executor_cfg.max_workers
139
+ ), None
140
+ elif executor_cfg.type == "ray":
141
+ if h_ray:
142
+ logger.debug(
143
+ f"Using RayTaskExecutor with num_cpus={executor_cfg.num_cpus}"
144
+ )
145
+
146
+ return (
147
+ h_ray.RayTaskExecutor(
148
+ num_cpus=executor_cfg.num_cpus,
149
+ ray_init_config=self.project_cfg.adapter.ray.ray_init_config,
150
+ ),
151
+ ray.shutdown
152
+ if self.project_cfg.adapter.ray.shutdown_ray_on_completion
153
+ else None,
154
+ )
155
+ else:
156
+ logger.warning("Ray is not installed. Using local executor.")
157
+ return executors.SynchronousLocalTaskExecutor(), None
158
+ elif executor_cfg.type == "dask":
159
+ if distributed:
160
+ cluster = distributed.LocalCluster()
161
+ client = distributed.Client(cluster)
162
+ return h_dask.DaskExecutor(client=client), cluster.close
163
+ else:
164
+ logger.warning("Dask is not installed. Using local executor.")
165
+ return executors.SynchronousLocalTaskExecutor(), None
166
+ else:
167
+ logger.warning(
168
+ f"Unknown executor type: {executor_cfg.type}. Using local executor."
169
+ )
170
+ return executors.SynchronousLocalTaskExecutor(), None
171
+
172
+ def _get_adapters(
173
+ self,
174
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
175
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
176
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
177
+ adapter: dict[str, Any] | None = None,
178
+ ) -> list:
179
+ """
180
+ Set the adapters for the pipeline.
181
+
182
+ Args:
183
+ with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
184
+ Overrides the with_adapter settings in the pipeline config.
185
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
186
+ Overrides the adapter settings in the pipeline config.
187
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
188
+ Overrides the adapter settings in the project config.
189
+ adapter (dict[str, Any] | None): Any additional hamilton adapters can be passed here.
190
+ """
191
+ logger.debug("Setting up adapters...")
192
+ if with_adapter_cfg:
193
+ if isinstance(with_adapter_cfg, dict):
194
+ with_adapter_cfg = WithAdapterConfig.from_dict(with_adapter_cfg)
195
+ elif not isinstance(with_adapter_cfg, WithAdapterConfig):
196
+ raise TypeError(
197
+ "with_adapter must be a dictionary or WithAdapterConfig instance."
198
+ )
199
+
200
+ with_adapter_cfg = self.pipeline_cfg.run.with_adapter.merge(
201
+ with_adapter_cfg
202
+ )
203
+ else:
204
+ with_adapter_cfg = self.pipeline_cfg.run.with_adapter
205
+
206
+ if pipeline_adapter_cfg:
207
+ if isinstance(pipeline_adapter_cfg, dict):
208
+ pipeline_adapter_cfg = PipelineAdapterConfig.from_dict(
209
+ pipeline_adapter_cfg
210
+ )
211
+ elif not isinstance(pipeline_adapter_cfg, PipelineAdapterConfig):
212
+ raise TypeError(
213
+ "pipeline_adapter_cfg must be a dictionary or PipelineAdapterConfig instance."
214
+ )
215
+
216
+ pipeline_adapter_cfg = self.pipeline_cfg.adapter.merge(pipeline_adapter_cfg)
217
+ else:
218
+ pipeline_adapter_cfg = self.pipeline_cfg.adapter
219
+
220
+ if project_adapter_cfg:
221
+ if isinstance(project_adapter_cfg, dict):
222
+ project_adapter_cfg = ProjectAdapterConfig.from_dict(
223
+ project_adapter_cfg
224
+ )
225
+ elif not isinstance(project_adapter_cfg, ProjectAdapterConfig):
226
+ raise TypeError(
227
+ "project_adapter_cfg must be a dictionary or ProjectAdapterConfig instance."
228
+ )
229
+
230
+ project_adapter_cfg = self.project_cfg.adapter.merge(project_adapter_cfg)
231
+ else:
232
+ project_adapter_cfg = self.project_cfg.adapter
233
+
234
+ adapters = []
235
+ if with_adapter_cfg.tracker:
236
+ tracker_kwargs = project_adapter_cfg.tracker.to_dict()
237
+ tracker_kwargs.update(pipeline_adapter_cfg.tracker.to_dict())
238
+ tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
239
+ tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
240
+
241
+ constants.MAX_DICT_LENGTH_CAPTURE = (
242
+ tracker_kwargs.pop("max_dict_length_capture", None)
243
+ or settings.HAMILTON_MAX_DICT_LENGTH_CAPTURE
244
+ )
245
+ constants.MAX_LIST_LENGTH_CAPTURE = (
246
+ tracker_kwargs.pop("max_list_length_capture", None)
247
+ or settings.HAMILTON_MAX_LIST_LENGTH_CAPTURE
248
+ )
249
+ constants.CAPTURE_DATA_STATISTICS = (
250
+ tracker_kwargs.pop("capture_data_statistics", None)
251
+ or settings.HAMILTON_CAPTURE_DATA_STATISTICS
252
+ )
253
+
254
+ tracker = HamiltonTracker(**tracker_kwargs)
255
+
256
+ adapters.append(tracker)
257
+
258
+ if with_adapter_cfg.mlflow:
259
+ if h_mlflow is None:
260
+ logger.warning("MLFlow is not installed. Skipping MLFlow adapter.")
261
+ else:
262
+ mlflow_kwargs = project_adapter_cfg.mlflow.to_dict()
263
+ mlflow_kwargs.update(pipeline_adapter_cfg.mlflow.to_dict())
264
+ mlflow_adapter = h_mlflow.MLFlowTracker(**mlflow_kwargs)
265
+ adapters.append(mlflow_adapter)
266
+
267
+ if with_adapter_cfg.opentelemetry:
268
+ if h_opentelemetry is None:
269
+ logger.warning(
270
+ "OpenTelemetry is not installed. Skipping OpenTelemetry adapter."
271
+ )
272
+ else:
273
+ otel_kwargs = project_adapter_cfg.opentelemetry.to_dict()
274
+ otel_kwargs.update(pipeline_adapter_cfg.opentelemetry.to_dict())
275
+ trace = init_tracer(**otel_kwargs, name=self.project_cfg.name)
276
+ tracer = trace.get_tracer(self.name)
277
+ otel_adapter = h_opentelemetry.OpenTelemetryTracer(
278
+ tracer_name=f"{self.project_cfg.name}.{self.name}",
279
+ tracer=tracer,
280
+ )
281
+ adapters.append(otel_adapter)
282
+
283
+ if with_adapter_cfg.progressbar:
284
+ adapters.append(
285
+ h_rich.RichProgressBar(run_desc=f"{self.project_cfg.name}.{self.name}")
286
+ )
287
+
288
+ if with_adapter_cfg.future:
289
+ adapters.append(FutureAdapter())
290
+
291
+ if with_adapter_cfg.ray:
292
+ if h_ray is None:
293
+ logger.warning("Ray is not installed. Skipping Ray adapter.")
294
+ else:
295
+ ray_kwargs = project_adapter_cfg.ray.to_dict()
296
+ ray_kwargs.update(pipeline_adapter_cfg.ray.to_dict())
297
+ ray_adapter = h_ray.RayGraphAdapter(**ray_kwargs)
298
+ adapters.append(ray_adapter)
299
+
300
+ all_adapters = [
301
+ f"{adp}: ✅" if enabled else f"{adp}: ❌"
302
+ for adp, enabled in with_adapter_cfg.to_dict().items()
303
+ ]
304
+
305
+ if adapter:
306
+ adapters += list(adapter.values())
307
+ all_adapters += [f"{adp}: ✅" for adp in adapter.keys()]
308
+
309
+ logger.debug(f"Adapters enabled: {' | '.join(all_adapters)}")
310
+ return adapters
311
+
312
+ def _get_driver(
313
+ self,
314
+ config: dict | None = None,
315
+ cache: bool | dict = False,
316
+ executor_cfg: str | dict | ExecutorConfig | None = None,
317
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
318
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
319
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
320
+ adapter: dict[str, Any] | None = None,
321
+ reload: bool = False,
322
+ ) -> tuple[driver.Driver, Callable | None]:
323
+ """
324
+ Get the driver and shutdown function for a given pipeline.
325
+
326
+ Args:
327
+ config (dict | None): The configuration for the pipeline.
328
+ cache (bool): Use cache or not.
329
+ To fine tune the cache settings, pass a dictionary with the cache settings
330
+ or adjust the pipeline config.
331
+ If set to True, the default cache settings will be used.
332
+ executor_cfg (str | dict | ExecutorConfig | None): The executor to use.
333
+ Overrides the executor settings in the pipeline config.
334
+ with_adapter_cfg (dict | WithAdapterConfig | None): The adapter configuration.
335
+ Overrides the with_adapter settings in the pipeline config.
336
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): The pipeline adapter configuration.
337
+ Overrides the adapter settings in the pipeline config.
338
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): The project adapter configuration.
339
+ Overrides the adapter settings in the project config.
340
+ adapter (dict[str, Any] | None): Any additional Hamilton adapters can be passed here.
341
+ reload (bool): Whether to reload the module.
342
+
343
+
344
+ Returns:
345
+ tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
346
+ """
347
+ logger.debug("Setting up driver...")
348
+ module = load_module(name=self.name, reload=reload)
349
+ executor, shutdown = self._get_executor(executor_cfg)
350
+ adapters = self._get_adapters(
351
+ with_adapter_cfg,
352
+ pipeline_adapter_cfg,
353
+ project_adapter_cfg,
354
+ adapter=adapter,
355
+ )
356
+
357
+ config = config or self.pipeline_cfg.run.config
358
+
359
+ dr = (
360
+ driver.Builder()
361
+ .enable_dynamic_execution(allow_experimental_mode=True)
362
+ .with_modules(module)
363
+ .with_config(config)
364
+ .with_local_executor(executors.SynchronousLocalTaskExecutor())
365
+ )
366
+
367
+ if cache:
368
+ if isinstance(cache, dict):
369
+ cache = cache or self.pipeline_cfg.run.cache
370
+ dr = dr.with_cache(**cache)
371
+ else:
372
+ dr = dr.with_cache()
373
+
374
+ if executor:
375
+ dr = dr.with_remote_executor(executor)
376
+
377
+ if adapters:
378
+ dr = dr.with_adapters(*adapters)
379
+
380
+ dr = dr.build()
381
+ return dr, shutdown
382
+
383
+ def run(
384
+ self,
385
+ inputs: dict | None = None,
386
+ final_vars: list[str] | None = None,
387
+ config: dict | None = None,
388
+ cache: dict | None = None,
389
+ executor_cfg: str | dict | ExecutorConfig | None = None,
390
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
391
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
392
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
393
+ adapter: dict[str, Any] | None = None,
394
+ reload: bool = False,
395
+ log_level: str | None = None,
396
+ ) -> dict[str, Any]:
397
+ """
398
+ Run the pipeline with the given parameters.
399
+ Args:
400
+ inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
401
+ final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
402
+ config (dict | None, optional): The config for the hamilton driver. Defaults to None.
403
+ cache (dict | None, optional): The cache configuration. Defaults to None.
404
+ executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
405
+ Overrides the executor settings in the pipeline config. Defaults to None.
406
+ with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
407
+ Overrides the with_adapter settings in the pipeline config. Defaults to None.
408
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
409
+ Overrides the adapter settings in the pipeline config. Defaults to None.
410
+ project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
411
+ Overrides the adapter settings in the project config. Defaults to None.
412
+ adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
413
+ reload (bool, optional): Whether to reload the module. Defaults to False.
414
+ log_level (str | None, optional): The log level to use. Defaults to None.
415
+
416
+ Returns:
417
+ dict[str, Any]: The result of executing the pipeline.
418
+ """
419
+ self.start_time = dt.datetime.now()
420
+
421
+ if log_level or self.pipeline_cfg.run.log_level:
422
+ setup_logging(level=log_level or self.pipeline_cfg.run.log_level)
423
+
424
+ logger.info(f"Starting pipeline {self.project_cfg.name}.{self.name}")
425
+ # Load the module and get the driver
426
+ dr, shutdown = self._get_driver(
427
+ config=config,
428
+ cache=cache,
429
+ executor_cfg=executor_cfg,
430
+ with_adapter_cfg=with_adapter_cfg,
431
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
432
+ project_adapter_cfg=project_adapter_cfg,
433
+ adapter=adapter,
434
+ reload=reload,
435
+ )
436
+ final_vars = final_vars or self.pipeline_cfg.run.final_vars
437
+ inputs = {
438
+ **(self.pipeline_cfg.run.inputs or {}),
439
+ **(inputs or {}),
440
+ } # <-- inputs override and/or extend config inputs
441
+
442
+ res = dr.execute(final_vars=final_vars, inputs=inputs)
443
+ self.end_time = dt.datetime.now()
444
+ self.execution_time = self.end_time - self.start_time
445
+ logger.success(
446
+ f"Finished: Pipeline {self.project_cfg.name}.{self.name} executed in {humanize.naturaldelta(self.execution_time)}"
447
+ )
448
+
449
+ if shutdown is not None:
450
+ logger.info("Shutting down executor...")
451
+ shutdown()
452
+ logger.info("Executor shut down.")
453
+
454
+ return res
455
+
456
+
457
+ def run_pipeline(
458
+ project_cfg: ProjectConfig,
459
+ pipeline_cfg: PipelineConfig,
460
+ inputs: dict | None = None,
461
+ final_vars: list[str] | None = None,
462
+ config: dict | None = None,
463
+ cache: dict | None = None,
464
+ executor_cfg: str | dict | ExecutorConfig | None = None,
465
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
466
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
467
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
468
+ adapter: dict[str, Any] | None = None,
469
+ reload: bool = False,
470
+ log_level: str | None = None,
471
+ max_retries: int = 0,
472
+ retry_delay: float = 1.0,
473
+ jitter_factor: float = 0.1,
474
+ retry_exceptions: tuple = (
475
+ Exception,
476
+ HTTPError,
477
+ UnauthorizedException,
478
+ ), # Adjust to specific exceptions
479
+ ) -> dict[str, Any]:
480
+ """Run the pipeline with the given parameters.
481
+
482
+ Args:
483
+
484
+ project_cfg (ProjectConfig): The project configuration.
485
+ pipeline_cfg (PipelineConfig): The pipeline configuration.
486
+ inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
487
+ final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
488
+ config (dict | None, optional): The config for the hamilton driver. Defaults to None.
489
+ cache (dict | None, optional): The cache configuration. Defaults to None.
490
+ executor_cfg (str | dict | ExecutorConfig | None, optional): The executor to use.
491
+ Overrides the executor settings in the pipeline config. Defaults to None.
492
+ with_adapter_cfg (dict | WithAdapterConfig | None, optional): The adapter configuration.
493
+ Overrides the with_adapter settings in the pipeline config. Defaults to None.
494
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None, optional): The pipeline adapter configuration.
495
+ Overrides the adapter settings in the pipeline config. Defaults to None.
496
+ project_adapter_cfg (dict | ProjectAdapterConfig | None, optional): The project adapter configuration.
497
+ Overrides the adapter settings in the project config. Defaults to None.
498
+ adapter (dict[str, Any] | None, optional): Any additional Hamilton adapters can be passed here. Defaults to None.
499
+ reload (bool, optional): Whether to reload the module. Defaults to False.
500
+ log_level (str | None, optional): The log level to use. Defaults to None.
501
+ max_retries (int, optional): The maximum number of retry attempts. Defaults to 0.
502
+ retry_delay (float, optional): The base delay between retries in seconds. Defaults to 1.0.
503
+ jitter_factor (float, optional): The factor to apply for jitter. Defaults to 0.1.
504
+ retry_exceptions (tuple, optional): A tuple of exception classes to catch for retries. Defaults to (Exception,).
505
+
506
+ Returns:
507
+
508
+ dict[str, Any]: The result of executing the pipeline.
509
+
510
+ Raises:
511
+ Exception: If the pipeline execution fails after the maximum number of retries.
512
+ """
513
+ attempts = 0
514
+ last_exception = None
515
+
516
+ while attempts <= max_retries:
517
+ try:
518
+ with PipelineRunner(project_cfg, pipeline_cfg) as runner:
519
+ return runner.run(
520
+ inputs=inputs,
521
+ final_vars=final_vars,
522
+ config=config,
523
+ cache=cache,
524
+ executor_cfg=executor_cfg,
525
+ with_adapter_cfg=with_adapter_cfg,
526
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
527
+ project_adapter_cfg=project_adapter_cfg,
528
+ adapter=adapter,
529
+ reload=reload,
530
+ log_level=log_level,
531
+ )
532
+ except retry_exceptions as e:
533
+ if isinstance(e, HTTPError) or isinstance(e, UnauthorizedException):
534
+ if with_adapter_cfg["tracker"]:
535
+ logger.info("Tracker is enabled. Disabling tracker for this run.")
536
+ with_adapter_cfg["tracker"] = False
537
+
538
+ attempts += 1
539
+ last_exception = e
540
+
541
+ if attempts <= max_retries:
542
+ logger.warning(
543
+ f"Pipeline execution failed (attempt {attempts}/{max_retries}): {e}"
544
+ )
545
+
546
+ # Calculate base delay with exponential backoff
547
+ base_delay = retry_delay * (2 ** (attempts - 1))
548
+
549
+ # Add jitter: random value between -jitter_factor and +jitter_factor of the base delay
550
+ jitter = base_delay * jitter_factor * (2 * random.random() - 1)
551
+ actual_delay = max(0, base_delay + jitter) # Ensure non-negative delay
552
+
553
+ logger.debug(
554
+ f"Retrying in {actual_delay:.2f} seconds (base: {base_delay:.2f}s, jitter: {jitter:.2f}s)"
555
+ )
556
+ time.sleep(actual_delay)
557
+ else:
558
+ # Last attempt failed
559
+ logger.error(f"Pipeline execution failed after {max_retries} attempts")
560
+ raise last_exception
@@ -0,0 +1,142 @@
1
+ import posixpath
2
+ from typing import Any
3
+
4
+ from hamilton import driver
5
+ from rich import print
6
+
7
+ # Import necessary config types and utility functions
8
+ from ..cfg import PipelineConfig, ProjectConfig
9
+ from ..fs import AbstractFileSystem
10
+ from ..utils.misc import view_img
11
+ from .base import load_module # Import module loading utility
12
+
13
+
14
+ class PipelineVisualizer:
15
+ """Handles the visualization of pipeline DAGs."""
16
+
17
+ def __init__(self, project_cfg: ProjectConfig, fs: AbstractFileSystem):
18
+ """
19
+ Initializes the PipelineVisualizer.
20
+
21
+ Args:
22
+ project_cfg: The project configuration object.
23
+ fs: The filesystem instance.
24
+ """
25
+ self.project_cfg = project_cfg
26
+ self._fs = fs
27
+ # Attributes like fs and base_dir are accessed via self.project_cfg
28
+
29
+ def _display_all_function(self, name: str, reload: bool = False):
30
+ """Internal helper to load module/config and get the Hamilton DAG object.
31
+
32
+ Args:
33
+ name (str): The name of the pipeline.
34
+ reload (bool): Whether to reload the module.
35
+
36
+ Returns:
37
+ Hamilton DAG object.
38
+
39
+ Raises:
40
+ ImportError: If the module cannot be loaded.
41
+
42
+ """
43
+ # Load pipeline-specific config
44
+ pipeline_cfg = PipelineConfig.load(name=name, fs=self._fs)
45
+
46
+ # Load the pipeline module
47
+ # Ensure the pipelines directory is in sys.path (handled by PipelineManager usually)
48
+ module = load_module(name=name, reload=reload)
49
+
50
+ # Create a basic driver builder for visualization purposes
51
+ # Use the run config from the loaded pipeline_cfg
52
+ builder = (
53
+ driver.Builder()
54
+ .enable_dynamic_execution(allow_experimental_mode=True)
55
+ .with_modules(module)
56
+ .with_config(pipeline_cfg.run.config or {})
57
+ # No adapters or complex executors needed for display_all_functions
58
+ )
59
+
60
+ # Build the driver
61
+ dr = builder.build()
62
+
63
+ # Return the visualization object
64
+ return dr.display_all_functions()
65
+
66
+ def save_dag(
67
+ self,
68
+ name: str,
69
+ format: str = "png",
70
+ reload: bool = False,
71
+ ):
72
+ """
73
+ Save an image of the graph of functions for a given pipeline name.
74
+
75
+ Args:
76
+ name (str): The name of the pipeline graph.
77
+ format (str, optional): The format of the graph file. Defaults to "png".
78
+ reload (bool, optional): Whether to reload the pipeline data. Defaults to False.
79
+
80
+ Raises:
81
+ ImportError: If the module cannot be loaded.
82
+
83
+ Example:
84
+ >>> from flowerpower.pipeline.visualizer import PipelineVisualizer
85
+ >>> visualizer = PipelineVisualizer(project_cfg, fs)
86
+ >>> visualizer.save_dag(name="example_pipeline", format="png")
87
+ """
88
+ dag = self._display_all_function(name=name, reload=reload)
89
+
90
+ # Use project_cfg attributes for path and filesystem access
91
+ graph_dir = posixpath.join(self.project_cfg.base_dir, "graphs")
92
+ self._fs.makedirs(graph_dir, exist_ok=True)
93
+
94
+ output_path = posixpath.join(
95
+ graph_dir, name
96
+ ) # Output filename is just the pipeline name
97
+ output_path_with_ext = f"{output_path}.{format}"
98
+
99
+ # Render the DAG using the graphviz object returned by display_all_functions
100
+ dag.render(
101
+ output_path, # graphviz appends the format automatically
102
+ format=format,
103
+ cleanup=True,
104
+ view=False,
105
+ )
106
+ print(
107
+ f"📊 Saved graph for [bold blue]{self.project_cfg.name}.{name}[/bold blue] to [green]{output_path_with_ext}[/green]"
108
+ )
109
+
110
+ def show_dag(
111
+ self,
112
+ name: str,
113
+ format: str = "png",
114
+ reload: bool = False,
115
+ raw: bool = False,
116
+ ):
117
+ """
118
+ Display the graph of functions for a given pipeline name.
119
+
120
+ Args:
121
+ name (str): The name of the pipeline graph.
122
+ format (str, optional): The format of the graph file. Defaults to "png".
123
+ reload (bool, optional): Whether to reload the pipeline data. Defaults to False.
124
+ raw (bool, optional): Whether to return the raw graph object instead of displaying. Defaults to False.
125
+
126
+ Returns:
127
+ Optional[graphviz.Digraph]: The generated graph object if raw=True, else None.
128
+
129
+ Raises:
130
+ ImportError: If the module cannot be loaded.
131
+
132
+ Example:
133
+ >>> from flowerpower.pipeline.visualizer import PipelineVisualizer
134
+ >>> visualizer = PipelineVisualizer(project_cfg, fs)
135
+ >>> visualizer.show_dag(name="example_pipeline", format="png")
136
+ """
137
+ dag = self._display_all_function(name=name, reload=reload)
138
+ if raw:
139
+ return dag
140
+ # Use view_img utility to display the rendered graph
141
+ view_img(dag.pipe(format=format), format=format)
142
+ return None # Explicitly return None when not raw