FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -35
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +49 -4
  14. flowerpower/cli/pipeline.py +576 -381
  15. flowerpower/cli/utils.py +55 -0
  16. flowerpower/flowerpower.py +12 -7
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +18 -142
  59. flowerpower/web/app.py +0 -0
  60. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  61. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  62. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
  63. flowerpower/cfg/pipeline/tracker.py +0 -14
  64. flowerpower/cfg/project/open_telemetry.py +0 -8
  65. flowerpower/cfg/project/tracker.py +0 -11
  66. flowerpower/cfg/project/worker.py +0 -19
  67. flowerpower/cli/scheduler.py +0 -309
  68. flowerpower/event_handler.py +0 -23
  69. flowerpower/mqtt.py +0 -525
  70. flowerpower/pipeline.py +0 -2419
  71. flowerpower/scheduler.py +0 -680
  72. flowerpower/tui.py +0 -79
  73. flowerpower/utils/datastore.py +0 -186
  74. flowerpower/utils/eventbroker.py +0 -127
  75. flowerpower/utils/executor.py +0 -58
  76. flowerpower/utils/trigger.py +0 -140
  77. flowerpower-0.9.12.4.dist-info/METADATA +0 -575
  78. flowerpower-0.9.12.4.dist-info/RECORD +0 -70
  79. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  80. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  81. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1586 @@
1
+ import datetime as dt
2
+ import posixpath
3
+ import sys
4
+ from pathlib import Path
5
+ from types import TracebackType
6
+ from typing import Any, Callable, TypeVar, Union
7
+ from uuid import UUID
8
+ import duration_parser
9
+ from loguru import logger
10
+ from munch import Munch
11
+
12
+ try:
13
+ from graphviz import Digraph
14
+ except ImportError:
15
+ Digraph = Any # Type alias for when graphviz isn't installed
16
+
17
+ from .. import settings
18
+ from ..cfg import PipelineConfig, ProjectConfig
19
+ from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
20
+ from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
21
+ from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
22
+ from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
23
+ from ..utils.logging import setup_logging
24
+ from .io import PipelineIOManager
25
+ from .registry import PipelineRegistry, HookType
26
+ from .runner import PipelineRunner, run_pipeline
27
+ from .job_queue import PipelineJobQueue
28
+ from .visualizer import PipelineVisualizer
29
+
30
+ setup_logging()
31
+
32
+ GraphType = TypeVar("GraphType") # Type variable for graphviz.Digraph
33
+
34
+
35
+ class PipelineManager:
36
+ """Central manager for FlowerPower pipeline operations.
37
+
38
+ This class provides a unified interface for managing pipelines, including:
39
+ - Configuration management and loading
40
+ - Pipeline creation, deletion, and discovery
41
+ - Pipeline execution via PipelineRunner
42
+ - Job scheduling via PipelineScheduler
43
+ - Visualization via PipelineVisualizer
44
+ - Import/export operations via PipelineIOManager
45
+
46
+ Attributes:
47
+ registry (PipelineRegistry): Handles pipeline registration and discovery
48
+ scheduler (PipelineScheduler): Manages job scheduling and execution
49
+ visualizer (PipelineVisualizer): Handles pipeline visualization
50
+ io (PipelineIOManager): Manages pipeline import/export operations
51
+ project_cfg (ProjectConfig): Current project configuration
52
+ pipeline_cfg (PipelineConfig): Current pipeline configuration
53
+ pipelines (list[str]): List of available pipeline names
54
+ current_pipeline_name (str): Name of the currently loaded pipeline
55
+ summary (dict[str, dict | str]): Summary of all pipelines
56
+
57
+ Example:
58
+ >>> from flowerpower.pipeline import PipelineManager
59
+ >>>
60
+ >>> # Create manager with default settings
61
+ >>> manager = PipelineManager()
62
+ >>>
63
+ >>> # Create manager with custom settings
64
+ >>> manager = PipelineManager(
65
+ ... base_dir="/path/to/project",
66
+ ... job_queue_type="rq",
67
+ ... log_level="DEBUG"
68
+ ... )
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ base_dir: str | None = None,
74
+ storage_options: dict | Munch | BaseStorageOptions | None = None,
75
+ fs: AbstractFileSystem | None = None,
76
+ cfg_dir: str | None = None,
77
+ pipelines_dir: str | None = None,
78
+ job_queue_type: str | None = None,
79
+ log_level: str | None = None,
80
+ ) -> None:
81
+ """Initialize the PipelineManager.
82
+
83
+ Args:
84
+ base_dir: Root directory for the FlowerPower project. Defaults to current
85
+ working directory if not specified.
86
+ storage_options: Configuration options for filesystem access. Can be:
87
+ - dict: Raw key-value options
88
+ - Munch: Dot-accessible options object
89
+ - BaseStorageOptions: Structured options class
90
+ Used for S3, GCS, etc. Example: {"key": "abc", "secret": "xyz"}
91
+ fs: Pre-configured fsspec filesystem instance. If provided, used instead
92
+ of creating new filesystem from base_dir and storage_options.
93
+ cfg_dir: Override default configuration directory name ('conf').
94
+ Example: "config" or "settings".
95
+ pipelines_dir: Override default pipelines directory name ('pipelines').
96
+ Example: "flows" or "dags".
97
+ job_queue_type: Override worker type from project config/settings.
98
+ Valid values: "rq", "apscheduler", or "huey".
99
+ log_level: Set logging level for the manager.
100
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
101
+
102
+ Raises:
103
+ ValueError: If provided configuration paths don't exist or can't be created
104
+ RuntimeError: If filesystem operations fail during initialization
105
+ ImportError: If required dependencies for specified worker type not installed
106
+
107
+ Example:
108
+ >>> # Basic initialization
109
+ >>> manager = PipelineManager()
110
+ >>>
111
+ >>> # Custom configuration with S3 storage
112
+ >>> manager = PipelineManager(
113
+ ... base_dir="s3://my-bucket/project",
114
+ ... storage_options={
115
+ ... "key": "ACCESS_KEY",
116
+ ... "secret": "SECRET_KEY"
117
+ ... },
118
+ ... job_queue_type="rq",
119
+ ... log_level="DEBUG"
120
+ ... )
121
+ """
122
+ if log_level:
123
+ setup_logging(level=log_level)
124
+
125
+ self._base_dir = base_dir or str(Path.cwd())
126
+ self._storage_options = storage_options
127
+ if not fs:
128
+ fs = get_filesystem(self._base_dir, storage_options=storage_options)
129
+ self._fs = fs
130
+
131
+ # Store overrides for ProjectConfig loading
132
+ self._cfg_dir = cfg_dir or settings.CONFIG_DIR
133
+ self._pipelines_dir = pipelines_dir or settings.PIPELINES_DIR
134
+ self._job_queue_type = job_queue_type
135
+
136
+ self._load_project_cfg(reload=True) # Load project config
137
+
138
+ # Ensure essential directories exist (using paths from loaded project_cfg)
139
+ try:
140
+ self._fs.makedirs(self._cfg_dir, exist_ok=True)
141
+ self._fs.makedirs(self._pipelines_dir, exist_ok=True)
142
+ except Exception as e:
143
+ logger.error(f"Error creating essential directories: {e}")
144
+ # Consider raising an error here depending on desired behavior
145
+
146
+ # Ensure pipeline modules can be imported
147
+ self._add_modules_path()
148
+
149
+ # Instantiate components using the loaded project config
150
+ self.registry = PipelineRegistry(
151
+ project_cfg=self.project_cfg,
152
+ fs=self._fs,
153
+ cfg_dir=self._cfg_dir,
154
+ pipelines_dir=self._pipelines_dir,
155
+ )
156
+ self.job_queue = PipelineJobQueue(
157
+ project_cfg=self.project_cfg,
158
+ fs=self._fs,
159
+ cfg_dir=self._cfg_dir,
160
+ pipelines_dir=self._pipelines_dir,
161
+ job_queue_type=self._job_queue_type,
162
+ )
163
+ self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
164
+ self.io = PipelineIOManager(registry=self.registry)
165
+
166
+ self._current_pipeline_name: str | None = None
167
+ self._pipeline_cfg: PipelineConfig | None = None
168
+
169
+ def __enter__(self) -> "PipelineManager":
170
+ """Enter the context manager.
171
+
172
+ Enables use of the manager in a with statement for automatic resource cleanup.
173
+
174
+ Returns:
175
+ PipelineManager: Self for use in context manager.
176
+
177
+ Example:
178
+ >>> from flowerpower.pipeline import PipelineManager
179
+ >>>
180
+ >>> with PipelineManager() as manager:
181
+ ... result = manager.run("my_pipeline")
182
+ """
183
+ return self
184
+
185
+ def __exit__(
186
+ self,
187
+ exc_type: type[BaseException] | None,
188
+ exc_val: BaseException | None,
189
+ exc_tb: TracebackType | None,
190
+ ) -> None:
191
+ """Exit the context manager.
192
+
193
+ Handles cleanup of resources when exiting a with statement.
194
+
195
+ Args:
196
+ exc_type: Type of exception that occurred, if any
197
+ exc_val: Exception instance that occurred, if any
198
+ exc_tb: Traceback of exception that occurred, if any
199
+
200
+ Example:
201
+ >>> from flowerpower.pipeline import PipelineManager
202
+ >>>
203
+ >>> with PipelineManager() as manager:
204
+ ... try:
205
+ ... result = manager.run("my_pipeline")
206
+ ... except Exception as e:
207
+ ... print(f"Error: {e}")
208
+ ... # Resources automatically cleaned up here
209
+ """
210
+ # Add cleanup code if needed
211
+ pass
212
+
213
+ def _get_run_func_for_job(self, name: str, reload: bool = False) -> Callable:
214
+ """Create a PipelineRunner instance and return its run method.
215
+
216
+ This internal helper method ensures that each job gets a fresh runner
217
+ with the correct configuration state.
218
+
219
+ Args:
220
+ name: Name of the pipeline to create runner for
221
+ reload: Whether to reload pipeline configuration
222
+
223
+ Returns:
224
+ Callable: Bound run method from a fresh PipelineRunner instance
225
+
226
+ Example:
227
+ >>> # Internal usage
228
+ >>> manager = PipelineManager()
229
+ >>> run_func = manager._get_run_func_for_job("data_pipeline")
230
+ >>> result = run_func(inputs={"date": "2025-04-28"})
231
+ """
232
+ pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
233
+ runner = PipelineRunner(project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
234
+ return runner.run
235
+
236
+ def _add_modules_path(self) -> None:
237
+ """Add pipeline module paths to Python path.
238
+
239
+ This internal method ensures that pipeline modules can be imported by:
240
+ 1. Syncing filesystem cache if needed
241
+ 2. Adding project root to Python path
242
+ 3. Adding pipelines directory to Python path
243
+
244
+ Raises:
245
+ RuntimeError: If filesystem sync fails or paths are invalid
246
+
247
+ Example:
248
+ >>> # Internal usage
249
+ >>> manager = PipelineManager()
250
+ >>> manager._add_modules_path()
251
+ >>> import my_pipeline # Now importable
252
+ """
253
+ if self._fs.is_cache_fs:
254
+ self._fs.sync()
255
+
256
+ if self._fs.path not in sys.path:
257
+ sys.path.insert(0, self._fs.path)
258
+
259
+ modules_path = posixpath.join(self._fs.path, self._pipelines_dir)
260
+ if modules_path not in sys.path:
261
+ sys.path.insert(0, modules_path)
262
+
263
+ def _load_project_cfg(self, reload: bool = False) -> ProjectConfig:
264
+ """Load or reload the project configuration.
265
+
266
+ This internal method handles loading project-wide settings from the config
267
+ directory, applying overrides, and maintaining configuration state.
268
+
269
+ Args:
270
+ reload: Force reload configuration even if already loaded.
271
+ Defaults to False for caching behavior.
272
+
273
+ Returns:
274
+ ProjectConfig: The loaded project configuration object with any
275
+ specified overrides applied.
276
+
277
+ Raises:
278
+ FileNotFoundError: If project configuration file doesn't exist
279
+ ValueError: If configuration format is invalid
280
+ RuntimeError: If filesystem operations fail during loading
281
+
282
+ Example:
283
+ >>> # Internal usage
284
+ >>> manager = PipelineManager()
285
+ >>> project_cfg = manager._load_project_cfg(reload=True)
286
+ >>> print(project_cfg.worker.type)
287
+ 'rq'
288
+ """
289
+ if hasattr(self, "_project_cfg") and not reload:
290
+ return self._project_cfg
291
+
292
+ # Pass overrides to ProjectConfig.load
293
+ self._project_cfg = ProjectConfig.load(
294
+ base_dir=self._base_dir,
295
+ job_queue_type=self._job_queue_type,
296
+ fs=self._fs, # Pass pre-configured fs if provided
297
+ storage_options=self._storage_options,
298
+ )
299
+ # Update internal fs reference in case ProjectConfig loaded/created one
300
+ return self._project_cfg
301
+
302
+ def _load_pipeline_cfg(self, name: str, reload: bool = False) -> PipelineConfig:
303
+ """Load or reload configuration for a specific pipeline.
304
+
305
+ This internal method handles loading pipeline-specific settings from the config
306
+ directory and maintaining the configuration cache state.
307
+
308
+ Args:
309
+ name: Name of the pipeline whose configuration to load
310
+ reload: Force reload configuration even if already loaded.
311
+ When False, returns cached config if available.
312
+
313
+ Returns:
314
+ PipelineConfig: The loaded pipeline configuration object
315
+
316
+ Raises:
317
+ FileNotFoundError: If pipeline configuration file doesn't exist
318
+ ValueError: If configuration format is invalid
319
+ RuntimeError: If filesystem operations fail during loading
320
+
321
+ Example:
322
+ >>> # Internal usage
323
+ >>> manager = PipelineManager()
324
+ >>> cfg = manager._load_pipeline_cfg("data_pipeline", reload=True)
325
+ >>> print(cfg.run.executor.type)
326
+ 'async'
327
+ """
328
+ if name == self._current_pipeline_name and not reload:
329
+ return self._pipeline_cfg
330
+
331
+ self._current_pipeline_name = name
332
+ self._pipeline_cfg = PipelineConfig.load(
333
+ base_dir=self._base_dir,
334
+ name=name,
335
+ fs=self._fs,
336
+ storage_options=self._storage_options,
337
+ )
338
+ return self._pipeline_cfg
339
+
340
+ @property
341
+ def current_pipeline_name(self) -> str:
342
+ """Get the name of the currently loaded pipeline.
343
+
344
+ Returns:
345
+ str: Name of the currently loaded pipeline, or empty string if none loaded.
346
+
347
+ Example:
348
+ >>> manager = PipelineManager()
349
+ >>> manager._load_pipeline_cfg("example_pipeline")
350
+ >>> print(manager.current_pipeline_name)
351
+ 'example_pipeline'
352
+ """
353
+ return self._current_pipeline_name
354
+
355
+ @property
356
+ def project_cfg(self) -> ProjectConfig:
357
+ """Get the project configuration.
358
+
359
+ Loads configuration if not already loaded.
360
+
361
+ Returns:
362
+ ProjectConfig: Project-wide configuration object.
363
+
364
+ Raises:
365
+ RuntimeError: If configuration loading fails.
366
+
367
+ Example:
368
+ >>> manager = PipelineManager()
369
+ >>> cfg = manager.project_cfg
370
+ >>> print(cfg.worker.type)
371
+ 'rq'
372
+ """
373
+ if not hasattr(self, "_project_cfg"):
374
+ self._load_project_cfg()
375
+ return self._project_cfg
376
+
377
+ @property
378
+ def pipeline_cfg(self) -> PipelineConfig:
379
+ """Get the configuration for the currently loaded pipeline.
380
+
381
+ Returns:
382
+ PipelineConfig: Pipeline-specific configuration object.
383
+
384
+ Warns:
385
+ UserWarning: If no pipeline is currently loaded.
386
+
387
+ Example:
388
+ >>> manager = PipelineManager()
389
+ >>> manager._load_pipeline_cfg("example_pipeline")
390
+ >>> cfg = manager.pipeline_cfg
391
+ >>> print(cfg.run.executor)
392
+ 'local'
393
+ """
394
+ if not hasattr(self, "_pipeline_cfg"):
395
+ logger.warning("Pipeline config not loaded.")
396
+ return
397
+ return self._pipeline_cfg
398
+
399
+ # --- Core Execution Method ---
400
+
401
+ def run(
402
+ self,
403
+ name: str,
404
+ inputs: dict | None = None,
405
+ final_vars: list[str] | None = None,
406
+ config: dict | None = None,
407
+ cache: dict | None = None,
408
+ executor_cfg: str | dict | ExecutorConfig | None = None,
409
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
410
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
411
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
412
+ adapter: dict[str, Any] | None = None,
413
+ reload: bool = False,
414
+ log_level: str | None = None,
415
+ max_retries: int | None = None,
416
+ retry_delay: float | None = None,
417
+ jitter_factor: float | None = None,
418
+ retry_exceptions: tuple | list | None = None,
419
+
420
+ ) -> dict[str, Any]:
421
+ """Execute a pipeline synchronously and return its results.
422
+
423
+ This is the main method for running pipelines directly. It handles configuration
424
+ loading, adapter setup, and execution via PipelineRunner.
425
+
426
+ Args:
427
+ name (str): Name of the pipeline to run. Must be a valid identifier.
428
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
429
+ final_vars (list[str] | None): Specify which output variables to return.
430
+ Example: ["model", "metrics"]
431
+ config (dict | None): Configuration for Hamilton pipeline executor.
432
+ Example: {"model": "LogisticRegression"}
433
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
434
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
435
+ - str: Executor name, e.g. "threadpool", "local"
436
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
437
+ - ExecutorConfig: Structured config object
438
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
439
+ Example: {"opentelemetry": True, "tracker": False}
440
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
441
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
442
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
443
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
444
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
445
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
446
+ reload (bool): Force reload of pipeline configuration.
447
+ log_level (str | None): Logging level for the execution. Default None uses project config.
448
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
449
+ max_retries (int): Maximum number of retries for execution.
450
+ retry_delay (float): Delay between retries in seconds.
451
+ jitter_factor (float): Random jitter factor to add to retry delay
452
+ retry_exceptions (tuple): Exceptions that trigger a retry.
453
+
454
+ Returns:
455
+ dict[str, Any]: Pipeline execution results, mapping output variable names
456
+ to their computed values.
457
+
458
+ Raises:
459
+ ValueError: If pipeline name doesn't exist or configuration is invalid
460
+ ImportError: If pipeline module cannot be imported
461
+ RuntimeError: If execution fails due to pipeline or adapter errors
462
+
463
+ Example:
464
+ >>> from flowerpower.pipeline import PipelineManager
465
+ >>>
466
+ >>> manager = PipelineManager()
467
+ >>>
468
+ >>> # Basic pipeline run
469
+ >>> results = manager.run("data_pipeline")
470
+ >>>
471
+ >>> # Complex run with overrides
472
+ >>> results = manager.run(
473
+ ... name="ml_pipeline",
474
+ ... inputs={
475
+ ... "training_date": "2025-04-28",
476
+ ... "model_params": {"n_estimators": 100}
477
+ ... },
478
+ ... final_vars=["model", "metrics"],
479
+ ... executor_cfg={"type": "threadpool", "max_workers": 4},
480
+ ... with_adapter_cfg={"tracker": True},
481
+ ... reload=True
482
+ ... )
483
+ """
484
+ pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
485
+
486
+ res = run_pipeline(
487
+ project_cfg=self.project_cfg,
488
+ pipeline_cfg=pipeline_cfg,
489
+ inputs=inputs,
490
+ final_vars=final_vars,
491
+ config=config,
492
+ cache=cache,
493
+ executor_cfg=executor_cfg,
494
+ with_adapter_cfg=with_adapter_cfg,
495
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
496
+ project_adapter_cfg=project_adapter_cfg,
497
+ adapter=adapter,
498
+ #reload=reload, # Runner handles module reload if needed
499
+ log_level=log_level,
500
+ max_retries=max_retries,
501
+ retry_delay=retry_delay,
502
+ jitter_factor=jitter_factor,
503
+ retry_exceptions=retry_exceptions,
504
+ )
505
+ return res
506
+
507
+ # --- Delegated Methods ---
508
+
509
+ # Registry Delegations
510
+ def new(self, name: str, overwrite: bool = False) -> None:
511
+ """Create a new pipeline with the given name.
512
+
513
+ Creates necessary configuration files and pipeline module template.
514
+
515
+ Args:
516
+ name: Name for the new pipeline. Must be a valid Python identifier.
517
+ overwrite: Whether to overwrite existing pipeline with same name.
518
+ Default False for safety.
519
+
520
+ Raises:
521
+ ValueError: If name is invalid or pipeline exists and overwrite=False
522
+ RuntimeError: If file creation fails
523
+ PermissionError: If lacking write permissions
524
+
525
+ Example:
526
+ >>> from flowerpower.pipeline import PipelineManager
527
+ >>>
528
+ >>> # Create new pipeline
529
+ >>> manager = PipelineManager()
530
+ >>> manager.new("data_transformation")
531
+ >>>
532
+ >>> # Overwrite existing pipeline
533
+ >>> manager.new("data_transformation", overwrite=True)
534
+ """
535
+ self.registry.new(name=name, overwrite=overwrite)
536
+
537
+ def delete(self, name: str, cfg: bool = True, module: bool = False) -> None:
538
+ """
539
+ Delete a pipeline and its associated files.
540
+
541
+ Args:
542
+ name: Name of the pipeline to delete
543
+ cfg: Whether to delete configuration files. Default True.
544
+ module: Whether to delete Python module file. Default False
545
+ for safety since it may contain custom code.
546
+
547
+ Raises:
548
+ FileNotFoundError: If specified pipeline files don't exist
549
+ PermissionError: If lacking delete permissions
550
+ RuntimeError: If deletion fails partially, leaving inconsistent state
551
+
552
+ Example:
553
+ >>> from flowerpower.pipeline import PipelineManager
554
+ >>>
555
+ >>> # Delete pipeline config only
556
+ >>> manager = PipelineManager()
557
+ >>> manager.delete("old_pipeline")
558
+ >>>
559
+ >>> # Delete both config and module
560
+ >>> manager.delete("test_pipeline", module=True)
561
+ """
562
+ self.registry.delete(name=name, cfg=cfg, module=module)
563
+
564
+ def get_summary(
565
+ self,
566
+ name: str | None = None,
567
+ cfg: bool = True,
568
+ code: bool = True,
569
+ project: bool = True,
570
+ ) -> dict[str, dict | str]:
571
+ """Get a detailed summary of pipeline(s) configuration and code.
572
+
573
+ Args:
574
+ name: Specific pipeline to summarize. If None, summarizes all.
575
+ cfg: Include pipeline configuration details. Default True.
576
+ code: Include pipeline module code. Default True.
577
+ project: Include project configuration. Default True.
578
+
579
+ Returns:
580
+ dict[str, dict | str]: Nested dictionary containing requested
581
+ summaries. Structure varies based on input parameters:
582
+ - With name: {"config": dict, "code": str, "project": dict}
583
+ - Without name: {pipeline_name: {"config": dict, ...}, ...}
584
+
585
+ Example:
586
+ >>> from flowerpower.pipeline import PipelineManager
587
+ >>>
588
+ >>> manager = PipelineManager()
589
+ >>>
590
+ >>> # Get summary of specific pipeline
591
+ >>> summary = manager.get_summary("data_pipeline")
592
+ >>> print(summary["config"]["schedule"]["enabled"])
593
+ True
594
+ >>>
595
+ >>> # Get summary of all pipelines' code
596
+ >>> all_code = manager.get_summary(
597
+ ... cfg=False,
598
+ ... code=True,
599
+ ... project=False
600
+ ... )
601
+ """
602
+ return self.registry.get_summary(name=name, cfg=cfg, code=code, project=project)
603
+
604
+ def show_summary(
605
+ self,
606
+ name: str | None = None,
607
+ cfg: bool = True,
608
+ code: bool = True,
609
+ project: bool = True,
610
+ to_html: bool = False,
611
+ to_svg: bool = False,
612
+ ) -> None | str:
613
+ """
614
+ Show a summary of the pipelines.
615
+
616
+ Args:
617
+ name (str | None, optional): The name of the pipeline. Defaults to None.
618
+ cfg (bool, optional): Whether to show the configuration. Defaults to True.
619
+ code (bool, optional): Whether to show the module. Defaults to True.
620
+ project (bool, optional): Whether to show the project configuration. Defaults to True.
621
+ to_html (bool, optional): Whether to export the summary to HTML. Defaults to False.
622
+ to_svg (bool, optional): Whether to export the summary to SVG. Defaults to False.
623
+
624
+ Returns:
625
+ None | str: The summary of the pipelines. If `to_html` is True, returns the HTML string.
626
+ If `to_svg` is True, returns the SVG string.
627
+
628
+ Examples:
629
+ >>> pm = PipelineManager()
630
+ >>> pm.show_summary()
631
+ """
632
+ return self.registry.show_summary(
633
+ name=name,
634
+ cfg=cfg,
635
+ code=code,
636
+ project=project,
637
+ to_html=to_html,
638
+ to_svg=to_svg,
639
+ )
640
+
641
+ def show_pipelines(self) -> None:
642
+ """Display all available pipelines in a formatted table.
643
+
644
+ The table includes pipeline names, types, and enablement status.
645
+ Uses rich formatting for terminal display.
646
+
647
+ Example:
648
+ >>> from flowerpower.pipeline import PipelineManager
649
+ >>>
650
+ >>> manager = PipelineManager()
651
+ >>> manager.show_pipelines()
652
+
653
+ """
654
+ self.registry.show_pipelines()
655
+
656
+ def list_pipelines(self) -> list[str]:
657
+ """Get list of all available pipeline names.
658
+
659
+ Returns:
660
+ list[str]: Names of all registered pipelines, sorted alphabetically.
661
+
662
+ Example:
663
+ >>> from flowerpower.pipeline import PipelineManager
664
+ >>>
665
+ >>> manager = PipelineManager()
666
+ >>> pipelines = manager.list_pipelines()
667
+ >>> print(pipelines)
668
+ ['data_ingestion', 'model_training', 'reporting']
669
+ """
670
+ return self.registry.list_pipelines()
671
+
672
+ @property
673
+ def pipelines(self) -> list[str]:
674
+ """Get list of all available pipeline names.
675
+
676
+ Similar to list_pipelines() but as a property.
677
+
678
+ Returns:
679
+ list[str]: Names of all registered pipelines, sorted alphabetically.
680
+
681
+ Example:
682
+ >>> from flowerpower.pipeline import PipelineManager
683
+ >>>
684
+ >>> manager = PipelineManager()
685
+ >>> print(manager.pipelines)
686
+ ['data_ingestion', 'model_training', 'reporting']
687
+ """
688
+ return self.registry.pipelines
689
+
690
+ @property
691
+ def summary(self) -> dict[str, dict | str]:
692
+ """Get complete summary of all pipelines.
693
+
694
+ Returns:
695
+ dict[str, dict | str]: Full summary including configuration,
696
+ code, and project settings for all pipelines.
697
+
698
+ Example:
699
+ >>> from flowerpower.pipeline import PipelineManager
700
+ >>>
701
+ >>> manager = PipelineManager()
702
+ >>> summary = manager.summary
703
+ >>> for name, details in summary.items():
704
+ ... print(f"{name}: {details['config']['type']}")
705
+ data_pipeline: batch
706
+ ml_pipeline: streaming
707
+ """
708
+ return self.registry.summary
709
+
710
+ def add_hook(
711
+ self, name:str, type:HookType, to:str|None, function_name:str|None,
712
+ )->None:
713
+ """Add a hook to the pipeline module.
714
+
715
+ Args:
716
+ name (str): The name of the pipeline
717
+ type (HookType): The type of the hook.
718
+ to (str | None, optional): The name of the file to add the hook to. Defaults to the hook.py file in the pipelines hooks folder.
719
+ function_name (str | None, optional): The name of the function. If not provided uses default name of hook type.
720
+
721
+ Returns:
722
+ None
723
+
724
+ Raises:
725
+ ValueError: If the hook type is not valid
726
+
727
+ Example:
728
+ >>> from flowerpower.pipeline import PipelineManager
729
+ >>>
730
+ >>> manager = PipelineManager()
731
+ >>> manager.add_hook(
732
+ ... name="data_pipeline",
733
+ ... type=HookType.PRE_EXECUTE,
734
+ ... to="pre_execute_hook",
735
+ ... function_name="my_pre_execute_function"
736
+ ... )
737
+ """
738
+ self.registry.add_hook(
739
+ name=name,
740
+ type=type,
741
+ to=to,
742
+ function_name=function_name,
743
+ )
744
+
745
+ # IO Delegations
746
+ def import_pipeline(
747
+ self,
748
+ name: str,
749
+ base_dir: str,
750
+ src_fs: AbstractFileSystem | None = None,
751
+ storage_options: BaseStorageOptions | None = None,
752
+ overwrite: bool = False,
753
+ ) -> None:
754
+ """Import a pipeline from another FlowerPower project.
755
+
756
+ Copies both pipeline configuration and code files from the source location
757
+ to the current project.
758
+
759
+ Args:
760
+ name: Name to give the imported pipeline
761
+ base_dir: Source FlowerPower project directory or URI
762
+ Examples:
763
+ - Local: "/path/to/other/project"
764
+ - S3: "s3://bucket/project"
765
+ - GitHub: "github://org/repo/project"
766
+ src_fs: Pre-configured filesystem for source location
767
+ Example: S3FileSystem(key='...', secret='...')
768
+ storage_options: Options for source filesystem access
769
+ Example: {"project": "my-gcp-project"}
770
+ overwrite: Whether to replace existing pipeline if name exists
771
+
772
+ Raises:
773
+ ValueError: If pipeline name exists and overwrite=False
774
+ FileNotFoundError: If source pipeline not found
775
+ RuntimeError: If import fails
776
+
777
+ Example:
778
+ >>> from flowerpower.pipeline import PipelineManager
779
+ >>> from s3fs import S3FileSystem
780
+ >>>
781
+ >>> manager = PipelineManager()
782
+ >>>
783
+ >>> # Import from local filesystem
784
+ >>> manager.import_pipeline(
785
+ ... "new_pipeline",
786
+ ... "/path/to/other/project"
787
+ ... )
788
+ >>>
789
+ >>> # Import from S3 with custom filesystem
790
+ >>> s3 = S3FileSystem(anon=False)
791
+ >>> manager.import_pipeline(
792
+ ... "s3_pipeline",
793
+ ... "s3://bucket/project",
794
+ ... src_fs=s3
795
+ ... )
796
+ """
797
+ return self.io.import_pipeline(
798
+ name=name,
799
+ src_base_dir=base_dir,
800
+ src_fs=src_fs,
801
+ src_storage_options=storage_options,
802
+ overwrite=overwrite,
803
+ )
804
+
805
+ def import_many(
806
+ self,
807
+ pipelines: dict[str, str] | list[str],
808
+ base_dir: str, # Base dir for source if pipelines is a list
809
+ src_fs: AbstractFileSystem | None = None,
810
+ src_storage_options: BaseStorageOptions | None = None,
811
+ overwrite: bool = False,
812
+ ) -> None:
813
+ """Import multiple pipelines from another project or location.
814
+
815
+ Supports two import modes:
816
+ 1. Dictionary mode: Map source names to new names
817
+ 2. List mode: Import keeping original names
818
+
819
+ Args:
820
+ pipelines: Pipeline specifications, either:
821
+ - dict: Map of {new_name: source_name}
822
+ - list: List of pipeline names to import as-is
823
+ base_dir: Source FlowerPower project directory or URI
824
+ src_fs: Pre-configured filesystem for source location
825
+ src_storage_options: Options for source filesystem access
826
+ overwrite: Whether to replace existing pipelines
827
+
828
+ Raises:
829
+ ValueError: If any pipeline exists and overwrite=False
830
+ FileNotFoundError: If source pipelines not found
831
+ RuntimeError: If import operation fails
832
+
833
+ Example:
834
+ >>> from flowerpower.pipeline import PipelineManager
835
+ >>>
836
+ >>> manager = PipelineManager()
837
+ >>>
838
+ >>> # Import with name mapping
839
+ >>> manager.import_many(
840
+ ... pipelines={
841
+ ... "new_ingest": "data_ingest",
842
+ ... "new_process": "data_process"
843
+ ... },
844
+ ... base_dir="/path/to/source",
845
+ ... overwrite=True
846
+ ... )
847
+ >>>
848
+ >>> # Import keeping original names
849
+ >>> manager.import_many(
850
+ ... pipelines=["pipeline1", "pipeline2"],
851
+ ... base_dir="s3://bucket/source",
852
+ ... src_storage_options={
853
+ ... "key": "ACCESS_KEY",
854
+ ... "secret": "SECRET_KEY"
855
+ ... }
856
+ ... )
857
+ """
858
+ return self.io.import_many(
859
+ pipelines=pipelines,
860
+ src_base_dir=base_dir,
861
+ src_fs=src_fs,
862
+ src_storage_options=src_storage_options,
863
+ overwrite=overwrite,
864
+ )
865
+
866
+ def import_all(
867
+ self,
868
+ base_dir: str,
869
+ src_fs: AbstractFileSystem | None = None,
870
+ src_storage_options: BaseStorageOptions | None = None,
871
+ overwrite: bool = False,
872
+ ) -> None:
873
+ """Import all pipelines from another FlowerPower project.
874
+
875
+ Args:
876
+ base_dir: Source project directory or URI
877
+ src_fs: Pre-configured source filesystem
878
+ src_storage_options: Source filesystem options
879
+ overwrite: Whether to replace existing pipelines
880
+
881
+ Raises:
882
+ FileNotFoundError: If source location not found
883
+ RuntimeError: If import fails
884
+
885
+ Example:
886
+ >>> from flowerpower.pipeline import PipelineManager
887
+ >>>
888
+ >>> manager = PipelineManager()
889
+ >>>
890
+ >>> # Import all from backup
891
+ >>> manager.import_all("/path/to/backup")
892
+ >>>
893
+ >>> # Import all from S3 with credentials
894
+ >>> manager.import_all(
895
+ ... "s3://bucket/backup",
896
+ ... src_storage_options={
897
+ ... "key": "ACCESS_KEY",
898
+ ... "secret": "SECRET_KEY"
899
+ ... }
900
+ ... )
901
+ """
902
+ return self.io.import_all(
903
+ src_base_dir=base_dir,
904
+ src_fs=src_fs,
905
+ src_storage_options=src_storage_options,
906
+ overwrite=overwrite,
907
+ )
908
+
909
+ def export_pipeline(
910
+ self,
911
+ name: str,
912
+ base_dir: str,
913
+ dest_fs: AbstractFileSystem | None = None,
914
+ dest_storage_options: BaseStorageOptions | None = None,
915
+ overwrite: bool = False,
916
+ ) -> None:
917
+ """Export a pipeline to another location or project.
918
+
919
+ Copies pipeline configuration and code files to the destination location
920
+ while preserving directory structure.
921
+
922
+ Args:
923
+ name: Name of pipeline to export
924
+ base_dir: Destination directory or URI
925
+ Examples:
926
+ - Local: "/path/to/backup"
927
+ - S3: "s3://bucket/backups"
928
+ - GCS: "gs://bucket/exports"
929
+ dest_fs: Pre-configured filesystem for destination
930
+ Example: GCSFileSystem(token='...')
931
+ dest_storage_options: Options for destination filesystem
932
+ Example: {"key": "...", "secret": "..."}
933
+ overwrite: Whether to replace existing files at destination
934
+
935
+ Raises:
936
+ ValueError: If pipeline doesn't exist
937
+ FileNotFoundError: If destination not accessible
938
+ RuntimeError: If export fails
939
+
940
+ Example:
941
+ >>> from flowerpower.pipeline import PipelineManager
942
+ >>> from gcsfs import GCSFileSystem
943
+ >>>
944
+ >>> manager = PipelineManager()
945
+ >>>
946
+ >>> # Export to local backup
947
+ >>> manager.export_pipeline(
948
+ ... "my_pipeline",
949
+ ... "/path/to/backup"
950
+ ... )
951
+ >>>
952
+ >>> # Export to Google Cloud Storage
953
+ >>> gcs = GCSFileSystem(project='my-project')
954
+ >>> manager.export_pipeline(
955
+ ... "prod_pipeline",
956
+ ... "gs://my-bucket/backups",
957
+ ... dest_fs=gcs
958
+ ... )
959
+ """
960
+ return self.io.export_pipeline(
961
+ name=name,
962
+ dest_base_dir=base_dir,
963
+ dest_fs=dest_fs,
964
+ dest_storage_options=dest_storage_options,
965
+ overwrite=overwrite,
966
+ )
967
+
968
+ def export_many(
969
+ self,
970
+ pipelines: list[str],
971
+ base_dir: str,
972
+ dest_fs: AbstractFileSystem | None = None,
973
+ dest_storage_options: BaseStorageOptions | None = None,
974
+ overwrite: bool = False,
975
+ ) -> None:
976
+ """Export multiple pipelines to another location.
977
+
978
+ Efficiently exports multiple pipelines in a single operation,
979
+ preserving directory structure and metadata.
980
+
981
+ Args:
982
+ pipelines: List of pipeline names to export
983
+ base_dir: Destination directory or URI
984
+ Examples:
985
+ - Local: "/path/to/exports"
986
+ - S3: "s3://bucket/exports"
987
+ - Azure: "abfs://container/exports"
988
+ dest_fs: Pre-configured filesystem for destination
989
+ Example: S3FileSystem(anon=False, key='...', secret='...')
990
+ dest_storage_options: Options for destination filesystem access
991
+ Example: {"account_name": "storage", "sas_token": "..."}
992
+ overwrite: Whether to replace existing files at destination
993
+
994
+ Raises:
995
+ ValueError: If any pipeline doesn't exist
996
+ FileNotFoundError: If destination not accessible
997
+ RuntimeError: If export operation fails
998
+
999
+ Example:
1000
+ >>> from flowerpower.pipeline import PipelineManager
1001
+ >>> from azure.storage.filedatalake import DataLakeServiceClient
1002
+ >>>
1003
+ >>> manager = PipelineManager()
1004
+ >>>
1005
+ >>> # Export multiple pipelines to Azure Data Lake
1006
+ >>> manager.export_many(
1007
+ ... pipelines=["ingest", "process", "report"],
1008
+ ... base_dir="abfs://data/backups",
1009
+ ... dest_storage_options={
1010
+ ... "account_name": "myaccount",
1011
+ ... "sas_token": "...",
1012
+ ... }
1013
+ ... )
1014
+ """
1015
+ return self.io.export_many(
1016
+ pipelines=pipelines,
1017
+ dest_base_dir=base_dir,
1018
+ dest_fs=dest_fs,
1019
+ dest_storage_options=dest_storage_options,
1020
+ overwrite=overwrite,
1021
+ )
1022
+
1023
+ def export_all(
1024
+ self,
1025
+ base_dir: str,
1026
+ dest_fs: AbstractFileSystem | None = None,
1027
+ dest_storage_options: BaseStorageOptions | None = None,
1028
+ overwrite: bool = False,
1029
+ ) -> None:
1030
+ """Export all pipelines to another location.
1031
+
1032
+ Args:
1033
+ base_dir: Destination directory or URI
1034
+ dest_fs: Pre-configured destination filesystem
1035
+ dest_storage_options: Destination filesystem options
1036
+ overwrite: Whether to replace existing files
1037
+
1038
+ Raises:
1039
+ FileNotFoundError: If destination not accessible
1040
+ RuntimeError: If export fails
1041
+
1042
+ Example:
1043
+ >>> from flowerpower.pipeline import PipelineManager
1044
+ >>>
1045
+ >>> manager = PipelineManager()
1046
+ >>>
1047
+ >>> # Export all to backup directory
1048
+ >>> manager.export_all("/path/to/backup")
1049
+ >>>
1050
+ >>> # Export all to cloud storage
1051
+ >>> manager.export_all(
1052
+ ... "gs://bucket/pipelines",
1053
+ ... dest_storage_options={
1054
+ ... "token": "SERVICE_ACCOUNT_TOKEN",
1055
+ ... "project": "my-project"
1056
+ ... }
1057
+ ... )
1058
+ """
1059
+ return self.io.export_all(
1060
+ dest_base_dir=base_dir,
1061
+ dest_fs=dest_fs,
1062
+ dest_storage_options=dest_storage_options,
1063
+ overwrite=overwrite,
1064
+ )
1065
+
1066
+ # Visualizer Delegations
1067
+ def save_dag(self, name: str, format: str = "png", reload: bool = False) -> None:
1068
+ """Save pipeline DAG visualization to a file.
1069
+
1070
+ Creates a visual representation of the pipeline's directed acyclic graph (DAG)
1071
+ showing function dependencies and data flow.
1072
+
1073
+ Args:
1074
+ name: Name of the pipeline to visualize
1075
+ format: Output file format. Supported formats:
1076
+ - "png": Standard bitmap image
1077
+ - "svg": Scalable vector graphic
1078
+ - "pdf": Portable document format
1079
+ - "dot": Graphviz DOT format
1080
+ reload: Whether to reload pipeline before visualization
1081
+
1082
+ Raises:
1083
+ ValueError: If pipeline name doesn't exist
1084
+ ImportError: If required visualization dependencies missing
1085
+ RuntimeError: If graph generation fails
1086
+
1087
+ Example:
1088
+ >>> from flowerpower.pipeline import PipelineManager
1089
+ >>>
1090
+ >>> manager = PipelineManager()
1091
+ >>>
1092
+ >>> # Save as PNG
1093
+ >>> manager.save_dag("data_pipeline")
1094
+ >>>
1095
+ >>> # Save as SVG with reload
1096
+ >>> manager.save_dag(
1097
+ ... name="ml_pipeline",
1098
+ ... format="svg",
1099
+ ... reload=True
1100
+ ... )
1101
+ """
1102
+ self.visualizer.save_dag(name=name, format=format, reload=reload)
1103
+
1104
+ def show_dag(
1105
+ self, name: str, format: str = "png", reload: bool = False, raw: bool = False
1106
+ ) -> Union[GraphType, None]:
1107
+ """Display pipeline DAG visualization interactively.
1108
+
1109
+ Similar to save_dag() but displays the graph immediately in notebook
1110
+ environments or returns the raw graph object for custom rendering.
1111
+
1112
+ Args:
1113
+ name: Name of the pipeline to visualize
1114
+ format: Output format (see save_dag() for options)
1115
+ reload: Whether to reload pipeline before visualization
1116
+ raw: If True, return the raw graph object instead of displaying
1117
+
1118
+ Returns:
1119
+ Union[GraphType, None]: Raw graph object if raw=True, else None after
1120
+ displaying the visualization
1121
+
1122
+ Raises:
1123
+ ValueError: If pipeline name doesn't exist
1124
+ ImportError: If visualization dependencies missing
1125
+ RuntimeError: If graph generation fails
1126
+
1127
+ Example:
1128
+ >>> from flowerpower.pipeline import PipelineManager
1129
+ >>>
1130
+ >>> manager = PipelineManager()
1131
+ >>>
1132
+ >>> # Display in notebook
1133
+ >>> manager.show_dag("data_pipeline")
1134
+ >>>
1135
+ >>> # Get raw graph for custom rendering
1136
+ >>> graph = manager.show_dag(
1137
+ ... name="ml_pipeline",
1138
+ ... format="svg",
1139
+ ... raw=True
1140
+ ... )
1141
+ >>> # Custom rendering
1142
+ >>> graph.render("custom_vis", view=True)
1143
+ """
1144
+ return self.visualizer.show_dag(
1145
+ name=name, format=format, reload=reload, raw=raw
1146
+ )
1147
+
1148
+ # Scheduler Delegations
1149
+ def _get_run_func_for_job(self, name: str, reload: bool = False) -> Callable:
1150
+ """Helper to create a PipelineRunner instance and return its run method."""
1151
+ # This ensures the runner uses the correct, potentially reloaded, config for the job
1152
+ pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
1153
+ runner = PipelineRunner(project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
1154
+ # We return the bound method runner.run
1155
+ return runner.run
1156
+
1157
+ def run_job(
1158
+ self,
1159
+ name: str,
1160
+ inputs: dict | None = None,
1161
+ final_vars: list[str] | None = None,
1162
+ config: dict | None = None,
1163
+ cache: bool | dict = False,
1164
+ executor_cfg: str | dict | ExecutorConfig | None = None,
1165
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
1166
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1167
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1168
+ adapter: dict[str, Any] | None = None,
1169
+ reload: bool = False,
1170
+ log_level: str | None = None,
1171
+ max_retries: int | None = None,
1172
+ retry_delay: float | None = None,
1173
+ jitter_factor: float | None = None,
1174
+ retry_exceptions: tuple | list | None = None,
1175
+ **kwargs: Any,
1176
+ ) -> dict[str, Any]:
1177
+ """Execute a pipeline job immediately through the task queue.
1178
+
1179
+ Unlike the run() method which executes synchronously, this method runs
1180
+ the pipeline through the configured worker system (RQ, APScheduler, etc.).
1181
+
1182
+ Args:
1183
+ name (str): Name of the pipeline to run. Must be a valid identifier.
1184
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1185
+ final_vars (list[str] | None): Specify which output variables to return.
1186
+ Example: ["model", "metrics"]
1187
+ config (dict | None): Configuration for Hamilton pipeline executor.
1188
+ Example: {"model": "LogisticRegression"}
1189
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1190
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1191
+ - str: Executor name, e.g. "threadpool", "local"
1192
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1193
+ - ExecutorConfig: Structured config object
1194
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1195
+ Example: {"opentelemetry": True, "tracker": False}
1196
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1197
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1198
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1199
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1200
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1201
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
1202
+ reload (bool): Force reload of pipeline configuration.
1203
+ log_level (str | None): Logging level for the execution. Default None uses project config.
1204
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1205
+ max_retries (int): Maximum number of retries for execution.
1206
+ retry_delay (float): Delay between retries in seconds.
1207
+ jitter_factor (float): Random jitter factor to add to retry delay
1208
+ retry_exceptions (tuple): Exceptions that trigger a retry.
1209
+
1210
+ **kwargs: JobQueue-specific arguments
1211
+ For RQ:
1212
+ - queue_name: Queue to use (str)
1213
+ - retry: Number of retries (int)
1214
+ For APScheduler:
1215
+ - job_executor: Executor type (str)
1216
+
1217
+ Returns:
1218
+ dict[str, Any]: Job execution results
1219
+
1220
+ Raises:
1221
+ ValueError: If pipeline or configuration is invalid
1222
+ RuntimeError: If job execution fails
1223
+
1224
+ Example:
1225
+ >>> from flowerpower.pipeline import PipelineManager
1226
+ >>>
1227
+ >>> manager = PipelineManager()
1228
+ >>>
1229
+ >>> # Simple job execution
1230
+ >>> result = manager.run_job("data_pipeline")
1231
+ >>>
1232
+ >>> # Complex job with retry logic
1233
+ >>> result = manager.run_job(
1234
+ ... name="ml_training",
1235
+ ... inputs={"training_date": "2025-04-28"},
1236
+ ... executor_cfg={"type": "async"},
1237
+ ... with_adapter_cfg={"enable_tracking": True},
1238
+ ... retry=3,
1239
+ ... queue_name="ml_jobs"
1240
+ ... )
1241
+ """
1242
+ run_func = self._get_run_func_for_job(name, reload)
1243
+ return self.job_queue.run_job(
1244
+ run_func=run_func,
1245
+ name=name,
1246
+ inputs=inputs,
1247
+ final_vars=final_vars,
1248
+ config=config,
1249
+ cache=cache,
1250
+ executor_cfg=executor_cfg,
1251
+ with_adapter_cfg=with_adapter_cfg,
1252
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
1253
+ project_adapter_cfg=project_adapter_cfg,
1254
+ adapter=adapter,
1255
+ #reload=reload,
1256
+ log_level=log_level,
1257
+ max_retries=max_retries,
1258
+ retry_delay=retry_delay,
1259
+ jitter_factor=jitter_factor,
1260
+ retry_exceptions=retry_exceptions,
1261
+ **kwargs,
1262
+ )
1263
+
1264
+ def add_job(
1265
+ self,
1266
+ name: str,
1267
+ inputs: dict | None = None,
1268
+ final_vars: list[str] | None = None,
1269
+ config: dict | None = None,
1270
+ cache: bool | dict = False,
1271
+ executor_cfg: str | dict | ExecutorConfig | None = None,
1272
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
1273
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1274
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1275
+ adapter: dict[str, Any] | None = None,
1276
+ reload: bool = False, # Reload config/module before creating run_func
1277
+ log_level: str | None = None,
1278
+ result_ttl: int | dt.timedelta = 0,
1279
+ run_at: dt.datetime | str | None = None,
1280
+ run_in: dt.datetime | str | None = None,
1281
+ max_retries: int = 3,
1282
+ retry_delay: float = 1.0,
1283
+ jitter_factor: float = 0.1,
1284
+ retry_exceptions: tuple = (Exception,),
1285
+ **kwargs, # JobQueue specific args
1286
+ ) -> str | UUID:
1287
+ """Adds a jobt to the task queue.
1288
+
1289
+ Args:
1290
+ name (str): Name of the pipeline to run. Must be a valid identifier.
1291
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1292
+ final_vars (list[str] | None): Specify which output variables to return.
1293
+ Example: ["model", "metrics"]
1294
+ config (dict | None): Configuration for Hamilton pipeline executor.
1295
+ Example: {"model": "LogisticRegression"}
1296
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1297
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1298
+ - str: Executor name, e.g. "threadpool", "local"
1299
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1300
+ - ExecutorConfig: Structured config object
1301
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1302
+ Example: {"opentelemetry": True, "tracker": False}
1303
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1304
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1305
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1306
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1307
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1308
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
1309
+ reload (bool): Force reload of pipeline configuration.
1310
+ run_at (dt.datetime | str | None): Future date to run the job.
1311
+ Example: datetime(2025, 4, 28, 12, 0)
1312
+ Example str: "2025-04-28T12:00:00" (ISO format)
1313
+ run_in (dt.datetime | str | None): Time interval to run the job.
1314
+ Example: 3600 (every hour in seconds)
1315
+ Example: datetime.timedelta(days=1)
1316
+ Example str: "1d" (1 day)
1317
+ result_ttl (int | dt.timedelta): Time to live for the job result.
1318
+ Example: 3600 (1 hour in seconds)
1319
+ log_level (str | None): Logging level for the execution. Default None uses project config.
1320
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1321
+ max_retries (int): Maximum number of retries for execution.
1322
+ retry_delay (float): Delay between retries in seconds.
1323
+ jitter_factor (float): Random jitter factor to add to retry delay
1324
+ retry_exceptions (tuple): Exceptions that trigger a retry.
1325
+ **kwargs: Additional keyword arguments passed to the worker's add_job method.
1326
+ For RQ this includes:
1327
+ - result_ttl: Time to live for the job result (float or timedelta)
1328
+ - ttl: Time to live for the job (float or timedelta)
1329
+ - queue_name: Name of the queue to use (str)
1330
+ - retry: Number of retries (int)
1331
+ - repeat: Repeat count (int or dict)
1332
+ For APScheduler, this includes:
1333
+ - job_executor: Job executor to use (str)
1334
+
1335
+ Returns:
1336
+ str | UUID: The ID of the job.
1337
+
1338
+ Raises:
1339
+ ValueError: If the job ID is not valid or if the job cannot be scheduled.
1340
+
1341
+ Example:
1342
+ >>> from flowerpower.pipeline import PipelineManager
1343
+ >>> pm = PipelineManager()
1344
+ >>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
1345
+
1346
+ """
1347
+ run_func = self._get_run_func_for_job(name, reload)
1348
+ run_in = duration_parser.parse(run_in) if isinstance(run_in, str) else run_in #convert to seconds
1349
+ run_at = dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
1350
+
1351
+
1352
+ return self.job_queue.add_job(
1353
+ run_func=run_func,
1354
+ name=name, # Pass name for logging
1355
+ # Pass run parameters
1356
+ inputs=inputs,
1357
+ final_vars=final_vars,
1358
+ config=config,
1359
+ cache=cache,
1360
+ executor_cfg=executor_cfg,
1361
+ with_adapter_cfg=with_adapter_cfg,
1362
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
1363
+ project_adapter_cfg=project_adapter_cfg,
1364
+ adapter=adapter,
1365
+ #reload=reload, # Note: reload already happened
1366
+ log_level=log_level,
1367
+ result_ttl=result_ttl,
1368
+ run_at=run_at,
1369
+ run_in=run_in,
1370
+ max_retries=max_retries,
1371
+ retry_delay=retry_delay,
1372
+ jitter_factor=jitter_factor,
1373
+ retry_exceptions=retry_exceptions,
1374
+ **kwargs, # Pass worker args
1375
+ )
1376
+
1377
+ def schedule(
1378
+ self,
1379
+ name: str,
1380
+ inputs: dict | None = None,
1381
+ final_vars: list[str] | None = None,
1382
+ config: dict | None = None,
1383
+ cache: bool | dict = False,
1384
+ executor_cfg: str | dict | ExecutorConfig | None = None,
1385
+ with_adapter_cfg: dict | WithAdapterConfig | None = None,
1386
+ pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1387
+ project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1388
+ adapter: dict[str, Any] | None = None,
1389
+ reload: bool = False,
1390
+ log_level: str | None = None,
1391
+ cron: str | dict[str, str | int] | None = None,
1392
+ interval: int | str | dict[str, str | int] | None = None,
1393
+ date: dt.datetime | str | None = None,
1394
+ overwrite: bool = False,
1395
+ schedule_id: str | None = None,
1396
+ max_retries: int | None = None,
1397
+ retry_delay: float | None = None,
1398
+ jitter_factor: float | None = None,
1399
+ retry_exceptions: tuple | list | None = None,
1400
+ **kwargs: Any,
1401
+ ) -> str | UUID:
1402
+ """Schedule a pipeline to run on a recurring or future basis.
1403
+
1404
+ Args:
1405
+ name (str): The name of the pipeline to run.
1406
+ inputs (dict | None): Inputs for the pipeline run (overrides config).
1407
+ final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
1408
+ config (dict | None): Hamilton driver config (overrides config).
1409
+ cache (bool | dict): Cache settings (overrides config).
1410
+ executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
1411
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
1412
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
1413
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
1414
+ adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
1415
+ reload (bool): Whether to reload module and pipeline config. Defaults to False.
1416
+ log_level (str | None): Log level for the run (overrides config).
1417
+ cron (str | dict[str, str | int] | None): Cron expression or settings
1418
+ Example string: "0 0 * * *" (daily at midnight)
1419
+ Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
1420
+ interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
1421
+ Example int: 3600 (every hour in seconds)
1422
+ Example str: "1h" (every hour)
1423
+ Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
1424
+ date (dt.datetime | str | None): Future date for
1425
+ Example: datetime(2025, 4, 28, 12, 0)
1426
+ Example str: "2025-04-28T12:00:00" (ISO format)
1427
+ overwrite (bool): Whether to overwrite existing schedule with the same ID
1428
+ schedule_id (str | None): Unique identifier for the schedule
1429
+ max_retries (int): Maximum number of retries for execution
1430
+ retry_delay (float): Delay between retries in seconds
1431
+ jitter_factor (float): Random jitter factor to add to retry delay
1432
+ retry_exceptions (tuple): Exceptions that trigger a retry
1433
+ **kwargs: JobQueue-specific scheduling options
1434
+ For RQ:
1435
+ - result_ttl: Result lifetime (int seconds)
1436
+ - queue_name: Queue to use (str)
1437
+ For APScheduler:
1438
+ - misfire_grace_time: Late execution window
1439
+ - coalesce: Combine missed executions (bool)
1440
+ - max_running_jobs: Concurrent instances limit (int)
1441
+
1442
+ Returns:
1443
+ str | UUID: Unique identifier for the created schedule
1444
+
1445
+ Raises:
1446
+ ValueError: If schedule parameters are invalid
1447
+ RuntimeError: If scheduling fails
1448
+
1449
+ Example:
1450
+ >>> from flowerpower.pipeline import PipelineManager
1451
+ >>> from datetime import datetime, timedelta
1452
+ >>>
1453
+ >>> manager = PipelineManager()
1454
+ >>>
1455
+ >>> # Daily schedule with cron
1456
+ >>> schedule_id = manager.schedule(
1457
+ ... name="daily_metrics",
1458
+ ... cron="0 0 * * *",
1459
+ ... inputs={"date": "{{ execution_date }}"}
1460
+ ... )
1461
+ >>>
1462
+ >>> # Interval-based schedule
1463
+ >>> schedule_id = manager.schedule(
1464
+ ... name="monitoring",
1465
+ ... interval={"minutes": 15},
1466
+ ... with_adapter_cfg={"enable_alerts": True}
1467
+ ... )
1468
+ >>>
1469
+ >>> # Future one-time execution
1470
+ >>> future_date = datetime.now() + timedelta(days=1)
1471
+ >>> schedule_id = manager.schedule(
1472
+ ... name="batch_process",
1473
+ ... date=future_date,
1474
+ ... executor_cfg={"type": "async"}
1475
+ ... )
1476
+ """
1477
+ pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
1478
+ run_func = self._get_run_func_for_job(name, reload)
1479
+ interval = duration_parser.parse(interval) if isinstance(interval, str) else interval
1480
+ date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
1481
+
1482
+ return self.job_queue.schedule(
1483
+ run_func=run_func,
1484
+ pipeline_cfg=pipeline_cfg,
1485
+ inputs=inputs,
1486
+ final_vars=final_vars,
1487
+ config=config,
1488
+ cache=cache,
1489
+ executor_cfg=executor_cfg,
1490
+ with_adapter_cfg=with_adapter_cfg,
1491
+ pipeline_adapter_cfg=pipeline_adapter_cfg,
1492
+ project_adapter_cfg=project_adapter_cfg,
1493
+ adapter=adapter,
1494
+ reload=reload,
1495
+ log_level=log_level,
1496
+ cron=cron,
1497
+ interval=interval,
1498
+ date=date,
1499
+ overwrite=overwrite,
1500
+ schedule_id=schedule_id,
1501
+ max_retries=max_retries,
1502
+ retry_delay=retry_delay,
1503
+ jitter_factor=jitter_factor,
1504
+ retry_exceptions=retry_exceptions,
1505
+ **kwargs,
1506
+ )
1507
+
1508
+ def schedule_all(self, **kwargs: Any) -> None:
1509
+ """Schedule all pipelines that are enabled in their configuration.
1510
+
1511
+ For each enabled pipeline, applies its configured schedule settings
1512
+ and any provided overrides.
1513
+
1514
+ Args:
1515
+ **kwargs: Overrides for schedule settings that apply to all pipelines.
1516
+ See schedule() method for supported arguments.
1517
+
1518
+ Example:
1519
+ >>> from flowerpower.pipeline import PipelineManager
1520
+ >>>
1521
+ >>> manager = PipelineManager()
1522
+ >>>
1523
+ >>> # Schedule all with default settings
1524
+ >>> manager.schedule_all()
1525
+ >>>
1526
+ >>> # Schedule all with common overrides
1527
+ >>> manager.schedule_all(
1528
+ ... max_running_jobs=2,
1529
+ ... coalesce=True,
1530
+ ... misfire_grace_time=300
1531
+ ... )
1532
+ """
1533
+ scheduled_ids = []
1534
+ errors = []
1535
+ pipeline_names = self.list_pipelines()
1536
+ if not pipeline_names:
1537
+ logger.warning("No pipelines found to schedule.")
1538
+ return
1539
+
1540
+ logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
1541
+ for name in pipeline_names:
1542
+ try:
1543
+ pipeline_cfg = self._load_pipeline_cfg(name=name, reload=True)
1544
+
1545
+ if not pipeline_cfg.schedule.enabled:
1546
+ logger.info(
1547
+ f"Skipping scheduling for '{name}': Not enabled in config."
1548
+ )
1549
+ continue
1550
+
1551
+ logger.info(f"Scheduling [cyan]{name}[/cyan]...")
1552
+ schedule_id = self.schedule(name=name, reload=False, **kwargs)
1553
+ scheduled_ids.append(schedule_id)
1554
+ except Exception as e:
1555
+ logger.error(f"Failed to schedule pipeline '{name}': {e}")
1556
+ errors.append(name)
1557
+
1558
+ if errors:
1559
+ logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
1560
+ else:
1561
+ logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
1562
+
1563
+ @property
1564
+ def schedules(self) -> list[Any]:
1565
+ """Get list of current pipeline schedules.
1566
+
1567
+ Retrieves all active schedules from the worker system.
1568
+
1569
+ Returns:
1570
+ list[Any]: List of schedule objects. Exact type depends on worker:
1571
+ - RQ: List[rq.job.Job]
1572
+ - APScheduler: List[apscheduler.schedulers.base.Schedule]
1573
+
1574
+ Example:
1575
+ >>> from flowerpower.pipeline import PipelineManager
1576
+ >>>
1577
+ >>> manager = PipelineManager()
1578
+ >>> for schedule in manager.schedules:
1579
+ ... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
1580
+ """
1581
+ try:
1582
+ return self.job_queue._get_schedules()
1583
+ except Exception as e:
1584
+ logger.error(f"Failed to retrieve schedules: {e}")
1585
+ return []
1586
+