FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -37
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +32 -1
- flowerpower/cli/pipeline.py +559 -406
- flowerpower/cli/utils.py +29 -18
- flowerpower/flowerpower.py +12 -8
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +2 -142
- flowerpower-1.0.0b2.dist-info/METADATA +324 -0
- flowerpower-1.0.0b2.dist-info/RECORD +94 -0
- flowerpower/_web/__init__.py +0 -61
- flowerpower/_web/routes/config.py +0 -103
- flowerpower/_web/routes/pipelines.py +0 -173
- flowerpower/_web/routes/scheduler.py +0 -136
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/cli/web.py +0 -44
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -609
- flowerpower/pipeline.py +0 -2499
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.13.1.dist-info/METADATA +0 -586
- flowerpower-0.9.13.1.dist-info/RECORD +0 -76
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/WHEEL +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1586 @@
|
|
1
|
+
import datetime as dt
|
2
|
+
import posixpath
|
3
|
+
import sys
|
4
|
+
from pathlib import Path
|
5
|
+
from types import TracebackType
|
6
|
+
from typing import Any, Callable, TypeVar, Union
|
7
|
+
from uuid import UUID
|
8
|
+
import duration_parser
|
9
|
+
from loguru import logger
|
10
|
+
from munch import Munch
|
11
|
+
|
12
|
+
try:
|
13
|
+
from graphviz import Digraph
|
14
|
+
except ImportError:
|
15
|
+
Digraph = Any # Type alias for when graphviz isn't installed
|
16
|
+
|
17
|
+
from .. import settings
|
18
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
19
|
+
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
20
|
+
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
21
|
+
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
22
|
+
from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
|
23
|
+
from ..utils.logging import setup_logging
|
24
|
+
from .io import PipelineIOManager
|
25
|
+
from .registry import PipelineRegistry, HookType
|
26
|
+
from .runner import PipelineRunner, run_pipeline
|
27
|
+
from .job_queue import PipelineJobQueue
|
28
|
+
from .visualizer import PipelineVisualizer
|
29
|
+
|
30
|
+
setup_logging()
|
31
|
+
|
32
|
+
GraphType = TypeVar("GraphType") # Type variable for graphviz.Digraph
|
33
|
+
|
34
|
+
|
35
|
+
class PipelineManager:
|
36
|
+
"""Central manager for FlowerPower pipeline operations.
|
37
|
+
|
38
|
+
This class provides a unified interface for managing pipelines, including:
|
39
|
+
- Configuration management and loading
|
40
|
+
- Pipeline creation, deletion, and discovery
|
41
|
+
- Pipeline execution via PipelineRunner
|
42
|
+
- Job scheduling via PipelineScheduler
|
43
|
+
- Visualization via PipelineVisualizer
|
44
|
+
- Import/export operations via PipelineIOManager
|
45
|
+
|
46
|
+
Attributes:
|
47
|
+
registry (PipelineRegistry): Handles pipeline registration and discovery
|
48
|
+
scheduler (PipelineScheduler): Manages job scheduling and execution
|
49
|
+
visualizer (PipelineVisualizer): Handles pipeline visualization
|
50
|
+
io (PipelineIOManager): Manages pipeline import/export operations
|
51
|
+
project_cfg (ProjectConfig): Current project configuration
|
52
|
+
pipeline_cfg (PipelineConfig): Current pipeline configuration
|
53
|
+
pipelines (list[str]): List of available pipeline names
|
54
|
+
current_pipeline_name (str): Name of the currently loaded pipeline
|
55
|
+
summary (dict[str, dict | str]): Summary of all pipelines
|
56
|
+
|
57
|
+
Example:
|
58
|
+
>>> from flowerpower.pipeline import PipelineManager
|
59
|
+
>>>
|
60
|
+
>>> # Create manager with default settings
|
61
|
+
>>> manager = PipelineManager()
|
62
|
+
>>>
|
63
|
+
>>> # Create manager with custom settings
|
64
|
+
>>> manager = PipelineManager(
|
65
|
+
... base_dir="/path/to/project",
|
66
|
+
... job_queue_type="rq",
|
67
|
+
... log_level="DEBUG"
|
68
|
+
... )
|
69
|
+
"""
|
70
|
+
|
71
|
+
def __init__(
|
72
|
+
self,
|
73
|
+
base_dir: str | None = None,
|
74
|
+
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
75
|
+
fs: AbstractFileSystem | None = None,
|
76
|
+
cfg_dir: str | None = None,
|
77
|
+
pipelines_dir: str | None = None,
|
78
|
+
job_queue_type: str | None = None,
|
79
|
+
log_level: str | None = None,
|
80
|
+
) -> None:
|
81
|
+
"""Initialize the PipelineManager.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
base_dir: Root directory for the FlowerPower project. Defaults to current
|
85
|
+
working directory if not specified.
|
86
|
+
storage_options: Configuration options for filesystem access. Can be:
|
87
|
+
- dict: Raw key-value options
|
88
|
+
- Munch: Dot-accessible options object
|
89
|
+
- BaseStorageOptions: Structured options class
|
90
|
+
Used for S3, GCS, etc. Example: {"key": "abc", "secret": "xyz"}
|
91
|
+
fs: Pre-configured fsspec filesystem instance. If provided, used instead
|
92
|
+
of creating new filesystem from base_dir and storage_options.
|
93
|
+
cfg_dir: Override default configuration directory name ('conf').
|
94
|
+
Example: "config" or "settings".
|
95
|
+
pipelines_dir: Override default pipelines directory name ('pipelines').
|
96
|
+
Example: "flows" or "dags".
|
97
|
+
job_queue_type: Override worker type from project config/settings.
|
98
|
+
Valid values: "rq", "apscheduler", or "huey".
|
99
|
+
log_level: Set logging level for the manager.
|
100
|
+
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
101
|
+
|
102
|
+
Raises:
|
103
|
+
ValueError: If provided configuration paths don't exist or can't be created
|
104
|
+
RuntimeError: If filesystem operations fail during initialization
|
105
|
+
ImportError: If required dependencies for specified worker type not installed
|
106
|
+
|
107
|
+
Example:
|
108
|
+
>>> # Basic initialization
|
109
|
+
>>> manager = PipelineManager()
|
110
|
+
>>>
|
111
|
+
>>> # Custom configuration with S3 storage
|
112
|
+
>>> manager = PipelineManager(
|
113
|
+
... base_dir="s3://my-bucket/project",
|
114
|
+
... storage_options={
|
115
|
+
... "key": "ACCESS_KEY",
|
116
|
+
... "secret": "SECRET_KEY"
|
117
|
+
... },
|
118
|
+
... job_queue_type="rq",
|
119
|
+
... log_level="DEBUG"
|
120
|
+
... )
|
121
|
+
"""
|
122
|
+
if log_level:
|
123
|
+
setup_logging(level=log_level)
|
124
|
+
|
125
|
+
self._base_dir = base_dir or str(Path.cwd())
|
126
|
+
self._storage_options = storage_options
|
127
|
+
if not fs:
|
128
|
+
fs = get_filesystem(self._base_dir, storage_options=storage_options)
|
129
|
+
self._fs = fs
|
130
|
+
|
131
|
+
# Store overrides for ProjectConfig loading
|
132
|
+
self._cfg_dir = cfg_dir or settings.CONFIG_DIR
|
133
|
+
self._pipelines_dir = pipelines_dir or settings.PIPELINES_DIR
|
134
|
+
self._job_queue_type = job_queue_type
|
135
|
+
|
136
|
+
self._load_project_cfg(reload=True) # Load project config
|
137
|
+
|
138
|
+
# Ensure essential directories exist (using paths from loaded project_cfg)
|
139
|
+
try:
|
140
|
+
self._fs.makedirs(self._cfg_dir, exist_ok=True)
|
141
|
+
self._fs.makedirs(self._pipelines_dir, exist_ok=True)
|
142
|
+
except Exception as e:
|
143
|
+
logger.error(f"Error creating essential directories: {e}")
|
144
|
+
# Consider raising an error here depending on desired behavior
|
145
|
+
|
146
|
+
# Ensure pipeline modules can be imported
|
147
|
+
self._add_modules_path()
|
148
|
+
|
149
|
+
# Instantiate components using the loaded project config
|
150
|
+
self.registry = PipelineRegistry(
|
151
|
+
project_cfg=self.project_cfg,
|
152
|
+
fs=self._fs,
|
153
|
+
cfg_dir=self._cfg_dir,
|
154
|
+
pipelines_dir=self._pipelines_dir,
|
155
|
+
)
|
156
|
+
self.job_queue = PipelineJobQueue(
|
157
|
+
project_cfg=self.project_cfg,
|
158
|
+
fs=self._fs,
|
159
|
+
cfg_dir=self._cfg_dir,
|
160
|
+
pipelines_dir=self._pipelines_dir,
|
161
|
+
job_queue_type=self._job_queue_type,
|
162
|
+
)
|
163
|
+
self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
|
164
|
+
self.io = PipelineIOManager(registry=self.registry)
|
165
|
+
|
166
|
+
self._current_pipeline_name: str | None = None
|
167
|
+
self._pipeline_cfg: PipelineConfig | None = None
|
168
|
+
|
169
|
+
def __enter__(self) -> "PipelineManager":
|
170
|
+
"""Enter the context manager.
|
171
|
+
|
172
|
+
Enables use of the manager in a with statement for automatic resource cleanup.
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
PipelineManager: Self for use in context manager.
|
176
|
+
|
177
|
+
Example:
|
178
|
+
>>> from flowerpower.pipeline import PipelineManager
|
179
|
+
>>>
|
180
|
+
>>> with PipelineManager() as manager:
|
181
|
+
... result = manager.run("my_pipeline")
|
182
|
+
"""
|
183
|
+
return self
|
184
|
+
|
185
|
+
def __exit__(
|
186
|
+
self,
|
187
|
+
exc_type: type[BaseException] | None,
|
188
|
+
exc_val: BaseException | None,
|
189
|
+
exc_tb: TracebackType | None,
|
190
|
+
) -> None:
|
191
|
+
"""Exit the context manager.
|
192
|
+
|
193
|
+
Handles cleanup of resources when exiting a with statement.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
exc_type: Type of exception that occurred, if any
|
197
|
+
exc_val: Exception instance that occurred, if any
|
198
|
+
exc_tb: Traceback of exception that occurred, if any
|
199
|
+
|
200
|
+
Example:
|
201
|
+
>>> from flowerpower.pipeline import PipelineManager
|
202
|
+
>>>
|
203
|
+
>>> with PipelineManager() as manager:
|
204
|
+
... try:
|
205
|
+
... result = manager.run("my_pipeline")
|
206
|
+
... except Exception as e:
|
207
|
+
... print(f"Error: {e}")
|
208
|
+
... # Resources automatically cleaned up here
|
209
|
+
"""
|
210
|
+
# Add cleanup code if needed
|
211
|
+
pass
|
212
|
+
|
213
|
+
def _get_run_func_for_job(self, name: str, reload: bool = False) -> Callable:
|
214
|
+
"""Create a PipelineRunner instance and return its run method.
|
215
|
+
|
216
|
+
This internal helper method ensures that each job gets a fresh runner
|
217
|
+
with the correct configuration state.
|
218
|
+
|
219
|
+
Args:
|
220
|
+
name: Name of the pipeline to create runner for
|
221
|
+
reload: Whether to reload pipeline configuration
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
Callable: Bound run method from a fresh PipelineRunner instance
|
225
|
+
|
226
|
+
Example:
|
227
|
+
>>> # Internal usage
|
228
|
+
>>> manager = PipelineManager()
|
229
|
+
>>> run_func = manager._get_run_func_for_job("data_pipeline")
|
230
|
+
>>> result = run_func(inputs={"date": "2025-04-28"})
|
231
|
+
"""
|
232
|
+
pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
233
|
+
runner = PipelineRunner(project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
|
234
|
+
return runner.run
|
235
|
+
|
236
|
+
def _add_modules_path(self) -> None:
|
237
|
+
"""Add pipeline module paths to Python path.
|
238
|
+
|
239
|
+
This internal method ensures that pipeline modules can be imported by:
|
240
|
+
1. Syncing filesystem cache if needed
|
241
|
+
2. Adding project root to Python path
|
242
|
+
3. Adding pipelines directory to Python path
|
243
|
+
|
244
|
+
Raises:
|
245
|
+
RuntimeError: If filesystem sync fails or paths are invalid
|
246
|
+
|
247
|
+
Example:
|
248
|
+
>>> # Internal usage
|
249
|
+
>>> manager = PipelineManager()
|
250
|
+
>>> manager._add_modules_path()
|
251
|
+
>>> import my_pipeline # Now importable
|
252
|
+
"""
|
253
|
+
if self._fs.is_cache_fs:
|
254
|
+
self._fs.sync()
|
255
|
+
|
256
|
+
if self._fs.path not in sys.path:
|
257
|
+
sys.path.insert(0, self._fs.path)
|
258
|
+
|
259
|
+
modules_path = posixpath.join(self._fs.path, self._pipelines_dir)
|
260
|
+
if modules_path not in sys.path:
|
261
|
+
sys.path.insert(0, modules_path)
|
262
|
+
|
263
|
+
def _load_project_cfg(self, reload: bool = False) -> ProjectConfig:
|
264
|
+
"""Load or reload the project configuration.
|
265
|
+
|
266
|
+
This internal method handles loading project-wide settings from the config
|
267
|
+
directory, applying overrides, and maintaining configuration state.
|
268
|
+
|
269
|
+
Args:
|
270
|
+
reload: Force reload configuration even if already loaded.
|
271
|
+
Defaults to False for caching behavior.
|
272
|
+
|
273
|
+
Returns:
|
274
|
+
ProjectConfig: The loaded project configuration object with any
|
275
|
+
specified overrides applied.
|
276
|
+
|
277
|
+
Raises:
|
278
|
+
FileNotFoundError: If project configuration file doesn't exist
|
279
|
+
ValueError: If configuration format is invalid
|
280
|
+
RuntimeError: If filesystem operations fail during loading
|
281
|
+
|
282
|
+
Example:
|
283
|
+
>>> # Internal usage
|
284
|
+
>>> manager = PipelineManager()
|
285
|
+
>>> project_cfg = manager._load_project_cfg(reload=True)
|
286
|
+
>>> print(project_cfg.worker.type)
|
287
|
+
'rq'
|
288
|
+
"""
|
289
|
+
if hasattr(self, "_project_cfg") and not reload:
|
290
|
+
return self._project_cfg
|
291
|
+
|
292
|
+
# Pass overrides to ProjectConfig.load
|
293
|
+
self._project_cfg = ProjectConfig.load(
|
294
|
+
base_dir=self._base_dir,
|
295
|
+
job_queue_type=self._job_queue_type,
|
296
|
+
fs=self._fs, # Pass pre-configured fs if provided
|
297
|
+
storage_options=self._storage_options,
|
298
|
+
)
|
299
|
+
# Update internal fs reference in case ProjectConfig loaded/created one
|
300
|
+
return self._project_cfg
|
301
|
+
|
302
|
+
def _load_pipeline_cfg(self, name: str, reload: bool = False) -> PipelineConfig:
|
303
|
+
"""Load or reload configuration for a specific pipeline.
|
304
|
+
|
305
|
+
This internal method handles loading pipeline-specific settings from the config
|
306
|
+
directory and maintaining the configuration cache state.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
name: Name of the pipeline whose configuration to load
|
310
|
+
reload: Force reload configuration even if already loaded.
|
311
|
+
When False, returns cached config if available.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
PipelineConfig: The loaded pipeline configuration object
|
315
|
+
|
316
|
+
Raises:
|
317
|
+
FileNotFoundError: If pipeline configuration file doesn't exist
|
318
|
+
ValueError: If configuration format is invalid
|
319
|
+
RuntimeError: If filesystem operations fail during loading
|
320
|
+
|
321
|
+
Example:
|
322
|
+
>>> # Internal usage
|
323
|
+
>>> manager = PipelineManager()
|
324
|
+
>>> cfg = manager._load_pipeline_cfg("data_pipeline", reload=True)
|
325
|
+
>>> print(cfg.run.executor.type)
|
326
|
+
'async'
|
327
|
+
"""
|
328
|
+
if name == self._current_pipeline_name and not reload:
|
329
|
+
return self._pipeline_cfg
|
330
|
+
|
331
|
+
self._current_pipeline_name = name
|
332
|
+
self._pipeline_cfg = PipelineConfig.load(
|
333
|
+
base_dir=self._base_dir,
|
334
|
+
name=name,
|
335
|
+
fs=self._fs,
|
336
|
+
storage_options=self._storage_options,
|
337
|
+
)
|
338
|
+
return self._pipeline_cfg
|
339
|
+
|
340
|
+
@property
|
341
|
+
def current_pipeline_name(self) -> str:
|
342
|
+
"""Get the name of the currently loaded pipeline.
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
str: Name of the currently loaded pipeline, or empty string if none loaded.
|
346
|
+
|
347
|
+
Example:
|
348
|
+
>>> manager = PipelineManager()
|
349
|
+
>>> manager._load_pipeline_cfg("example_pipeline")
|
350
|
+
>>> print(manager.current_pipeline_name)
|
351
|
+
'example_pipeline'
|
352
|
+
"""
|
353
|
+
return self._current_pipeline_name
|
354
|
+
|
355
|
+
@property
|
356
|
+
def project_cfg(self) -> ProjectConfig:
|
357
|
+
"""Get the project configuration.
|
358
|
+
|
359
|
+
Loads configuration if not already loaded.
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
ProjectConfig: Project-wide configuration object.
|
363
|
+
|
364
|
+
Raises:
|
365
|
+
RuntimeError: If configuration loading fails.
|
366
|
+
|
367
|
+
Example:
|
368
|
+
>>> manager = PipelineManager()
|
369
|
+
>>> cfg = manager.project_cfg
|
370
|
+
>>> print(cfg.worker.type)
|
371
|
+
'rq'
|
372
|
+
"""
|
373
|
+
if not hasattr(self, "_project_cfg"):
|
374
|
+
self._load_project_cfg()
|
375
|
+
return self._project_cfg
|
376
|
+
|
377
|
+
@property
|
378
|
+
def pipeline_cfg(self) -> PipelineConfig:
|
379
|
+
"""Get the configuration for the currently loaded pipeline.
|
380
|
+
|
381
|
+
Returns:
|
382
|
+
PipelineConfig: Pipeline-specific configuration object.
|
383
|
+
|
384
|
+
Warns:
|
385
|
+
UserWarning: If no pipeline is currently loaded.
|
386
|
+
|
387
|
+
Example:
|
388
|
+
>>> manager = PipelineManager()
|
389
|
+
>>> manager._load_pipeline_cfg("example_pipeline")
|
390
|
+
>>> cfg = manager.pipeline_cfg
|
391
|
+
>>> print(cfg.run.executor)
|
392
|
+
'local'
|
393
|
+
"""
|
394
|
+
if not hasattr(self, "_pipeline_cfg"):
|
395
|
+
logger.warning("Pipeline config not loaded.")
|
396
|
+
return
|
397
|
+
return self._pipeline_cfg
|
398
|
+
|
399
|
+
# --- Core Execution Method ---
|
400
|
+
|
401
|
+
def run(
|
402
|
+
self,
|
403
|
+
name: str,
|
404
|
+
inputs: dict | None = None,
|
405
|
+
final_vars: list[str] | None = None,
|
406
|
+
config: dict | None = None,
|
407
|
+
cache: dict | None = None,
|
408
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
409
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
410
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
411
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
412
|
+
adapter: dict[str, Any] | None = None,
|
413
|
+
reload: bool = False,
|
414
|
+
log_level: str | None = None,
|
415
|
+
max_retries: int | None = None,
|
416
|
+
retry_delay: float | None = None,
|
417
|
+
jitter_factor: float | None = None,
|
418
|
+
retry_exceptions: tuple | list | None = None,
|
419
|
+
|
420
|
+
) -> dict[str, Any]:
|
421
|
+
"""Execute a pipeline synchronously and return its results.
|
422
|
+
|
423
|
+
This is the main method for running pipelines directly. It handles configuration
|
424
|
+
loading, adapter setup, and execution via PipelineRunner.
|
425
|
+
|
426
|
+
Args:
|
427
|
+
name (str): Name of the pipeline to run. Must be a valid identifier.
|
428
|
+
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
429
|
+
final_vars (list[str] | None): Specify which output variables to return.
|
430
|
+
Example: ["model", "metrics"]
|
431
|
+
config (dict | None): Configuration for Hamilton pipeline executor.
|
432
|
+
Example: {"model": "LogisticRegression"}
|
433
|
+
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
434
|
+
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
435
|
+
- str: Executor name, e.g. "threadpool", "local"
|
436
|
+
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
437
|
+
- ExecutorConfig: Structured config object
|
438
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
439
|
+
Example: {"opentelemetry": True, "tracker": False}
|
440
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
441
|
+
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
442
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
443
|
+
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
444
|
+
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
445
|
+
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
446
|
+
reload (bool): Force reload of pipeline configuration.
|
447
|
+
log_level (str | None): Logging level for the execution. Default None uses project config.
|
448
|
+
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
449
|
+
max_retries (int): Maximum number of retries for execution.
|
450
|
+
retry_delay (float): Delay between retries in seconds.
|
451
|
+
jitter_factor (float): Random jitter factor to add to retry delay
|
452
|
+
retry_exceptions (tuple): Exceptions that trigger a retry.
|
453
|
+
|
454
|
+
Returns:
|
455
|
+
dict[str, Any]: Pipeline execution results, mapping output variable names
|
456
|
+
to their computed values.
|
457
|
+
|
458
|
+
Raises:
|
459
|
+
ValueError: If pipeline name doesn't exist or configuration is invalid
|
460
|
+
ImportError: If pipeline module cannot be imported
|
461
|
+
RuntimeError: If execution fails due to pipeline or adapter errors
|
462
|
+
|
463
|
+
Example:
|
464
|
+
>>> from flowerpower.pipeline import PipelineManager
|
465
|
+
>>>
|
466
|
+
>>> manager = PipelineManager()
|
467
|
+
>>>
|
468
|
+
>>> # Basic pipeline run
|
469
|
+
>>> results = manager.run("data_pipeline")
|
470
|
+
>>>
|
471
|
+
>>> # Complex run with overrides
|
472
|
+
>>> results = manager.run(
|
473
|
+
... name="ml_pipeline",
|
474
|
+
... inputs={
|
475
|
+
... "training_date": "2025-04-28",
|
476
|
+
... "model_params": {"n_estimators": 100}
|
477
|
+
... },
|
478
|
+
... final_vars=["model", "metrics"],
|
479
|
+
... executor_cfg={"type": "threadpool", "max_workers": 4},
|
480
|
+
... with_adapter_cfg={"tracker": True},
|
481
|
+
... reload=True
|
482
|
+
... )
|
483
|
+
"""
|
484
|
+
pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
485
|
+
|
486
|
+
res = run_pipeline(
|
487
|
+
project_cfg=self.project_cfg,
|
488
|
+
pipeline_cfg=pipeline_cfg,
|
489
|
+
inputs=inputs,
|
490
|
+
final_vars=final_vars,
|
491
|
+
config=config,
|
492
|
+
cache=cache,
|
493
|
+
executor_cfg=executor_cfg,
|
494
|
+
with_adapter_cfg=with_adapter_cfg,
|
495
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
496
|
+
project_adapter_cfg=project_adapter_cfg,
|
497
|
+
adapter=adapter,
|
498
|
+
#reload=reload, # Runner handles module reload if needed
|
499
|
+
log_level=log_level,
|
500
|
+
max_retries=max_retries,
|
501
|
+
retry_delay=retry_delay,
|
502
|
+
jitter_factor=jitter_factor,
|
503
|
+
retry_exceptions=retry_exceptions,
|
504
|
+
)
|
505
|
+
return res
|
506
|
+
|
507
|
+
# --- Delegated Methods ---
|
508
|
+
|
509
|
+
# Registry Delegations
|
510
|
+
def new(self, name: str, overwrite: bool = False) -> None:
|
511
|
+
"""Create a new pipeline with the given name.
|
512
|
+
|
513
|
+
Creates necessary configuration files and pipeline module template.
|
514
|
+
|
515
|
+
Args:
|
516
|
+
name: Name for the new pipeline. Must be a valid Python identifier.
|
517
|
+
overwrite: Whether to overwrite existing pipeline with same name.
|
518
|
+
Default False for safety.
|
519
|
+
|
520
|
+
Raises:
|
521
|
+
ValueError: If name is invalid or pipeline exists and overwrite=False
|
522
|
+
RuntimeError: If file creation fails
|
523
|
+
PermissionError: If lacking write permissions
|
524
|
+
|
525
|
+
Example:
|
526
|
+
>>> from flowerpower.pipeline import PipelineManager
|
527
|
+
>>>
|
528
|
+
>>> # Create new pipeline
|
529
|
+
>>> manager = PipelineManager()
|
530
|
+
>>> manager.new("data_transformation")
|
531
|
+
>>>
|
532
|
+
>>> # Overwrite existing pipeline
|
533
|
+
>>> manager.new("data_transformation", overwrite=True)
|
534
|
+
"""
|
535
|
+
self.registry.new(name=name, overwrite=overwrite)
|
536
|
+
|
537
|
+
def delete(self, name: str, cfg: bool = True, module: bool = False) -> None:
|
538
|
+
"""
|
539
|
+
Delete a pipeline and its associated files.
|
540
|
+
|
541
|
+
Args:
|
542
|
+
name: Name of the pipeline to delete
|
543
|
+
cfg: Whether to delete configuration files. Default True.
|
544
|
+
module: Whether to delete Python module file. Default False
|
545
|
+
for safety since it may contain custom code.
|
546
|
+
|
547
|
+
Raises:
|
548
|
+
FileNotFoundError: If specified pipeline files don't exist
|
549
|
+
PermissionError: If lacking delete permissions
|
550
|
+
RuntimeError: If deletion fails partially, leaving inconsistent state
|
551
|
+
|
552
|
+
Example:
|
553
|
+
>>> from flowerpower.pipeline import PipelineManager
|
554
|
+
>>>
|
555
|
+
>>> # Delete pipeline config only
|
556
|
+
>>> manager = PipelineManager()
|
557
|
+
>>> manager.delete("old_pipeline")
|
558
|
+
>>>
|
559
|
+
>>> # Delete both config and module
|
560
|
+
>>> manager.delete("test_pipeline", module=True)
|
561
|
+
"""
|
562
|
+
self.registry.delete(name=name, cfg=cfg, module=module)
|
563
|
+
|
564
|
+
def get_summary(
|
565
|
+
self,
|
566
|
+
name: str | None = None,
|
567
|
+
cfg: bool = True,
|
568
|
+
code: bool = True,
|
569
|
+
project: bool = True,
|
570
|
+
) -> dict[str, dict | str]:
|
571
|
+
"""Get a detailed summary of pipeline(s) configuration and code.
|
572
|
+
|
573
|
+
Args:
|
574
|
+
name: Specific pipeline to summarize. If None, summarizes all.
|
575
|
+
cfg: Include pipeline configuration details. Default True.
|
576
|
+
code: Include pipeline module code. Default True.
|
577
|
+
project: Include project configuration. Default True.
|
578
|
+
|
579
|
+
Returns:
|
580
|
+
dict[str, dict | str]: Nested dictionary containing requested
|
581
|
+
summaries. Structure varies based on input parameters:
|
582
|
+
- With name: {"config": dict, "code": str, "project": dict}
|
583
|
+
- Without name: {pipeline_name: {"config": dict, ...}, ...}
|
584
|
+
|
585
|
+
Example:
|
586
|
+
>>> from flowerpower.pipeline import PipelineManager
|
587
|
+
>>>
|
588
|
+
>>> manager = PipelineManager()
|
589
|
+
>>>
|
590
|
+
>>> # Get summary of specific pipeline
|
591
|
+
>>> summary = manager.get_summary("data_pipeline")
|
592
|
+
>>> print(summary["config"]["schedule"]["enabled"])
|
593
|
+
True
|
594
|
+
>>>
|
595
|
+
>>> # Get summary of all pipelines' code
|
596
|
+
>>> all_code = manager.get_summary(
|
597
|
+
... cfg=False,
|
598
|
+
... code=True,
|
599
|
+
... project=False
|
600
|
+
... )
|
601
|
+
"""
|
602
|
+
return self.registry.get_summary(name=name, cfg=cfg, code=code, project=project)
|
603
|
+
|
604
|
+
def show_summary(
|
605
|
+
self,
|
606
|
+
name: str | None = None,
|
607
|
+
cfg: bool = True,
|
608
|
+
code: bool = True,
|
609
|
+
project: bool = True,
|
610
|
+
to_html: bool = False,
|
611
|
+
to_svg: bool = False,
|
612
|
+
) -> None | str:
|
613
|
+
"""
|
614
|
+
Show a summary of the pipelines.
|
615
|
+
|
616
|
+
Args:
|
617
|
+
name (str | None, optional): The name of the pipeline. Defaults to None.
|
618
|
+
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
619
|
+
code (bool, optional): Whether to show the module. Defaults to True.
|
620
|
+
project (bool, optional): Whether to show the project configuration. Defaults to True.
|
621
|
+
to_html (bool, optional): Whether to export the summary to HTML. Defaults to False.
|
622
|
+
to_svg (bool, optional): Whether to export the summary to SVG. Defaults to False.
|
623
|
+
|
624
|
+
Returns:
|
625
|
+
None | str: The summary of the pipelines. If `to_html` is True, returns the HTML string.
|
626
|
+
If `to_svg` is True, returns the SVG string.
|
627
|
+
|
628
|
+
Examples:
|
629
|
+
>>> pm = PipelineManager()
|
630
|
+
>>> pm.show_summary()
|
631
|
+
"""
|
632
|
+
return self.registry.show_summary(
|
633
|
+
name=name,
|
634
|
+
cfg=cfg,
|
635
|
+
code=code,
|
636
|
+
project=project,
|
637
|
+
to_html=to_html,
|
638
|
+
to_svg=to_svg,
|
639
|
+
)
|
640
|
+
|
641
|
+
def show_pipelines(self) -> None:
|
642
|
+
"""Display all available pipelines in a formatted table.
|
643
|
+
|
644
|
+
The table includes pipeline names, types, and enablement status.
|
645
|
+
Uses rich formatting for terminal display.
|
646
|
+
|
647
|
+
Example:
|
648
|
+
>>> from flowerpower.pipeline import PipelineManager
|
649
|
+
>>>
|
650
|
+
>>> manager = PipelineManager()
|
651
|
+
>>> manager.show_pipelines()
|
652
|
+
|
653
|
+
"""
|
654
|
+
self.registry.show_pipelines()
|
655
|
+
|
656
|
+
def list_pipelines(self) -> list[str]:
|
657
|
+
"""Get list of all available pipeline names.
|
658
|
+
|
659
|
+
Returns:
|
660
|
+
list[str]: Names of all registered pipelines, sorted alphabetically.
|
661
|
+
|
662
|
+
Example:
|
663
|
+
>>> from flowerpower.pipeline import PipelineManager
|
664
|
+
>>>
|
665
|
+
>>> manager = PipelineManager()
|
666
|
+
>>> pipelines = manager.list_pipelines()
|
667
|
+
>>> print(pipelines)
|
668
|
+
['data_ingestion', 'model_training', 'reporting']
|
669
|
+
"""
|
670
|
+
return self.registry.list_pipelines()
|
671
|
+
|
672
|
+
@property
|
673
|
+
def pipelines(self) -> list[str]:
|
674
|
+
"""Get list of all available pipeline names.
|
675
|
+
|
676
|
+
Similar to list_pipelines() but as a property.
|
677
|
+
|
678
|
+
Returns:
|
679
|
+
list[str]: Names of all registered pipelines, sorted alphabetically.
|
680
|
+
|
681
|
+
Example:
|
682
|
+
>>> from flowerpower.pipeline import PipelineManager
|
683
|
+
>>>
|
684
|
+
>>> manager = PipelineManager()
|
685
|
+
>>> print(manager.pipelines)
|
686
|
+
['data_ingestion', 'model_training', 'reporting']
|
687
|
+
"""
|
688
|
+
return self.registry.pipelines
|
689
|
+
|
690
|
+
@property
|
691
|
+
def summary(self) -> dict[str, dict | str]:
|
692
|
+
"""Get complete summary of all pipelines.
|
693
|
+
|
694
|
+
Returns:
|
695
|
+
dict[str, dict | str]: Full summary including configuration,
|
696
|
+
code, and project settings for all pipelines.
|
697
|
+
|
698
|
+
Example:
|
699
|
+
>>> from flowerpower.pipeline import PipelineManager
|
700
|
+
>>>
|
701
|
+
>>> manager = PipelineManager()
|
702
|
+
>>> summary = manager.summary
|
703
|
+
>>> for name, details in summary.items():
|
704
|
+
... print(f"{name}: {details['config']['type']}")
|
705
|
+
data_pipeline: batch
|
706
|
+
ml_pipeline: streaming
|
707
|
+
"""
|
708
|
+
return self.registry.summary
|
709
|
+
|
710
|
+
def add_hook(
|
711
|
+
self, name:str, type:HookType, to:str|None, function_name:str|None,
|
712
|
+
)->None:
|
713
|
+
"""Add a hook to the pipeline module.
|
714
|
+
|
715
|
+
Args:
|
716
|
+
name (str): The name of the pipeline
|
717
|
+
type (HookType): The type of the hook.
|
718
|
+
to (str | None, optional): The name of the file to add the hook to. Defaults to the hook.py file in the pipelines hooks folder.
|
719
|
+
function_name (str | None, optional): The name of the function. If not provided uses default name of hook type.
|
720
|
+
|
721
|
+
Returns:
|
722
|
+
None
|
723
|
+
|
724
|
+
Raises:
|
725
|
+
ValueError: If the hook type is not valid
|
726
|
+
|
727
|
+
Example:
|
728
|
+
>>> from flowerpower.pipeline import PipelineManager
|
729
|
+
>>>
|
730
|
+
>>> manager = PipelineManager()
|
731
|
+
>>> manager.add_hook(
|
732
|
+
... name="data_pipeline",
|
733
|
+
... type=HookType.PRE_EXECUTE,
|
734
|
+
... to="pre_execute_hook",
|
735
|
+
... function_name="my_pre_execute_function"
|
736
|
+
... )
|
737
|
+
"""
|
738
|
+
self.registry.add_hook(
|
739
|
+
name=name,
|
740
|
+
type=type,
|
741
|
+
to=to,
|
742
|
+
function_name=function_name,
|
743
|
+
)
|
744
|
+
|
745
|
+
# IO Delegations
|
746
|
+
def import_pipeline(
|
747
|
+
self,
|
748
|
+
name: str,
|
749
|
+
base_dir: str,
|
750
|
+
src_fs: AbstractFileSystem | None = None,
|
751
|
+
storage_options: BaseStorageOptions | None = None,
|
752
|
+
overwrite: bool = False,
|
753
|
+
) -> None:
|
754
|
+
"""Import a pipeline from another FlowerPower project.
|
755
|
+
|
756
|
+
Copies both pipeline configuration and code files from the source location
|
757
|
+
to the current project.
|
758
|
+
|
759
|
+
Args:
|
760
|
+
name: Name to give the imported pipeline
|
761
|
+
base_dir: Source FlowerPower project directory or URI
|
762
|
+
Examples:
|
763
|
+
- Local: "/path/to/other/project"
|
764
|
+
- S3: "s3://bucket/project"
|
765
|
+
- GitHub: "github://org/repo/project"
|
766
|
+
src_fs: Pre-configured filesystem for source location
|
767
|
+
Example: S3FileSystem(key='...', secret='...')
|
768
|
+
storage_options: Options for source filesystem access
|
769
|
+
Example: {"project": "my-gcp-project"}
|
770
|
+
overwrite: Whether to replace existing pipeline if name exists
|
771
|
+
|
772
|
+
Raises:
|
773
|
+
ValueError: If pipeline name exists and overwrite=False
|
774
|
+
FileNotFoundError: If source pipeline not found
|
775
|
+
RuntimeError: If import fails
|
776
|
+
|
777
|
+
Example:
|
778
|
+
>>> from flowerpower.pipeline import PipelineManager
|
779
|
+
>>> from s3fs import S3FileSystem
|
780
|
+
>>>
|
781
|
+
>>> manager = PipelineManager()
|
782
|
+
>>>
|
783
|
+
>>> # Import from local filesystem
|
784
|
+
>>> manager.import_pipeline(
|
785
|
+
... "new_pipeline",
|
786
|
+
... "/path/to/other/project"
|
787
|
+
... )
|
788
|
+
>>>
|
789
|
+
>>> # Import from S3 with custom filesystem
|
790
|
+
>>> s3 = S3FileSystem(anon=False)
|
791
|
+
>>> manager.import_pipeline(
|
792
|
+
... "s3_pipeline",
|
793
|
+
... "s3://bucket/project",
|
794
|
+
... src_fs=s3
|
795
|
+
... )
|
796
|
+
"""
|
797
|
+
return self.io.import_pipeline(
|
798
|
+
name=name,
|
799
|
+
src_base_dir=base_dir,
|
800
|
+
src_fs=src_fs,
|
801
|
+
src_storage_options=storage_options,
|
802
|
+
overwrite=overwrite,
|
803
|
+
)
|
804
|
+
|
805
|
+
def import_many(
|
806
|
+
self,
|
807
|
+
pipelines: dict[str, str] | list[str],
|
808
|
+
base_dir: str, # Base dir for source if pipelines is a list
|
809
|
+
src_fs: AbstractFileSystem | None = None,
|
810
|
+
src_storage_options: BaseStorageOptions | None = None,
|
811
|
+
overwrite: bool = False,
|
812
|
+
) -> None:
|
813
|
+
"""Import multiple pipelines from another project or location.
|
814
|
+
|
815
|
+
Supports two import modes:
|
816
|
+
1. Dictionary mode: Map source names to new names
|
817
|
+
2. List mode: Import keeping original names
|
818
|
+
|
819
|
+
Args:
|
820
|
+
pipelines: Pipeline specifications, either:
|
821
|
+
- dict: Map of {new_name: source_name}
|
822
|
+
- list: List of pipeline names to import as-is
|
823
|
+
base_dir: Source FlowerPower project directory or URI
|
824
|
+
src_fs: Pre-configured filesystem for source location
|
825
|
+
src_storage_options: Options for source filesystem access
|
826
|
+
overwrite: Whether to replace existing pipelines
|
827
|
+
|
828
|
+
Raises:
|
829
|
+
ValueError: If any pipeline exists and overwrite=False
|
830
|
+
FileNotFoundError: If source pipelines not found
|
831
|
+
RuntimeError: If import operation fails
|
832
|
+
|
833
|
+
Example:
|
834
|
+
>>> from flowerpower.pipeline import PipelineManager
|
835
|
+
>>>
|
836
|
+
>>> manager = PipelineManager()
|
837
|
+
>>>
|
838
|
+
>>> # Import with name mapping
|
839
|
+
>>> manager.import_many(
|
840
|
+
... pipelines={
|
841
|
+
... "new_ingest": "data_ingest",
|
842
|
+
... "new_process": "data_process"
|
843
|
+
... },
|
844
|
+
... base_dir="/path/to/source",
|
845
|
+
... overwrite=True
|
846
|
+
... )
|
847
|
+
>>>
|
848
|
+
>>> # Import keeping original names
|
849
|
+
>>> manager.import_many(
|
850
|
+
... pipelines=["pipeline1", "pipeline2"],
|
851
|
+
... base_dir="s3://bucket/source",
|
852
|
+
... src_storage_options={
|
853
|
+
... "key": "ACCESS_KEY",
|
854
|
+
... "secret": "SECRET_KEY"
|
855
|
+
... }
|
856
|
+
... )
|
857
|
+
"""
|
858
|
+
return self.io.import_many(
|
859
|
+
pipelines=pipelines,
|
860
|
+
src_base_dir=base_dir,
|
861
|
+
src_fs=src_fs,
|
862
|
+
src_storage_options=src_storage_options,
|
863
|
+
overwrite=overwrite,
|
864
|
+
)
|
865
|
+
|
866
|
+
def import_all(
|
867
|
+
self,
|
868
|
+
base_dir: str,
|
869
|
+
src_fs: AbstractFileSystem | None = None,
|
870
|
+
src_storage_options: BaseStorageOptions | None = None,
|
871
|
+
overwrite: bool = False,
|
872
|
+
) -> None:
|
873
|
+
"""Import all pipelines from another FlowerPower project.
|
874
|
+
|
875
|
+
Args:
|
876
|
+
base_dir: Source project directory or URI
|
877
|
+
src_fs: Pre-configured source filesystem
|
878
|
+
src_storage_options: Source filesystem options
|
879
|
+
overwrite: Whether to replace existing pipelines
|
880
|
+
|
881
|
+
Raises:
|
882
|
+
FileNotFoundError: If source location not found
|
883
|
+
RuntimeError: If import fails
|
884
|
+
|
885
|
+
Example:
|
886
|
+
>>> from flowerpower.pipeline import PipelineManager
|
887
|
+
>>>
|
888
|
+
>>> manager = PipelineManager()
|
889
|
+
>>>
|
890
|
+
>>> # Import all from backup
|
891
|
+
>>> manager.import_all("/path/to/backup")
|
892
|
+
>>>
|
893
|
+
>>> # Import all from S3 with credentials
|
894
|
+
>>> manager.import_all(
|
895
|
+
... "s3://bucket/backup",
|
896
|
+
... src_storage_options={
|
897
|
+
... "key": "ACCESS_KEY",
|
898
|
+
... "secret": "SECRET_KEY"
|
899
|
+
... }
|
900
|
+
... )
|
901
|
+
"""
|
902
|
+
return self.io.import_all(
|
903
|
+
src_base_dir=base_dir,
|
904
|
+
src_fs=src_fs,
|
905
|
+
src_storage_options=src_storage_options,
|
906
|
+
overwrite=overwrite,
|
907
|
+
)
|
908
|
+
|
909
|
+
def export_pipeline(
|
910
|
+
self,
|
911
|
+
name: str,
|
912
|
+
base_dir: str,
|
913
|
+
dest_fs: AbstractFileSystem | None = None,
|
914
|
+
dest_storage_options: BaseStorageOptions | None = None,
|
915
|
+
overwrite: bool = False,
|
916
|
+
) -> None:
|
917
|
+
"""Export a pipeline to another location or project.
|
918
|
+
|
919
|
+
Copies pipeline configuration and code files to the destination location
|
920
|
+
while preserving directory structure.
|
921
|
+
|
922
|
+
Args:
|
923
|
+
name: Name of pipeline to export
|
924
|
+
base_dir: Destination directory or URI
|
925
|
+
Examples:
|
926
|
+
- Local: "/path/to/backup"
|
927
|
+
- S3: "s3://bucket/backups"
|
928
|
+
- GCS: "gs://bucket/exports"
|
929
|
+
dest_fs: Pre-configured filesystem for destination
|
930
|
+
Example: GCSFileSystem(token='...')
|
931
|
+
dest_storage_options: Options for destination filesystem
|
932
|
+
Example: {"key": "...", "secret": "..."}
|
933
|
+
overwrite: Whether to replace existing files at destination
|
934
|
+
|
935
|
+
Raises:
|
936
|
+
ValueError: If pipeline doesn't exist
|
937
|
+
FileNotFoundError: If destination not accessible
|
938
|
+
RuntimeError: If export fails
|
939
|
+
|
940
|
+
Example:
|
941
|
+
>>> from flowerpower.pipeline import PipelineManager
|
942
|
+
>>> from gcsfs import GCSFileSystem
|
943
|
+
>>>
|
944
|
+
>>> manager = PipelineManager()
|
945
|
+
>>>
|
946
|
+
>>> # Export to local backup
|
947
|
+
>>> manager.export_pipeline(
|
948
|
+
... "my_pipeline",
|
949
|
+
... "/path/to/backup"
|
950
|
+
... )
|
951
|
+
>>>
|
952
|
+
>>> # Export to Google Cloud Storage
|
953
|
+
>>> gcs = GCSFileSystem(project='my-project')
|
954
|
+
>>> manager.export_pipeline(
|
955
|
+
... "prod_pipeline",
|
956
|
+
... "gs://my-bucket/backups",
|
957
|
+
... dest_fs=gcs
|
958
|
+
... )
|
959
|
+
"""
|
960
|
+
return self.io.export_pipeline(
|
961
|
+
name=name,
|
962
|
+
dest_base_dir=base_dir,
|
963
|
+
dest_fs=dest_fs,
|
964
|
+
dest_storage_options=dest_storage_options,
|
965
|
+
overwrite=overwrite,
|
966
|
+
)
|
967
|
+
|
968
|
+
def export_many(
|
969
|
+
self,
|
970
|
+
pipelines: list[str],
|
971
|
+
base_dir: str,
|
972
|
+
dest_fs: AbstractFileSystem | None = None,
|
973
|
+
dest_storage_options: BaseStorageOptions | None = None,
|
974
|
+
overwrite: bool = False,
|
975
|
+
) -> None:
|
976
|
+
"""Export multiple pipelines to another location.
|
977
|
+
|
978
|
+
Efficiently exports multiple pipelines in a single operation,
|
979
|
+
preserving directory structure and metadata.
|
980
|
+
|
981
|
+
Args:
|
982
|
+
pipelines: List of pipeline names to export
|
983
|
+
base_dir: Destination directory or URI
|
984
|
+
Examples:
|
985
|
+
- Local: "/path/to/exports"
|
986
|
+
- S3: "s3://bucket/exports"
|
987
|
+
- Azure: "abfs://container/exports"
|
988
|
+
dest_fs: Pre-configured filesystem for destination
|
989
|
+
Example: S3FileSystem(anon=False, key='...', secret='...')
|
990
|
+
dest_storage_options: Options for destination filesystem access
|
991
|
+
Example: {"account_name": "storage", "sas_token": "..."}
|
992
|
+
overwrite: Whether to replace existing files at destination
|
993
|
+
|
994
|
+
Raises:
|
995
|
+
ValueError: If any pipeline doesn't exist
|
996
|
+
FileNotFoundError: If destination not accessible
|
997
|
+
RuntimeError: If export operation fails
|
998
|
+
|
999
|
+
Example:
|
1000
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1001
|
+
>>> from azure.storage.filedatalake import DataLakeServiceClient
|
1002
|
+
>>>
|
1003
|
+
>>> manager = PipelineManager()
|
1004
|
+
>>>
|
1005
|
+
>>> # Export multiple pipelines to Azure Data Lake
|
1006
|
+
>>> manager.export_many(
|
1007
|
+
... pipelines=["ingest", "process", "report"],
|
1008
|
+
... base_dir="abfs://data/backups",
|
1009
|
+
... dest_storage_options={
|
1010
|
+
... "account_name": "myaccount",
|
1011
|
+
... "sas_token": "...",
|
1012
|
+
... }
|
1013
|
+
... )
|
1014
|
+
"""
|
1015
|
+
return self.io.export_many(
|
1016
|
+
pipelines=pipelines,
|
1017
|
+
dest_base_dir=base_dir,
|
1018
|
+
dest_fs=dest_fs,
|
1019
|
+
dest_storage_options=dest_storage_options,
|
1020
|
+
overwrite=overwrite,
|
1021
|
+
)
|
1022
|
+
|
1023
|
+
def export_all(
|
1024
|
+
self,
|
1025
|
+
base_dir: str,
|
1026
|
+
dest_fs: AbstractFileSystem | None = None,
|
1027
|
+
dest_storage_options: BaseStorageOptions | None = None,
|
1028
|
+
overwrite: bool = False,
|
1029
|
+
) -> None:
|
1030
|
+
"""Export all pipelines to another location.
|
1031
|
+
|
1032
|
+
Args:
|
1033
|
+
base_dir: Destination directory or URI
|
1034
|
+
dest_fs: Pre-configured destination filesystem
|
1035
|
+
dest_storage_options: Destination filesystem options
|
1036
|
+
overwrite: Whether to replace existing files
|
1037
|
+
|
1038
|
+
Raises:
|
1039
|
+
FileNotFoundError: If destination not accessible
|
1040
|
+
RuntimeError: If export fails
|
1041
|
+
|
1042
|
+
Example:
|
1043
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1044
|
+
>>>
|
1045
|
+
>>> manager = PipelineManager()
|
1046
|
+
>>>
|
1047
|
+
>>> # Export all to backup directory
|
1048
|
+
>>> manager.export_all("/path/to/backup")
|
1049
|
+
>>>
|
1050
|
+
>>> # Export all to cloud storage
|
1051
|
+
>>> manager.export_all(
|
1052
|
+
... "gs://bucket/pipelines",
|
1053
|
+
... dest_storage_options={
|
1054
|
+
... "token": "SERVICE_ACCOUNT_TOKEN",
|
1055
|
+
... "project": "my-project"
|
1056
|
+
... }
|
1057
|
+
... )
|
1058
|
+
"""
|
1059
|
+
return self.io.export_all(
|
1060
|
+
dest_base_dir=base_dir,
|
1061
|
+
dest_fs=dest_fs,
|
1062
|
+
dest_storage_options=dest_storage_options,
|
1063
|
+
overwrite=overwrite,
|
1064
|
+
)
|
1065
|
+
|
1066
|
+
# Visualizer Delegations
|
1067
|
+
def save_dag(self, name: str, format: str = "png", reload: bool = False) -> None:
|
1068
|
+
"""Save pipeline DAG visualization to a file.
|
1069
|
+
|
1070
|
+
Creates a visual representation of the pipeline's directed acyclic graph (DAG)
|
1071
|
+
showing function dependencies and data flow.
|
1072
|
+
|
1073
|
+
Args:
|
1074
|
+
name: Name of the pipeline to visualize
|
1075
|
+
format: Output file format. Supported formats:
|
1076
|
+
- "png": Standard bitmap image
|
1077
|
+
- "svg": Scalable vector graphic
|
1078
|
+
- "pdf": Portable document format
|
1079
|
+
- "dot": Graphviz DOT format
|
1080
|
+
reload: Whether to reload pipeline before visualization
|
1081
|
+
|
1082
|
+
Raises:
|
1083
|
+
ValueError: If pipeline name doesn't exist
|
1084
|
+
ImportError: If required visualization dependencies missing
|
1085
|
+
RuntimeError: If graph generation fails
|
1086
|
+
|
1087
|
+
Example:
|
1088
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1089
|
+
>>>
|
1090
|
+
>>> manager = PipelineManager()
|
1091
|
+
>>>
|
1092
|
+
>>> # Save as PNG
|
1093
|
+
>>> manager.save_dag("data_pipeline")
|
1094
|
+
>>>
|
1095
|
+
>>> # Save as SVG with reload
|
1096
|
+
>>> manager.save_dag(
|
1097
|
+
... name="ml_pipeline",
|
1098
|
+
... format="svg",
|
1099
|
+
... reload=True
|
1100
|
+
... )
|
1101
|
+
"""
|
1102
|
+
self.visualizer.save_dag(name=name, format=format, reload=reload)
|
1103
|
+
|
1104
|
+
def show_dag(
|
1105
|
+
self, name: str, format: str = "png", reload: bool = False, raw: bool = False
|
1106
|
+
) -> Union[GraphType, None]:
|
1107
|
+
"""Display pipeline DAG visualization interactively.
|
1108
|
+
|
1109
|
+
Similar to save_dag() but displays the graph immediately in notebook
|
1110
|
+
environments or returns the raw graph object for custom rendering.
|
1111
|
+
|
1112
|
+
Args:
|
1113
|
+
name: Name of the pipeline to visualize
|
1114
|
+
format: Output format (see save_dag() for options)
|
1115
|
+
reload: Whether to reload pipeline before visualization
|
1116
|
+
raw: If True, return the raw graph object instead of displaying
|
1117
|
+
|
1118
|
+
Returns:
|
1119
|
+
Union[GraphType, None]: Raw graph object if raw=True, else None after
|
1120
|
+
displaying the visualization
|
1121
|
+
|
1122
|
+
Raises:
|
1123
|
+
ValueError: If pipeline name doesn't exist
|
1124
|
+
ImportError: If visualization dependencies missing
|
1125
|
+
RuntimeError: If graph generation fails
|
1126
|
+
|
1127
|
+
Example:
|
1128
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1129
|
+
>>>
|
1130
|
+
>>> manager = PipelineManager()
|
1131
|
+
>>>
|
1132
|
+
>>> # Display in notebook
|
1133
|
+
>>> manager.show_dag("data_pipeline")
|
1134
|
+
>>>
|
1135
|
+
>>> # Get raw graph for custom rendering
|
1136
|
+
>>> graph = manager.show_dag(
|
1137
|
+
... name="ml_pipeline",
|
1138
|
+
... format="svg",
|
1139
|
+
... raw=True
|
1140
|
+
... )
|
1141
|
+
>>> # Custom rendering
|
1142
|
+
>>> graph.render("custom_vis", view=True)
|
1143
|
+
"""
|
1144
|
+
return self.visualizer.show_dag(
|
1145
|
+
name=name, format=format, reload=reload, raw=raw
|
1146
|
+
)
|
1147
|
+
|
1148
|
+
# Scheduler Delegations
|
1149
|
+
def _get_run_func_for_job(self, name: str, reload: bool = False) -> Callable:
|
1150
|
+
"""Helper to create a PipelineRunner instance and return its run method."""
|
1151
|
+
# This ensures the runner uses the correct, potentially reloaded, config for the job
|
1152
|
+
pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
1153
|
+
runner = PipelineRunner(project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
|
1154
|
+
# We return the bound method runner.run
|
1155
|
+
return runner.run
|
1156
|
+
|
1157
|
+
def run_job(
|
1158
|
+
self,
|
1159
|
+
name: str,
|
1160
|
+
inputs: dict | None = None,
|
1161
|
+
final_vars: list[str] | None = None,
|
1162
|
+
config: dict | None = None,
|
1163
|
+
cache: bool | dict = False,
|
1164
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1165
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1166
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1167
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1168
|
+
adapter: dict[str, Any] | None = None,
|
1169
|
+
reload: bool = False,
|
1170
|
+
log_level: str | None = None,
|
1171
|
+
max_retries: int | None = None,
|
1172
|
+
retry_delay: float | None = None,
|
1173
|
+
jitter_factor: float | None = None,
|
1174
|
+
retry_exceptions: tuple | list | None = None,
|
1175
|
+
**kwargs: Any,
|
1176
|
+
) -> dict[str, Any]:
|
1177
|
+
"""Execute a pipeline job immediately through the task queue.
|
1178
|
+
|
1179
|
+
Unlike the run() method which executes synchronously, this method runs
|
1180
|
+
the pipeline through the configured worker system (RQ, APScheduler, etc.).
|
1181
|
+
|
1182
|
+
Args:
|
1183
|
+
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1184
|
+
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1185
|
+
final_vars (list[str] | None): Specify which output variables to return.
|
1186
|
+
Example: ["model", "metrics"]
|
1187
|
+
config (dict | None): Configuration for Hamilton pipeline executor.
|
1188
|
+
Example: {"model": "LogisticRegression"}
|
1189
|
+
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1190
|
+
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1191
|
+
- str: Executor name, e.g. "threadpool", "local"
|
1192
|
+
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1193
|
+
- ExecutorConfig: Structured config object
|
1194
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1195
|
+
Example: {"opentelemetry": True, "tracker": False}
|
1196
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1197
|
+
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1198
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1199
|
+
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1200
|
+
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1201
|
+
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1202
|
+
reload (bool): Force reload of pipeline configuration.
|
1203
|
+
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1204
|
+
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1205
|
+
max_retries (int): Maximum number of retries for execution.
|
1206
|
+
retry_delay (float): Delay between retries in seconds.
|
1207
|
+
jitter_factor (float): Random jitter factor to add to retry delay
|
1208
|
+
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1209
|
+
|
1210
|
+
**kwargs: JobQueue-specific arguments
|
1211
|
+
For RQ:
|
1212
|
+
- queue_name: Queue to use (str)
|
1213
|
+
- retry: Number of retries (int)
|
1214
|
+
For APScheduler:
|
1215
|
+
- job_executor: Executor type (str)
|
1216
|
+
|
1217
|
+
Returns:
|
1218
|
+
dict[str, Any]: Job execution results
|
1219
|
+
|
1220
|
+
Raises:
|
1221
|
+
ValueError: If pipeline or configuration is invalid
|
1222
|
+
RuntimeError: If job execution fails
|
1223
|
+
|
1224
|
+
Example:
|
1225
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1226
|
+
>>>
|
1227
|
+
>>> manager = PipelineManager()
|
1228
|
+
>>>
|
1229
|
+
>>> # Simple job execution
|
1230
|
+
>>> result = manager.run_job("data_pipeline")
|
1231
|
+
>>>
|
1232
|
+
>>> # Complex job with retry logic
|
1233
|
+
>>> result = manager.run_job(
|
1234
|
+
... name="ml_training",
|
1235
|
+
... inputs={"training_date": "2025-04-28"},
|
1236
|
+
... executor_cfg={"type": "async"},
|
1237
|
+
... with_adapter_cfg={"enable_tracking": True},
|
1238
|
+
... retry=3,
|
1239
|
+
... queue_name="ml_jobs"
|
1240
|
+
... )
|
1241
|
+
"""
|
1242
|
+
run_func = self._get_run_func_for_job(name, reload)
|
1243
|
+
return self.job_queue.run_job(
|
1244
|
+
run_func=run_func,
|
1245
|
+
name=name,
|
1246
|
+
inputs=inputs,
|
1247
|
+
final_vars=final_vars,
|
1248
|
+
config=config,
|
1249
|
+
cache=cache,
|
1250
|
+
executor_cfg=executor_cfg,
|
1251
|
+
with_adapter_cfg=with_adapter_cfg,
|
1252
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1253
|
+
project_adapter_cfg=project_adapter_cfg,
|
1254
|
+
adapter=adapter,
|
1255
|
+
#reload=reload,
|
1256
|
+
log_level=log_level,
|
1257
|
+
max_retries=max_retries,
|
1258
|
+
retry_delay=retry_delay,
|
1259
|
+
jitter_factor=jitter_factor,
|
1260
|
+
retry_exceptions=retry_exceptions,
|
1261
|
+
**kwargs,
|
1262
|
+
)
|
1263
|
+
|
1264
|
+
def add_job(
|
1265
|
+
self,
|
1266
|
+
name: str,
|
1267
|
+
inputs: dict | None = None,
|
1268
|
+
final_vars: list[str] | None = None,
|
1269
|
+
config: dict | None = None,
|
1270
|
+
cache: bool | dict = False,
|
1271
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1272
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1273
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1274
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1275
|
+
adapter: dict[str, Any] | None = None,
|
1276
|
+
reload: bool = False, # Reload config/module before creating run_func
|
1277
|
+
log_level: str | None = None,
|
1278
|
+
result_ttl: int | dt.timedelta = 0,
|
1279
|
+
run_at: dt.datetime | str | None = None,
|
1280
|
+
run_in: dt.datetime | str | None = None,
|
1281
|
+
max_retries: int = 3,
|
1282
|
+
retry_delay: float = 1.0,
|
1283
|
+
jitter_factor: float = 0.1,
|
1284
|
+
retry_exceptions: tuple = (Exception,),
|
1285
|
+
**kwargs, # JobQueue specific args
|
1286
|
+
) -> str | UUID:
|
1287
|
+
"""Adds a jobt to the task queue.
|
1288
|
+
|
1289
|
+
Args:
|
1290
|
+
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1291
|
+
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1292
|
+
final_vars (list[str] | None): Specify which output variables to return.
|
1293
|
+
Example: ["model", "metrics"]
|
1294
|
+
config (dict | None): Configuration for Hamilton pipeline executor.
|
1295
|
+
Example: {"model": "LogisticRegression"}
|
1296
|
+
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1297
|
+
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1298
|
+
- str: Executor name, e.g. "threadpool", "local"
|
1299
|
+
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1300
|
+
- ExecutorConfig: Structured config object
|
1301
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1302
|
+
Example: {"opentelemetry": True, "tracker": False}
|
1303
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1304
|
+
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1305
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1306
|
+
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1307
|
+
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1308
|
+
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1309
|
+
reload (bool): Force reload of pipeline configuration.
|
1310
|
+
run_at (dt.datetime | str | None): Future date to run the job.
|
1311
|
+
Example: datetime(2025, 4, 28, 12, 0)
|
1312
|
+
Example str: "2025-04-28T12:00:00" (ISO format)
|
1313
|
+
run_in (dt.datetime | str | None): Time interval to run the job.
|
1314
|
+
Example: 3600 (every hour in seconds)
|
1315
|
+
Example: datetime.timedelta(days=1)
|
1316
|
+
Example str: "1d" (1 day)
|
1317
|
+
result_ttl (int | dt.timedelta): Time to live for the job result.
|
1318
|
+
Example: 3600 (1 hour in seconds)
|
1319
|
+
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1320
|
+
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1321
|
+
max_retries (int): Maximum number of retries for execution.
|
1322
|
+
retry_delay (float): Delay between retries in seconds.
|
1323
|
+
jitter_factor (float): Random jitter factor to add to retry delay
|
1324
|
+
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1325
|
+
**kwargs: Additional keyword arguments passed to the worker's add_job method.
|
1326
|
+
For RQ this includes:
|
1327
|
+
- result_ttl: Time to live for the job result (float or timedelta)
|
1328
|
+
- ttl: Time to live for the job (float or timedelta)
|
1329
|
+
- queue_name: Name of the queue to use (str)
|
1330
|
+
- retry: Number of retries (int)
|
1331
|
+
- repeat: Repeat count (int or dict)
|
1332
|
+
For APScheduler, this includes:
|
1333
|
+
- job_executor: Job executor to use (str)
|
1334
|
+
|
1335
|
+
Returns:
|
1336
|
+
str | UUID: The ID of the job.
|
1337
|
+
|
1338
|
+
Raises:
|
1339
|
+
ValueError: If the job ID is not valid or if the job cannot be scheduled.
|
1340
|
+
|
1341
|
+
Example:
|
1342
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1343
|
+
>>> pm = PipelineManager()
|
1344
|
+
>>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
|
1345
|
+
|
1346
|
+
"""
|
1347
|
+
run_func = self._get_run_func_for_job(name, reload)
|
1348
|
+
run_in = duration_parser.parse(run_in) if isinstance(run_in, str) else run_in #convert to seconds
|
1349
|
+
run_at = dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
|
1350
|
+
|
1351
|
+
|
1352
|
+
return self.job_queue.add_job(
|
1353
|
+
run_func=run_func,
|
1354
|
+
name=name, # Pass name for logging
|
1355
|
+
# Pass run parameters
|
1356
|
+
inputs=inputs,
|
1357
|
+
final_vars=final_vars,
|
1358
|
+
config=config,
|
1359
|
+
cache=cache,
|
1360
|
+
executor_cfg=executor_cfg,
|
1361
|
+
with_adapter_cfg=with_adapter_cfg,
|
1362
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1363
|
+
project_adapter_cfg=project_adapter_cfg,
|
1364
|
+
adapter=adapter,
|
1365
|
+
#reload=reload, # Note: reload already happened
|
1366
|
+
log_level=log_level,
|
1367
|
+
result_ttl=result_ttl,
|
1368
|
+
run_at=run_at,
|
1369
|
+
run_in=run_in,
|
1370
|
+
max_retries=max_retries,
|
1371
|
+
retry_delay=retry_delay,
|
1372
|
+
jitter_factor=jitter_factor,
|
1373
|
+
retry_exceptions=retry_exceptions,
|
1374
|
+
**kwargs, # Pass worker args
|
1375
|
+
)
|
1376
|
+
|
1377
|
+
def schedule(
|
1378
|
+
self,
|
1379
|
+
name: str,
|
1380
|
+
inputs: dict | None = None,
|
1381
|
+
final_vars: list[str] | None = None,
|
1382
|
+
config: dict | None = None,
|
1383
|
+
cache: bool | dict = False,
|
1384
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1385
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1386
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1387
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1388
|
+
adapter: dict[str, Any] | None = None,
|
1389
|
+
reload: bool = False,
|
1390
|
+
log_level: str | None = None,
|
1391
|
+
cron: str | dict[str, str | int] | None = None,
|
1392
|
+
interval: int | str | dict[str, str | int] | None = None,
|
1393
|
+
date: dt.datetime | str | None = None,
|
1394
|
+
overwrite: bool = False,
|
1395
|
+
schedule_id: str | None = None,
|
1396
|
+
max_retries: int | None = None,
|
1397
|
+
retry_delay: float | None = None,
|
1398
|
+
jitter_factor: float | None = None,
|
1399
|
+
retry_exceptions: tuple | list | None = None,
|
1400
|
+
**kwargs: Any,
|
1401
|
+
) -> str | UUID:
|
1402
|
+
"""Schedule a pipeline to run on a recurring or future basis.
|
1403
|
+
|
1404
|
+
Args:
|
1405
|
+
name (str): The name of the pipeline to run.
|
1406
|
+
inputs (dict | None): Inputs for the pipeline run (overrides config).
|
1407
|
+
final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
|
1408
|
+
config (dict | None): Hamilton driver config (overrides config).
|
1409
|
+
cache (bool | dict): Cache settings (overrides config).
|
1410
|
+
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
|
1411
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
|
1412
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
|
1413
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
|
1414
|
+
adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
|
1415
|
+
reload (bool): Whether to reload module and pipeline config. Defaults to False.
|
1416
|
+
log_level (str | None): Log level for the run (overrides config).
|
1417
|
+
cron (str | dict[str, str | int] | None): Cron expression or settings
|
1418
|
+
Example string: "0 0 * * *" (daily at midnight)
|
1419
|
+
Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
|
1420
|
+
interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
|
1421
|
+
Example int: 3600 (every hour in seconds)
|
1422
|
+
Example str: "1h" (every hour)
|
1423
|
+
Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
|
1424
|
+
date (dt.datetime | str | None): Future date for
|
1425
|
+
Example: datetime(2025, 4, 28, 12, 0)
|
1426
|
+
Example str: "2025-04-28T12:00:00" (ISO format)
|
1427
|
+
overwrite (bool): Whether to overwrite existing schedule with the same ID
|
1428
|
+
schedule_id (str | None): Unique identifier for the schedule
|
1429
|
+
max_retries (int): Maximum number of retries for execution
|
1430
|
+
retry_delay (float): Delay between retries in seconds
|
1431
|
+
jitter_factor (float): Random jitter factor to add to retry delay
|
1432
|
+
retry_exceptions (tuple): Exceptions that trigger a retry
|
1433
|
+
**kwargs: JobQueue-specific scheduling options
|
1434
|
+
For RQ:
|
1435
|
+
- result_ttl: Result lifetime (int seconds)
|
1436
|
+
- queue_name: Queue to use (str)
|
1437
|
+
For APScheduler:
|
1438
|
+
- misfire_grace_time: Late execution window
|
1439
|
+
- coalesce: Combine missed executions (bool)
|
1440
|
+
- max_running_jobs: Concurrent instances limit (int)
|
1441
|
+
|
1442
|
+
Returns:
|
1443
|
+
str | UUID: Unique identifier for the created schedule
|
1444
|
+
|
1445
|
+
Raises:
|
1446
|
+
ValueError: If schedule parameters are invalid
|
1447
|
+
RuntimeError: If scheduling fails
|
1448
|
+
|
1449
|
+
Example:
|
1450
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1451
|
+
>>> from datetime import datetime, timedelta
|
1452
|
+
>>>
|
1453
|
+
>>> manager = PipelineManager()
|
1454
|
+
>>>
|
1455
|
+
>>> # Daily schedule with cron
|
1456
|
+
>>> schedule_id = manager.schedule(
|
1457
|
+
... name="daily_metrics",
|
1458
|
+
... cron="0 0 * * *",
|
1459
|
+
... inputs={"date": "{{ execution_date }}"}
|
1460
|
+
... )
|
1461
|
+
>>>
|
1462
|
+
>>> # Interval-based schedule
|
1463
|
+
>>> schedule_id = manager.schedule(
|
1464
|
+
... name="monitoring",
|
1465
|
+
... interval={"minutes": 15},
|
1466
|
+
... with_adapter_cfg={"enable_alerts": True}
|
1467
|
+
... )
|
1468
|
+
>>>
|
1469
|
+
>>> # Future one-time execution
|
1470
|
+
>>> future_date = datetime.now() + timedelta(days=1)
|
1471
|
+
>>> schedule_id = manager.schedule(
|
1472
|
+
... name="batch_process",
|
1473
|
+
... date=future_date,
|
1474
|
+
... executor_cfg={"type": "async"}
|
1475
|
+
... )
|
1476
|
+
"""
|
1477
|
+
pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
1478
|
+
run_func = self._get_run_func_for_job(name, reload)
|
1479
|
+
interval = duration_parser.parse(interval) if isinstance(interval, str) else interval
|
1480
|
+
date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
|
1481
|
+
|
1482
|
+
return self.job_queue.schedule(
|
1483
|
+
run_func=run_func,
|
1484
|
+
pipeline_cfg=pipeline_cfg,
|
1485
|
+
inputs=inputs,
|
1486
|
+
final_vars=final_vars,
|
1487
|
+
config=config,
|
1488
|
+
cache=cache,
|
1489
|
+
executor_cfg=executor_cfg,
|
1490
|
+
with_adapter_cfg=with_adapter_cfg,
|
1491
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1492
|
+
project_adapter_cfg=project_adapter_cfg,
|
1493
|
+
adapter=adapter,
|
1494
|
+
reload=reload,
|
1495
|
+
log_level=log_level,
|
1496
|
+
cron=cron,
|
1497
|
+
interval=interval,
|
1498
|
+
date=date,
|
1499
|
+
overwrite=overwrite,
|
1500
|
+
schedule_id=schedule_id,
|
1501
|
+
max_retries=max_retries,
|
1502
|
+
retry_delay=retry_delay,
|
1503
|
+
jitter_factor=jitter_factor,
|
1504
|
+
retry_exceptions=retry_exceptions,
|
1505
|
+
**kwargs,
|
1506
|
+
)
|
1507
|
+
|
1508
|
+
def schedule_all(self, **kwargs: Any) -> None:
|
1509
|
+
"""Schedule all pipelines that are enabled in their configuration.
|
1510
|
+
|
1511
|
+
For each enabled pipeline, applies its configured schedule settings
|
1512
|
+
and any provided overrides.
|
1513
|
+
|
1514
|
+
Args:
|
1515
|
+
**kwargs: Overrides for schedule settings that apply to all pipelines.
|
1516
|
+
See schedule() method for supported arguments.
|
1517
|
+
|
1518
|
+
Example:
|
1519
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1520
|
+
>>>
|
1521
|
+
>>> manager = PipelineManager()
|
1522
|
+
>>>
|
1523
|
+
>>> # Schedule all with default settings
|
1524
|
+
>>> manager.schedule_all()
|
1525
|
+
>>>
|
1526
|
+
>>> # Schedule all with common overrides
|
1527
|
+
>>> manager.schedule_all(
|
1528
|
+
... max_running_jobs=2,
|
1529
|
+
... coalesce=True,
|
1530
|
+
... misfire_grace_time=300
|
1531
|
+
... )
|
1532
|
+
"""
|
1533
|
+
scheduled_ids = []
|
1534
|
+
errors = []
|
1535
|
+
pipeline_names = self.list_pipelines()
|
1536
|
+
if not pipeline_names:
|
1537
|
+
logger.warning("No pipelines found to schedule.")
|
1538
|
+
return
|
1539
|
+
|
1540
|
+
logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
|
1541
|
+
for name in pipeline_names:
|
1542
|
+
try:
|
1543
|
+
pipeline_cfg = self._load_pipeline_cfg(name=name, reload=True)
|
1544
|
+
|
1545
|
+
if not pipeline_cfg.schedule.enabled:
|
1546
|
+
logger.info(
|
1547
|
+
f"Skipping scheduling for '{name}': Not enabled in config."
|
1548
|
+
)
|
1549
|
+
continue
|
1550
|
+
|
1551
|
+
logger.info(f"Scheduling [cyan]{name}[/cyan]...")
|
1552
|
+
schedule_id = self.schedule(name=name, reload=False, **kwargs)
|
1553
|
+
scheduled_ids.append(schedule_id)
|
1554
|
+
except Exception as e:
|
1555
|
+
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
1556
|
+
errors.append(name)
|
1557
|
+
|
1558
|
+
if errors:
|
1559
|
+
logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
|
1560
|
+
else:
|
1561
|
+
logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
|
1562
|
+
|
1563
|
+
@property
|
1564
|
+
def schedules(self) -> list[Any]:
|
1565
|
+
"""Get list of current pipeline schedules.
|
1566
|
+
|
1567
|
+
Retrieves all active schedules from the worker system.
|
1568
|
+
|
1569
|
+
Returns:
|
1570
|
+
list[Any]: List of schedule objects. Exact type depends on worker:
|
1571
|
+
- RQ: List[rq.job.Job]
|
1572
|
+
- APScheduler: List[apscheduler.schedulers.base.Schedule]
|
1573
|
+
|
1574
|
+
Example:
|
1575
|
+
>>> from flowerpower.pipeline import PipelineManager
|
1576
|
+
>>>
|
1577
|
+
>>> manager = PipelineManager()
|
1578
|
+
>>> for schedule in manager.schedules:
|
1579
|
+
... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
|
1580
|
+
"""
|
1581
|
+
try:
|
1582
|
+
return self.job_queue._get_schedules()
|
1583
|
+
except Exception as e:
|
1584
|
+
logger.error(f"Failed to retrieve schedules: {e}")
|
1585
|
+
return []
|
1586
|
+
|