FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +7 -14
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +8 -6
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +11 -24
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +22 -415
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +345 -146
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +21 -12
- flowerpower/pipeline/io.py +58 -54
- flowerpower/pipeline/manager.py +165 -726
- flowerpower/pipeline/pipeline.py +643 -0
- flowerpower/pipeline/registry.py +285 -18
- flowerpower/pipeline/visualizer.py +5 -6
- flowerpower/plugins/io/__init__.py +8 -0
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -21
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -256
- flowerpower/utils/monkey.py +1 -83
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -238
- flowerpower/cli/job_queue.py +0 -1061
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/__init__.py +0 -294
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/job_queue/base.py +0 -413
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
- flowerpower/job_queue/rq/manager.py +0 -1582
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -87
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/METADATA +0 -537
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
flowerpower/pipeline/manager.py
CHANGED
@@ -2,12 +2,12 @@ import datetime as dt
|
|
2
2
|
import os
|
3
3
|
import posixpath
|
4
4
|
import sys
|
5
|
+
import warnings
|
5
6
|
from pathlib import Path
|
6
7
|
from types import TracebackType
|
7
8
|
from typing import Any, Callable, TypeVar, Union
|
8
9
|
from uuid import UUID
|
9
10
|
|
10
|
-
import duration_parser
|
11
11
|
from loguru import logger
|
12
12
|
from munch import Munch
|
13
13
|
|
@@ -16,21 +16,19 @@ try:
|
|
16
16
|
except ImportError:
|
17
17
|
Digraph = Any # Type alias for when graphviz isn't installed
|
18
18
|
|
19
|
-
from
|
19
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
20
|
+
|
21
|
+
from ..settings import CONFIG_DIR, PIPELINES_DIR, CACHE_DIR
|
20
22
|
from ..cfg import PipelineConfig, ProjectConfig
|
21
23
|
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
22
|
-
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
24
|
+
from ..cfg.pipeline.run import ExecutorConfig, RunConfig, WithAdapterConfig
|
23
25
|
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
24
|
-
from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
|
25
|
-
from ..utils.callback import run_with_callback
|
26
26
|
from ..utils.logging import setup_logging
|
27
27
|
from .io import PipelineIOManager
|
28
|
-
from .job_queue import PipelineJobQueue
|
29
28
|
from .registry import HookType, PipelineRegistry
|
30
|
-
from .runner import run_pipeline
|
31
29
|
from .visualizer import PipelineVisualizer
|
32
30
|
|
33
|
-
setup_logging(
|
31
|
+
setup_logging()
|
34
32
|
|
35
33
|
GraphType = TypeVar("GraphType") # Type variable for graphviz.Digraph
|
36
34
|
|
@@ -66,7 +64,6 @@ class PipelineManager:
|
|
66
64
|
>>> # Create manager with custom settings
|
67
65
|
>>> manager = PipelineManager(
|
68
66
|
... base_dir="/path/to/project",
|
69
|
-
... job_queue_type="rq",
|
70
67
|
... log_level="DEBUG"
|
71
68
|
... )
|
72
69
|
"""
|
@@ -76,9 +73,9 @@ class PipelineManager:
|
|
76
73
|
base_dir: str | None = None,
|
77
74
|
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
78
75
|
fs: AbstractFileSystem | None = None,
|
79
|
-
cfg_dir: str | None =
|
80
|
-
pipelines_dir: str | None =
|
81
|
-
|
76
|
+
cfg_dir: str | None = CONFIG_DIR,
|
77
|
+
pipelines_dir: str | None = PIPELINES_DIR,
|
78
|
+
|
82
79
|
log_level: str | None = None,
|
83
80
|
) -> None:
|
84
81
|
"""Initialize the PipelineManager.
|
@@ -97,8 +94,7 @@ class PipelineManager:
|
|
97
94
|
Example: "config" or "settings".
|
98
95
|
pipelines_dir: Override default pipelines directory name ('pipelines').
|
99
96
|
Example: "flows" or "dags".
|
100
|
-
|
101
|
-
Valid values: "rq", "apscheduler", or "huey".
|
97
|
+
|
102
98
|
log_level: Set logging level for the manager.
|
103
99
|
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
104
100
|
|
@@ -118,7 +114,7 @@ class PipelineManager:
|
|
118
114
|
... "key": "ACCESS_KEY",
|
119
115
|
... "secret": "SECRET_KEY"
|
120
116
|
... },
|
121
|
-
|
117
|
+
|
122
118
|
... log_level="DEBUG"
|
123
119
|
... )
|
124
120
|
"""
|
@@ -130,7 +126,7 @@ class PipelineManager:
|
|
130
126
|
if storage_options is not None:
|
131
127
|
cached = True
|
132
128
|
cache_storage = posixpath.join(
|
133
|
-
posixpath.expanduser(
|
129
|
+
posixpath.expanduser(CACHE_DIR),
|
134
130
|
self._base_dir.split("://")[-1],
|
135
131
|
)
|
136
132
|
os.makedirs(cache_storage, exist_ok=True)
|
@@ -138,7 +134,7 @@ class PipelineManager:
|
|
138
134
|
cached = False
|
139
135
|
cache_storage = None
|
140
136
|
if not fs:
|
141
|
-
fs =
|
137
|
+
fs = filesystem(
|
142
138
|
self._base_dir,
|
143
139
|
storage_options=storage_options,
|
144
140
|
cached=cached,
|
@@ -156,17 +152,20 @@ class PipelineManager:
|
|
156
152
|
self._pipelines_dir = pipelines_dir
|
157
153
|
|
158
154
|
self._load_project_cfg(
|
159
|
-
reload=True
|
155
|
+
reload=True
|
160
156
|
) # Load project config
|
161
|
-
|
157
|
+
|
162
158
|
|
163
159
|
# Ensure essential directories exist (using paths from loaded project_cfg)
|
164
160
|
try:
|
165
161
|
self._fs.makedirs(self._cfg_dir, exist_ok=True)
|
166
162
|
self._fs.makedirs(self._pipelines_dir, exist_ok=True)
|
167
|
-
except
|
163
|
+
except (OSError, PermissionError) as e:
|
168
164
|
logger.error(f"Error creating essential directories: {e}")
|
169
|
-
|
165
|
+
raise RuntimeError(f"Failed to create essential directories: {e}") from e
|
166
|
+
except Exception as e:
|
167
|
+
logger.error(f"Unexpected error creating essential directories: {e}")
|
168
|
+
raise RuntimeError(f"Unexpected filesystem error: {e}") from e
|
170
169
|
|
171
170
|
# Ensure pipeline modules can be imported
|
172
171
|
self._add_modules_path()
|
@@ -175,22 +174,12 @@ class PipelineManager:
|
|
175
174
|
self.registry = PipelineRegistry(
|
176
175
|
project_cfg=self.project_cfg,
|
177
176
|
fs=self._fs,
|
178
|
-
|
179
|
-
|
180
|
-
)
|
181
|
-
pipeline_job_queue = PipelineJobQueue(
|
182
|
-
project_cfg=self.project_cfg,
|
183
|
-
fs=self._fs,
|
184
|
-
cfg_dir=self._cfg_dir,
|
185
|
-
pipelines_dir=self._pipelines_dir,
|
177
|
+
base_dir=self._base_dir,
|
178
|
+
storage_options=self._storage_options,
|
186
179
|
)
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
)
|
191
|
-
self.jqm = None
|
192
|
-
else:
|
193
|
-
self.jqm = pipeline_job_queue
|
180
|
+
|
181
|
+
# Initialize project context (will be injected by FlowerPowerProject)
|
182
|
+
self._project_context = None
|
194
183
|
self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
|
195
184
|
self.io = PipelineIOManager(registry=self.registry)
|
196
185
|
|
@@ -241,49 +230,6 @@ class PipelineManager:
|
|
241
230
|
# Add cleanup code if needed
|
242
231
|
pass
|
243
232
|
|
244
|
-
def _get_run_func(
|
245
|
-
self,
|
246
|
-
name: str,
|
247
|
-
reload: bool = False,
|
248
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
249
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
250
|
-
) -> Callable:
|
251
|
-
"""Create a PipelineRunner instance and return its run method.
|
252
|
-
|
253
|
-
This internal helper method ensures that each job gets a fresh runner
|
254
|
-
with the correct configuration state.
|
255
|
-
|
256
|
-
Args:
|
257
|
-
name: Name of the pipeline to create runner for
|
258
|
-
reload: Whether to reload pipeline configuration
|
259
|
-
|
260
|
-
Returns:
|
261
|
-
Callable: Bound run method from a fresh PipelineRunner instance
|
262
|
-
|
263
|
-
Example:
|
264
|
-
>>> # Internal usage
|
265
|
-
>>> manager = PipelineManager()
|
266
|
-
>>> run_func = manager._get_run_func_for_job("data_pipeline")
|
267
|
-
>>> result = run_func(inputs={"date": "2025-04-28"})
|
268
|
-
"""
|
269
|
-
if (
|
270
|
-
name == self._current_pipeline_name and not reload
|
271
|
-
# and hasattr(self, "_runner")
|
272
|
-
):
|
273
|
-
# run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=self._pipeline_cfg)
|
274
|
-
run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
275
|
-
run_pipeline
|
276
|
-
)
|
277
|
-
return run_func
|
278
|
-
|
279
|
-
_ = self.load_pipeline(name=name, reload=reload)
|
280
|
-
# run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
|
281
|
-
|
282
|
-
run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
283
|
-
run_pipeline
|
284
|
-
)
|
285
|
-
return run_func
|
286
|
-
|
287
233
|
def _add_modules_path(self) -> None:
|
288
234
|
"""Add pipeline module paths to Python path.
|
289
235
|
|
@@ -318,7 +264,7 @@ class PipelineManager:
|
|
318
264
|
sys.path.insert(0, modules_path)
|
319
265
|
|
320
266
|
def _load_project_cfg(
|
321
|
-
self, reload: bool = False
|
267
|
+
self, reload: bool = False
|
322
268
|
) -> ProjectConfig:
|
323
269
|
"""Load or reload the project configuration.
|
324
270
|
|
@@ -342,8 +288,8 @@ class PipelineManager:
|
|
342
288
|
>>> # Internal usage
|
343
289
|
>>> manager = PipelineManager()
|
344
290
|
>>> project_cfg = manager._load_project_cfg(reload=True)
|
345
|
-
>>> print(project_cfg.
|
346
|
-
'
|
291
|
+
>>> print(project_cfg.name)
|
292
|
+
'my_project'
|
347
293
|
"""
|
348
294
|
if hasattr(self, "_project_cfg") and not reload:
|
349
295
|
return self._project_cfg
|
@@ -351,7 +297,6 @@ class PipelineManager:
|
|
351
297
|
# Pass overrides to ProjectConfig.load
|
352
298
|
self._project_cfg = ProjectConfig.load(
|
353
299
|
base_dir=self._base_dir,
|
354
|
-
job_queue_type=job_queue_type,
|
355
300
|
fs=self._fs, # Pass pre-configured fs if provided
|
356
301
|
storage_options=self._storage_options,
|
357
302
|
)
|
@@ -426,8 +371,8 @@ class PipelineManager:
|
|
426
371
|
Example:
|
427
372
|
>>> manager = PipelineManager()
|
428
373
|
>>> cfg = manager.project_cfg
|
429
|
-
>>> print(cfg.
|
430
|
-
'
|
374
|
+
>>> print(cfg.name)
|
375
|
+
'my_project'
|
431
376
|
"""
|
432
377
|
if not hasattr(self, "_project_cfg"):
|
433
378
|
self._load_project_cfg()
|
@@ -457,26 +402,82 @@ class PipelineManager:
|
|
457
402
|
|
458
403
|
# --- Core Execution Method ---
|
459
404
|
|
405
|
+
def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
|
406
|
+
"""Merge kwargs into a RunConfig object.
|
407
|
+
|
408
|
+
This helper method updates the RunConfig object with values from kwargs,
|
409
|
+
handling different types of attributes appropriately.
|
410
|
+
|
411
|
+
Args:
|
412
|
+
run_config: The RunConfig object to update
|
413
|
+
kwargs: Dictionary of additional parameters to merge
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
RunConfig: Updated RunConfig object
|
417
|
+
"""
|
418
|
+
# Handle dictionary-like attributes with update or deep merge
|
419
|
+
if 'inputs' in kwargs and kwargs['inputs'] is not None:
|
420
|
+
if run_config.inputs is None:
|
421
|
+
run_config.inputs = kwargs['inputs']
|
422
|
+
else:
|
423
|
+
run_config.inputs.update(kwargs['inputs'])
|
424
|
+
|
425
|
+
if 'config' in kwargs and kwargs['config'] is not None:
|
426
|
+
if run_config.config is None:
|
427
|
+
run_config.config = kwargs['config']
|
428
|
+
else:
|
429
|
+
run_config.config.update(kwargs['config'])
|
430
|
+
|
431
|
+
if 'cache' in kwargs and kwargs['cache'] is not None:
|
432
|
+
run_config.cache = kwargs['cache']
|
433
|
+
|
434
|
+
if 'adapter' in kwargs and kwargs['adapter'] is not None:
|
435
|
+
if run_config.adapter is None:
|
436
|
+
run_config.adapter = kwargs['adapter']
|
437
|
+
else:
|
438
|
+
run_config.adapter.update(kwargs['adapter'])
|
439
|
+
|
440
|
+
# Handle executor_cfg - convert string/dict to ExecutorConfig if needed
|
441
|
+
if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
|
442
|
+
executor_cfg = kwargs['executor_cfg']
|
443
|
+
if isinstance(executor_cfg, str):
|
444
|
+
run_config.executor = ExecutorConfig(type=executor_cfg)
|
445
|
+
elif isinstance(executor_cfg, dict):
|
446
|
+
run_config.executor = ExecutorConfig.from_dict(executor_cfg)
|
447
|
+
elif isinstance(executor_cfg, ExecutorConfig):
|
448
|
+
run_config.executor = executor_cfg
|
449
|
+
|
450
|
+
# Handle adapter configurations
|
451
|
+
if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
|
452
|
+
with_adapter_cfg = kwargs['with_adapter_cfg']
|
453
|
+
if isinstance(with_adapter_cfg, dict):
|
454
|
+
run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
|
455
|
+
elif isinstance(with_adapter_cfg, WithAdapterConfig):
|
456
|
+
run_config.with_adapter = with_adapter_cfg
|
457
|
+
|
458
|
+
if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
|
459
|
+
run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
|
460
|
+
|
461
|
+
if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
|
462
|
+
run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
|
463
|
+
|
464
|
+
# Handle simple attributes
|
465
|
+
simple_attrs = [
|
466
|
+
'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
|
467
|
+
'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
|
468
|
+
]
|
469
|
+
|
470
|
+
for attr in simple_attrs:
|
471
|
+
if attr in kwargs and kwargs[attr] is not None:
|
472
|
+
setattr(run_config, attr, kwargs[attr])
|
473
|
+
|
474
|
+
return run_config
|
475
|
+
|
460
476
|
def run(
|
461
477
|
self,
|
462
478
|
name: str,
|
463
|
-
|
464
|
-
|
465
|
-
config: dict | None = None,
|
466
|
-
cache: dict | None = None,
|
467
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
468
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
469
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
470
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
471
|
-
adapter: dict[str, Any] | None = None,
|
472
|
-
reload: bool = False,
|
473
|
-
log_level: str | None = None,
|
474
|
-
max_retries: int | None = None,
|
475
|
-
retry_delay: float | None = None,
|
476
|
-
jitter_factor: float | None = None,
|
477
|
-
retry_exceptions: tuple | list | None = None,
|
478
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
479
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
479
|
+
run_config: RunConfig | None = None,
|
480
|
+
**kwargs
|
480
481
|
) -> dict[str, Any]:
|
481
482
|
"""Execute a pipeline synchronously and return its results.
|
482
483
|
|
@@ -485,33 +486,36 @@ class PipelineManager:
|
|
485
486
|
|
486
487
|
Args:
|
487
488
|
name (str): Name of the pipeline to run. Must be a valid identifier.
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
489
|
+
run_config (RunConfig | None): Run configuration object containing all execution parameters.
|
490
|
+
If None, the default configuration from the pipeline will be used.
|
491
|
+
**kwargs: Additional parameters to override the run_config. Supported parameters include:
|
492
|
+
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
493
|
+
final_vars (list[str] | None): Specify which output variables to return.
|
494
|
+
Example: ["model", "metrics"]
|
495
|
+
config (dict | None): Configuration for Hamilton pipeline executor.
|
496
|
+
Example: {"model": "LogisticRegression"}
|
497
|
+
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
498
|
+
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
499
|
+
- str: Executor name, e.g. "threadpool", "local"
|
500
|
+
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
501
|
+
- ExecutorConfig: Structured config object
|
502
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
503
|
+
Example: {"opentelemetry": True, "tracker": False}
|
504
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
505
|
+
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
506
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
507
|
+
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
508
|
+
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
509
|
+
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
510
|
+
reload (bool): Force reload of pipeline configuration.
|
511
|
+
log_level (str | None): Logging level for the execution. Default None uses project config.
|
512
|
+
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
513
|
+
max_retries (int): Maximum number of retries for execution.
|
514
|
+
retry_delay (float): Delay between retries in seconds.
|
515
|
+
jitter_factor (float): Random jitter factor to add to retry delay
|
516
|
+
retry_exceptions (tuple): Exceptions that trigger a retry.
|
517
|
+
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
518
|
+
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
515
519
|
|
516
520
|
Returns:
|
517
521
|
dict[str, Any]: Pipeline execution results, mapping output variable names
|
@@ -530,46 +534,49 @@ class PipelineManager:
|
|
530
534
|
>>> # Basic pipeline run
|
531
535
|
>>> results = manager.run("data_pipeline")
|
532
536
|
>>>
|
533
|
-
>>> #
|
537
|
+
>>> # Run with custom RunConfig
|
538
|
+
>>> from flowerpower.cfg.pipeline.run import RunConfig
|
539
|
+
>>> config = RunConfig(inputs={"date": "2025-04-28"}, final_vars=["result"])
|
540
|
+
>>> results = manager.run("ml_pipeline", run_config=config)
|
541
|
+
>>>
|
542
|
+
>>> # Complex run with kwargs overrides
|
534
543
|
>>> results = manager.run(
|
535
|
-
...
|
536
|
-
... inputs={
|
537
|
-
... "training_date": "2025-04-28",
|
538
|
-
... "model_params": {"n_estimators": 100}
|
539
|
-
... },
|
544
|
+
... "ml_pipeline",
|
545
|
+
... inputs={"training_date": "2025-04-28"},
|
540
546
|
... final_vars=["model", "metrics"],
|
541
547
|
... executor_cfg={"type": "threadpool", "max_workers": 4},
|
542
548
|
... with_adapter_cfg={"tracker": True},
|
543
549
|
... reload=True
|
544
550
|
... )
|
545
551
|
"""
|
546
|
-
#
|
547
|
-
|
548
|
-
|
552
|
+
# Initialize run_config - use provided config or load pipeline default
|
553
|
+
if run_config is None:
|
554
|
+
run_config = self.load_pipeline(name=name).run
|
555
|
+
|
556
|
+
# Merge kwargs into run_config
|
557
|
+
if kwargs:
|
558
|
+
run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
|
559
|
+
|
560
|
+
# Set up logging for this specific run if log_level is provided
|
561
|
+
if run_config.log_level is not None:
|
562
|
+
setup_logging(level=run_config.log_level)
|
563
|
+
else:
|
564
|
+
# Ensure logging is reset to default if no specific level is provided for this run
|
565
|
+
setup_logging()
|
566
|
+
|
567
|
+
# Use injected project context, fallback to self for backward compatibility
|
568
|
+
project_context = getattr(self, "_project_context", self)
|
569
|
+
|
570
|
+
# Get Pipeline instance from registry
|
571
|
+
pipeline = self.registry.get_pipeline(
|
572
|
+
name=name, project_context=project_context, reload=run_config.reload
|
549
573
|
)
|
550
574
|
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
inputs=inputs,
|
555
|
-
final_vars=final_vars,
|
556
|
-
config=config,
|
557
|
-
cache=cache,
|
558
|
-
executor_cfg=executor_cfg,
|
559
|
-
with_adapter_cfg=with_adapter_cfg,
|
560
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
561
|
-
project_adapter_cfg=project_adapter_cfg,
|
562
|
-
adapter=adapter,
|
563
|
-
# reload=reload, # Runner handles module reload if needed
|
564
|
-
log_level=log_level,
|
565
|
-
max_retries=max_retries,
|
566
|
-
retry_delay=retry_delay,
|
567
|
-
jitter_factor=jitter_factor,
|
568
|
-
retry_exceptions=retry_exceptions,
|
575
|
+
# Execute pipeline using its own run method
|
576
|
+
return pipeline.run(
|
577
|
+
run_config=run_config,
|
569
578
|
)
|
570
579
|
|
571
|
-
return res
|
572
|
-
|
573
580
|
# --- Delegated Methods ---
|
574
581
|
|
575
582
|
# Registry Delegations
|
@@ -1213,571 +1220,3 @@ class PipelineManager:
|
|
1213
1220
|
return self.visualizer.show_dag(
|
1214
1221
|
name=name, format=format, reload=reload, raw=raw
|
1215
1222
|
)
|
1216
|
-
|
1217
|
-
def run_job(
|
1218
|
-
self,
|
1219
|
-
name: str,
|
1220
|
-
inputs: dict | None = None,
|
1221
|
-
final_vars: list[str] | None = None,
|
1222
|
-
config: dict | None = None,
|
1223
|
-
cache: bool | dict = False,
|
1224
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1225
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1226
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1227
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1228
|
-
adapter: dict[str, Any] | None = None,
|
1229
|
-
reload: bool = False,
|
1230
|
-
log_level: str | None = None,
|
1231
|
-
max_retries: int | None = None,
|
1232
|
-
retry_delay: float | None = None,
|
1233
|
-
jitter_factor: float | None = None,
|
1234
|
-
retry_exceptions: tuple | list | None = None,
|
1235
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1236
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1237
|
-
on_success_pipeline: Callable
|
1238
|
-
| tuple[Callable, tuple | None, dict | None]
|
1239
|
-
| None = None,
|
1240
|
-
on_failure_pipeline: Callable
|
1241
|
-
| tuple[Callable, tuple | None, dict | None]
|
1242
|
-
| None = None,
|
1243
|
-
**kwargs: Any,
|
1244
|
-
) -> dict[str, Any] | None:
|
1245
|
-
"""Execute a pipeline job immediately through the job queue.
|
1246
|
-
|
1247
|
-
Unlike the run() method which executes synchronously, this method runs
|
1248
|
-
the pipeline through the configured worker system (RQ, APScheduler, etc.).
|
1249
|
-
|
1250
|
-
If the job queue is not configured, it logs an error and returns None.
|
1251
|
-
|
1252
|
-
Args:
|
1253
|
-
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1254
|
-
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1255
|
-
final_vars (list[str] | None): Specify which output variables to return.
|
1256
|
-
Example: ["model", "metrics"]
|
1257
|
-
config (dict | None): Configuration for Hamilton pipeline executor.
|
1258
|
-
Example: {"model": "LogisticRegression"}
|
1259
|
-
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1260
|
-
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1261
|
-
- str: Executor name, e.g. "threadpool", "local"
|
1262
|
-
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1263
|
-
- ExecutorConfig: Structured config object
|
1264
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1265
|
-
Example: {"opentelemetry": True, "tracker": False}
|
1266
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1267
|
-
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1268
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1269
|
-
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1270
|
-
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1271
|
-
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1272
|
-
reload (bool): Force reload of pipeline configuration.
|
1273
|
-
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1274
|
-
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1275
|
-
max_retries (int): Maximum number of retries for execution.
|
1276
|
-
retry_delay (float): Delay between retries in seconds.
|
1277
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1278
|
-
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1279
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job execution.
|
1280
|
-
This runs after the pipeline execution through the job queue was executed successfully.
|
1281
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job execution failure.
|
1282
|
-
This runs if the job creation or the pipeline execution through the job queue fails or raises an exception.
|
1283
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1284
|
-
This runs after the pipeline completes successfully.
|
1285
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1286
|
-
This runs if the pipeline fails or raises an exception.
|
1287
|
-
|
1288
|
-
**kwargs: JobQueue-specific arguments
|
1289
|
-
For RQ:
|
1290
|
-
- queue_name: Queue to use (str)
|
1291
|
-
- retry: Number of retries (int)
|
1292
|
-
- result_ttl: Time to live for the job result (float or timedelta)
|
1293
|
-
- ttl: Time to live for the job (float or timedelta)
|
1294
|
-
- timeout: Time to wait for the job to complete (float or timedelta)
|
1295
|
-
- repeat: Repeat count (int or dict)
|
1296
|
-
- rq_on_failure: Callback function on failure (callable)
|
1297
|
-
- rq_on_success: Callback function on success (callable)
|
1298
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1299
|
-
For APScheduler:
|
1300
|
-
- job_executor: Executor type (str)
|
1301
|
-
|
1302
|
-
Returns:
|
1303
|
-
dict[str, Any] | None: Job execution results if successful, otherwise None.
|
1304
|
-
|
1305
|
-
Raises:
|
1306
|
-
ValueError: If pipeline or configuration is invalid
|
1307
|
-
RuntimeError: If job execution fails
|
1308
|
-
|
1309
|
-
Example:
|
1310
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1311
|
-
>>>
|
1312
|
-
>>> manager = PipelineManager()
|
1313
|
-
>>>
|
1314
|
-
>>> # Simple job execution
|
1315
|
-
>>> result = manager.run_job("data_pipeline")
|
1316
|
-
>>>
|
1317
|
-
>>> # Complex job with retry logic
|
1318
|
-
>>> result = manager.run_job(
|
1319
|
-
... name="ml_training",
|
1320
|
-
... inputs={"training_date": "2025-04-28"},
|
1321
|
-
... executor_cfg={"type": "async"},
|
1322
|
-
... with_adapter_cfg={"enable_tracking": True},
|
1323
|
-
... retry=3,
|
1324
|
-
... queue_name="ml_jobs"
|
1325
|
-
... )
|
1326
|
-
"""
|
1327
|
-
if self.jqm is None:
|
1328
|
-
logger.error(
|
1329
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1330
|
-
)
|
1331
|
-
return None
|
1332
|
-
|
1333
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1334
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1335
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1336
|
-
|
1337
|
-
run_func = self._get_run_func(
|
1338
|
-
name=name,
|
1339
|
-
reload=reload,
|
1340
|
-
on_success=on_success_pipeline,
|
1341
|
-
on_failure=on_failure_pipeline,
|
1342
|
-
)
|
1343
|
-
# run_func = run_with_callback(on_success=on_success_pipeline, on_failure=on_failure_pipeline)(
|
1344
|
-
# run_func_
|
1345
|
-
# )
|
1346
|
-
run_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1347
|
-
self.jqm.run_job
|
1348
|
-
)
|
1349
|
-
|
1350
|
-
return run_job(
|
1351
|
-
run_func=run_func,
|
1352
|
-
pipeline_cfg=self._pipeline_cfg,
|
1353
|
-
name=name,
|
1354
|
-
inputs=inputs,
|
1355
|
-
final_vars=final_vars,
|
1356
|
-
config=config,
|
1357
|
-
cache=cache,
|
1358
|
-
executor_cfg=executor_cfg,
|
1359
|
-
with_adapter_cfg=with_adapter_cfg,
|
1360
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1361
|
-
project_adapter_cfg=project_adapter_cfg,
|
1362
|
-
adapter=adapter,
|
1363
|
-
log_level=log_level,
|
1364
|
-
max_retries=max_retries,
|
1365
|
-
retry_delay=retry_delay,
|
1366
|
-
jitter_factor=jitter_factor,
|
1367
|
-
retry_exceptions=retry_exceptions,
|
1368
|
-
**kwargs,
|
1369
|
-
)
|
1370
|
-
|
1371
|
-
def add_job(
|
1372
|
-
self,
|
1373
|
-
name: str,
|
1374
|
-
inputs: dict | None = None,
|
1375
|
-
final_vars: list[str] | None = None,
|
1376
|
-
config: dict | None = None,
|
1377
|
-
cache: bool | dict = False,
|
1378
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1379
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1380
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1381
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1382
|
-
adapter: dict[str, Any] | None = None,
|
1383
|
-
reload: bool = False, # Reload config/module before creating run_func
|
1384
|
-
log_level: str | None = None,
|
1385
|
-
result_ttl: int | dt.timedelta = 0,
|
1386
|
-
run_at: dt.datetime | str | None = None,
|
1387
|
-
run_in: dt.datetime | str | None = None,
|
1388
|
-
max_retries: int = 3,
|
1389
|
-
retry_delay: float = 1.0,
|
1390
|
-
jitter_factor: float = 0.1,
|
1391
|
-
retry_exceptions: tuple = (Exception,),
|
1392
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1393
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1394
|
-
on_success_pipeline: Callable
|
1395
|
-
| tuple[Callable, tuple | None, dict | None]
|
1396
|
-
| None = None,
|
1397
|
-
on_failure_pipeline: Callable
|
1398
|
-
| tuple[Callable, tuple | None, dict | None]
|
1399
|
-
| None = None,
|
1400
|
-
**kwargs, # JobQueue specific args
|
1401
|
-
) -> str | UUID | None:
|
1402
|
-
"""Adds a job to the job queue.
|
1403
|
-
|
1404
|
-
If the job queue is not configured, it logs an error and returns None.
|
1405
|
-
|
1406
|
-
Args:
|
1407
|
-
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1408
|
-
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1409
|
-
final_vars (list[str] | None): Specify which output variables to return.
|
1410
|
-
Example: ["model", "metrics"]
|
1411
|
-
config (dict | None): Configuration for Hamilton pipeline executor.
|
1412
|
-
Example: {"model": "LogisticRegression"}
|
1413
|
-
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1414
|
-
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1415
|
-
- str: Executor name, e.g. "threadpool", "local"
|
1416
|
-
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1417
|
-
- ExecutorConfig: Structured config object
|
1418
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1419
|
-
Example: {"opentelemetry": True, "tracker": False}
|
1420
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1421
|
-
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1422
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1423
|
-
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1424
|
-
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1425
|
-
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1426
|
-
reload (bool): Force reload of pipeline configuration.
|
1427
|
-
run_at (dt.datetime | str | None): Future date to run the job.
|
1428
|
-
Example: datetime(2025, 4, 28, 12, 0)
|
1429
|
-
Example str: "2025-04-28T12:00:00" (ISO format)
|
1430
|
-
run_in (dt.datetime | str | None): Time interval to run the job.
|
1431
|
-
Example: 3600 (every hour in seconds)
|
1432
|
-
Example: datetime.timedelta(days=1)
|
1433
|
-
Example str: "1d" (1 day)
|
1434
|
-
result_ttl (int | dt.timedelta): Time to live for the job result.
|
1435
|
-
Example: 3600 (1 hour in seconds)
|
1436
|
-
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1437
|
-
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1438
|
-
max_retries (int): Maximum number of retries for execution.
|
1439
|
-
retry_delay (float): Delay between retries in seconds.
|
1440
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1441
|
-
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1442
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job creation.
|
1443
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job creation failure.
|
1444
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1445
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1446
|
-
**kwargs: Additional keyword arguments passed to the worker's add_job method.
|
1447
|
-
For RQ this includes:
|
1448
|
-
- result_ttl: Time to live for the job result (float or timedelta)
|
1449
|
-
- ttl: Time to live for the job (float or timedelta)
|
1450
|
-
- timeout: Time to wait for the job to complete (float or timedelta)
|
1451
|
-
- queue_name: Name of the queue to use (str)
|
1452
|
-
- retry: Number of retries (int)
|
1453
|
-
- repeat: Repeat count (int or dict)
|
1454
|
-
- rq_on_failure: Callback function on failure (callable)
|
1455
|
-
- rq_on_success: Callback function on success (callable)
|
1456
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1457
|
-
For APScheduler, this includes:
|
1458
|
-
- job_executor: Job executor to use (str)
|
1459
|
-
|
1460
|
-
Returns:
|
1461
|
-
str | UUID | None: The ID of the job that was added to the job queue, or None if the job queue is not configured.
|
1462
|
-
|
1463
|
-
Raises:
|
1464
|
-
ValueError: If the job ID is not valid or if the job cannot be scheduled.
|
1465
|
-
|
1466
|
-
Example:
|
1467
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1468
|
-
>>> pm = PipelineManager()
|
1469
|
-
>>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
|
1470
|
-
|
1471
|
-
"""
|
1472
|
-
if self.jqm is None:
|
1473
|
-
logger.error(
|
1474
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1475
|
-
)
|
1476
|
-
return None
|
1477
|
-
|
1478
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1479
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1480
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1481
|
-
|
1482
|
-
run_func = self._get_run_func(
|
1483
|
-
name=name,
|
1484
|
-
reload=reload,
|
1485
|
-
on_success=on_success_pipeline,
|
1486
|
-
on_failure=on_failure_pipeline,
|
1487
|
-
)
|
1488
|
-
|
1489
|
-
run_in = (
|
1490
|
-
duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
|
1491
|
-
) # convert to seconds
|
1492
|
-
run_at = (
|
1493
|
-
dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
|
1494
|
-
)
|
1495
|
-
|
1496
|
-
add_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1497
|
-
self.jqm.add_job
|
1498
|
-
)
|
1499
|
-
return add_job(
|
1500
|
-
run_func=run_func,
|
1501
|
-
pipeline_cfg=self._pipeline_cfg,
|
1502
|
-
name=name, # Pass name for logging
|
1503
|
-
# Pass run parameters
|
1504
|
-
inputs=inputs,
|
1505
|
-
final_vars=final_vars,
|
1506
|
-
config=config,
|
1507
|
-
cache=cache,
|
1508
|
-
executor_cfg=executor_cfg,
|
1509
|
-
with_adapter_cfg=with_adapter_cfg,
|
1510
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1511
|
-
project_adapter_cfg=project_adapter_cfg,
|
1512
|
-
adapter=adapter,
|
1513
|
-
# reload=reload, # Note: reload already happened
|
1514
|
-
log_level=log_level,
|
1515
|
-
result_ttl=result_ttl,
|
1516
|
-
run_at=run_at,
|
1517
|
-
run_in=run_in,
|
1518
|
-
max_retries=max_retries,
|
1519
|
-
retry_delay=retry_delay,
|
1520
|
-
jitter_factor=jitter_factor,
|
1521
|
-
retry_exceptions=retry_exceptions,
|
1522
|
-
**kwargs, # Pass worker args
|
1523
|
-
)
|
1524
|
-
|
1525
|
-
def schedule(
|
1526
|
-
self,
|
1527
|
-
name: str,
|
1528
|
-
inputs: dict | None = None,
|
1529
|
-
final_vars: list[str] | None = None,
|
1530
|
-
config: dict | None = None,
|
1531
|
-
cache: bool | dict = False,
|
1532
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1533
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1534
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1535
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1536
|
-
adapter: dict[str, Any] | None = None,
|
1537
|
-
reload: bool = False,
|
1538
|
-
log_level: str | None = None,
|
1539
|
-
cron: str | dict[str, str | int] | None = None,
|
1540
|
-
interval: int | str | dict[str, str | int] | None = None,
|
1541
|
-
date: dt.datetime | str | None = None,
|
1542
|
-
overwrite: bool = False,
|
1543
|
-
schedule_id: str | None = None,
|
1544
|
-
max_retries: int | None = None,
|
1545
|
-
retry_delay: float | None = None,
|
1546
|
-
jitter_factor: float | None = None,
|
1547
|
-
retry_exceptions: tuple | list | None = None,
|
1548
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1549
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1550
|
-
on_success_pipeline: Callable
|
1551
|
-
| tuple[Callable, tuple | None, dict | None]
|
1552
|
-
| None = None,
|
1553
|
-
on_failure_pipeline: Callable
|
1554
|
-
| tuple[Callable, tuple | None, dict | None]
|
1555
|
-
| None = None,
|
1556
|
-
**kwargs: Any,
|
1557
|
-
) -> str | UUID | None:
|
1558
|
-
"""Schedule a pipeline to run on a recurring or future basis.
|
1559
|
-
|
1560
|
-
If the job queue is not configured, it logs an error and returns None.
|
1561
|
-
|
1562
|
-
Args:
|
1563
|
-
name (str): The name of the pipeline to run.
|
1564
|
-
inputs (dict | None): Inputs for the pipeline run (overrides config).
|
1565
|
-
final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
|
1566
|
-
config (dict | None): Hamilton driver config (overrides config).
|
1567
|
-
cache (bool | dict): Cache settings (overrides config).
|
1568
|
-
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
|
1569
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
|
1570
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
|
1571
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
|
1572
|
-
adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
|
1573
|
-
reload (bool): Whether to reload module and pipeline config. Defaults to False.
|
1574
|
-
log_level (str | None): Log level for the run (overrides config).
|
1575
|
-
cron (str | dict[str, str | int] | None): Cron expression or settings
|
1576
|
-
Example string: "0 0 * * *" (daily at midnight)
|
1577
|
-
Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
|
1578
|
-
interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
|
1579
|
-
Example int: 3600 (every hour in seconds)
|
1580
|
-
Example str: "1h" (every hour)
|
1581
|
-
Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
|
1582
|
-
date (dt.datetime | str | None): Future date for
|
1583
|
-
Example: datetime(2025, 4, 28, 12, 0)
|
1584
|
-
Example str: "2025-04-28T12:00:00" (ISO format)
|
1585
|
-
overwrite (bool): Whether to overwrite existing schedule with the same ID
|
1586
|
-
schedule_id (str | None): Unique identifier for the schedule
|
1587
|
-
max_retries (int): Maximum number of retries for execution
|
1588
|
-
retry_delay (float): Delay between retries in seconds
|
1589
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1590
|
-
retry_exceptions (tuple): Exceptions that trigger a retry
|
1591
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful schedule creation.
|
1592
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on schedule creation failure.
|
1593
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1594
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1595
|
-
**kwargs: JobQueue-specific scheduling options
|
1596
|
-
For RQ:
|
1597
|
-
- result_ttl: Result lifetime (int seconds)
|
1598
|
-
- ttl: Job lifetime (int seconds)
|
1599
|
-
- timeout: Job execution timeout (int seconds)
|
1600
|
-
- queue_name: Queue to use (str)
|
1601
|
-
- repeat: Repeat count (int or dict)
|
1602
|
-
- rq_on_failure: Callback function on failure (callable)
|
1603
|
-
- rq_on_success: Callback function on success (callable)
|
1604
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1605
|
-
For APScheduler:
|
1606
|
-
- misfire_grace_time: Late execution window
|
1607
|
-
- coalesce: Combine missed executions (bool)
|
1608
|
-
- max_running_jobs: Concurrent instances limit (int)
|
1609
|
-
|
1610
|
-
Returns:
|
1611
|
-
str | UUID | None: Unique identifier for the created schedule, or None if scheduling fails.
|
1612
|
-
|
1613
|
-
Raises:
|
1614
|
-
ValueError: If schedule parameters are invalid
|
1615
|
-
RuntimeError: If scheduling fails
|
1616
|
-
|
1617
|
-
Example:
|
1618
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1619
|
-
>>> from datetime import datetime, timedelta
|
1620
|
-
>>>
|
1621
|
-
>>> manager = PipelineManager()
|
1622
|
-
>>>
|
1623
|
-
>>> # Daily schedule with cron
|
1624
|
-
>>> schedule_id = manager.schedule(
|
1625
|
-
... name="daily_metrics",
|
1626
|
-
... cron="0 0 * * *",
|
1627
|
-
... inputs={"date": "{{ execution_date }}"}
|
1628
|
-
... )
|
1629
|
-
>>>
|
1630
|
-
>>> # Interval-based schedule
|
1631
|
-
>>> schedule_id = manager.schedule(
|
1632
|
-
... name="monitoring",
|
1633
|
-
... interval={"minutes": 15},
|
1634
|
-
... with_adapter_cfg={"enable_alerts": True}
|
1635
|
-
... )
|
1636
|
-
>>>
|
1637
|
-
>>> # Future one-time execution
|
1638
|
-
>>> future_date = datetime.now() + timedelta(days=1)
|
1639
|
-
>>> schedule_id = manager.schedule(
|
1640
|
-
... name="batch_process",
|
1641
|
-
... date=future_date,
|
1642
|
-
... executor_cfg={"type": "async"}
|
1643
|
-
... )
|
1644
|
-
"""
|
1645
|
-
if self.jqm is None:
|
1646
|
-
logger.error(
|
1647
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1648
|
-
)
|
1649
|
-
return None
|
1650
|
-
|
1651
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1652
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1653
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1654
|
-
|
1655
|
-
# pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
1656
|
-
run_func = self._get_run_func(
|
1657
|
-
name=name,
|
1658
|
-
reload=reload,
|
1659
|
-
on_success=on_success_pipeline,
|
1660
|
-
on_failure=on_failure_pipeline,
|
1661
|
-
)
|
1662
|
-
interval = (
|
1663
|
-
duration_parser.parse(interval) if isinstance(interval, str) else interval
|
1664
|
-
)
|
1665
|
-
date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
|
1666
|
-
|
1667
|
-
schedule = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1668
|
-
self.jqm.schedule
|
1669
|
-
)
|
1670
|
-
return schedule(
|
1671
|
-
run_func=run_func,
|
1672
|
-
pipeline_cfg=self._pipeline_cfg,
|
1673
|
-
inputs=inputs,
|
1674
|
-
final_vars=final_vars,
|
1675
|
-
config=config,
|
1676
|
-
cache=cache,
|
1677
|
-
executor_cfg=executor_cfg,
|
1678
|
-
with_adapter_cfg=with_adapter_cfg,
|
1679
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1680
|
-
project_adapter_cfg=project_adapter_cfg,
|
1681
|
-
adapter=adapter,
|
1682
|
-
reload=reload,
|
1683
|
-
log_level=log_level,
|
1684
|
-
cron=cron,
|
1685
|
-
interval=interval,
|
1686
|
-
date=date,
|
1687
|
-
overwrite=overwrite,
|
1688
|
-
schedule_id=schedule_id,
|
1689
|
-
max_retries=max_retries,
|
1690
|
-
retry_delay=retry_delay,
|
1691
|
-
jitter_factor=jitter_factor,
|
1692
|
-
retry_exceptions=retry_exceptions,
|
1693
|
-
**kwargs,
|
1694
|
-
)
|
1695
|
-
|
1696
|
-
def schedule_all(self, **kwargs: Any) -> None:
|
1697
|
-
"""Schedule all pipelines that are enabled in their configuration.
|
1698
|
-
|
1699
|
-
For each enabled pipeline, applies its configured schedule settings
|
1700
|
-
and any provided overrides.
|
1701
|
-
|
1702
|
-
Args:
|
1703
|
-
**kwargs: Overrides for schedule settings that apply to all pipelines.
|
1704
|
-
See schedule() method for supported arguments.
|
1705
|
-
|
1706
|
-
Example:
|
1707
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1708
|
-
>>>
|
1709
|
-
>>> manager = PipelineManager()
|
1710
|
-
>>>
|
1711
|
-
>>> # Schedule all with default settings
|
1712
|
-
>>> manager.schedule_all()
|
1713
|
-
>>>
|
1714
|
-
>>> # Schedule all with common overrides
|
1715
|
-
>>> manager.schedule_all(
|
1716
|
-
... max_running_jobs=2,
|
1717
|
-
... coalesce=True,
|
1718
|
-
... misfire_grace_time=300
|
1719
|
-
... )
|
1720
|
-
"""
|
1721
|
-
scheduled_ids = []
|
1722
|
-
errors = []
|
1723
|
-
pipeline_names = self.list_pipelines()
|
1724
|
-
if not pipeline_names:
|
1725
|
-
logger.warning("No pipelines found to schedule.")
|
1726
|
-
return
|
1727
|
-
|
1728
|
-
logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
|
1729
|
-
for name in pipeline_names:
|
1730
|
-
try:
|
1731
|
-
pipeline_cfg = self.load_pipeline(name=name, reload=True)
|
1732
|
-
|
1733
|
-
if not pipeline_cfg.schedule.enabled:
|
1734
|
-
logger.info(
|
1735
|
-
f"Skipping scheduling for '{name}': Not enabled in config."
|
1736
|
-
)
|
1737
|
-
continue
|
1738
|
-
|
1739
|
-
logger.info(f"Scheduling [cyan]{name}[/cyan]...")
|
1740
|
-
schedule_id = self.schedule(name=name, reload=False, **kwargs)
|
1741
|
-
if schedule_id is None:
|
1742
|
-
logger.info(
|
1743
|
-
f"🟡 Skipping adding schedule for [cyan]{name}[/cyan]: Job queue backend not available or scheduling failed."
|
1744
|
-
)
|
1745
|
-
continue
|
1746
|
-
scheduled_ids.append(schedule_id)
|
1747
|
-
except Exception as e:
|
1748
|
-
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
1749
|
-
errors.append(name)
|
1750
|
-
|
1751
|
-
if errors:
|
1752
|
-
logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
|
1753
|
-
else:
|
1754
|
-
logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
|
1755
|
-
|
1756
|
-
@property
|
1757
|
-
def schedules(self) -> list[Any]:
|
1758
|
-
"""Get list of current pipeline schedules.
|
1759
|
-
|
1760
|
-
Retrieves all active schedules from the worker system.
|
1761
|
-
|
1762
|
-
Returns:
|
1763
|
-
list[Any]: List of schedule objects. Exact type depends on worker:
|
1764
|
-
- RQ: List[rq.job.Job]
|
1765
|
-
- APScheduler: List[apscheduler.schedulers.base.Schedule]
|
1766
|
-
|
1767
|
-
Example:
|
1768
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1769
|
-
>>>
|
1770
|
-
>>> manager = PipelineManager()
|
1771
|
-
>>> for schedule in manager.schedules:
|
1772
|
-
... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
|
1773
|
-
"""
|
1774
|
-
if self.jqm is None:
|
1775
|
-
logger.error(
|
1776
|
-
"This PipelineManager instance does not have a job queue configured. Skipping schedule retrieval."
|
1777
|
-
)
|
1778
|
-
return []
|
1779
|
-
try:
|
1780
|
-
return self.jqm._get_schedules()
|
1781
|
-
except Exception as e:
|
1782
|
-
logger.error(f"Failed to retrieve schedules: {e}")
|
1783
|
-
return []
|