FlowerPower 0.11.6.20__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +400 -132
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
flowerpower/pipeline/manager.py
CHANGED
@@ -2,12 +2,12 @@ import datetime as dt
|
|
2
2
|
import os
|
3
3
|
import posixpath
|
4
4
|
import sys
|
5
|
+
import warnings
|
5
6
|
from pathlib import Path
|
6
7
|
from types import TracebackType
|
7
8
|
from typing import Any, Callable, TypeVar, Union
|
8
9
|
from uuid import UUID
|
9
10
|
|
10
|
-
import duration_parser
|
11
11
|
from loguru import logger
|
12
12
|
from munch import Munch
|
13
13
|
|
@@ -16,18 +16,16 @@ try:
|
|
16
16
|
except ImportError:
|
17
17
|
Digraph = Any # Type alias for when graphviz isn't installed
|
18
18
|
|
19
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
20
|
+
|
19
21
|
from .. import settings
|
20
22
|
from ..cfg import PipelineConfig, ProjectConfig
|
21
23
|
from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
22
24
|
from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
|
23
25
|
from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
24
|
-
from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
|
25
|
-
from ..utils.callback import run_with_callback
|
26
26
|
from ..utils.logging import setup_logging
|
27
27
|
from .io import PipelineIOManager
|
28
|
-
from .job_queue import PipelineJobQueue
|
29
28
|
from .registry import HookType, PipelineRegistry
|
30
|
-
from .runner import run_pipeline
|
31
29
|
from .visualizer import PipelineVisualizer
|
32
30
|
|
33
31
|
setup_logging(level=settings.LOG_LEVEL)
|
@@ -98,7 +96,7 @@ class PipelineManager:
|
|
98
96
|
pipelines_dir: Override default pipelines directory name ('pipelines').
|
99
97
|
Example: "flows" or "dags".
|
100
98
|
job_queue_type: Override worker type from project config/settings.
|
101
|
-
Valid values: "rq"
|
99
|
+
Valid values: "rq".
|
102
100
|
log_level: Set logging level for the manager.
|
103
101
|
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
104
102
|
|
@@ -138,7 +136,7 @@ class PipelineManager:
|
|
138
136
|
cached = False
|
139
137
|
cache_storage = None
|
140
138
|
if not fs:
|
141
|
-
fs =
|
139
|
+
fs = filesystem(
|
142
140
|
self._base_dir,
|
143
141
|
storage_options=storage_options,
|
144
142
|
cached=cached,
|
@@ -175,22 +173,12 @@ class PipelineManager:
|
|
175
173
|
self.registry = PipelineRegistry(
|
176
174
|
project_cfg=self.project_cfg,
|
177
175
|
fs=self._fs,
|
178
|
-
|
179
|
-
|
180
|
-
)
|
181
|
-
pipeline_job_queue = PipelineJobQueue(
|
182
|
-
project_cfg=self.project_cfg,
|
183
|
-
fs=self._fs,
|
184
|
-
cfg_dir=self._cfg_dir,
|
185
|
-
pipelines_dir=self._pipelines_dir,
|
176
|
+
base_dir=self._base_dir,
|
177
|
+
storage_options=self._storage_options,
|
186
178
|
)
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
)
|
191
|
-
self.jqm = None
|
192
|
-
else:
|
193
|
-
self.jqm = pipeline_job_queue
|
179
|
+
|
180
|
+
# Initialize project context (will be injected by FlowerPowerProject)
|
181
|
+
self._project_context = None
|
194
182
|
self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
|
195
183
|
self.io = PipelineIOManager(registry=self.registry)
|
196
184
|
|
@@ -241,49 +229,6 @@ class PipelineManager:
|
|
241
229
|
# Add cleanup code if needed
|
242
230
|
pass
|
243
231
|
|
244
|
-
def _get_run_func(
|
245
|
-
self,
|
246
|
-
name: str,
|
247
|
-
reload: bool = False,
|
248
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
249
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
250
|
-
) -> Callable:
|
251
|
-
"""Create a PipelineRunner instance and return its run method.
|
252
|
-
|
253
|
-
This internal helper method ensures that each job gets a fresh runner
|
254
|
-
with the correct configuration state.
|
255
|
-
|
256
|
-
Args:
|
257
|
-
name: Name of the pipeline to create runner for
|
258
|
-
reload: Whether to reload pipeline configuration
|
259
|
-
|
260
|
-
Returns:
|
261
|
-
Callable: Bound run method from a fresh PipelineRunner instance
|
262
|
-
|
263
|
-
Example:
|
264
|
-
>>> # Internal usage
|
265
|
-
>>> manager = PipelineManager()
|
266
|
-
>>> run_func = manager._get_run_func_for_job("data_pipeline")
|
267
|
-
>>> result = run_func(inputs={"date": "2025-04-28"})
|
268
|
-
"""
|
269
|
-
if (
|
270
|
-
name == self._current_pipeline_name and not reload
|
271
|
-
# and hasattr(self, "_runner")
|
272
|
-
):
|
273
|
-
# run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=self._pipeline_cfg)
|
274
|
-
run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
275
|
-
run_pipeline
|
276
|
-
)
|
277
|
-
return run_func
|
278
|
-
|
279
|
-
_ = self.load_pipeline(name=name, reload=reload)
|
280
|
-
# run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
|
281
|
-
|
282
|
-
run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
283
|
-
run_pipeline
|
284
|
-
)
|
285
|
-
return run_func
|
286
|
-
|
287
232
|
def _add_modules_path(self) -> None:
|
288
233
|
"""Add pipeline module paths to Python path.
|
289
234
|
|
@@ -543,14 +488,16 @@ class PipelineManager:
|
|
543
488
|
... reload=True
|
544
489
|
... )
|
545
490
|
"""
|
546
|
-
#
|
547
|
-
|
548
|
-
|
491
|
+
# Use injected project context, fallback to self for backward compatibility
|
492
|
+
project_context = getattr(self, "_project_context", self)
|
493
|
+
|
494
|
+
# Get Pipeline instance from registry
|
495
|
+
pipeline = self.registry.get_pipeline(
|
496
|
+
name=name, project_context=project_context, reload=reload
|
549
497
|
)
|
550
498
|
|
551
|
-
|
552
|
-
|
553
|
-
pipeline_cfg=self._pipeline_cfg,
|
499
|
+
# Execute pipeline using its own run method
|
500
|
+
return pipeline.run(
|
554
501
|
inputs=inputs,
|
555
502
|
final_vars=final_vars,
|
556
503
|
config=config,
|
@@ -560,16 +507,16 @@ class PipelineManager:
|
|
560
507
|
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
561
508
|
project_adapter_cfg=project_adapter_cfg,
|
562
509
|
adapter=adapter,
|
563
|
-
|
510
|
+
reload=reload,
|
564
511
|
log_level=log_level,
|
565
512
|
max_retries=max_retries,
|
566
513
|
retry_delay=retry_delay,
|
567
514
|
jitter_factor=jitter_factor,
|
568
515
|
retry_exceptions=retry_exceptions,
|
516
|
+
on_success=on_success,
|
517
|
+
on_failure=on_failure,
|
569
518
|
)
|
570
519
|
|
571
|
-
return res
|
572
|
-
|
573
520
|
# --- Delegated Methods ---
|
574
521
|
|
575
522
|
# Registry Delegations
|
@@ -1213,571 +1160,3 @@ class PipelineManager:
|
|
1213
1160
|
return self.visualizer.show_dag(
|
1214
1161
|
name=name, format=format, reload=reload, raw=raw
|
1215
1162
|
)
|
1216
|
-
|
1217
|
-
def run_job(
|
1218
|
-
self,
|
1219
|
-
name: str,
|
1220
|
-
inputs: dict | None = None,
|
1221
|
-
final_vars: list[str] | None = None,
|
1222
|
-
config: dict | None = None,
|
1223
|
-
cache: bool | dict = False,
|
1224
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1225
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1226
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1227
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1228
|
-
adapter: dict[str, Any] | None = None,
|
1229
|
-
reload: bool = False,
|
1230
|
-
log_level: str | None = None,
|
1231
|
-
max_retries: int | None = None,
|
1232
|
-
retry_delay: float | None = None,
|
1233
|
-
jitter_factor: float | None = None,
|
1234
|
-
retry_exceptions: tuple | list | None = None,
|
1235
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1236
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1237
|
-
on_success_pipeline: Callable
|
1238
|
-
| tuple[Callable, tuple | None, dict | None]
|
1239
|
-
| None = None,
|
1240
|
-
on_failure_pipeline: Callable
|
1241
|
-
| tuple[Callable, tuple | None, dict | None]
|
1242
|
-
| None = None,
|
1243
|
-
**kwargs: Any,
|
1244
|
-
) -> dict[str, Any] | None:
|
1245
|
-
"""Execute a pipeline job immediately through the job queue.
|
1246
|
-
|
1247
|
-
Unlike the run() method which executes synchronously, this method runs
|
1248
|
-
the pipeline through the configured worker system (RQ, APScheduler, etc.).
|
1249
|
-
|
1250
|
-
If the job queue is not configured, it logs an error and returns None.
|
1251
|
-
|
1252
|
-
Args:
|
1253
|
-
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1254
|
-
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1255
|
-
final_vars (list[str] | None): Specify which output variables to return.
|
1256
|
-
Example: ["model", "metrics"]
|
1257
|
-
config (dict | None): Configuration for Hamilton pipeline executor.
|
1258
|
-
Example: {"model": "LogisticRegression"}
|
1259
|
-
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1260
|
-
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1261
|
-
- str: Executor name, e.g. "threadpool", "local"
|
1262
|
-
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1263
|
-
- ExecutorConfig: Structured config object
|
1264
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1265
|
-
Example: {"opentelemetry": True, "tracker": False}
|
1266
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1267
|
-
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1268
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1269
|
-
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1270
|
-
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1271
|
-
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1272
|
-
reload (bool): Force reload of pipeline configuration.
|
1273
|
-
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1274
|
-
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1275
|
-
max_retries (int): Maximum number of retries for execution.
|
1276
|
-
retry_delay (float): Delay between retries in seconds.
|
1277
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1278
|
-
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1279
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job execution.
|
1280
|
-
This runs after the pipeline execution through the job queue was executed successfully.
|
1281
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job execution failure.
|
1282
|
-
This runs if the job creation or the pipeline execution through the job queue fails or raises an exception.
|
1283
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1284
|
-
This runs after the pipeline completes successfully.
|
1285
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1286
|
-
This runs if the pipeline fails or raises an exception.
|
1287
|
-
|
1288
|
-
**kwargs: JobQueue-specific arguments
|
1289
|
-
For RQ:
|
1290
|
-
- queue_name: Queue to use (str)
|
1291
|
-
- retry: Number of retries (int)
|
1292
|
-
- result_ttl: Time to live for the job result (float or timedelta)
|
1293
|
-
- ttl: Time to live for the job (float or timedelta)
|
1294
|
-
- timeout: Time to wait for the job to complete (float or timedelta)
|
1295
|
-
- repeat: Repeat count (int or dict)
|
1296
|
-
- rq_on_failure: Callback function on failure (callable)
|
1297
|
-
- rq_on_success: Callback function on success (callable)
|
1298
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1299
|
-
For APScheduler:
|
1300
|
-
- job_executor: Executor type (str)
|
1301
|
-
|
1302
|
-
Returns:
|
1303
|
-
dict[str, Any] | None: Job execution results if successful, otherwise None.
|
1304
|
-
|
1305
|
-
Raises:
|
1306
|
-
ValueError: If pipeline or configuration is invalid
|
1307
|
-
RuntimeError: If job execution fails
|
1308
|
-
|
1309
|
-
Example:
|
1310
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1311
|
-
>>>
|
1312
|
-
>>> manager = PipelineManager()
|
1313
|
-
>>>
|
1314
|
-
>>> # Simple job execution
|
1315
|
-
>>> result = manager.run_job("data_pipeline")
|
1316
|
-
>>>
|
1317
|
-
>>> # Complex job with retry logic
|
1318
|
-
>>> result = manager.run_job(
|
1319
|
-
... name="ml_training",
|
1320
|
-
... inputs={"training_date": "2025-04-28"},
|
1321
|
-
... executor_cfg={"type": "async"},
|
1322
|
-
... with_adapter_cfg={"enable_tracking": True},
|
1323
|
-
... retry=3,
|
1324
|
-
... queue_name="ml_jobs"
|
1325
|
-
... )
|
1326
|
-
"""
|
1327
|
-
if self.jqm is None:
|
1328
|
-
logger.error(
|
1329
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1330
|
-
)
|
1331
|
-
return None
|
1332
|
-
|
1333
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1334
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1335
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1336
|
-
|
1337
|
-
run_func = self._get_run_func(
|
1338
|
-
name=name,
|
1339
|
-
reload=reload,
|
1340
|
-
on_success=on_success_pipeline,
|
1341
|
-
on_failure=on_failure_pipeline,
|
1342
|
-
)
|
1343
|
-
# run_func = run_with_callback(on_success=on_success_pipeline, on_failure=on_failure_pipeline)(
|
1344
|
-
# run_func_
|
1345
|
-
# )
|
1346
|
-
run_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1347
|
-
self.jqm.run_job
|
1348
|
-
)
|
1349
|
-
|
1350
|
-
return run_job(
|
1351
|
-
run_func=run_func,
|
1352
|
-
pipeline_cfg=self._pipeline_cfg,
|
1353
|
-
name=name,
|
1354
|
-
inputs=inputs,
|
1355
|
-
final_vars=final_vars,
|
1356
|
-
config=config,
|
1357
|
-
cache=cache,
|
1358
|
-
executor_cfg=executor_cfg,
|
1359
|
-
with_adapter_cfg=with_adapter_cfg,
|
1360
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1361
|
-
project_adapter_cfg=project_adapter_cfg,
|
1362
|
-
adapter=adapter,
|
1363
|
-
log_level=log_level,
|
1364
|
-
max_retries=max_retries,
|
1365
|
-
retry_delay=retry_delay,
|
1366
|
-
jitter_factor=jitter_factor,
|
1367
|
-
retry_exceptions=retry_exceptions,
|
1368
|
-
**kwargs,
|
1369
|
-
)
|
1370
|
-
|
1371
|
-
def add_job(
|
1372
|
-
self,
|
1373
|
-
name: str,
|
1374
|
-
inputs: dict | None = None,
|
1375
|
-
final_vars: list[str] | None = None,
|
1376
|
-
config: dict | None = None,
|
1377
|
-
cache: bool | dict = False,
|
1378
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1379
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1380
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1381
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1382
|
-
adapter: dict[str, Any] | None = None,
|
1383
|
-
reload: bool = False, # Reload config/module before creating run_func
|
1384
|
-
log_level: str | None = None,
|
1385
|
-
result_ttl: int | dt.timedelta = 0,
|
1386
|
-
run_at: dt.datetime | str | None = None,
|
1387
|
-
run_in: dt.datetime | str | None = None,
|
1388
|
-
max_retries: int = 3,
|
1389
|
-
retry_delay: float = 1.0,
|
1390
|
-
jitter_factor: float = 0.1,
|
1391
|
-
retry_exceptions: tuple = (Exception,),
|
1392
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1393
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1394
|
-
on_success_pipeline: Callable
|
1395
|
-
| tuple[Callable, tuple | None, dict | None]
|
1396
|
-
| None = None,
|
1397
|
-
on_failure_pipeline: Callable
|
1398
|
-
| tuple[Callable, tuple | None, dict | None]
|
1399
|
-
| None = None,
|
1400
|
-
**kwargs, # JobQueue specific args
|
1401
|
-
) -> str | UUID | None:
|
1402
|
-
"""Adds a job to the job queue.
|
1403
|
-
|
1404
|
-
If the job queue is not configured, it logs an error and returns None.
|
1405
|
-
|
1406
|
-
Args:
|
1407
|
-
name (str): Name of the pipeline to run. Must be a valid identifier.
|
1408
|
-
inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
1409
|
-
final_vars (list[str] | None): Specify which output variables to return.
|
1410
|
-
Example: ["model", "metrics"]
|
1411
|
-
config (dict | None): Configuration for Hamilton pipeline executor.
|
1412
|
-
Example: {"model": "LogisticRegression"}
|
1413
|
-
cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
1414
|
-
executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
|
1415
|
-
- str: Executor name, e.g. "threadpool", "local"
|
1416
|
-
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
1417
|
-
- ExecutorConfig: Structured config object
|
1418
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
|
1419
|
-
Example: {"opentelemetry": True, "tracker": False}
|
1420
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
|
1421
|
-
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
1422
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
|
1423
|
-
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
1424
|
-
adapter (dict[str, Any] | None): Custom adapter instance for pipeline
|
1425
|
-
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
1426
|
-
reload (bool): Force reload of pipeline configuration.
|
1427
|
-
run_at (dt.datetime | str | None): Future date to run the job.
|
1428
|
-
Example: datetime(2025, 4, 28, 12, 0)
|
1429
|
-
Example str: "2025-04-28T12:00:00" (ISO format)
|
1430
|
-
run_in (dt.datetime | str | None): Time interval to run the job.
|
1431
|
-
Example: 3600 (every hour in seconds)
|
1432
|
-
Example: datetime.timedelta(days=1)
|
1433
|
-
Example str: "1d" (1 day)
|
1434
|
-
result_ttl (int | dt.timedelta): Time to live for the job result.
|
1435
|
-
Example: 3600 (1 hour in seconds)
|
1436
|
-
log_level (str | None): Logging level for the execution. Default None uses project config.
|
1437
|
-
Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
1438
|
-
max_retries (int): Maximum number of retries for execution.
|
1439
|
-
retry_delay (float): Delay between retries in seconds.
|
1440
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1441
|
-
retry_exceptions (tuple): Exceptions that trigger a retry.
|
1442
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job creation.
|
1443
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job creation failure.
|
1444
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1445
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1446
|
-
**kwargs: Additional keyword arguments passed to the worker's add_job method.
|
1447
|
-
For RQ this includes:
|
1448
|
-
- result_ttl: Time to live for the job result (float or timedelta)
|
1449
|
-
- ttl: Time to live for the job (float or timedelta)
|
1450
|
-
- timeout: Time to wait for the job to complete (float or timedelta)
|
1451
|
-
- queue_name: Name of the queue to use (str)
|
1452
|
-
- retry: Number of retries (int)
|
1453
|
-
- repeat: Repeat count (int or dict)
|
1454
|
-
- rq_on_failure: Callback function on failure (callable)
|
1455
|
-
- rq_on_success: Callback function on success (callable)
|
1456
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1457
|
-
For APScheduler, this includes:
|
1458
|
-
- job_executor: Job executor to use (str)
|
1459
|
-
|
1460
|
-
Returns:
|
1461
|
-
str | UUID | None: The ID of the job that was added to the job queue, or None if the job queue is not configured.
|
1462
|
-
|
1463
|
-
Raises:
|
1464
|
-
ValueError: If the job ID is not valid or if the job cannot be scheduled.
|
1465
|
-
|
1466
|
-
Example:
|
1467
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1468
|
-
>>> pm = PipelineManager()
|
1469
|
-
>>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
|
1470
|
-
|
1471
|
-
"""
|
1472
|
-
if self.jqm is None:
|
1473
|
-
logger.error(
|
1474
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1475
|
-
)
|
1476
|
-
return None
|
1477
|
-
|
1478
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1479
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1480
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1481
|
-
|
1482
|
-
run_func = self._get_run_func(
|
1483
|
-
name=name,
|
1484
|
-
reload=reload,
|
1485
|
-
on_success=on_success_pipeline,
|
1486
|
-
on_failure=on_failure_pipeline,
|
1487
|
-
)
|
1488
|
-
|
1489
|
-
run_in = (
|
1490
|
-
duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
|
1491
|
-
) # convert to seconds
|
1492
|
-
run_at = (
|
1493
|
-
dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
|
1494
|
-
)
|
1495
|
-
|
1496
|
-
add_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1497
|
-
self.jqm.add_job
|
1498
|
-
)
|
1499
|
-
return add_job(
|
1500
|
-
run_func=run_func,
|
1501
|
-
pipeline_cfg=self._pipeline_cfg,
|
1502
|
-
name=name, # Pass name for logging
|
1503
|
-
# Pass run parameters
|
1504
|
-
inputs=inputs,
|
1505
|
-
final_vars=final_vars,
|
1506
|
-
config=config,
|
1507
|
-
cache=cache,
|
1508
|
-
executor_cfg=executor_cfg,
|
1509
|
-
with_adapter_cfg=with_adapter_cfg,
|
1510
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1511
|
-
project_adapter_cfg=project_adapter_cfg,
|
1512
|
-
adapter=adapter,
|
1513
|
-
# reload=reload, # Note: reload already happened
|
1514
|
-
log_level=log_level,
|
1515
|
-
result_ttl=result_ttl,
|
1516
|
-
run_at=run_at,
|
1517
|
-
run_in=run_in,
|
1518
|
-
max_retries=max_retries,
|
1519
|
-
retry_delay=retry_delay,
|
1520
|
-
jitter_factor=jitter_factor,
|
1521
|
-
retry_exceptions=retry_exceptions,
|
1522
|
-
**kwargs, # Pass worker args
|
1523
|
-
)
|
1524
|
-
|
1525
|
-
def schedule(
|
1526
|
-
self,
|
1527
|
-
name: str,
|
1528
|
-
inputs: dict | None = None,
|
1529
|
-
final_vars: list[str] | None = None,
|
1530
|
-
config: dict | None = None,
|
1531
|
-
cache: bool | dict = False,
|
1532
|
-
executor_cfg: str | dict | ExecutorConfig | None = None,
|
1533
|
-
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
1534
|
-
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
1535
|
-
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
1536
|
-
adapter: dict[str, Any] | None = None,
|
1537
|
-
reload: bool = False,
|
1538
|
-
log_level: str | None = None,
|
1539
|
-
cron: str | dict[str, str | int] | None = None,
|
1540
|
-
interval: int | str | dict[str, str | int] | None = None,
|
1541
|
-
date: dt.datetime | str | None = None,
|
1542
|
-
overwrite: bool = False,
|
1543
|
-
schedule_id: str | None = None,
|
1544
|
-
max_retries: int | None = None,
|
1545
|
-
retry_delay: float | None = None,
|
1546
|
-
jitter_factor: float | None = None,
|
1547
|
-
retry_exceptions: tuple | list | None = None,
|
1548
|
-
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1549
|
-
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
1550
|
-
on_success_pipeline: Callable
|
1551
|
-
| tuple[Callable, tuple | None, dict | None]
|
1552
|
-
| None = None,
|
1553
|
-
on_failure_pipeline: Callable
|
1554
|
-
| tuple[Callable, tuple | None, dict | None]
|
1555
|
-
| None = None,
|
1556
|
-
**kwargs: Any,
|
1557
|
-
) -> str | UUID | None:
|
1558
|
-
"""Schedule a pipeline to run on a recurring or future basis.
|
1559
|
-
|
1560
|
-
If the job queue is not configured, it logs an error and returns None.
|
1561
|
-
|
1562
|
-
Args:
|
1563
|
-
name (str): The name of the pipeline to run.
|
1564
|
-
inputs (dict | None): Inputs for the pipeline run (overrides config).
|
1565
|
-
final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
|
1566
|
-
config (dict | None): Hamilton driver config (overrides config).
|
1567
|
-
cache (bool | dict): Cache settings (overrides config).
|
1568
|
-
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
|
1569
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
|
1570
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
|
1571
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
|
1572
|
-
adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
|
1573
|
-
reload (bool): Whether to reload module and pipeline config. Defaults to False.
|
1574
|
-
log_level (str | None): Log level for the run (overrides config).
|
1575
|
-
cron (str | dict[str, str | int] | None): Cron expression or settings
|
1576
|
-
Example string: "0 0 * * *" (daily at midnight)
|
1577
|
-
Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
|
1578
|
-
interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
|
1579
|
-
Example int: 3600 (every hour in seconds)
|
1580
|
-
Example str: "1h" (every hour)
|
1581
|
-
Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
|
1582
|
-
date (dt.datetime | str | None): Future date for
|
1583
|
-
Example: datetime(2025, 4, 28, 12, 0)
|
1584
|
-
Example str: "2025-04-28T12:00:00" (ISO format)
|
1585
|
-
overwrite (bool): Whether to overwrite existing schedule with the same ID
|
1586
|
-
schedule_id (str | None): Unique identifier for the schedule
|
1587
|
-
max_retries (int): Maximum number of retries for execution
|
1588
|
-
retry_delay (float): Delay between retries in seconds
|
1589
|
-
jitter_factor (float): Random jitter factor to add to retry delay
|
1590
|
-
retry_exceptions (tuple): Exceptions that trigger a retry
|
1591
|
-
on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful schedule creation.
|
1592
|
-
on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on schedule creation failure.
|
1593
|
-
on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
|
1594
|
-
on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
|
1595
|
-
**kwargs: JobQueue-specific scheduling options
|
1596
|
-
For RQ:
|
1597
|
-
- result_ttl: Result lifetime (int seconds)
|
1598
|
-
- ttl: Job lifetime (int seconds)
|
1599
|
-
- timeout: Job execution timeout (int seconds)
|
1600
|
-
- queue_name: Queue to use (str)
|
1601
|
-
- repeat: Repeat count (int or dict)
|
1602
|
-
- rq_on_failure: Callback function on failure (callable)
|
1603
|
-
- rq_on_success: Callback function on success (callable)
|
1604
|
-
- rq_on_stopped: Callback function on stop (callable)
|
1605
|
-
For APScheduler:
|
1606
|
-
- misfire_grace_time: Late execution window
|
1607
|
-
- coalesce: Combine missed executions (bool)
|
1608
|
-
- max_running_jobs: Concurrent instances limit (int)
|
1609
|
-
|
1610
|
-
Returns:
|
1611
|
-
str | UUID | None: Unique identifier for the created schedule, or None if scheduling fails.
|
1612
|
-
|
1613
|
-
Raises:
|
1614
|
-
ValueError: If schedule parameters are invalid
|
1615
|
-
RuntimeError: If scheduling fails
|
1616
|
-
|
1617
|
-
Example:
|
1618
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1619
|
-
>>> from datetime import datetime, timedelta
|
1620
|
-
>>>
|
1621
|
-
>>> manager = PipelineManager()
|
1622
|
-
>>>
|
1623
|
-
>>> # Daily schedule with cron
|
1624
|
-
>>> schedule_id = manager.schedule(
|
1625
|
-
... name="daily_metrics",
|
1626
|
-
... cron="0 0 * * *",
|
1627
|
-
... inputs={"date": "{{ execution_date }}"}
|
1628
|
-
... )
|
1629
|
-
>>>
|
1630
|
-
>>> # Interval-based schedule
|
1631
|
-
>>> schedule_id = manager.schedule(
|
1632
|
-
... name="monitoring",
|
1633
|
-
... interval={"minutes": 15},
|
1634
|
-
... with_adapter_cfg={"enable_alerts": True}
|
1635
|
-
... )
|
1636
|
-
>>>
|
1637
|
-
>>> # Future one-time execution
|
1638
|
-
>>> future_date = datetime.now() + timedelta(days=1)
|
1639
|
-
>>> schedule_id = manager.schedule(
|
1640
|
-
... name="batch_process",
|
1641
|
-
... date=future_date,
|
1642
|
-
... executor_cfg={"type": "async"}
|
1643
|
-
... )
|
1644
|
-
"""
|
1645
|
-
if self.jqm is None:
|
1646
|
-
logger.error(
|
1647
|
-
"This PipelineManager instance does not have a job queue configured. Skipping job execution."
|
1648
|
-
)
|
1649
|
-
return None
|
1650
|
-
|
1651
|
-
kwargs["on_success"] = kwargs.get("rq_on_success", None)
|
1652
|
-
kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
|
1653
|
-
kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
|
1654
|
-
|
1655
|
-
# pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
|
1656
|
-
run_func = self._get_run_func(
|
1657
|
-
name=name,
|
1658
|
-
reload=reload,
|
1659
|
-
on_success=on_success_pipeline,
|
1660
|
-
on_failure=on_failure_pipeline,
|
1661
|
-
)
|
1662
|
-
interval = (
|
1663
|
-
duration_parser.parse(interval) if isinstance(interval, str) else interval
|
1664
|
-
)
|
1665
|
-
date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
|
1666
|
-
|
1667
|
-
schedule = run_with_callback(on_success=on_success, on_failure=on_failure)(
|
1668
|
-
self.jqm.schedule
|
1669
|
-
)
|
1670
|
-
return schedule(
|
1671
|
-
run_func=run_func,
|
1672
|
-
pipeline_cfg=self._pipeline_cfg,
|
1673
|
-
inputs=inputs,
|
1674
|
-
final_vars=final_vars,
|
1675
|
-
config=config,
|
1676
|
-
cache=cache,
|
1677
|
-
executor_cfg=executor_cfg,
|
1678
|
-
with_adapter_cfg=with_adapter_cfg,
|
1679
|
-
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
1680
|
-
project_adapter_cfg=project_adapter_cfg,
|
1681
|
-
adapter=adapter,
|
1682
|
-
reload=reload,
|
1683
|
-
log_level=log_level,
|
1684
|
-
cron=cron,
|
1685
|
-
interval=interval,
|
1686
|
-
date=date,
|
1687
|
-
overwrite=overwrite,
|
1688
|
-
schedule_id=schedule_id,
|
1689
|
-
max_retries=max_retries,
|
1690
|
-
retry_delay=retry_delay,
|
1691
|
-
jitter_factor=jitter_factor,
|
1692
|
-
retry_exceptions=retry_exceptions,
|
1693
|
-
**kwargs,
|
1694
|
-
)
|
1695
|
-
|
1696
|
-
def schedule_all(self, **kwargs: Any) -> None:
|
1697
|
-
"""Schedule all pipelines that are enabled in their configuration.
|
1698
|
-
|
1699
|
-
For each enabled pipeline, applies its configured schedule settings
|
1700
|
-
and any provided overrides.
|
1701
|
-
|
1702
|
-
Args:
|
1703
|
-
**kwargs: Overrides for schedule settings that apply to all pipelines.
|
1704
|
-
See schedule() method for supported arguments.
|
1705
|
-
|
1706
|
-
Example:
|
1707
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1708
|
-
>>>
|
1709
|
-
>>> manager = PipelineManager()
|
1710
|
-
>>>
|
1711
|
-
>>> # Schedule all with default settings
|
1712
|
-
>>> manager.schedule_all()
|
1713
|
-
>>>
|
1714
|
-
>>> # Schedule all with common overrides
|
1715
|
-
>>> manager.schedule_all(
|
1716
|
-
... max_running_jobs=2,
|
1717
|
-
... coalesce=True,
|
1718
|
-
... misfire_grace_time=300
|
1719
|
-
... )
|
1720
|
-
"""
|
1721
|
-
scheduled_ids = []
|
1722
|
-
errors = []
|
1723
|
-
pipeline_names = self.list_pipelines()
|
1724
|
-
if not pipeline_names:
|
1725
|
-
logger.warning("No pipelines found to schedule.")
|
1726
|
-
return
|
1727
|
-
|
1728
|
-
logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
|
1729
|
-
for name in pipeline_names:
|
1730
|
-
try:
|
1731
|
-
pipeline_cfg = self.load_pipeline(name=name, reload=True)
|
1732
|
-
|
1733
|
-
if not pipeline_cfg.schedule.enabled:
|
1734
|
-
logger.info(
|
1735
|
-
f"Skipping scheduling for '{name}': Not enabled in config."
|
1736
|
-
)
|
1737
|
-
continue
|
1738
|
-
|
1739
|
-
logger.info(f"Scheduling [cyan]{name}[/cyan]...")
|
1740
|
-
schedule_id = self.schedule(name=name, reload=False, **kwargs)
|
1741
|
-
if schedule_id is None:
|
1742
|
-
logger.info(
|
1743
|
-
f"🟡 Skipping adding schedule for [cyan]{name}[/cyan]: Job queue backend not available or scheduling failed."
|
1744
|
-
)
|
1745
|
-
continue
|
1746
|
-
scheduled_ids.append(schedule_id)
|
1747
|
-
except Exception as e:
|
1748
|
-
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
1749
|
-
errors.append(name)
|
1750
|
-
|
1751
|
-
if errors:
|
1752
|
-
logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
|
1753
|
-
else:
|
1754
|
-
logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
|
1755
|
-
|
1756
|
-
@property
|
1757
|
-
def schedules(self) -> list[Any]:
|
1758
|
-
"""Get list of current pipeline schedules.
|
1759
|
-
|
1760
|
-
Retrieves all active schedules from the worker system.
|
1761
|
-
|
1762
|
-
Returns:
|
1763
|
-
list[Any]: List of schedule objects. Exact type depends on worker:
|
1764
|
-
- RQ: List[rq.job.Job]
|
1765
|
-
- APScheduler: List[apscheduler.schedulers.base.Schedule]
|
1766
|
-
|
1767
|
-
Example:
|
1768
|
-
>>> from flowerpower.pipeline import PipelineManager
|
1769
|
-
>>>
|
1770
|
-
>>> manager = PipelineManager()
|
1771
|
-
>>> for schedule in manager.schedules:
|
1772
|
-
... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
|
1773
|
-
"""
|
1774
|
-
if self.jqm is None:
|
1775
|
-
logger.error(
|
1776
|
-
"This PipelineManager instance does not have a job queue configured. Skipping schedule retrieval."
|
1777
|
-
)
|
1778
|
-
return []
|
1779
|
-
try:
|
1780
|
-
return self.jqm._get_schedules()
|
1781
|
-
except Exception as e:
|
1782
|
-
logger.error(f"Failed to retrieve schedules: {e}")
|
1783
|
-
return []
|