FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. flowerpower/cfg/__init__.py +3 -3
  2. flowerpower/cfg/pipeline/__init__.py +5 -3
  3. flowerpower/cfg/project/__init__.py +3 -3
  4. flowerpower/cfg/project/job_queue.py +1 -128
  5. flowerpower/cli/__init__.py +5 -5
  6. flowerpower/cli/cfg.py +0 -3
  7. flowerpower/cli/job_queue.py +401 -133
  8. flowerpower/cli/pipeline.py +14 -413
  9. flowerpower/cli/utils.py +0 -1
  10. flowerpower/flowerpower.py +537 -28
  11. flowerpower/job_queue/__init__.py +5 -94
  12. flowerpower/job_queue/base.py +201 -3
  13. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
  14. flowerpower/job_queue/rq/manager.py +388 -77
  15. flowerpower/pipeline/__init__.py +2 -0
  16. flowerpower/pipeline/base.py +2 -2
  17. flowerpower/pipeline/io.py +14 -16
  18. flowerpower/pipeline/manager.py +21 -642
  19. flowerpower/pipeline/pipeline.py +571 -0
  20. flowerpower/pipeline/registry.py +242 -10
  21. flowerpower/pipeline/visualizer.py +1 -2
  22. flowerpower/plugins/_io/__init__.py +8 -0
  23. flowerpower/plugins/mqtt/manager.py +6 -6
  24. flowerpower/settings/backend.py +0 -2
  25. flowerpower/settings/job_queue.py +1 -57
  26. flowerpower/utils/misc.py +0 -256
  27. flowerpower/utils/monkey.py +1 -83
  28. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
  29. flowerpower-0.20.0.dist-info/RECORD +58 -0
  30. flowerpower/fs/__init__.py +0 -29
  31. flowerpower/fs/base.py +0 -662
  32. flowerpower/fs/ext.py +0 -2143
  33. flowerpower/fs/storage_options.py +0 -1420
  34. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  35. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  36. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  37. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  38. flowerpower/job_queue/apscheduler/setup.py +0 -554
  39. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  40. flowerpower/job_queue/apscheduler/utils.py +0 -311
  41. flowerpower/pipeline/job_queue.py +0 -583
  42. flowerpower/pipeline/runner.py +0 -603
  43. flowerpower/plugins/io/base.py +0 -2520
  44. flowerpower/plugins/io/helpers/datetime.py +0 -298
  45. flowerpower/plugins/io/helpers/polars.py +0 -875
  46. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  47. flowerpower/plugins/io/helpers/sql.py +0 -202
  48. flowerpower/plugins/io/loader/__init__.py +0 -28
  49. flowerpower/plugins/io/loader/csv.py +0 -37
  50. flowerpower/plugins/io/loader/deltatable.py +0 -190
  51. flowerpower/plugins/io/loader/duckdb.py +0 -19
  52. flowerpower/plugins/io/loader/json.py +0 -37
  53. flowerpower/plugins/io/loader/mqtt.py +0 -159
  54. flowerpower/plugins/io/loader/mssql.py +0 -26
  55. flowerpower/plugins/io/loader/mysql.py +0 -26
  56. flowerpower/plugins/io/loader/oracle.py +0 -26
  57. flowerpower/plugins/io/loader/parquet.py +0 -35
  58. flowerpower/plugins/io/loader/postgres.py +0 -26
  59. flowerpower/plugins/io/loader/pydala.py +0 -19
  60. flowerpower/plugins/io/loader/sqlite.py +0 -23
  61. flowerpower/plugins/io/metadata.py +0 -244
  62. flowerpower/plugins/io/saver/__init__.py +0 -28
  63. flowerpower/plugins/io/saver/csv.py +0 -36
  64. flowerpower/plugins/io/saver/deltatable.py +0 -186
  65. flowerpower/plugins/io/saver/duckdb.py +0 -19
  66. flowerpower/plugins/io/saver/json.py +0 -36
  67. flowerpower/plugins/io/saver/mqtt.py +0 -28
  68. flowerpower/plugins/io/saver/mssql.py +0 -26
  69. flowerpower/plugins/io/saver/mysql.py +0 -26
  70. flowerpower/plugins/io/saver/oracle.py +0 -26
  71. flowerpower/plugins/io/saver/parquet.py +0 -36
  72. flowerpower/plugins/io/saver/postgres.py +0 -26
  73. flowerpower/plugins/io/saver/pydala.py +0 -20
  74. flowerpower/plugins/io/saver/sqlite.py +0 -24
  75. flowerpower/utils/scheduler.py +0 -311
  76. flowerpower-0.11.6.19.dist-info/RECORD +0 -102
  77. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
  78. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
  79. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
  80. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -2,12 +2,12 @@ import datetime as dt
2
2
  import os
3
3
  import posixpath
4
4
  import sys
5
+ import warnings
5
6
  from pathlib import Path
6
7
  from types import TracebackType
7
8
  from typing import Any, Callable, TypeVar, Union
8
9
  from uuid import UUID
9
10
 
10
- import duration_parser
11
11
  from loguru import logger
12
12
  from munch import Munch
13
13
 
@@ -16,18 +16,16 @@ try:
16
16
  except ImportError:
17
17
  Digraph = Any # Type alias for when graphviz isn't installed
18
18
 
19
+ from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
20
+
19
21
  from .. import settings
20
22
  from ..cfg import PipelineConfig, ProjectConfig
21
23
  from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
22
24
  from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
23
25
  from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
24
- from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
25
- from ..utils.callback import run_with_callback
26
26
  from ..utils.logging import setup_logging
27
27
  from .io import PipelineIOManager
28
- from .job_queue import PipelineJobQueue
29
28
  from .registry import HookType, PipelineRegistry
30
- from .runner import run_pipeline
31
29
  from .visualizer import PipelineVisualizer
32
30
 
33
31
  setup_logging(level=settings.LOG_LEVEL)
@@ -98,7 +96,7 @@ class PipelineManager:
98
96
  pipelines_dir: Override default pipelines directory name ('pipelines').
99
97
  Example: "flows" or "dags".
100
98
  job_queue_type: Override worker type from project config/settings.
101
- Valid values: "rq", "apscheduler", or "huey".
99
+ Valid values: "rq".
102
100
  log_level: Set logging level for the manager.
103
101
  Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
104
102
 
@@ -138,7 +136,7 @@ class PipelineManager:
138
136
  cached = False
139
137
  cache_storage = None
140
138
  if not fs:
141
- fs = get_filesystem(
139
+ fs = filesystem(
142
140
  self._base_dir,
143
141
  storage_options=storage_options,
144
142
  cached=cached,
@@ -175,22 +173,12 @@ class PipelineManager:
175
173
  self.registry = PipelineRegistry(
176
174
  project_cfg=self.project_cfg,
177
175
  fs=self._fs,
178
- cfg_dir=self._cfg_dir,
179
- pipelines_dir=self._pipelines_dir,
180
- )
181
- pipeline_job_queue = PipelineJobQueue(
182
- project_cfg=self.project_cfg,
183
- fs=self._fs,
184
- cfg_dir=self._cfg_dir,
185
- pipelines_dir=self._pipelines_dir,
176
+ base_dir=self._base_dir,
177
+ storage_options=self._storage_options,
186
178
  )
187
- if pipeline_job_queue.job_queue is None:
188
- logger.warning(
189
- "Job queue backend is unavailable. Some features may not work."
190
- )
191
- self.jqm = None
192
- else:
193
- self.jqm = pipeline_job_queue
179
+
180
+ # Initialize project context (will be injected by FlowerPowerProject)
181
+ self._project_context = None
194
182
  self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
195
183
  self.io = PipelineIOManager(registry=self.registry)
196
184
 
@@ -241,49 +229,6 @@ class PipelineManager:
241
229
  # Add cleanup code if needed
242
230
  pass
243
231
 
244
- def _get_run_func(
245
- self,
246
- name: str,
247
- reload: bool = False,
248
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
249
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
250
- ) -> Callable:
251
- """Create a PipelineRunner instance and return its run method.
252
-
253
- This internal helper method ensures that each job gets a fresh runner
254
- with the correct configuration state.
255
-
256
- Args:
257
- name: Name of the pipeline to create runner for
258
- reload: Whether to reload pipeline configuration
259
-
260
- Returns:
261
- Callable: Bound run method from a fresh PipelineRunner instance
262
-
263
- Example:
264
- >>> # Internal usage
265
- >>> manager = PipelineManager()
266
- >>> run_func = manager._get_run_func_for_job("data_pipeline")
267
- >>> result = run_func(inputs={"date": "2025-04-28"})
268
- """
269
- if (
270
- name == self._current_pipeline_name and not reload
271
- # and hasattr(self, "_runner")
272
- ):
273
- # run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=self._pipeline_cfg)
274
- run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
275
- run_pipeline
276
- )
277
- return run_func
278
-
279
- _ = self.load_pipeline(name=name, reload=reload)
280
- # run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
281
-
282
- run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
283
- run_pipeline
284
- )
285
- return run_func
286
-
287
232
  def _add_modules_path(self) -> None:
288
233
  """Add pipeline module paths to Python path.
289
234
 
@@ -543,14 +488,16 @@ class PipelineManager:
543
488
  ... reload=True
544
489
  ... )
545
490
  """
546
- # pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
547
- run_func = self._get_run_func(
548
- name=name, reload=reload, on_success=on_success, on_failure=on_failure
491
+ # Use injected project context, fallback to self for backward compatibility
492
+ project_context = getattr(self, "_project_context", self)
493
+
494
+ # Get Pipeline instance from registry
495
+ pipeline = self.registry.get_pipeline(
496
+ name=name, project_context=project_context, reload=reload
549
497
  )
550
498
 
551
- res = run_func(
552
- project_cfg=self._project_cfg,
553
- pipeline_cfg=self._pipeline_cfg,
499
+ # Execute pipeline using its own run method
500
+ return pipeline.run(
554
501
  inputs=inputs,
555
502
  final_vars=final_vars,
556
503
  config=config,
@@ -560,16 +507,16 @@ class PipelineManager:
560
507
  pipeline_adapter_cfg=pipeline_adapter_cfg,
561
508
  project_adapter_cfg=project_adapter_cfg,
562
509
  adapter=adapter,
563
- # reload=reload, # Runner handles module reload if needed
510
+ reload=reload,
564
511
  log_level=log_level,
565
512
  max_retries=max_retries,
566
513
  retry_delay=retry_delay,
567
514
  jitter_factor=jitter_factor,
568
515
  retry_exceptions=retry_exceptions,
516
+ on_success=on_success,
517
+ on_failure=on_failure,
569
518
  )
570
519
 
571
- return res
572
-
573
520
  # --- Delegated Methods ---
574
521
 
575
522
  # Registry Delegations
@@ -1213,571 +1160,3 @@ class PipelineManager:
1213
1160
  return self.visualizer.show_dag(
1214
1161
  name=name, format=format, reload=reload, raw=raw
1215
1162
  )
1216
-
1217
- def run_job(
1218
- self,
1219
- name: str,
1220
- inputs: dict | None = None,
1221
- final_vars: list[str] | None = None,
1222
- config: dict | None = None,
1223
- cache: bool | dict = False,
1224
- executor_cfg: str | dict | ExecutorConfig | None = None,
1225
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1226
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1227
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1228
- adapter: dict[str, Any] | None = None,
1229
- reload: bool = False,
1230
- log_level: str | None = None,
1231
- max_retries: int | None = None,
1232
- retry_delay: float | None = None,
1233
- jitter_factor: float | None = None,
1234
- retry_exceptions: tuple | list | None = None,
1235
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1236
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1237
- on_success_pipeline: Callable
1238
- | tuple[Callable, tuple | None, dict | None]
1239
- | None = None,
1240
- on_failure_pipeline: Callable
1241
- | tuple[Callable, tuple | None, dict | None]
1242
- | None = None,
1243
- **kwargs: Any,
1244
- ) -> dict[str, Any] | None:
1245
- """Execute a pipeline job immediately through the job queue.
1246
-
1247
- Unlike the run() method which executes synchronously, this method runs
1248
- the pipeline through the configured worker system (RQ, APScheduler, etc.).
1249
-
1250
- If the job queue is not configured, it logs an error and returns None.
1251
-
1252
- Args:
1253
- name (str): Name of the pipeline to run. Must be a valid identifier.
1254
- inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1255
- final_vars (list[str] | None): Specify which output variables to return.
1256
- Example: ["model", "metrics"]
1257
- config (dict | None): Configuration for Hamilton pipeline executor.
1258
- Example: {"model": "LogisticRegression"}
1259
- cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1260
- executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1261
- - str: Executor name, e.g. "threadpool", "local"
1262
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1263
- - ExecutorConfig: Structured config object
1264
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1265
- Example: {"opentelemetry": True, "tracker": False}
1266
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1267
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1268
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1269
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1270
- adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1271
- Example: {"ray_graph_adapter": RayGraphAdapter()}
1272
- reload (bool): Force reload of pipeline configuration.
1273
- log_level (str | None): Logging level for the execution. Default None uses project config.
1274
- Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1275
- max_retries (int): Maximum number of retries for execution.
1276
- retry_delay (float): Delay between retries in seconds.
1277
- jitter_factor (float): Random jitter factor to add to retry delay
1278
- retry_exceptions (tuple): Exceptions that trigger a retry.
1279
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job execution.
1280
- This runs after the pipeline execution through the job queue was executed successfully.
1281
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job execution failure.
1282
- This runs if the job creation or the pipeline execution through the job queue fails or raises an exception.
1283
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1284
- This runs after the pipeline completes successfully.
1285
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1286
- This runs if the pipeline fails or raises an exception.
1287
-
1288
- **kwargs: JobQueue-specific arguments
1289
- For RQ:
1290
- - queue_name: Queue to use (str)
1291
- - retry: Number of retries (int)
1292
- - result_ttl: Time to live for the job result (float or timedelta)
1293
- - ttl: Time to live for the job (float or timedelta)
1294
- - timeout: Time to wait for the job to complete (float or timedelta)
1295
- - repeat: Repeat count (int or dict)
1296
- - rq_on_failure: Callback function on failure (callable)
1297
- - rq_on_success: Callback function on success (callable)
1298
- - rq_on_stopped: Callback function on stop (callable)
1299
- For APScheduler:
1300
- - job_executor: Executor type (str)
1301
-
1302
- Returns:
1303
- dict[str, Any] | None: Job execution results if successful, otherwise None.
1304
-
1305
- Raises:
1306
- ValueError: If pipeline or configuration is invalid
1307
- RuntimeError: If job execution fails
1308
-
1309
- Example:
1310
- >>> from flowerpower.pipeline import PipelineManager
1311
- >>>
1312
- >>> manager = PipelineManager()
1313
- >>>
1314
- >>> # Simple job execution
1315
- >>> result = manager.run_job("data_pipeline")
1316
- >>>
1317
- >>> # Complex job with retry logic
1318
- >>> result = manager.run_job(
1319
- ... name="ml_training",
1320
- ... inputs={"training_date": "2025-04-28"},
1321
- ... executor_cfg={"type": "async"},
1322
- ... with_adapter_cfg={"enable_tracking": True},
1323
- ... retry=3,
1324
- ... queue_name="ml_jobs"
1325
- ... )
1326
- """
1327
- if self.jqm is None:
1328
- logger.error(
1329
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1330
- )
1331
- return None
1332
-
1333
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1334
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1335
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1336
-
1337
- run_func = self._get_run_func(
1338
- name=name,
1339
- reload=reload,
1340
- on_success=on_success_pipeline,
1341
- on_failure=on_failure_pipeline,
1342
- )
1343
- # run_func = run_with_callback(on_success=on_success_pipeline, on_failure=on_failure_pipeline)(
1344
- # run_func_
1345
- # )
1346
- run_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
1347
- self.jqm.run_job
1348
- )
1349
-
1350
- return run_job(
1351
- run_func=run_func,
1352
- pipeline_cfg=self._pipeline_cfg,
1353
- name=name,
1354
- inputs=inputs,
1355
- final_vars=final_vars,
1356
- config=config,
1357
- cache=cache,
1358
- executor_cfg=executor_cfg,
1359
- with_adapter_cfg=with_adapter_cfg,
1360
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1361
- project_adapter_cfg=project_adapter_cfg,
1362
- adapter=adapter,
1363
- log_level=log_level,
1364
- max_retries=max_retries,
1365
- retry_delay=retry_delay,
1366
- jitter_factor=jitter_factor,
1367
- retry_exceptions=retry_exceptions,
1368
- **kwargs,
1369
- )
1370
-
1371
- def add_job(
1372
- self,
1373
- name: str,
1374
- inputs: dict | None = None,
1375
- final_vars: list[str] | None = None,
1376
- config: dict | None = None,
1377
- cache: bool | dict = False,
1378
- executor_cfg: str | dict | ExecutorConfig | None = None,
1379
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1380
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1381
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1382
- adapter: dict[str, Any] | None = None,
1383
- reload: bool = False, # Reload config/module before creating run_func
1384
- log_level: str | None = None,
1385
- result_ttl: int | dt.timedelta = 0,
1386
- run_at: dt.datetime | str | None = None,
1387
- run_in: dt.datetime | str | None = None,
1388
- max_retries: int = 3,
1389
- retry_delay: float = 1.0,
1390
- jitter_factor: float = 0.1,
1391
- retry_exceptions: tuple = (Exception,),
1392
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1393
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1394
- on_success_pipeline: Callable
1395
- | tuple[Callable, tuple | None, dict | None]
1396
- | None = None,
1397
- on_failure_pipeline: Callable
1398
- | tuple[Callable, tuple | None, dict | None]
1399
- | None = None,
1400
- **kwargs, # JobQueue specific args
1401
- ) -> str | UUID | None:
1402
- """Adds a job to the job queue.
1403
-
1404
- If the job queue is not configured, it logs an error and returns None.
1405
-
1406
- Args:
1407
- name (str): Name of the pipeline to run. Must be a valid identifier.
1408
- inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1409
- final_vars (list[str] | None): Specify which output variables to return.
1410
- Example: ["model", "metrics"]
1411
- config (dict | None): Configuration for Hamilton pipeline executor.
1412
- Example: {"model": "LogisticRegression"}
1413
- cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1414
- executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1415
- - str: Executor name, e.g. "threadpool", "local"
1416
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1417
- - ExecutorConfig: Structured config object
1418
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1419
- Example: {"opentelemetry": True, "tracker": False}
1420
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1421
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1422
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1423
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1424
- adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1425
- Example: {"ray_graph_adapter": RayGraphAdapter()}
1426
- reload (bool): Force reload of pipeline configuration.
1427
- run_at (dt.datetime | str | None): Future date to run the job.
1428
- Example: datetime(2025, 4, 28, 12, 0)
1429
- Example str: "2025-04-28T12:00:00" (ISO format)
1430
- run_in (dt.datetime | str | None): Time interval to run the job.
1431
- Example: 3600 (every hour in seconds)
1432
- Example: datetime.timedelta(days=1)
1433
- Example str: "1d" (1 day)
1434
- result_ttl (int | dt.timedelta): Time to live for the job result.
1435
- Example: 3600 (1 hour in seconds)
1436
- log_level (str | None): Logging level for the execution. Default None uses project config.
1437
- Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1438
- max_retries (int): Maximum number of retries for execution.
1439
- retry_delay (float): Delay between retries in seconds.
1440
- jitter_factor (float): Random jitter factor to add to retry delay
1441
- retry_exceptions (tuple): Exceptions that trigger a retry.
1442
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job creation.
1443
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job creation failure.
1444
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1445
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1446
- **kwargs: Additional keyword arguments passed to the worker's add_job method.
1447
- For RQ this includes:
1448
- - result_ttl: Time to live for the job result (float or timedelta)
1449
- - ttl: Time to live for the job (float or timedelta)
1450
- - timeout: Time to wait for the job to complete (float or timedelta)
1451
- - queue_name: Name of the queue to use (str)
1452
- - retry: Number of retries (int)
1453
- - repeat: Repeat count (int or dict)
1454
- - rq_on_failure: Callback function on failure (callable)
1455
- - rq_on_success: Callback function on success (callable)
1456
- - rq_on_stopped: Callback function on stop (callable)
1457
- For APScheduler, this includes:
1458
- - job_executor: Job executor to use (str)
1459
-
1460
- Returns:
1461
- str | UUID | None: The ID of the job that was added to the job queue, or None if the job queue is not configured.
1462
-
1463
- Raises:
1464
- ValueError: If the job ID is not valid or if the job cannot be scheduled.
1465
-
1466
- Example:
1467
- >>> from flowerpower.pipeline import PipelineManager
1468
- >>> pm = PipelineManager()
1469
- >>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
1470
-
1471
- """
1472
- if self.jqm is None:
1473
- logger.error(
1474
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1475
- )
1476
- return None
1477
-
1478
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1479
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1480
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1481
-
1482
- run_func = self._get_run_func(
1483
- name=name,
1484
- reload=reload,
1485
- on_success=on_success_pipeline,
1486
- on_failure=on_failure_pipeline,
1487
- )
1488
-
1489
- run_in = (
1490
- duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
1491
- ) # convert to seconds
1492
- run_at = (
1493
- dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
1494
- )
1495
-
1496
- add_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
1497
- self.jqm.add_job
1498
- )
1499
- return add_job(
1500
- run_func=run_func,
1501
- pipeline_cfg=self._pipeline_cfg,
1502
- name=name, # Pass name for logging
1503
- # Pass run parameters
1504
- inputs=inputs,
1505
- final_vars=final_vars,
1506
- config=config,
1507
- cache=cache,
1508
- executor_cfg=executor_cfg,
1509
- with_adapter_cfg=with_adapter_cfg,
1510
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1511
- project_adapter_cfg=project_adapter_cfg,
1512
- adapter=adapter,
1513
- # reload=reload, # Note: reload already happened
1514
- log_level=log_level,
1515
- result_ttl=result_ttl,
1516
- run_at=run_at,
1517
- run_in=run_in,
1518
- max_retries=max_retries,
1519
- retry_delay=retry_delay,
1520
- jitter_factor=jitter_factor,
1521
- retry_exceptions=retry_exceptions,
1522
- **kwargs, # Pass worker args
1523
- )
1524
-
1525
- def schedule(
1526
- self,
1527
- name: str,
1528
- inputs: dict | None = None,
1529
- final_vars: list[str] | None = None,
1530
- config: dict | None = None,
1531
- cache: bool | dict = False,
1532
- executor_cfg: str | dict | ExecutorConfig | None = None,
1533
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1534
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1535
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1536
- adapter: dict[str, Any] | None = None,
1537
- reload: bool = False,
1538
- log_level: str | None = None,
1539
- cron: str | dict[str, str | int] | None = None,
1540
- interval: int | str | dict[str, str | int] | None = None,
1541
- date: dt.datetime | str | None = None,
1542
- overwrite: bool = False,
1543
- schedule_id: str | None = None,
1544
- max_retries: int | None = None,
1545
- retry_delay: float | None = None,
1546
- jitter_factor: float | None = None,
1547
- retry_exceptions: tuple | list | None = None,
1548
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1549
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1550
- on_success_pipeline: Callable
1551
- | tuple[Callable, tuple | None, dict | None]
1552
- | None = None,
1553
- on_failure_pipeline: Callable
1554
- | tuple[Callable, tuple | None, dict | None]
1555
- | None = None,
1556
- **kwargs: Any,
1557
- ) -> str | UUID | None:
1558
- """Schedule a pipeline to run on a recurring or future basis.
1559
-
1560
- If the job queue is not configured, it logs an error and returns None.
1561
-
1562
- Args:
1563
- name (str): The name of the pipeline to run.
1564
- inputs (dict | None): Inputs for the pipeline run (overrides config).
1565
- final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
1566
- config (dict | None): Hamilton driver config (overrides config).
1567
- cache (bool | dict): Cache settings (overrides config).
1568
- executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
1569
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
1570
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
1571
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
1572
- adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
1573
- reload (bool): Whether to reload module and pipeline config. Defaults to False.
1574
- log_level (str | None): Log level for the run (overrides config).
1575
- cron (str | dict[str, str | int] | None): Cron expression or settings
1576
- Example string: "0 0 * * *" (daily at midnight)
1577
- Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
1578
- interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
1579
- Example int: 3600 (every hour in seconds)
1580
- Example str: "1h" (every hour)
1581
- Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
1582
- date (dt.datetime | str | None): Future date for
1583
- Example: datetime(2025, 4, 28, 12, 0)
1584
- Example str: "2025-04-28T12:00:00" (ISO format)
1585
- overwrite (bool): Whether to overwrite existing schedule with the same ID
1586
- schedule_id (str | None): Unique identifier for the schedule
1587
- max_retries (int): Maximum number of retries for execution
1588
- retry_delay (float): Delay between retries in seconds
1589
- jitter_factor (float): Random jitter factor to add to retry delay
1590
- retry_exceptions (tuple): Exceptions that trigger a retry
1591
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful schedule creation.
1592
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on schedule creation failure.
1593
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1594
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1595
- **kwargs: JobQueue-specific scheduling options
1596
- For RQ:
1597
- - result_ttl: Result lifetime (int seconds)
1598
- - ttl: Job lifetime (int seconds)
1599
- - timeout: Job execution timeout (int seconds)
1600
- - queue_name: Queue to use (str)
1601
- - repeat: Repeat count (int or dict)
1602
- - rq_on_failure: Callback function on failure (callable)
1603
- - rq_on_success: Callback function on success (callable)
1604
- - rq_on_stopped: Callback function on stop (callable)
1605
- For APScheduler:
1606
- - misfire_grace_time: Late execution window
1607
- - coalesce: Combine missed executions (bool)
1608
- - max_running_jobs: Concurrent instances limit (int)
1609
-
1610
- Returns:
1611
- str | UUID | None: Unique identifier for the created schedule, or None if scheduling fails.
1612
-
1613
- Raises:
1614
- ValueError: If schedule parameters are invalid
1615
- RuntimeError: If scheduling fails
1616
-
1617
- Example:
1618
- >>> from flowerpower.pipeline import PipelineManager
1619
- >>> from datetime import datetime, timedelta
1620
- >>>
1621
- >>> manager = PipelineManager()
1622
- >>>
1623
- >>> # Daily schedule with cron
1624
- >>> schedule_id = manager.schedule(
1625
- ... name="daily_metrics",
1626
- ... cron="0 0 * * *",
1627
- ... inputs={"date": "{{ execution_date }}"}
1628
- ... )
1629
- >>>
1630
- >>> # Interval-based schedule
1631
- >>> schedule_id = manager.schedule(
1632
- ... name="monitoring",
1633
- ... interval={"minutes": 15},
1634
- ... with_adapter_cfg={"enable_alerts": True}
1635
- ... )
1636
- >>>
1637
- >>> # Future one-time execution
1638
- >>> future_date = datetime.now() + timedelta(days=1)
1639
- >>> schedule_id = manager.schedule(
1640
- ... name="batch_process",
1641
- ... date=future_date,
1642
- ... executor_cfg={"type": "async"}
1643
- ... )
1644
- """
1645
- if self.jqm is None:
1646
- logger.error(
1647
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1648
- )
1649
- return None
1650
-
1651
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1652
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1653
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1654
-
1655
- # pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
1656
- run_func = self._get_run_func(
1657
- name=name,
1658
- reload=reload,
1659
- on_success=on_success_pipeline,
1660
- on_failure=on_failure_pipeline,
1661
- )
1662
- interval = (
1663
- duration_parser.parse(interval) if isinstance(interval, str) else interval
1664
- )
1665
- date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
1666
-
1667
- schedule = run_with_callback(on_success=on_success, on_failure=on_failure)(
1668
- self.jqm.schedule
1669
- )
1670
- return schedule(
1671
- run_func=run_func,
1672
- pipeline_cfg=self._pipeline_cfg,
1673
- inputs=inputs,
1674
- final_vars=final_vars,
1675
- config=config,
1676
- cache=cache,
1677
- executor_cfg=executor_cfg,
1678
- with_adapter_cfg=with_adapter_cfg,
1679
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1680
- project_adapter_cfg=project_adapter_cfg,
1681
- adapter=adapter,
1682
- reload=reload,
1683
- log_level=log_level,
1684
- cron=cron,
1685
- interval=interval,
1686
- date=date,
1687
- overwrite=overwrite,
1688
- schedule_id=schedule_id,
1689
- max_retries=max_retries,
1690
- retry_delay=retry_delay,
1691
- jitter_factor=jitter_factor,
1692
- retry_exceptions=retry_exceptions,
1693
- **kwargs,
1694
- )
1695
-
1696
- def schedule_all(self, **kwargs: Any) -> None:
1697
- """Schedule all pipelines that are enabled in their configuration.
1698
-
1699
- For each enabled pipeline, applies its configured schedule settings
1700
- and any provided overrides.
1701
-
1702
- Args:
1703
- **kwargs: Overrides for schedule settings that apply to all pipelines.
1704
- See schedule() method for supported arguments.
1705
-
1706
- Example:
1707
- >>> from flowerpower.pipeline import PipelineManager
1708
- >>>
1709
- >>> manager = PipelineManager()
1710
- >>>
1711
- >>> # Schedule all with default settings
1712
- >>> manager.schedule_all()
1713
- >>>
1714
- >>> # Schedule all with common overrides
1715
- >>> manager.schedule_all(
1716
- ... max_running_jobs=2,
1717
- ... coalesce=True,
1718
- ... misfire_grace_time=300
1719
- ... )
1720
- """
1721
- scheduled_ids = []
1722
- errors = []
1723
- pipeline_names = self.list_pipelines()
1724
- if not pipeline_names:
1725
- logger.warning("No pipelines found to schedule.")
1726
- return
1727
-
1728
- logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
1729
- for name in pipeline_names:
1730
- try:
1731
- pipeline_cfg = self.load_pipeline(name=name, reload=True)
1732
-
1733
- if not pipeline_cfg.schedule.enabled:
1734
- logger.info(
1735
- f"Skipping scheduling for '{name}': Not enabled in config."
1736
- )
1737
- continue
1738
-
1739
- logger.info(f"Scheduling [cyan]{name}[/cyan]...")
1740
- schedule_id = self.schedule(name=name, reload=False, **kwargs)
1741
- if schedule_id is None:
1742
- logger.info(
1743
- f"🟡 Skipping adding schedule for [cyan]{name}[/cyan]: Job queue backend not available or scheduling failed."
1744
- )
1745
- continue
1746
- scheduled_ids.append(schedule_id)
1747
- except Exception as e:
1748
- logger.error(f"Failed to schedule pipeline '{name}': {e}")
1749
- errors.append(name)
1750
-
1751
- if errors:
1752
- logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
1753
- else:
1754
- logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
1755
-
1756
- @property
1757
- def schedules(self) -> list[Any]:
1758
- """Get list of current pipeline schedules.
1759
-
1760
- Retrieves all active schedules from the worker system.
1761
-
1762
- Returns:
1763
- list[Any]: List of schedule objects. Exact type depends on worker:
1764
- - RQ: List[rq.job.Job]
1765
- - APScheduler: List[apscheduler.schedulers.base.Schedule]
1766
-
1767
- Example:
1768
- >>> from flowerpower.pipeline import PipelineManager
1769
- >>>
1770
- >>> manager = PipelineManager()
1771
- >>> for schedule in manager.schedules:
1772
- ... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
1773
- """
1774
- if self.jqm is None:
1775
- logger.error(
1776
- "This PipelineManager instance does not have a job queue configured. Skipping schedule retrieval."
1777
- )
1778
- return []
1779
- try:
1780
- return self.jqm._get_schedules()
1781
- except Exception as e:
1782
- logger.error(f"Failed to retrieve schedules: {e}")
1783
- return []