FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -2,12 +2,12 @@ import datetime as dt
2
2
  import os
3
3
  import posixpath
4
4
  import sys
5
+ import warnings
5
6
  from pathlib import Path
6
7
  from types import TracebackType
7
8
  from typing import Any, Callable, TypeVar, Union
8
9
  from uuid import UUID
9
10
 
10
- import duration_parser
11
11
  from loguru import logger
12
12
  from munch import Munch
13
13
 
@@ -16,21 +16,19 @@ try:
16
16
  except ImportError:
17
17
  Digraph = Any # Type alias for when graphviz isn't installed
18
18
 
19
- from .. import settings
19
+ from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
20
+
21
+ from ..settings import CONFIG_DIR, PIPELINES_DIR, CACHE_DIR
20
22
  from ..cfg import PipelineConfig, ProjectConfig
21
23
  from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
22
- from ..cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
24
+ from ..cfg.pipeline.run import ExecutorConfig, RunConfig, WithAdapterConfig
23
25
  from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
24
- from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
25
- from ..utils.callback import run_with_callback
26
26
  from ..utils.logging import setup_logging
27
27
  from .io import PipelineIOManager
28
- from .job_queue import PipelineJobQueue
29
28
  from .registry import HookType, PipelineRegistry
30
- from .runner import run_pipeline
31
29
  from .visualizer import PipelineVisualizer
32
30
 
33
- setup_logging(level=settings.LOG_LEVEL)
31
+ setup_logging()
34
32
 
35
33
  GraphType = TypeVar("GraphType") # Type variable for graphviz.Digraph
36
34
 
@@ -66,7 +64,6 @@ class PipelineManager:
66
64
  >>> # Create manager with custom settings
67
65
  >>> manager = PipelineManager(
68
66
  ... base_dir="/path/to/project",
69
- ... job_queue_type="rq",
70
67
  ... log_level="DEBUG"
71
68
  ... )
72
69
  """
@@ -76,9 +73,9 @@ class PipelineManager:
76
73
  base_dir: str | None = None,
77
74
  storage_options: dict | Munch | BaseStorageOptions | None = None,
78
75
  fs: AbstractFileSystem | None = None,
79
- cfg_dir: str | None = settings.CONFIG_DIR,
80
- pipelines_dir: str | None = settings.PIPELINES_DIR,
81
- job_queue_type: str | None = None,
76
+ cfg_dir: str | None = CONFIG_DIR,
77
+ pipelines_dir: str | None = PIPELINES_DIR,
78
+
82
79
  log_level: str | None = None,
83
80
  ) -> None:
84
81
  """Initialize the PipelineManager.
@@ -97,8 +94,7 @@ class PipelineManager:
97
94
  Example: "config" or "settings".
98
95
  pipelines_dir: Override default pipelines directory name ('pipelines').
99
96
  Example: "flows" or "dags".
100
- job_queue_type: Override worker type from project config/settings.
101
- Valid values: "rq", "apscheduler", or "huey".
97
+
102
98
  log_level: Set logging level for the manager.
103
99
  Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
104
100
 
@@ -118,7 +114,7 @@ class PipelineManager:
118
114
  ... "key": "ACCESS_KEY",
119
115
  ... "secret": "SECRET_KEY"
120
116
  ... },
121
- ... job_queue_type="rq",
117
+
122
118
  ... log_level="DEBUG"
123
119
  ... )
124
120
  """
@@ -130,7 +126,7 @@ class PipelineManager:
130
126
  if storage_options is not None:
131
127
  cached = True
132
128
  cache_storage = posixpath.join(
133
- posixpath.expanduser(settings.CACHE_DIR),
129
+ posixpath.expanduser(CACHE_DIR),
134
130
  self._base_dir.split("://")[-1],
135
131
  )
136
132
  os.makedirs(cache_storage, exist_ok=True)
@@ -138,7 +134,7 @@ class PipelineManager:
138
134
  cached = False
139
135
  cache_storage = None
140
136
  if not fs:
141
- fs = get_filesystem(
137
+ fs = filesystem(
142
138
  self._base_dir,
143
139
  storage_options=storage_options,
144
140
  cached=cached,
@@ -156,17 +152,20 @@ class PipelineManager:
156
152
  self._pipelines_dir = pipelines_dir
157
153
 
158
154
  self._load_project_cfg(
159
- reload=True, job_queue_type=job_queue_type
155
+ reload=True
160
156
  ) # Load project config
161
- self._job_queue_type = job_queue_type or self.project_cfg.job_queue.type
157
+
162
158
 
163
159
  # Ensure essential directories exist (using paths from loaded project_cfg)
164
160
  try:
165
161
  self._fs.makedirs(self._cfg_dir, exist_ok=True)
166
162
  self._fs.makedirs(self._pipelines_dir, exist_ok=True)
167
- except Exception as e:
163
+ except (OSError, PermissionError) as e:
168
164
  logger.error(f"Error creating essential directories: {e}")
169
- # Consider raising an error here depending on desired behavior
165
+ raise RuntimeError(f"Failed to create essential directories: {e}") from e
166
+ except Exception as e:
167
+ logger.error(f"Unexpected error creating essential directories: {e}")
168
+ raise RuntimeError(f"Unexpected filesystem error: {e}") from e
170
169
 
171
170
  # Ensure pipeline modules can be imported
172
171
  self._add_modules_path()
@@ -175,22 +174,12 @@ class PipelineManager:
175
174
  self.registry = PipelineRegistry(
176
175
  project_cfg=self.project_cfg,
177
176
  fs=self._fs,
178
- cfg_dir=self._cfg_dir,
179
- pipelines_dir=self._pipelines_dir,
180
- )
181
- pipeline_job_queue = PipelineJobQueue(
182
- project_cfg=self.project_cfg,
183
- fs=self._fs,
184
- cfg_dir=self._cfg_dir,
185
- pipelines_dir=self._pipelines_dir,
177
+ base_dir=self._base_dir,
178
+ storage_options=self._storage_options,
186
179
  )
187
- if pipeline_job_queue.job_queue is None:
188
- logger.warning(
189
- "Job queue backend is unavailable. Some features may not work."
190
- )
191
- self.jqm = None
192
- else:
193
- self.jqm = pipeline_job_queue
180
+
181
+ # Initialize project context (will be injected by FlowerPowerProject)
182
+ self._project_context = None
194
183
  self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
195
184
  self.io = PipelineIOManager(registry=self.registry)
196
185
 
@@ -241,49 +230,6 @@ class PipelineManager:
241
230
  # Add cleanup code if needed
242
231
  pass
243
232
 
244
- def _get_run_func(
245
- self,
246
- name: str,
247
- reload: bool = False,
248
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
249
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
250
- ) -> Callable:
251
- """Create a PipelineRunner instance and return its run method.
252
-
253
- This internal helper method ensures that each job gets a fresh runner
254
- with the correct configuration state.
255
-
256
- Args:
257
- name: Name of the pipeline to create runner for
258
- reload: Whether to reload pipeline configuration
259
-
260
- Returns:
261
- Callable: Bound run method from a fresh PipelineRunner instance
262
-
263
- Example:
264
- >>> # Internal usage
265
- >>> manager = PipelineManager()
266
- >>> run_func = manager._get_run_func_for_job("data_pipeline")
267
- >>> result = run_func(inputs={"date": "2025-04-28"})
268
- """
269
- if (
270
- name == self._current_pipeline_name and not reload
271
- # and hasattr(self, "_runner")
272
- ):
273
- # run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=self._pipeline_cfg)
274
- run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
275
- run_pipeline
276
- )
277
- return run_func
278
-
279
- _ = self.load_pipeline(name=name, reload=reload)
280
- # run_pipeline_ = partial(run_pipeline, project_cfg=self.project_cfg, pipeline_cfg=pipeline_cfg)
281
-
282
- run_func = run_with_callback(on_success=on_success, on_failure=on_failure)(
283
- run_pipeline
284
- )
285
- return run_func
286
-
287
233
  def _add_modules_path(self) -> None:
288
234
  """Add pipeline module paths to Python path.
289
235
 
@@ -318,7 +264,7 @@ class PipelineManager:
318
264
  sys.path.insert(0, modules_path)
319
265
 
320
266
  def _load_project_cfg(
321
- self, reload: bool = False, job_queue_type: str | None = None
267
+ self, reload: bool = False
322
268
  ) -> ProjectConfig:
323
269
  """Load or reload the project configuration.
324
270
 
@@ -342,8 +288,8 @@ class PipelineManager:
342
288
  >>> # Internal usage
343
289
  >>> manager = PipelineManager()
344
290
  >>> project_cfg = manager._load_project_cfg(reload=True)
345
- >>> print(project_cfg.worker.type)
346
- 'rq'
291
+ >>> print(project_cfg.name)
292
+ 'my_project'
347
293
  """
348
294
  if hasattr(self, "_project_cfg") and not reload:
349
295
  return self._project_cfg
@@ -351,7 +297,6 @@ class PipelineManager:
351
297
  # Pass overrides to ProjectConfig.load
352
298
  self._project_cfg = ProjectConfig.load(
353
299
  base_dir=self._base_dir,
354
- job_queue_type=job_queue_type,
355
300
  fs=self._fs, # Pass pre-configured fs if provided
356
301
  storage_options=self._storage_options,
357
302
  )
@@ -426,8 +371,8 @@ class PipelineManager:
426
371
  Example:
427
372
  >>> manager = PipelineManager()
428
373
  >>> cfg = manager.project_cfg
429
- >>> print(cfg.worker.type)
430
- 'rq'
374
+ >>> print(cfg.name)
375
+ 'my_project'
431
376
  """
432
377
  if not hasattr(self, "_project_cfg"):
433
378
  self._load_project_cfg()
@@ -457,26 +402,82 @@ class PipelineManager:
457
402
 
458
403
  # --- Core Execution Method ---
459
404
 
405
+ def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
406
+ """Merge kwargs into a RunConfig object.
407
+
408
+ This helper method updates the RunConfig object with values from kwargs,
409
+ handling different types of attributes appropriately.
410
+
411
+ Args:
412
+ run_config: The RunConfig object to update
413
+ kwargs: Dictionary of additional parameters to merge
414
+
415
+ Returns:
416
+ RunConfig: Updated RunConfig object
417
+ """
418
+ # Handle dictionary-like attributes with update or deep merge
419
+ if 'inputs' in kwargs and kwargs['inputs'] is not None:
420
+ if run_config.inputs is None:
421
+ run_config.inputs = kwargs['inputs']
422
+ else:
423
+ run_config.inputs.update(kwargs['inputs'])
424
+
425
+ if 'config' in kwargs and kwargs['config'] is not None:
426
+ if run_config.config is None:
427
+ run_config.config = kwargs['config']
428
+ else:
429
+ run_config.config.update(kwargs['config'])
430
+
431
+ if 'cache' in kwargs and kwargs['cache'] is not None:
432
+ run_config.cache = kwargs['cache']
433
+
434
+ if 'adapter' in kwargs and kwargs['adapter'] is not None:
435
+ if run_config.adapter is None:
436
+ run_config.adapter = kwargs['adapter']
437
+ else:
438
+ run_config.adapter.update(kwargs['adapter'])
439
+
440
+ # Handle executor_cfg - convert string/dict to ExecutorConfig if needed
441
+ if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
442
+ executor_cfg = kwargs['executor_cfg']
443
+ if isinstance(executor_cfg, str):
444
+ run_config.executor = ExecutorConfig(type=executor_cfg)
445
+ elif isinstance(executor_cfg, dict):
446
+ run_config.executor = ExecutorConfig.from_dict(executor_cfg)
447
+ elif isinstance(executor_cfg, ExecutorConfig):
448
+ run_config.executor = executor_cfg
449
+
450
+ # Handle adapter configurations
451
+ if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
452
+ with_adapter_cfg = kwargs['with_adapter_cfg']
453
+ if isinstance(with_adapter_cfg, dict):
454
+ run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
455
+ elif isinstance(with_adapter_cfg, WithAdapterConfig):
456
+ run_config.with_adapter = with_adapter_cfg
457
+
458
+ if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
459
+ run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
460
+
461
+ if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
462
+ run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
463
+
464
+ # Handle simple attributes
465
+ simple_attrs = [
466
+ 'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
467
+ 'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
468
+ ]
469
+
470
+ for attr in simple_attrs:
471
+ if attr in kwargs and kwargs[attr] is not None:
472
+ setattr(run_config, attr, kwargs[attr])
473
+
474
+ return run_config
475
+
460
476
  def run(
461
477
  self,
462
478
  name: str,
463
- inputs: dict | None = None,
464
- final_vars: list[str] | None = None,
465
- config: dict | None = None,
466
- cache: dict | None = None,
467
- executor_cfg: str | dict | ExecutorConfig | None = None,
468
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
469
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
470
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
471
- adapter: dict[str, Any] | None = None,
472
- reload: bool = False,
473
- log_level: str | None = None,
474
- max_retries: int | None = None,
475
- retry_delay: float | None = None,
476
- jitter_factor: float | None = None,
477
- retry_exceptions: tuple | list | None = None,
478
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
479
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
479
+ run_config: RunConfig | None = None,
480
+ **kwargs
480
481
  ) -> dict[str, Any]:
481
482
  """Execute a pipeline synchronously and return its results.
482
483
 
@@ -485,33 +486,36 @@ class PipelineManager:
485
486
 
486
487
  Args:
487
488
  name (str): Name of the pipeline to run. Must be a valid identifier.
488
- inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
489
- final_vars (list[str] | None): Specify which output variables to return.
490
- Example: ["model", "metrics"]
491
- config (dict | None): Configuration for Hamilton pipeline executor.
492
- Example: {"model": "LogisticRegression"}
493
- cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
494
- executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
495
- - str: Executor name, e.g. "threadpool", "local"
496
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
497
- - ExecutorConfig: Structured config object
498
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
499
- Example: {"opentelemetry": True, "tracker": False}
500
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
501
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
502
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
503
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
504
- adapter (dict[str, Any] | None): Custom adapter instance for pipeline
505
- Example: {"ray_graph_adapter": RayGraphAdapter()}
506
- reload (bool): Force reload of pipeline configuration.
507
- log_level (str | None): Logging level for the execution. Default None uses project config.
508
- Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
509
- max_retries (int): Maximum number of retries for execution.
510
- retry_delay (float): Delay between retries in seconds.
511
- jitter_factor (float): Random jitter factor to add to retry delay
512
- retry_exceptions (tuple): Exceptions that trigger a retry.
513
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
514
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
489
+ run_config (RunConfig | None): Run configuration object containing all execution parameters.
490
+ If None, the default configuration from the pipeline will be used.
491
+ **kwargs: Additional parameters to override the run_config. Supported parameters include:
492
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
493
+ final_vars (list[str] | None): Specify which output variables to return.
494
+ Example: ["model", "metrics"]
495
+ config (dict | None): Configuration for Hamilton pipeline executor.
496
+ Example: {"model": "LogisticRegression"}
497
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
498
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
499
+ - str: Executor name, e.g. "threadpool", "local"
500
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
501
+ - ExecutorConfig: Structured config object
502
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
503
+ Example: {"opentelemetry": True, "tracker": False}
504
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
505
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
506
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
507
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
508
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
509
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
510
+ reload (bool): Force reload of pipeline configuration.
511
+ log_level (str | None): Logging level for the execution. Default None uses project config.
512
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
513
+ max_retries (int): Maximum number of retries for execution.
514
+ retry_delay (float): Delay between retries in seconds.
515
+ jitter_factor (float): Random jitter factor to add to retry delay
516
+ retry_exceptions (tuple): Exceptions that trigger a retry.
517
+ on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
518
+ on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
515
519
 
516
520
  Returns:
517
521
  dict[str, Any]: Pipeline execution results, mapping output variable names
@@ -530,46 +534,49 @@ class PipelineManager:
530
534
  >>> # Basic pipeline run
531
535
  >>> results = manager.run("data_pipeline")
532
536
  >>>
533
- >>> # Complex run with overrides
537
+ >>> # Run with custom RunConfig
538
+ >>> from flowerpower.cfg.pipeline.run import RunConfig
539
+ >>> config = RunConfig(inputs={"date": "2025-04-28"}, final_vars=["result"])
540
+ >>> results = manager.run("ml_pipeline", run_config=config)
541
+ >>>
542
+ >>> # Complex run with kwargs overrides
534
543
  >>> results = manager.run(
535
- ... name="ml_pipeline",
536
- ... inputs={
537
- ... "training_date": "2025-04-28",
538
- ... "model_params": {"n_estimators": 100}
539
- ... },
544
+ ... "ml_pipeline",
545
+ ... inputs={"training_date": "2025-04-28"},
540
546
  ... final_vars=["model", "metrics"],
541
547
  ... executor_cfg={"type": "threadpool", "max_workers": 4},
542
548
  ... with_adapter_cfg={"tracker": True},
543
549
  ... reload=True
544
550
  ... )
545
551
  """
546
- # pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
547
- run_func = self._get_run_func(
548
- name=name, reload=reload, on_success=on_success, on_failure=on_failure
552
+ # Initialize run_config - use provided config or load pipeline default
553
+ if run_config is None:
554
+ run_config = self.load_pipeline(name=name).run
555
+
556
+ # Merge kwargs into run_config
557
+ if kwargs:
558
+ run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
559
+
560
+ # Set up logging for this specific run if log_level is provided
561
+ if run_config.log_level is not None:
562
+ setup_logging(level=run_config.log_level)
563
+ else:
564
+ # Ensure logging is reset to default if no specific level is provided for this run
565
+ setup_logging()
566
+
567
+ # Use injected project context, fallback to self for backward compatibility
568
+ project_context = getattr(self, "_project_context", self)
569
+
570
+ # Get Pipeline instance from registry
571
+ pipeline = self.registry.get_pipeline(
572
+ name=name, project_context=project_context, reload=run_config.reload
549
573
  )
550
574
 
551
- res = run_func(
552
- project_cfg=self._project_cfg,
553
- pipeline_cfg=self._pipeline_cfg,
554
- inputs=inputs,
555
- final_vars=final_vars,
556
- config=config,
557
- cache=cache,
558
- executor_cfg=executor_cfg,
559
- with_adapter_cfg=with_adapter_cfg,
560
- pipeline_adapter_cfg=pipeline_adapter_cfg,
561
- project_adapter_cfg=project_adapter_cfg,
562
- adapter=adapter,
563
- # reload=reload, # Runner handles module reload if needed
564
- log_level=log_level,
565
- max_retries=max_retries,
566
- retry_delay=retry_delay,
567
- jitter_factor=jitter_factor,
568
- retry_exceptions=retry_exceptions,
575
+ # Execute pipeline using its own run method
576
+ return pipeline.run(
577
+ run_config=run_config,
569
578
  )
570
579
 
571
- return res
572
-
573
580
  # --- Delegated Methods ---
574
581
 
575
582
  # Registry Delegations
@@ -1213,571 +1220,3 @@ class PipelineManager:
1213
1220
  return self.visualizer.show_dag(
1214
1221
  name=name, format=format, reload=reload, raw=raw
1215
1222
  )
1216
-
1217
- def run_job(
1218
- self,
1219
- name: str,
1220
- inputs: dict | None = None,
1221
- final_vars: list[str] | None = None,
1222
- config: dict | None = None,
1223
- cache: bool | dict = False,
1224
- executor_cfg: str | dict | ExecutorConfig | None = None,
1225
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1226
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1227
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1228
- adapter: dict[str, Any] | None = None,
1229
- reload: bool = False,
1230
- log_level: str | None = None,
1231
- max_retries: int | None = None,
1232
- retry_delay: float | None = None,
1233
- jitter_factor: float | None = None,
1234
- retry_exceptions: tuple | list | None = None,
1235
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1236
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1237
- on_success_pipeline: Callable
1238
- | tuple[Callable, tuple | None, dict | None]
1239
- | None = None,
1240
- on_failure_pipeline: Callable
1241
- | tuple[Callable, tuple | None, dict | None]
1242
- | None = None,
1243
- **kwargs: Any,
1244
- ) -> dict[str, Any] | None:
1245
- """Execute a pipeline job immediately through the job queue.
1246
-
1247
- Unlike the run() method which executes synchronously, this method runs
1248
- the pipeline through the configured worker system (RQ, APScheduler, etc.).
1249
-
1250
- If the job queue is not configured, it logs an error and returns None.
1251
-
1252
- Args:
1253
- name (str): Name of the pipeline to run. Must be a valid identifier.
1254
- inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1255
- final_vars (list[str] | None): Specify which output variables to return.
1256
- Example: ["model", "metrics"]
1257
- config (dict | None): Configuration for Hamilton pipeline executor.
1258
- Example: {"model": "LogisticRegression"}
1259
- cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1260
- executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1261
- - str: Executor name, e.g. "threadpool", "local"
1262
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1263
- - ExecutorConfig: Structured config object
1264
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1265
- Example: {"opentelemetry": True, "tracker": False}
1266
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1267
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1268
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1269
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1270
- adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1271
- Example: {"ray_graph_adapter": RayGraphAdapter()}
1272
- reload (bool): Force reload of pipeline configuration.
1273
- log_level (str | None): Logging level for the execution. Default None uses project config.
1274
- Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1275
- max_retries (int): Maximum number of retries for execution.
1276
- retry_delay (float): Delay between retries in seconds.
1277
- jitter_factor (float): Random jitter factor to add to retry delay
1278
- retry_exceptions (tuple): Exceptions that trigger a retry.
1279
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job execution.
1280
- This runs after the pipeline execution through the job queue was executed successfully.
1281
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job execution failure.
1282
- This runs if the job creation or the pipeline execution through the job queue fails or raises an exception.
1283
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1284
- This runs after the pipeline completes successfully.
1285
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1286
- This runs if the pipeline fails or raises an exception.
1287
-
1288
- **kwargs: JobQueue-specific arguments
1289
- For RQ:
1290
- - queue_name: Queue to use (str)
1291
- - retry: Number of retries (int)
1292
- - result_ttl: Time to live for the job result (float or timedelta)
1293
- - ttl: Time to live for the job (float or timedelta)
1294
- - timeout: Time to wait for the job to complete (float or timedelta)
1295
- - repeat: Repeat count (int or dict)
1296
- - rq_on_failure: Callback function on failure (callable)
1297
- - rq_on_success: Callback function on success (callable)
1298
- - rq_on_stopped: Callback function on stop (callable)
1299
- For APScheduler:
1300
- - job_executor: Executor type (str)
1301
-
1302
- Returns:
1303
- dict[str, Any] | None: Job execution results if successful, otherwise None.
1304
-
1305
- Raises:
1306
- ValueError: If pipeline or configuration is invalid
1307
- RuntimeError: If job execution fails
1308
-
1309
- Example:
1310
- >>> from flowerpower.pipeline import PipelineManager
1311
- >>>
1312
- >>> manager = PipelineManager()
1313
- >>>
1314
- >>> # Simple job execution
1315
- >>> result = manager.run_job("data_pipeline")
1316
- >>>
1317
- >>> # Complex job with retry logic
1318
- >>> result = manager.run_job(
1319
- ... name="ml_training",
1320
- ... inputs={"training_date": "2025-04-28"},
1321
- ... executor_cfg={"type": "async"},
1322
- ... with_adapter_cfg={"enable_tracking": True},
1323
- ... retry=3,
1324
- ... queue_name="ml_jobs"
1325
- ... )
1326
- """
1327
- if self.jqm is None:
1328
- logger.error(
1329
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1330
- )
1331
- return None
1332
-
1333
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1334
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1335
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1336
-
1337
- run_func = self._get_run_func(
1338
- name=name,
1339
- reload=reload,
1340
- on_success=on_success_pipeline,
1341
- on_failure=on_failure_pipeline,
1342
- )
1343
- # run_func = run_with_callback(on_success=on_success_pipeline, on_failure=on_failure_pipeline)(
1344
- # run_func_
1345
- # )
1346
- run_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
1347
- self.jqm.run_job
1348
- )
1349
-
1350
- return run_job(
1351
- run_func=run_func,
1352
- pipeline_cfg=self._pipeline_cfg,
1353
- name=name,
1354
- inputs=inputs,
1355
- final_vars=final_vars,
1356
- config=config,
1357
- cache=cache,
1358
- executor_cfg=executor_cfg,
1359
- with_adapter_cfg=with_adapter_cfg,
1360
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1361
- project_adapter_cfg=project_adapter_cfg,
1362
- adapter=adapter,
1363
- log_level=log_level,
1364
- max_retries=max_retries,
1365
- retry_delay=retry_delay,
1366
- jitter_factor=jitter_factor,
1367
- retry_exceptions=retry_exceptions,
1368
- **kwargs,
1369
- )
1370
-
1371
- def add_job(
1372
- self,
1373
- name: str,
1374
- inputs: dict | None = None,
1375
- final_vars: list[str] | None = None,
1376
- config: dict | None = None,
1377
- cache: bool | dict = False,
1378
- executor_cfg: str | dict | ExecutorConfig | None = None,
1379
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1380
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1381
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1382
- adapter: dict[str, Any] | None = None,
1383
- reload: bool = False, # Reload config/module before creating run_func
1384
- log_level: str | None = None,
1385
- result_ttl: int | dt.timedelta = 0,
1386
- run_at: dt.datetime | str | None = None,
1387
- run_in: dt.datetime | str | None = None,
1388
- max_retries: int = 3,
1389
- retry_delay: float = 1.0,
1390
- jitter_factor: float = 0.1,
1391
- retry_exceptions: tuple = (Exception,),
1392
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1393
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1394
- on_success_pipeline: Callable
1395
- | tuple[Callable, tuple | None, dict | None]
1396
- | None = None,
1397
- on_failure_pipeline: Callable
1398
- | tuple[Callable, tuple | None, dict | None]
1399
- | None = None,
1400
- **kwargs, # JobQueue specific args
1401
- ) -> str | UUID | None:
1402
- """Adds a job to the job queue.
1403
-
1404
- If the job queue is not configured, it logs an error and returns None.
1405
-
1406
- Args:
1407
- name (str): Name of the pipeline to run. Must be a valid identifier.
1408
- inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
1409
- final_vars (list[str] | None): Specify which output variables to return.
1410
- Example: ["model", "metrics"]
1411
- config (dict | None): Configuration for Hamilton pipeline executor.
1412
- Example: {"model": "LogisticRegression"}
1413
- cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
1414
- executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
1415
- - str: Executor name, e.g. "threadpool", "local"
1416
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
1417
- - ExecutorConfig: Structured config object
1418
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
1419
- Example: {"opentelemetry": True, "tracker": False}
1420
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
1421
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
1422
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
1423
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
1424
- adapter (dict[str, Any] | None): Custom adapter instance for pipeline
1425
- Example: {"ray_graph_adapter": RayGraphAdapter()}
1426
- reload (bool): Force reload of pipeline configuration.
1427
- run_at (dt.datetime | str | None): Future date to run the job.
1428
- Example: datetime(2025, 4, 28, 12, 0)
1429
- Example str: "2025-04-28T12:00:00" (ISO format)
1430
- run_in (dt.datetime | str | None): Time interval to run the job.
1431
- Example: 3600 (every hour in seconds)
1432
- Example: datetime.timedelta(days=1)
1433
- Example str: "1d" (1 day)
1434
- result_ttl (int | dt.timedelta): Time to live for the job result.
1435
- Example: 3600 (1 hour in seconds)
1436
- log_level (str | None): Logging level for the execution. Default None uses project config.
1437
- Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
1438
- max_retries (int): Maximum number of retries for execution.
1439
- retry_delay (float): Delay between retries in seconds.
1440
- jitter_factor (float): Random jitter factor to add to retry delay
1441
- retry_exceptions (tuple): Exceptions that trigger a retry.
1442
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful job creation.
1443
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on job creation failure.
1444
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1445
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1446
- **kwargs: Additional keyword arguments passed to the worker's add_job method.
1447
- For RQ this includes:
1448
- - result_ttl: Time to live for the job result (float or timedelta)
1449
- - ttl: Time to live for the job (float or timedelta)
1450
- - timeout: Time to wait for the job to complete (float or timedelta)
1451
- - queue_name: Name of the queue to use (str)
1452
- - retry: Number of retries (int)
1453
- - repeat: Repeat count (int or dict)
1454
- - rq_on_failure: Callback function on failure (callable)
1455
- - rq_on_success: Callback function on success (callable)
1456
- - rq_on_stopped: Callback function on stop (callable)
1457
- For APScheduler, this includes:
1458
- - job_executor: Job executor to use (str)
1459
-
1460
- Returns:
1461
- str | UUID | None: The ID of the job that was added to the job queue, or None if the job queue is not configured.
1462
-
1463
- Raises:
1464
- ValueError: If the job ID is not valid or if the job cannot be scheduled.
1465
-
1466
- Example:
1467
- >>> from flowerpower.pipeline import PipelineManager
1468
- >>> pm = PipelineManager()
1469
- >>> job_id = pm.add_job("example_pipeline", inputs={"input1": 42})
1470
-
1471
- """
1472
- if self.jqm is None:
1473
- logger.error(
1474
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1475
- )
1476
- return None
1477
-
1478
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1479
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1480
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1481
-
1482
- run_func = self._get_run_func(
1483
- name=name,
1484
- reload=reload,
1485
- on_success=on_success_pipeline,
1486
- on_failure=on_failure_pipeline,
1487
- )
1488
-
1489
- run_in = (
1490
- duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
1491
- ) # convert to seconds
1492
- run_at = (
1493
- dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
1494
- )
1495
-
1496
- add_job = run_with_callback(on_success=on_success, on_failure=on_failure)(
1497
- self.jqm.add_job
1498
- )
1499
- return add_job(
1500
- run_func=run_func,
1501
- pipeline_cfg=self._pipeline_cfg,
1502
- name=name, # Pass name for logging
1503
- # Pass run parameters
1504
- inputs=inputs,
1505
- final_vars=final_vars,
1506
- config=config,
1507
- cache=cache,
1508
- executor_cfg=executor_cfg,
1509
- with_adapter_cfg=with_adapter_cfg,
1510
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1511
- project_adapter_cfg=project_adapter_cfg,
1512
- adapter=adapter,
1513
- # reload=reload, # Note: reload already happened
1514
- log_level=log_level,
1515
- result_ttl=result_ttl,
1516
- run_at=run_at,
1517
- run_in=run_in,
1518
- max_retries=max_retries,
1519
- retry_delay=retry_delay,
1520
- jitter_factor=jitter_factor,
1521
- retry_exceptions=retry_exceptions,
1522
- **kwargs, # Pass worker args
1523
- )
1524
-
1525
- def schedule(
1526
- self,
1527
- name: str,
1528
- inputs: dict | None = None,
1529
- final_vars: list[str] | None = None,
1530
- config: dict | None = None,
1531
- cache: bool | dict = False,
1532
- executor_cfg: str | dict | ExecutorConfig | None = None,
1533
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
1534
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
1535
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
1536
- adapter: dict[str, Any] | None = None,
1537
- reload: bool = False,
1538
- log_level: str | None = None,
1539
- cron: str | dict[str, str | int] | None = None,
1540
- interval: int | str | dict[str, str | int] | None = None,
1541
- date: dt.datetime | str | None = None,
1542
- overwrite: bool = False,
1543
- schedule_id: str | None = None,
1544
- max_retries: int | None = None,
1545
- retry_delay: float | None = None,
1546
- jitter_factor: float | None = None,
1547
- retry_exceptions: tuple | list | None = None,
1548
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1549
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
1550
- on_success_pipeline: Callable
1551
- | tuple[Callable, tuple | None, dict | None]
1552
- | None = None,
1553
- on_failure_pipeline: Callable
1554
- | tuple[Callable, tuple | None, dict | None]
1555
- | None = None,
1556
- **kwargs: Any,
1557
- ) -> str | UUID | None:
1558
- """Schedule a pipeline to run on a recurring or future basis.
1559
-
1560
- If the job queue is not configured, it logs an error and returns None.
1561
-
1562
- Args:
1563
- name (str): The name of the pipeline to run.
1564
- inputs (dict | None): Inputs for the pipeline run (overrides config).
1565
- final_vars (list[str] | None): Final variables for the pipeline run (overrides config).
1566
- config (dict | None): Hamilton driver config (overrides config).
1567
- cache (bool | dict): Cache settings (overrides config).
1568
- executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
1569
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
1570
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
1571
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
1572
- adapter (dict[str, Any] | None): Additional Hamilton adapters (overrides config).
1573
- reload (bool): Whether to reload module and pipeline config. Defaults to False.
1574
- log_level (str | None): Log level for the run (overrides config).
1575
- cron (str | dict[str, str | int] | None): Cron expression or settings
1576
- Example string: "0 0 * * *" (daily at midnight)
1577
- Example dict: {"minute": "0", "hour": "*/2"} (every 2 hours)
1578
- interval (int | str | dict[str, str | int] | None): Time interval for recurring execution
1579
- Example int: 3600 (every hour in seconds)
1580
- Example str: "1h" (every hour)
1581
- Example dict: {"hours": 1, "minutes": 30} (every 90 minutes)
1582
- date (dt.datetime | str | None): Future date for
1583
- Example: datetime(2025, 4, 28, 12, 0)
1584
- Example str: "2025-04-28T12:00:00" (ISO format)
1585
- overwrite (bool): Whether to overwrite existing schedule with the same ID
1586
- schedule_id (str | None): Unique identifier for the schedule
1587
- max_retries (int): Maximum number of retries for execution
1588
- retry_delay (float): Delay between retries in seconds
1589
- jitter_factor (float): Random jitter factor to add to retry delay
1590
- retry_exceptions (tuple): Exceptions that trigger a retry
1591
- on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful schedule creation.
1592
- on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on schedule creation failure.
1593
- on_success_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
1594
- on_failure_pipeline (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
1595
- **kwargs: JobQueue-specific scheduling options
1596
- For RQ:
1597
- - result_ttl: Result lifetime (int seconds)
1598
- - ttl: Job lifetime (int seconds)
1599
- - timeout: Job execution timeout (int seconds)
1600
- - queue_name: Queue to use (str)
1601
- - repeat: Repeat count (int or dict)
1602
- - rq_on_failure: Callback function on failure (callable)
1603
- - rq_on_success: Callback function on success (callable)
1604
- - rq_on_stopped: Callback function on stop (callable)
1605
- For APScheduler:
1606
- - misfire_grace_time: Late execution window
1607
- - coalesce: Combine missed executions (bool)
1608
- - max_running_jobs: Concurrent instances limit (int)
1609
-
1610
- Returns:
1611
- str | UUID | None: Unique identifier for the created schedule, or None if scheduling fails.
1612
-
1613
- Raises:
1614
- ValueError: If schedule parameters are invalid
1615
- RuntimeError: If scheduling fails
1616
-
1617
- Example:
1618
- >>> from flowerpower.pipeline import PipelineManager
1619
- >>> from datetime import datetime, timedelta
1620
- >>>
1621
- >>> manager = PipelineManager()
1622
- >>>
1623
- >>> # Daily schedule with cron
1624
- >>> schedule_id = manager.schedule(
1625
- ... name="daily_metrics",
1626
- ... cron="0 0 * * *",
1627
- ... inputs={"date": "{{ execution_date }}"}
1628
- ... )
1629
- >>>
1630
- >>> # Interval-based schedule
1631
- >>> schedule_id = manager.schedule(
1632
- ... name="monitoring",
1633
- ... interval={"minutes": 15},
1634
- ... with_adapter_cfg={"enable_alerts": True}
1635
- ... )
1636
- >>>
1637
- >>> # Future one-time execution
1638
- >>> future_date = datetime.now() + timedelta(days=1)
1639
- >>> schedule_id = manager.schedule(
1640
- ... name="batch_process",
1641
- ... date=future_date,
1642
- ... executor_cfg={"type": "async"}
1643
- ... )
1644
- """
1645
- if self.jqm is None:
1646
- logger.error(
1647
- "This PipelineManager instance does not have a job queue configured. Skipping job execution."
1648
- )
1649
- return None
1650
-
1651
- kwargs["on_success"] = kwargs.get("rq_on_success", None)
1652
- kwargs["on_failure"] = kwargs.get("rq_on_failure", None)
1653
- kwargs["on_stopped"] = kwargs.get("rq_on_stopped", None)
1654
-
1655
- # pipeline_cfg = self._load_pipeline_cfg(name=name, reload=reload)
1656
- run_func = self._get_run_func(
1657
- name=name,
1658
- reload=reload,
1659
- on_success=on_success_pipeline,
1660
- on_failure=on_failure_pipeline,
1661
- )
1662
- interval = (
1663
- duration_parser.parse(interval) if isinstance(interval, str) else interval
1664
- )
1665
- date = dt.datetime.fromisoformat(date) if isinstance(date, str) else date
1666
-
1667
- schedule = run_with_callback(on_success=on_success, on_failure=on_failure)(
1668
- self.jqm.schedule
1669
- )
1670
- return schedule(
1671
- run_func=run_func,
1672
- pipeline_cfg=self._pipeline_cfg,
1673
- inputs=inputs,
1674
- final_vars=final_vars,
1675
- config=config,
1676
- cache=cache,
1677
- executor_cfg=executor_cfg,
1678
- with_adapter_cfg=with_adapter_cfg,
1679
- pipeline_adapter_cfg=pipeline_adapter_cfg,
1680
- project_adapter_cfg=project_adapter_cfg,
1681
- adapter=adapter,
1682
- reload=reload,
1683
- log_level=log_level,
1684
- cron=cron,
1685
- interval=interval,
1686
- date=date,
1687
- overwrite=overwrite,
1688
- schedule_id=schedule_id,
1689
- max_retries=max_retries,
1690
- retry_delay=retry_delay,
1691
- jitter_factor=jitter_factor,
1692
- retry_exceptions=retry_exceptions,
1693
- **kwargs,
1694
- )
1695
-
1696
- def schedule_all(self, **kwargs: Any) -> None:
1697
- """Schedule all pipelines that are enabled in their configuration.
1698
-
1699
- For each enabled pipeline, applies its configured schedule settings
1700
- and any provided overrides.
1701
-
1702
- Args:
1703
- **kwargs: Overrides for schedule settings that apply to all pipelines.
1704
- See schedule() method for supported arguments.
1705
-
1706
- Example:
1707
- >>> from flowerpower.pipeline import PipelineManager
1708
- >>>
1709
- >>> manager = PipelineManager()
1710
- >>>
1711
- >>> # Schedule all with default settings
1712
- >>> manager.schedule_all()
1713
- >>>
1714
- >>> # Schedule all with common overrides
1715
- >>> manager.schedule_all(
1716
- ... max_running_jobs=2,
1717
- ... coalesce=True,
1718
- ... misfire_grace_time=300
1719
- ... )
1720
- """
1721
- scheduled_ids = []
1722
- errors = []
1723
- pipeline_names = self.list_pipelines()
1724
- if not pipeline_names:
1725
- logger.warning("No pipelines found to schedule.")
1726
- return
1727
-
1728
- logger.info(f"Attempting to schedule {len(pipeline_names)} pipelines...")
1729
- for name in pipeline_names:
1730
- try:
1731
- pipeline_cfg = self.load_pipeline(name=name, reload=True)
1732
-
1733
- if not pipeline_cfg.schedule.enabled:
1734
- logger.info(
1735
- f"Skipping scheduling for '{name}': Not enabled in config."
1736
- )
1737
- continue
1738
-
1739
- logger.info(f"Scheduling [cyan]{name}[/cyan]...")
1740
- schedule_id = self.schedule(name=name, reload=False, **kwargs)
1741
- if schedule_id is None:
1742
- logger.info(
1743
- f"🟡 Skipping adding schedule for [cyan]{name}[/cyan]: Job queue backend not available or scheduling failed."
1744
- )
1745
- continue
1746
- scheduled_ids.append(schedule_id)
1747
- except Exception as e:
1748
- logger.error(f"Failed to schedule pipeline '{name}': {e}")
1749
- errors.append(name)
1750
-
1751
- if errors:
1752
- logger.error(f"Finished scheduling with errors for: {', '.join(errors)}")
1753
- else:
1754
- logger.info(f"Successfully scheduled {len(scheduled_ids)} pipelines.")
1755
-
1756
- @property
1757
- def schedules(self) -> list[Any]:
1758
- """Get list of current pipeline schedules.
1759
-
1760
- Retrieves all active schedules from the worker system.
1761
-
1762
- Returns:
1763
- list[Any]: List of schedule objects. Exact type depends on worker:
1764
- - RQ: List[rq.job.Job]
1765
- - APScheduler: List[apscheduler.schedulers.base.Schedule]
1766
-
1767
- Example:
1768
- >>> from flowerpower.pipeline import PipelineManager
1769
- >>>
1770
- >>> manager = PipelineManager()
1771
- >>> for schedule in manager.schedules:
1772
- ... print(f"{schedule.id}: Next run at {schedule.next_run_time}")
1773
- """
1774
- if self.jqm is None:
1775
- logger.error(
1776
- "This PipelineManager instance does not have a job queue configured. Skipping schedule retrieval."
1777
- )
1778
- return []
1779
- try:
1780
- return self.jqm._get_schedules()
1781
- except Exception as e:
1782
- logger.error(f"Failed to retrieve schedules: {e}")
1783
- return []