FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. flowerpower/cfg/__init__.py +3 -3
  2. flowerpower/cfg/pipeline/__init__.py +5 -3
  3. flowerpower/cfg/project/__init__.py +3 -3
  4. flowerpower/cfg/project/job_queue.py +1 -128
  5. flowerpower/cli/__init__.py +5 -5
  6. flowerpower/cli/cfg.py +0 -3
  7. flowerpower/cli/job_queue.py +401 -133
  8. flowerpower/cli/pipeline.py +14 -413
  9. flowerpower/cli/utils.py +0 -1
  10. flowerpower/flowerpower.py +537 -28
  11. flowerpower/job_queue/__init__.py +5 -94
  12. flowerpower/job_queue/base.py +201 -3
  13. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
  14. flowerpower/job_queue/rq/manager.py +388 -77
  15. flowerpower/pipeline/__init__.py +2 -0
  16. flowerpower/pipeline/base.py +2 -2
  17. flowerpower/pipeline/io.py +14 -16
  18. flowerpower/pipeline/manager.py +21 -642
  19. flowerpower/pipeline/pipeline.py +571 -0
  20. flowerpower/pipeline/registry.py +242 -10
  21. flowerpower/pipeline/visualizer.py +1 -2
  22. flowerpower/plugins/_io/__init__.py +8 -0
  23. flowerpower/plugins/mqtt/manager.py +6 -6
  24. flowerpower/settings/backend.py +0 -2
  25. flowerpower/settings/job_queue.py +1 -57
  26. flowerpower/utils/misc.py +0 -256
  27. flowerpower/utils/monkey.py +1 -83
  28. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
  29. flowerpower-0.20.0.dist-info/RECORD +58 -0
  30. flowerpower/fs/__init__.py +0 -29
  31. flowerpower/fs/base.py +0 -662
  32. flowerpower/fs/ext.py +0 -2143
  33. flowerpower/fs/storage_options.py +0 -1420
  34. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  35. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  36. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  37. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  38. flowerpower/job_queue/apscheduler/setup.py +0 -554
  39. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  40. flowerpower/job_queue/apscheduler/utils.py +0 -311
  41. flowerpower/pipeline/job_queue.py +0 -583
  42. flowerpower/pipeline/runner.py +0 -603
  43. flowerpower/plugins/io/base.py +0 -2520
  44. flowerpower/plugins/io/helpers/datetime.py +0 -298
  45. flowerpower/plugins/io/helpers/polars.py +0 -875
  46. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  47. flowerpower/plugins/io/helpers/sql.py +0 -202
  48. flowerpower/plugins/io/loader/__init__.py +0 -28
  49. flowerpower/plugins/io/loader/csv.py +0 -37
  50. flowerpower/plugins/io/loader/deltatable.py +0 -190
  51. flowerpower/plugins/io/loader/duckdb.py +0 -19
  52. flowerpower/plugins/io/loader/json.py +0 -37
  53. flowerpower/plugins/io/loader/mqtt.py +0 -159
  54. flowerpower/plugins/io/loader/mssql.py +0 -26
  55. flowerpower/plugins/io/loader/mysql.py +0 -26
  56. flowerpower/plugins/io/loader/oracle.py +0 -26
  57. flowerpower/plugins/io/loader/parquet.py +0 -35
  58. flowerpower/plugins/io/loader/postgres.py +0 -26
  59. flowerpower/plugins/io/loader/pydala.py +0 -19
  60. flowerpower/plugins/io/loader/sqlite.py +0 -23
  61. flowerpower/plugins/io/metadata.py +0 -244
  62. flowerpower/plugins/io/saver/__init__.py +0 -28
  63. flowerpower/plugins/io/saver/csv.py +0 -36
  64. flowerpower/plugins/io/saver/deltatable.py +0 -186
  65. flowerpower/plugins/io/saver/duckdb.py +0 -19
  66. flowerpower/plugins/io/saver/json.py +0 -36
  67. flowerpower/plugins/io/saver/mqtt.py +0 -28
  68. flowerpower/plugins/io/saver/mssql.py +0 -26
  69. flowerpower/plugins/io/saver/mysql.py +0 -26
  70. flowerpower/plugins/io/saver/oracle.py +0 -26
  71. flowerpower/plugins/io/saver/parquet.py +0 -36
  72. flowerpower/plugins/io/saver/postgres.py +0 -26
  73. flowerpower/plugins/io/saver/pydala.py +0 -20
  74. flowerpower/plugins/io/saver/sqlite.py +0 -24
  75. flowerpower/utils/scheduler.py +0 -311
  76. flowerpower-0.11.6.19.dist-info/RECORD +0 -102
  77. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
  78. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
  79. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
  80. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,583 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # pylint: disable=logging-fstring-interpolation
3
- # flake8: noqa: E501
4
- """Pipeline Job Queue."""
5
-
6
- import datetime as dt
7
- from typing import Any, Callable, Optional, Union
8
- from uuid import UUID
9
-
10
- from loguru import logger
11
- from rich import print as rprint
12
-
13
- from .. import settings
14
- # Import necessary config types
15
- from ..cfg import PipelineConfig, ProjectConfig
16
- from ..fs import AbstractFileSystem
17
- from ..job_queue import JobQueueBackend, JobQueueManager
18
- from ..utils.logging import setup_logging
19
- from .registry import PipelineRegistry
20
-
21
- setup_logging()
22
-
23
-
24
- class PipelineJobQueue:
25
- """Handles scheduling of pipeline runs via a configured job queue backend."""
26
-
27
- def __init__(
28
- self,
29
- project_cfg: ProjectConfig,
30
- fs: AbstractFileSystem,
31
- cfg_dir: str,
32
- pipelines_dir: str,
33
- # job_queue_type: str | None = None,
34
- ):
35
- """Initialize PipelineJobQueue.
36
-
37
- Args:
38
- project_cfg: The project configuration object.
39
- fs: The file system to use for file operations.
40
- cfg_dir: The directory for configuration files.
41
- pipelines_dir: The directory for pipeline files.
42
- job_queue_type: The type of job queue to use (e.g., 'rq', 'apscheduler'). If None, defaults to the project config.
43
- """
44
- self.project_cfg = project_cfg
45
- self._fs = fs
46
- self._cfg_dir = cfg_dir
47
- self._pipelines_dir = pipelines_dir
48
- self._job_queue_type = project_cfg.job_queue.type
49
- self._job_queue_backend_cfg = project_cfg.job_queue.backend
50
- self._job_queue = None
51
-
52
- # if not self._job_queue_type:
53
- # # Fallback or default if not specified in project config
54
- # self._job_queue_type = settings.JOB_QUEUE_TYPE
55
- # logger.warning(
56
- # f"Job queue type not specified in project config, defaulting to '{self._job_queue_type}'"
57
- # )
58
-
59
- @property
60
- def job_queue(self) -> Optional[Any]:
61
- """
62
- Lazily instantiate and cache a Job queue instance.
63
- Handles the case where JobQueueManager returns None due to missing dependencies.
64
-
65
- Returns:
66
- Optional[Any]: The job queue manager instance, or None if the backend is unavailable.
67
- """
68
- logger.debug(
69
- f"Instantiating job queue of type: {self._job_queue_type} for project '{self.project_cfg.name}'"
70
- )
71
- if self._job_queue is None:
72
- self._job_queue = JobQueueManager(
73
- name=self.project_cfg.name,
74
- type=self._job_queue_type,
75
- backend=JobQueueBackend(
76
- job_queue_type=self._job_queue_type,
77
- **self._job_queue_backend_cfg.to_dict(),
78
- ),
79
- )
80
-
81
- if self._job_queue is None:
82
- if self._job_queue_type == "rq":
83
- logger.warning(
84
- "JobQueueManager could not be instantiated. The RQ backend is unavailable. "
85
- "Please ensure RQ is installed and configured correctly and that the Redis server is running."
86
- )
87
- elif self._job_queue_type == "apscheduler":
88
- logger.warning(
89
- "JobQueueManager could not be instantiated. The APScheduler backend is unavailable. "
90
- f"Please ensure APScheduler is installed and configured correctly, and that the configured data store ({self.project_cfg.job_queue.backend.data_store.type}) "
91
- f"and event_broker ({self.project_cfg.job_queue.backend.event_broker.type}) are accessible."
92
- )
93
- return None
94
- return self._job_queue
95
-
96
- def _get_schedule_ids(self) -> list[Any]:
97
- """Get all schedules from the job queue backend.
98
-
99
- Returns:
100
- list[Any]: List of schedule IDs, or empty list if job queue backend is unavailable.
101
- """
102
-
103
- if self.job_queue is None:
104
- return []
105
- with self.job_queue as job_queue:
106
- logger.debug("Fetching schedules ids from job queue")
107
- return job_queue.schedule_ids
108
-
109
- def run_job(
110
- self,
111
- run_func: Callable,
112
- pipeline_cfg: PipelineConfig, # Pipeline configuration object
113
- name: str, # name: str,
114
- inputs: dict | None = None,
115
- final_vars: list | None = None,
116
- config: dict | None = None,
117
- cache: bool | dict = False,
118
- executor_cfg: str | dict | Any | None = None,
119
- with_adapter_cfg: dict | Any | None = None,
120
- pipeline_adapter_cfg: dict | Any | None = None,
121
- project_adapter_cfg: dict | Any | None = None,
122
- adapter: dict[str, Any] | None = None,
123
- reload: bool = False,
124
- log_level: str | None = None,
125
- max_retries: int | None = None,
126
- retry_delay: float | None = None,
127
- jitter_factor: float | None = None,
128
- retry_exceptions: tuple | None = None,
129
- **kwargs,
130
- ) -> Optional[dict[str, Any]]:
131
- """
132
- Add a job to run the pipeline immediately via the job queue queue.
133
-
134
- Args:
135
- run_func (Callable): The function to execute in the job queue (e.g., a configured PipelineRunner.run).
136
- pipeline_cfg (PipelineConfig): The pipeline configuration object.
137
- name (str): The name of the pipeline (used for logging).
138
- inputs (dict | None): Inputs for the pipeline run.
139
- final_vars (list | None): Final variables for the pipeline run.
140
- config (dict | None): Hamilton driver config.
141
- cache (bool | dict): Cache configuration.
142
- executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
143
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
144
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
145
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
146
- adapter (dict[str, Any] | None): Additional adapter configuration.
147
- reload (bool): Whether to reload the pipeline module.
148
- log_level (str | None): Log level for the run.
149
- max_retries (int): Maximum number of retries for the job.
150
- retry_delay (float): Delay between retries.
151
- jitter_factor (float): Jitter factor for retry delay.
152
- retry_exceptions (tuple): Exceptions that should trigger a retry.
153
- **kwargs: Additional keyword arguments passed directly to the job queue's add_job method.
154
-
155
- Returns:
156
- Optional[dict[str, Any]]: The result of the job execution, or None if job queue backend is unavailable.
157
- """
158
- logger.debug(f"Adding immediate job for pipeline: {name}")
159
-
160
- pipeline_run_args = {
161
- # 'name' is not passed to run_func, it's part of the context already in PipelineRunner
162
- "project_cfg": self.project_cfg,
163
- "pipeline_cfg": pipeline_cfg,
164
- "inputs": inputs,
165
- "final_vars": final_vars,
166
- "config": config,
167
- "cache": cache,
168
- "executor_cfg": executor_cfg,
169
- "with_adapter_cfg": with_adapter_cfg,
170
- "pipeline_adapter_cfg": pipeline_adapter_cfg,
171
- "project_adapter_cfg": project_adapter_cfg,
172
- "adapter": adapter,
173
- "reload": reload,
174
- "log_level": log_level,
175
- "max_retries": max_retries,
176
- "retry_delay": retry_delay,
177
- "jitter_factor": jitter_factor,
178
- "retry_exceptions": retry_exceptions,
179
- }
180
- pipeline_run_args = {
181
- k: v for k, v in pipeline_run_args.items() if v is not None
182
- }
183
- logger.debug(
184
- f"Resolved arguments for target run_func for job '{name}': {pipeline_run_args}"
185
- )
186
-
187
- if self.job_queue is None:
188
- return None
189
- with self.job_queue as job_queue:
190
- res = job_queue.run_job(
191
- func=run_func,
192
- func_kwargs=pipeline_run_args,
193
- **kwargs,
194
- )
195
-
196
- return res
197
-
198
- def add_job(
199
- self,
200
- run_func: Callable, # The actual function to run (e.g., PipelineRunner(...).run)
201
- pipeline_cfg: PipelineConfig, # Pipeline configuration object
202
- name: str,
203
- inputs: dict | None = None,
204
- final_vars: list | None = None,
205
- config: dict | None = None,
206
- cache: bool | dict = False,
207
- executor_cfg: str | dict | Any | None = None,
208
- with_adapter_cfg: dict | Any | None = None,
209
- pipeline_adapter_cfg: dict | Any | None = None,
210
- project_adapter_cfg: dict | Any | None = None,
211
- adapter: dict[str, Any] | None = None,
212
- result_ttl: int | dt.timedelta = 120,
213
- run_at: dt.datetime | None = None,
214
- run_in: float | dt.timedelta | None = None,
215
- reload: bool = False,
216
- log_level: str | None = None,
217
- max_retries: int | None = None,
218
- retry_delay: float | None = None,
219
- jitter_factor: float | None = None,
220
- retry_exceptions: tuple | list | None = None,
221
- **kwargs, # Allow other job queue-specific args if needed
222
- ) -> Optional[Any]:
223
- """
224
- Add a job to run the pipeline immediately via the job queue, storing the result.
225
-
226
- Executes the job immediately and returns the job id (UUID). The job result will be stored
227
- by the job queue backend for the given `result_ttl` and can be fetched using the job id.
228
-
229
- Args:
230
- run_func (Callable): The function to execute in the job queue (e.g., a configured PipelineRunner.run).
231
- pipeline_cfg (PipelineConfig): The pipeline configuration object.
232
- name (str): The name of the pipeline (used for logging).
233
- inputs (dict | None): Inputs for the pipeline run.
234
- final_vars (list | None): Final variables for the pipeline run.
235
- config (dict | None): Hamilton driver config.
236
- cache (bool | dict): Cache configuration.
237
- executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
238
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
239
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
240
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
241
- adapter (dict[str, Any] | None): Additional adapter configuration.
242
- reload (bool): Whether to reload the pipeline module.
243
- log_level (str | None): Log level for the run.
244
- result_ttl (int | dt.timedelta): How long the job result should be stored. Defaults to 0 (don't store).
245
- run_at (dt.datetime | None): Optional datetime to run the job at.
246
- run_in (float | dt.timedelta | None): Optional delay before running the job.
247
- max_retries (int): Maximum number of retries for the job.
248
- retry_delay (float): Delay between retries.
249
- jitter_factor (float): Jitter factor for retry delay.
250
- retry_exceptions (tuple): Exceptions that should trigger a retry.
251
- **kwargs: Additional keyword arguments passed directly to the job queue's add_job method.
252
-
253
- Returns:
254
- Optional[Any]: The ID of the added job or the job object itself, or None if job queue backend is unavailable.
255
- """
256
- logger.debug(f"Adding immediate job with result TTL for pipeline: {name}")
257
-
258
- pipeline_run_args = {
259
- "project_cfg": self.project_cfg,
260
- "pipeline_cfg": pipeline_cfg,
261
- "inputs": inputs,
262
- "final_vars": final_vars,
263
- "config": config,
264
- "cache": cache,
265
- "executor_cfg": executor_cfg,
266
- "with_adapter_cfg": with_adapter_cfg,
267
- "pipeline_adapter_cfg": pipeline_adapter_cfg,
268
- "project_adapter_cfg": project_adapter_cfg,
269
- "adapter": adapter,
270
- "reload": reload,
271
- "log_level": log_level,
272
- "max_retries": max_retries,
273
- "retry_delay": retry_delay,
274
- "jitter_factor": jitter_factor,
275
- "retry_exceptions": retry_exceptions,
276
- }
277
- pipeline_run_args = {
278
- k: v for k, v in pipeline_run_args.items() if v is not None
279
- }
280
- logger.debug(
281
- f"Resolved arguments for target run_func for job (TTL) '{name}': {pipeline_run_args}"
282
- )
283
-
284
- if self.job_queue is None:
285
- return None
286
- with self.job_queue as job_queue:
287
- job = job_queue.add_job(
288
- func=run_func,
289
- func_kwargs=pipeline_run_args,
290
- result_ttl=result_ttl,
291
- run_at=run_at,
292
- run_in=run_in,
293
- **kwargs,
294
- )
295
- rprint(
296
- f"✅ Successfully added job for "
297
- f"[blue]{self.project_cfg.name}.{name}[/blue] with ID [green]{job if isinstance(job, (str, UUID)) else job.id}[/green]"
298
- f" and result TTL of {result_ttl} seconds."
299
- )
300
- return job
301
-
302
- # --- End Moved from PipelineManager ---
303
-
304
- def schedule(
305
- self,
306
- run_func: Callable,
307
- pipeline_cfg: PipelineConfig,
308
- # --- Run Parameters (passed to run_func) ---
309
- inputs: dict | None = None,
310
- final_vars: list | None = None,
311
- config: dict | None = None, # Driver config
312
- cache: bool | dict = False,
313
- executor_cfg: str | dict | Any | None = None,
314
- with_adapter_cfg: dict | Any | None = None,
315
- pipeline_adapter_cfg: dict | Any | None = None,
316
- project_adapter_cfg: dict | Any | None = None,
317
- adapter: dict[str, Any] | None = None,
318
- reload: bool = False,
319
- log_level: str | None = None,
320
- max_retries: int | None = None,
321
- retry_delay: float | None = None,
322
- jitter_factor: float | None = None,
323
- retry_exceptions: tuple | None = None,
324
- # --- Schedule Parameters (passed to job queue.add_schedule) ---
325
- cron: str | dict[str, str | int] | None = None,
326
- interval: int | str | dict[str, str | int] | None = None,
327
- date: dt.datetime | None = None,
328
- overwrite: bool = False,
329
- schedule_id: str | None = None,
330
- **kwargs,
331
- ) -> Optional[Union[str, UUID]]:
332
- """
333
- Schedule a pipeline for execution using the configured job queue.
334
-
335
- Args:
336
- run_func (Callable): The function to execute in the job queue.
337
- pipeline_cfg (PipelineConfig): The pipeline configuration object.
338
- inputs (dict | None): Inputs for the pipeline run (overrides config).
339
- final_vars (list | None): Final variables for the pipeline run (overrides config).
340
- config (dict | None): Hamilton driver config (overrides config).
341
- cache (bool | dict): Cache configuration (overrides config).
342
- executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
343
- with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
344
- pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
345
- project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
346
- adapter (dict | None): Additional Hamilton adapters (overrides config).
347
- reload (bool): Whether to reload module (overrides config).
348
- log_level (str | None): Log level for the run (overrides config).
349
- max_retries (int): Maximum number of retries for the job.
350
- retry_delay (float): Delay between retries.
351
- jitter_factor (float): Jitter factor for retry delay.
352
- retry_exceptions (tuple): Exceptions that should trigger a retry.
353
- cron (str | dict | None): Cron expression or dict for cron trigger.
354
- interval (int | str | dict | None): Interval in seconds or dict for interval trigger.
355
- date (dt.datetime | None): Date for date trigger.
356
- overwrite (bool): If True and id_ is None, generates ID '{name}-1', potentially overwriting.
357
- schedule_id (str | None): Optional ID for the schedule. If None, generates a new ID.
358
- **kwargs: Additional keyword arguments passed to the job queue's add_schedule method,
359
- For RQ this includes:
360
- - repeat: Repeat count (int or dict)
361
- - result_ttl: Time to live for the job result (float or timedelta)
362
- - ttl: Time to live for the job (float or timedelta)
363
- - use_local_time_zone: Whether to use local time zone for scheduling (bool)
364
- For APScheduler, this includes:
365
- - misfire_grace_time: Grace time for misfires (timedelta)
366
- - coalesce: Whether to coalesce jobs (bool)
367
- - max_running_jobs: Maximum instances of the job (int)
368
- - max_jitter: Maximum jitter for scheduling (int)
369
- - conflict_policy: Policy for conflicting jobs (str)
370
- - paused: Whether to pause the job (bool)
371
-
372
-
373
- Returns:
374
- Optional[Union[str, UUID]]: The ID of the scheduled pipeline, or None if job queue backend is unavailable.
375
-
376
- Raises:
377
- ValueError: If trigger_type is invalid or required args are missing.
378
- Exception: Can raise exceptions from the job queue backend.
379
- """
380
-
381
- project_name = self.project_cfg.name
382
- name = pipeline_cfg.name
383
- logger.debug(
384
- f"Attempting to schedule pipeline: {project_name}.{name} with id: {schedule_id}"
385
- )
386
-
387
- # --- Resolve Parameters using pipeline_cfg for defaults ---
388
- schedule_cfg = pipeline_cfg.schedule
389
- # run_cfg = pipeline_cfg.run
390
-
391
- pipeline_run_args = {
392
- "project_cfg": self.project_cfg,
393
- "pipeline_cfg": pipeline_cfg,
394
- "inputs": inputs,
395
- "final_vars": final_vars,
396
- "config": config,
397
- "cache": cache,
398
- "executor_cfg": executor_cfg,
399
- "with_adapter_cfg": with_adapter_cfg,
400
- "pipeline_adapter_cfg": pipeline_adapter_cfg,
401
- "project_adapter_cfg": project_adapter_cfg,
402
- "adapter": adapter,
403
- "reload": reload,
404
- "log_level": log_level,
405
- "max_retries": max_retries,
406
- "retry_delay": retry_delay,
407
- "jitter_factor": jitter_factor,
408
- "retry_exceptions": retry_exceptions,
409
- }
410
- pipeline_run_args = {
411
- k: v for k, v in pipeline_run_args.items() if v is not None
412
- }
413
- logger.debug(f"Resolved run_kwargs for '{name}': {pipeline_run_args}")
414
-
415
- cron = cron if cron is not None else schedule_cfg.cron
416
- interval = interval if interval is not None else schedule_cfg.interval
417
- date = date if date is not None else schedule_cfg.date
418
- logger.debug(
419
- f"Resolved schedule parameters for '{name}': cron={cron}, interval={interval}, date={date}"
420
- )
421
-
422
- # --- Generate ID if not provided ---
423
- # (Keep _generate_id function as is, it uses self._get_schedules())
424
- def _generate_id(
425
- pipeline_name: str, explicit_id: str | None, force_overwrite_base: bool
426
- ) -> str:
427
- if explicit_id:
428
- logger.debug(f"Using explicit schedule ID: {explicit_id}")
429
- return explicit_id
430
-
431
- base_id = f"{pipeline_name}-1"
432
-
433
- if force_overwrite_base:
434
- logger.debug(f"Overwrite specified, using base ID: {base_id}")
435
- return base_id
436
-
437
- try:
438
- existing_ids = self._get_schedule_ids()
439
- logger.debug(f"Existing schedule IDs: {existing_ids}")
440
-
441
- if not any(
442
- id_val.startswith(f"{pipeline_name}-") for id_val in existing_ids
443
- ):
444
- logger.debug(
445
- f"No existing schedules found for '{pipeline_name}', using base ID: {base_id}"
446
- )
447
- return base_id
448
-
449
- # Find highest existing number for this pipeline name
450
- max_num = 0
451
- for id_val in existing_ids:
452
- if id_val.startswith(f"{pipeline_name}-"):
453
- try:
454
- num_part = id_val.split("-")[-1]
455
- num = int(num_part)
456
- if num > max_num:
457
- max_num = num
458
- except (ValueError, IndexError):
459
- logger.warning(
460
- f"Could not parse number from existing schedule ID: {id_val}"
461
- )
462
- continue # Skip malformed IDs
463
-
464
- new_id = f"{pipeline_name}-{max_num + 1}"
465
- logger.debug(f"Generated new schedule ID: {new_id}")
466
- return new_id
467
-
468
- except Exception as e:
469
- logger.error(
470
- f"Error getting existing schedules to generate ID: {e}. Falling back to base ID: {base_id}"
471
- )
472
- # Fallback in case of error fetching schedules
473
- return base_id
474
-
475
- schedule_id = _generate_id(name, schedule_id, overwrite)
476
-
477
- # --- Add Schedule via Job queue ---
478
- try:
479
- if self.job_queue is None:
480
- return None
481
- with self.job_queue as job_queue:
482
- # Job queue is now responsible for creating the trigger object
483
- # Pass trigger type and kwargs directly
484
- added_id = job_queue.add_schedule(
485
- func=run_func,
486
- func_kwargs=pipeline_run_args, # Pass resolved run parameters
487
- cron=cron,
488
- interval=interval,
489
- date=date,
490
- schedule_id=schedule_id,
491
- **kwargs, # Pass resolved schedule run parameters
492
- )
493
- logger.info(
494
- f"✅ Successfully scheduled job for "
495
- f"[blue]{project_name}.{name}[/blue] with ID [green]{added_id}[/green]"
496
- )
497
- return added_id
498
- except Exception as e:
499
- logger.error(
500
- f"Failed to add schedule '{schedule_id}' for pipeline '{name}': {e}"
501
- )
502
- raise
503
-
504
- # --- schedule_all method removed ---
505
- # PipelineManager will be responsible for iterating and calling schedule()
506
-
507
- def schedule_all(self, registry: PipelineRegistry, **kwargs) -> Optional[list[str]]:
508
- """
509
- Schedule all pipelines found by the registry.
510
-
511
- Args:
512
- registry (PipelineRegistry): The pipeline registry to use for finding pipelines.
513
- **kwargs: Arguments passed directly to the `schedule` method for each pipeline.
514
- Note: Pipeline-specific configurations will still take precedence for
515
- defaults if not overridden by kwargs.
516
-
517
- Returns:
518
- Optional[list[str]]: List of scheduled pipeline IDs, or None if job queue backend is unavailable.
519
- """
520
- if self.job_queue is None:
521
- logger.warning(
522
- "Job queue backend is unavailable. Cannot schedule pipelines."
523
- )
524
- return None
525
-
526
- try:
527
- names = registry._get_names() # Use registry to find pipelines
528
- if not names:
529
- logger.info("[yellow]No pipelines found to schedule.[/yellow]")
530
- return []
531
-
532
- logger.info(f"Attempting to schedule {len(names)} pipelines...")
533
- scheduled_ids = []
534
- errors = []
535
- for name in names:
536
- try:
537
- # Load config specifically for this pipeline to get defaults
538
- # Note: schedule() will load it again, potential optimization later
539
- cfg = registry.load_config(name=name)
540
- if (
541
- not cfg
542
- or not cfg.pipeline
543
- or not cfg.pipeline.schedule
544
- or not cfg.pipeline.schedule.enabled
545
- ):
546
- logger.info(
547
- f"🟡 Skipping schedule for [cyan]{name}[/cyan]: Not configured or disabled in config."
548
- )
549
- continue
550
-
551
- logger.info(f"Scheduling [cyan]{name}[/cyan]...")
552
- # Pass kwargs, allowing overrides of config defaults
553
- run_func = registry.get_runner(name).run
554
- schedule_id = self.schedule(
555
- run_func=run_func, pipeline_cfg=cfg.pipeline, **kwargs
556
- )
557
- if schedule_id is None:
558
- logger.info(
559
- f"Skipping adding None schedule_id for pipeline '{name}' to scheduled_ids list."
560
- )
561
- continue
562
- scheduled_ids.append(schedule_id)
563
- except Exception as e:
564
- logger.error(f"Failed to schedule pipeline '{name}': {e}")
565
- errors.append(name)
566
- logger.error(f"❌ Error scheduling [cyan]{name}[/cyan]: {e}")
567
-
568
- if errors:
569
- logger.error(
570
- f"\n[bold red]Finished scheduling with errors for: {', '.join(errors)}[/bold red]"
571
- )
572
- else:
573
- logger.success(
574
- f"\n[bold green]Successfully scheduled {len(scheduled_ids)} pipelines.[/bold green]"
575
- )
576
-
577
- return scheduled_ids
578
-
579
- except Exception as e:
580
- logger.error(
581
- f"[bold red]An unexpected error occurred during schedule_all: {e}[/bold red]"
582
- )
583
- return None