FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -37
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +32 -1
- flowerpower/cli/pipeline.py +559 -406
- flowerpower/cli/utils.py +29 -18
- flowerpower/flowerpower.py +12 -8
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +2 -142
- flowerpower-1.0.0b1.dist-info/METADATA +324 -0
- flowerpower-1.0.0b1.dist-info/RECORD +94 -0
- flowerpower/_web/__init__.py +0 -61
- flowerpower/_web/routes/config.py +0 -103
- flowerpower/_web/routes/pipelines.py +0 -173
- flowerpower/_web/routes/scheduler.py +0 -136
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/cli/web.py +0 -44
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -609
- flowerpower/pipeline.py +0 -2499
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.13.1.dist-info/METADATA +0 -586
- flowerpower-0.9.13.1.dist-info/RECORD +0 -76
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,505 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# pylint: disable=logging-fstring-interpolation
|
3
|
+
# flake8: noqa: E501
|
4
|
+
"""Pipeline Job Queue."""
|
5
|
+
|
6
|
+
import datetime as dt
|
7
|
+
from typing import Any, Callable
|
8
|
+
from uuid import UUID
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
from rich import print as rprint
|
12
|
+
|
13
|
+
# Import necessary config types
|
14
|
+
from ..cfg import PipelineConfig, ProjectConfig
|
15
|
+
from ..fs import AbstractFileSystem
|
16
|
+
from ..utils.logging import setup_logging
|
17
|
+
from ..job_queue import JobQueue
|
18
|
+
from .registry import PipelineRegistry
|
19
|
+
from .. import settings
|
20
|
+
|
21
|
+
setup_logging()
|
22
|
+
|
23
|
+
|
24
|
+
class PipelineJobQueue:
|
25
|
+
"""Handles scheduling of pipeline runs via a configured worker backend."""
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
project_cfg: ProjectConfig,
|
30
|
+
fs: AbstractFileSystem,
|
31
|
+
cfg_dir: str,
|
32
|
+
pipelines_dir: str,
|
33
|
+
job_queue_type: str | None = None,
|
34
|
+
):
|
35
|
+
"""Initialize PipelineJobQueue.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
project_cfg: The project configuration object.
|
39
|
+
fs: The file system to use for file operations.
|
40
|
+
cfg_dir: The directory for configuration files.
|
41
|
+
pipelines_dir: The directory for pipeline files.
|
42
|
+
job_queue_type: The type of worker to use (e.g., 'rq', 'apscheduler'). If None, defaults to the project config.
|
43
|
+
"""
|
44
|
+
self.project_cfg = project_cfg
|
45
|
+
self._fs = fs
|
46
|
+
self._cfg_dir = cfg_dir
|
47
|
+
self._pipelines_dir = pipelines_dir
|
48
|
+
self._job_queue_type = job_queue_type or project_cfg.job_queue.type
|
49
|
+
if not self._job_queue_type:
|
50
|
+
# Fallback or default if not specified in project config
|
51
|
+
self._job_queue_type = settings.DEFAULT_JOB_QUEUE
|
52
|
+
logger.warning(
|
53
|
+
f"Job queue type not specified in project config, defaulting to '{self._job_queue_type}'"
|
54
|
+
)
|
55
|
+
|
56
|
+
@property
|
57
|
+
def job_queue(self):
|
58
|
+
"""
|
59
|
+
Lazily instantiate and cache a Job queue instance.
|
60
|
+
"""
|
61
|
+
# Lazily instantiate worker using project_cfg attributes
|
62
|
+
logger.debug(
|
63
|
+
f"Instantiating worker of type: {self._job_queue_type} for project '{self.project_cfg.name}'"
|
64
|
+
)
|
65
|
+
# Pass the necessary parts of project_cfg to the Job queue
|
66
|
+
return JobQueue(
|
67
|
+
type=self._job_queue_type,
|
68
|
+
fs=self._fs,
|
69
|
+
)
|
70
|
+
|
71
|
+
def _get_schedule_ids(self) -> list[Any]:
|
72
|
+
"""Get all schedules from the worker backend."""
|
73
|
+
|
74
|
+
with self.job_queue as worker:
|
75
|
+
logger.debug("Fetching schedules ids from worker")
|
76
|
+
return worker.schedule_ids
|
77
|
+
|
78
|
+
def run_job(
|
79
|
+
self,
|
80
|
+
run_func: Callable,
|
81
|
+
name: str, # name: str,
|
82
|
+
inputs: dict | None = None,
|
83
|
+
final_vars: list | None = None,
|
84
|
+
config: dict | None = None,
|
85
|
+
cache: bool | dict = False,
|
86
|
+
executor_cfg: str | dict | Any | None = None,
|
87
|
+
with_adapter_cfg: dict | Any | None = None,
|
88
|
+
pipeline_adapter_cfg: dict | Any | None = None,
|
89
|
+
project_adapter_cfg: dict | Any | None = None,
|
90
|
+
adapter: dict[str, Any] | None = None,
|
91
|
+
reload: bool = False,
|
92
|
+
log_level: str | None = None,
|
93
|
+
**kwargs,
|
94
|
+
) -> dict[str, Any]:
|
95
|
+
"""
|
96
|
+
Add a job to run the pipeline immediately via the worker queue.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
run_func (Callable): The function to execute in the worker (e.g., a configured PipelineRunner.run).
|
100
|
+
name (str): The name of the pipeline (used for logging).
|
101
|
+
inputs (dict | None): Inputs for the pipeline run.
|
102
|
+
final_vars (list | None): Final variables for the pipeline run.
|
103
|
+
config (dict | None): Hamilton driver config.
|
104
|
+
cache (bool | dict): Cache configuration.
|
105
|
+
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
|
106
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
|
107
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
|
108
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
|
109
|
+
adapter (dict[str, Any] | None): Additional adapter configuration.
|
110
|
+
reload (bool): Whether to reload the pipeline module.
|
111
|
+
log_level (str | None): Log level for the run.
|
112
|
+
**kwargs: Additional keyword arguments passed directly to the worker's add_job method.
|
113
|
+
|
114
|
+
Returns:
|
115
|
+
dict[str, Any]: The result of the job execution.
|
116
|
+
"""
|
117
|
+
logger.debug(f"Adding immediate job for pipeline: {name}")
|
118
|
+
|
119
|
+
|
120
|
+
pipeline_run_args = {
|
121
|
+
# 'name' is not passed to run_func, it's part of the context already in PipelineRunner
|
122
|
+
"inputs": inputs,
|
123
|
+
"final_vars": final_vars,
|
124
|
+
"config": config,
|
125
|
+
"cache": cache,
|
126
|
+
"executor_cfg": executor_cfg,
|
127
|
+
"with_adapter_cfg": with_adapter_cfg,
|
128
|
+
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
129
|
+
"project_adapter_cfg": project_adapter_cfg,
|
130
|
+
"adapter": adapter,
|
131
|
+
"reload": reload,
|
132
|
+
"log_level": log_level,
|
133
|
+
}
|
134
|
+
pipeline_run_args = {
|
135
|
+
k: v for k, v in pipeline_run_args.items() if v is not None
|
136
|
+
}
|
137
|
+
logger.debug(
|
138
|
+
f"Resolved arguments for target run_func for job '{name}': {pipeline_run_args}"
|
139
|
+
)
|
140
|
+
|
141
|
+
with self.job_queue as worker:
|
142
|
+
res = worker.run_job(
|
143
|
+
func=run_func,
|
144
|
+
func_kwargs=pipeline_run_args,
|
145
|
+
**kwargs,
|
146
|
+
)
|
147
|
+
|
148
|
+
return res
|
149
|
+
|
150
|
+
def add_job(
|
151
|
+
self,
|
152
|
+
run_func: Callable, # The actual function to run (e.g., PipelineRunner(...).run)
|
153
|
+
name: str,
|
154
|
+
inputs: dict | None = None,
|
155
|
+
final_vars: list | None = None,
|
156
|
+
config: dict | None = None,
|
157
|
+
cache: bool | dict = False,
|
158
|
+
executor_cfg: str | dict | Any | None = None,
|
159
|
+
with_adapter_cfg: dict | Any | None = None,
|
160
|
+
pipeline_adapter_cfg: dict | Any | None = None,
|
161
|
+
project_adapter_cfg: dict | Any | None = None,
|
162
|
+
adapter: dict[str, Any] | None = None,
|
163
|
+
result_ttl: int | dt.timedelta = 120,
|
164
|
+
run_at: dt.datetime | None = None,
|
165
|
+
run_in: float | dt.timedelta | None = None,
|
166
|
+
reload: bool = False,
|
167
|
+
log_level: str | None = None,
|
168
|
+
max_retries: int | None = None,
|
169
|
+
retry_delay: float | None = None,
|
170
|
+
jitter_factor: float | None = None,
|
171
|
+
retry_exceptions: tuple | list | None = None,
|
172
|
+
**kwargs, # Allow other worker-specific args if needed
|
173
|
+
) -> str | UUID:
|
174
|
+
"""
|
175
|
+
Add a job to run the pipeline immediately via the worker queue, storing the result.
|
176
|
+
|
177
|
+
Executes the job immediately and returns the job id (UUID). The job result will be stored
|
178
|
+
by the worker backend for the given `result_ttl` and can be fetched using the job id.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
run_func (Callable): The function to execute in the worker (e.g., a configured PipelineRunner.run).
|
182
|
+
name (str): The name of the pipeline (used for logging).
|
183
|
+
inputs (dict | None): Inputs for the pipeline run.
|
184
|
+
final_vars (list | None): Final variables for the pipeline run.
|
185
|
+
config (dict | None): Hamilton driver config.
|
186
|
+
cache (bool | dict): Cache configuration.
|
187
|
+
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
|
188
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
|
189
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
|
190
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
|
191
|
+
adapter (dict[str, Any] | None): Additional adapter configuration.
|
192
|
+
reload (bool): Whether to reload the pipeline module.
|
193
|
+
log_level (str | None): Log level for the run.
|
194
|
+
result_ttl (int | dt.timedelta): How long the job result should be stored. Defaults to 0 (don't store).
|
195
|
+
run_at (dt.datetime | None): Optional datetime to run the job at.
|
196
|
+
run_in (float | dt.timedelta | None): Optional delay before running the job.
|
197
|
+
max_retries (int): Maximum number of retries for the job.
|
198
|
+
retry_delay (float): Delay between retries.
|
199
|
+
jitter_factor (float): Jitter factor for retry delay.
|
200
|
+
retry_exceptions (tuple): Exceptions that should trigger a retry.
|
201
|
+
**kwargs: Additional keyword arguments passed directly to the worker's add_job method.
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
str | UUID: The ID of the added job.
|
205
|
+
"""
|
206
|
+
logger.debug(f"Adding immediate job with result TTL for pipeline: {name}")
|
207
|
+
|
208
|
+
pipeline_run_args = {
|
209
|
+
"inputs": inputs,
|
210
|
+
"final_vars": final_vars,
|
211
|
+
"config": config,
|
212
|
+
"cache": cache,
|
213
|
+
"executor_cfg": executor_cfg,
|
214
|
+
"with_adapter_cfg": with_adapter_cfg,
|
215
|
+
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
216
|
+
"project_adapter_cfg": project_adapter_cfg,
|
217
|
+
"adapter": adapter,
|
218
|
+
"reload": reload,
|
219
|
+
"log_level": log_level,
|
220
|
+
"max_retries": max_retries,
|
221
|
+
"retry_delay": retry_delay,
|
222
|
+
"jitter_factor": jitter_factor,
|
223
|
+
"retry_exceptions": retry_exceptions,
|
224
|
+
}
|
225
|
+
pipeline_run_args = {
|
226
|
+
k: v for k, v in pipeline_run_args.items() if v is not None
|
227
|
+
}
|
228
|
+
logger.debug(
|
229
|
+
f"Resolved arguments for target run_func for job (TTL) '{name}': {pipeline_run_args}"
|
230
|
+
)
|
231
|
+
|
232
|
+
with self.job_queue as worker:
|
233
|
+
job_id = worker.add_job(
|
234
|
+
func=run_func,
|
235
|
+
func_kwargs=pipeline_run_args,
|
236
|
+
result_ttl=result_ttl,
|
237
|
+
run_at=run_at,
|
238
|
+
run_in=run_in,
|
239
|
+
**kwargs,
|
240
|
+
)
|
241
|
+
rprint(
|
242
|
+
f"✅ Successfully added job for "
|
243
|
+
f"[blue]{self.project_cfg.name}.{name}[/blue] with ID [green]{job_id}[/green]"
|
244
|
+
f" and result TTL of {result_ttl} seconds."
|
245
|
+
)
|
246
|
+
return job_id
|
247
|
+
|
248
|
+
# --- End Moved from PipelineManager ---
|
249
|
+
|
250
|
+
def schedule(
|
251
|
+
self,
|
252
|
+
run_func: Callable,
|
253
|
+
pipeline_cfg: PipelineConfig,
|
254
|
+
# --- Run Parameters (passed to run_func) ---
|
255
|
+
inputs: dict | None = None,
|
256
|
+
final_vars: list | None = None,
|
257
|
+
config: dict | None = None, # Driver config
|
258
|
+
cache: bool | dict = False,
|
259
|
+
executor_cfg: str | dict | Any | None = None,
|
260
|
+
with_adapter_cfg: dict | Any | None = None,
|
261
|
+
pipeline_adapter_cfg: dict | Any | None = None,
|
262
|
+
project_adapter_cfg: dict | Any | None = None,
|
263
|
+
adapter: dict[str, Any] | None = None,
|
264
|
+
reload: bool = False,
|
265
|
+
log_level: str | None = None,
|
266
|
+
max_retries: int | None = None,
|
267
|
+
retry_delay: float | None = None,
|
268
|
+
jitter_factor: float | None = None,
|
269
|
+
retry_exceptions: tuple = (Exception,),
|
270
|
+
# --- Schedule Parameters (passed to worker.add_schedule) ---
|
271
|
+
cron: str | dict[str, str | int] | None = None,
|
272
|
+
interval: int | str | dict[str, str | int] | None = None,
|
273
|
+
date: dt.datetime | None = None,
|
274
|
+
overwrite: bool = False,
|
275
|
+
schedule_id: str | None = None,
|
276
|
+
**kwargs,
|
277
|
+
) -> str | UUID:
|
278
|
+
"""
|
279
|
+
Schedule a pipeline for execution using the configured worker.
|
280
|
+
|
281
|
+
Args:
|
282
|
+
run_func (Callable): The function to execute in the worker.
|
283
|
+
pipeline_cfg (PipelineConfig): The pipeline configuration object.
|
284
|
+
inputs (dict | None): Inputs for the pipeline run (overrides config).
|
285
|
+
final_vars (list | None): Final variables for the pipeline run (overrides config).
|
286
|
+
config (dict | None): Hamilton driver config (overrides config).
|
287
|
+
cache (bool | dict): Cache configuration (overrides config).
|
288
|
+
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
|
289
|
+
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
|
290
|
+
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
|
291
|
+
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
|
292
|
+
adapter (dict | None): Additional Hamilton adapters (overrides config).
|
293
|
+
reload (bool): Whether to reload module (overrides config).
|
294
|
+
log_level (str | None): Log level for the run (overrides config).
|
295
|
+
max_retries (int): Maximum number of retries for the job.
|
296
|
+
retry_delay (float): Delay between retries.
|
297
|
+
jitter_factor (float): Jitter factor for retry delay.
|
298
|
+
retry_exceptions (tuple): Exceptions that should trigger a retry.
|
299
|
+
cron (str | dict | None): Cron expression or dict for cron trigger.
|
300
|
+
interval (int | str | dict | None): Interval in seconds or dict for interval trigger.
|
301
|
+
date (dt.datetime | None): Date for date trigger.
|
302
|
+
overwrite (bool): If True and id_ is None, generates ID '{name}-1', potentially overwriting.
|
303
|
+
schedule_id (str | None): Optional ID for the schedule. If None, generates a new ID.
|
304
|
+
**kwargs: Additional keyword arguments passed to the worker's add_schedule method,
|
305
|
+
For RQ this includes:
|
306
|
+
- repeat: Repeat count (int or dict)
|
307
|
+
- result_ttl: Time to live for the job result (float or timedelta)
|
308
|
+
- ttl: Time to live for the job (float or timedelta)
|
309
|
+
- use_local_time_zone: Whether to use local time zone for scheduling (bool)
|
310
|
+
For APScheduler, this includes:
|
311
|
+
- misfire_grace_time: Grace time for misfires (timedelta)
|
312
|
+
- coalesce: Whether to coalesce jobs (bool)
|
313
|
+
- max_running_jobs: Maximum instances of the job (int)
|
314
|
+
- max_jitter: Maximum jitter for scheduling (int)
|
315
|
+
- conflict_policy: Policy for conflicting jobs (str)
|
316
|
+
- paused: Whether to pause the job (bool)
|
317
|
+
|
318
|
+
|
319
|
+
Returns:
|
320
|
+
str | UUID: The ID of the scheduled pipeline.
|
321
|
+
|
322
|
+
Raises:
|
323
|
+
ValueError: If trigger_type is invalid or required args are missing.
|
324
|
+
Exception: Can raise exceptions from the worker backend.
|
325
|
+
"""
|
326
|
+
|
327
|
+
project_name = self.project_cfg.name
|
328
|
+
name = pipeline_cfg.name
|
329
|
+
logger.debug(
|
330
|
+
f"Attempting to schedule pipeline: {project_name}.{name} with id: {schedule_id}"
|
331
|
+
)
|
332
|
+
|
333
|
+
# --- Resolve Parameters using pipeline_cfg for defaults ---
|
334
|
+
schedule_cfg = pipeline_cfg.schedule
|
335
|
+
#run_cfg = pipeline_cfg.run
|
336
|
+
|
337
|
+
pipeline_run_args = {
|
338
|
+
"inputs": inputs,
|
339
|
+
"final_vars": final_vars,
|
340
|
+
"config": config,
|
341
|
+
"cache": cache,
|
342
|
+
"executor_cfg": executor_cfg,
|
343
|
+
"with_adapter_cfg": with_adapter_cfg,
|
344
|
+
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
345
|
+
"project_adapter_cfg": project_adapter_cfg,
|
346
|
+
"adapter": adapter,
|
347
|
+
"reload": reload,
|
348
|
+
"log_level": log_level,
|
349
|
+
"max_retries": max_retries,
|
350
|
+
"retry_delay": retry_delay,
|
351
|
+
"jitter_factor": jitter_factor,
|
352
|
+
"retry_exceptions": retry_exceptions,
|
353
|
+
}
|
354
|
+
pipeline_run_args = {
|
355
|
+
k: v for k, v in pipeline_run_args.items() if v is not None
|
356
|
+
}
|
357
|
+
logger.debug(f"Resolved run_kwargs for '{name}': {pipeline_run_args}")
|
358
|
+
|
359
|
+
cron = cron if cron is not None else schedule_cfg.cron
|
360
|
+
interval = interval if interval is not None else schedule_cfg.interval
|
361
|
+
date = date if date is not None else schedule_cfg.date
|
362
|
+
logger.debug(
|
363
|
+
f"Resolved schedule parameters for '{name}': cron={cron}, interval={interval}, date={date}"
|
364
|
+
)
|
365
|
+
|
366
|
+
# --- Generate ID if not provided ---
|
367
|
+
# (Keep _generate_id function as is, it uses self._get_schedules())
|
368
|
+
def _generate_id(
|
369
|
+
pipeline_name: str, explicit_id: str | None, force_overwrite_base: bool
|
370
|
+
) -> str:
|
371
|
+
if explicit_id:
|
372
|
+
logger.debug(f"Using explicit schedule ID: {explicit_id}")
|
373
|
+
return explicit_id
|
374
|
+
|
375
|
+
base_id = f"{pipeline_name}-1"
|
376
|
+
|
377
|
+
if force_overwrite_base:
|
378
|
+
logger.debug(f"Overwrite specified, using base ID: {base_id}")
|
379
|
+
return base_id
|
380
|
+
|
381
|
+
try:
|
382
|
+
existing_ids = self._get_schedule_ids()
|
383
|
+
logger.debug(f"Existing schedule IDs: {existing_ids}")
|
384
|
+
|
385
|
+
if not any(
|
386
|
+
id_val.startswith(f"{pipeline_name}-") for id_val in existing_ids
|
387
|
+
):
|
388
|
+
logger.debug(
|
389
|
+
f"No existing schedules found for '{pipeline_name}', using base ID: {base_id}"
|
390
|
+
)
|
391
|
+
return base_id
|
392
|
+
|
393
|
+
# Find highest existing number for this pipeline name
|
394
|
+
max_num = 0
|
395
|
+
for id_val in existing_ids:
|
396
|
+
if id_val.startswith(f"{pipeline_name}-"):
|
397
|
+
try:
|
398
|
+
num_part = id_val.split("-")[-1]
|
399
|
+
num = int(num_part)
|
400
|
+
if num > max_num:
|
401
|
+
max_num = num
|
402
|
+
except (ValueError, IndexError):
|
403
|
+
logger.warning(
|
404
|
+
f"Could not parse number from existing schedule ID: {id_val}"
|
405
|
+
)
|
406
|
+
continue # Skip malformed IDs
|
407
|
+
|
408
|
+
new_id = f"{pipeline_name}-{max_num + 1}"
|
409
|
+
logger.debug(f"Generated new schedule ID: {new_id}")
|
410
|
+
return new_id
|
411
|
+
|
412
|
+
except Exception as e:
|
413
|
+
logger.error(
|
414
|
+
f"Error getting existing schedules to generate ID: {e}. Falling back to base ID: {base_id}"
|
415
|
+
)
|
416
|
+
# Fallback in case of error fetching schedules
|
417
|
+
return base_id
|
418
|
+
|
419
|
+
schedule_id = _generate_id(name, schedule_id, overwrite)
|
420
|
+
|
421
|
+
# --- Add Schedule via Job queue ---
|
422
|
+
try:
|
423
|
+
with self.job_queue as worker:
|
424
|
+
# Job queue is now responsible for creating the trigger object
|
425
|
+
# Pass trigger type and kwargs directly
|
426
|
+
added_id = worker.add_schedule(
|
427
|
+
func=run_func,
|
428
|
+
func_kwargs=pipeline_run_args, # Pass resolved run parameters
|
429
|
+
cron=cron,
|
430
|
+
interval=interval,
|
431
|
+
date=date,
|
432
|
+
schedule_id=schedule_id,
|
433
|
+
**kwargs, # Pass resolved schedule run parameters
|
434
|
+
)
|
435
|
+
logger.info(
|
436
|
+
f"✅ Successfully scheduled job for "
|
437
|
+
f"[blue]{project_name}.{name}[/blue] with ID [green]{added_id}[/green]"
|
438
|
+
)
|
439
|
+
return added_id
|
440
|
+
except Exception as e:
|
441
|
+
logger.error(
|
442
|
+
f"Failed to add schedule '{schedule_id}' for pipeline '{name}': {e}"
|
443
|
+
)
|
444
|
+
raise
|
445
|
+
|
446
|
+
# --- schedule_all method removed ---
|
447
|
+
# PipelineManager will be responsible for iterating and calling schedule()
|
448
|
+
|
449
|
+
def schedule_all(self, registry: PipelineRegistry, **kwargs):
|
450
|
+
"""
|
451
|
+
Schedule all pipelines found by the registry.
|
452
|
+
|
453
|
+
Args:
|
454
|
+
**kwargs: Arguments passed directly to the `schedule` method for each pipeline.
|
455
|
+
Note: Pipeline-specific configurations will still take precedence for
|
456
|
+
defaults if not overridden by kwargs.
|
457
|
+
"""
|
458
|
+
try:
|
459
|
+
registry = self._get_registry_func()
|
460
|
+
names = registry._get_names() # Use registry to find pipelines
|
461
|
+
if not names:
|
462
|
+
logger.info("[yellow]No pipelines found to schedule.[/yellow]")
|
463
|
+
return
|
464
|
+
|
465
|
+
logger.info(f"Attempting to schedule {len(names)} pipelines...")
|
466
|
+
scheduled_ids = []
|
467
|
+
errors = []
|
468
|
+
for name in names:
|
469
|
+
try:
|
470
|
+
# Load config specifically for this pipeline to get defaults
|
471
|
+
# Note: schedule() will load it again, potential optimization later
|
472
|
+
cfg = self._load_config_func(name=name)
|
473
|
+
if (
|
474
|
+
not cfg
|
475
|
+
or not cfg.pipeline
|
476
|
+
or not cfg.pipeline.schedule
|
477
|
+
or not cfg.pipeline.schedule.enabled
|
478
|
+
):
|
479
|
+
logger.info(
|
480
|
+
f"🟡 Skipping schedule for [cyan]{name}[/cyan]: Not configured or disabled in config."
|
481
|
+
)
|
482
|
+
continue
|
483
|
+
|
484
|
+
logger.info(f"Scheduling [cyan]{name}[/cyan]...")
|
485
|
+
# Pass kwargs, allowing overrides of config defaults
|
486
|
+
schedule_id = self.schedule(name=name, **kwargs)
|
487
|
+
scheduled_ids.append(schedule_id)
|
488
|
+
except Exception as e:
|
489
|
+
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
490
|
+
errors.append(name)
|
491
|
+
logger.error(f"❌ Error scheduling [cyan]{name}[/cyan]: {e}")
|
492
|
+
|
493
|
+
if errors:
|
494
|
+
logger.error(
|
495
|
+
f"\n[bold red]Finished scheduling with errors for: {', '.join(errors)}[/bold red]"
|
496
|
+
)
|
497
|
+
else:
|
498
|
+
logger.success(
|
499
|
+
f"\n[bold green]Successfully scheduled {len(scheduled_ids)} pipelines.[/bold green]"
|
500
|
+
)
|
501
|
+
|
502
|
+
except Exception as e:
|
503
|
+
logger.error(
|
504
|
+
f"[bold red]An unexpected error occurred during schedule_all: {e}[/bold red]"
|
505
|
+
)
|