FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +401 -133
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.19.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,583 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# pylint: disable=logging-fstring-interpolation
|
3
|
-
# flake8: noqa: E501
|
4
|
-
"""Pipeline Job Queue."""
|
5
|
-
|
6
|
-
import datetime as dt
|
7
|
-
from typing import Any, Callable, Optional, Union
|
8
|
-
from uuid import UUID
|
9
|
-
|
10
|
-
from loguru import logger
|
11
|
-
from rich import print as rprint
|
12
|
-
|
13
|
-
from .. import settings
|
14
|
-
# Import necessary config types
|
15
|
-
from ..cfg import PipelineConfig, ProjectConfig
|
16
|
-
from ..fs import AbstractFileSystem
|
17
|
-
from ..job_queue import JobQueueBackend, JobQueueManager
|
18
|
-
from ..utils.logging import setup_logging
|
19
|
-
from .registry import PipelineRegistry
|
20
|
-
|
21
|
-
setup_logging()
|
22
|
-
|
23
|
-
|
24
|
-
class PipelineJobQueue:
|
25
|
-
"""Handles scheduling of pipeline runs via a configured job queue backend."""
|
26
|
-
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
project_cfg: ProjectConfig,
|
30
|
-
fs: AbstractFileSystem,
|
31
|
-
cfg_dir: str,
|
32
|
-
pipelines_dir: str,
|
33
|
-
# job_queue_type: str | None = None,
|
34
|
-
):
|
35
|
-
"""Initialize PipelineJobQueue.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
project_cfg: The project configuration object.
|
39
|
-
fs: The file system to use for file operations.
|
40
|
-
cfg_dir: The directory for configuration files.
|
41
|
-
pipelines_dir: The directory for pipeline files.
|
42
|
-
job_queue_type: The type of job queue to use (e.g., 'rq', 'apscheduler'). If None, defaults to the project config.
|
43
|
-
"""
|
44
|
-
self.project_cfg = project_cfg
|
45
|
-
self._fs = fs
|
46
|
-
self._cfg_dir = cfg_dir
|
47
|
-
self._pipelines_dir = pipelines_dir
|
48
|
-
self._job_queue_type = project_cfg.job_queue.type
|
49
|
-
self._job_queue_backend_cfg = project_cfg.job_queue.backend
|
50
|
-
self._job_queue = None
|
51
|
-
|
52
|
-
# if not self._job_queue_type:
|
53
|
-
# # Fallback or default if not specified in project config
|
54
|
-
# self._job_queue_type = settings.JOB_QUEUE_TYPE
|
55
|
-
# logger.warning(
|
56
|
-
# f"Job queue type not specified in project config, defaulting to '{self._job_queue_type}'"
|
57
|
-
# )
|
58
|
-
|
59
|
-
@property
|
60
|
-
def job_queue(self) -> Optional[Any]:
|
61
|
-
"""
|
62
|
-
Lazily instantiate and cache a Job queue instance.
|
63
|
-
Handles the case where JobQueueManager returns None due to missing dependencies.
|
64
|
-
|
65
|
-
Returns:
|
66
|
-
Optional[Any]: The job queue manager instance, or None if the backend is unavailable.
|
67
|
-
"""
|
68
|
-
logger.debug(
|
69
|
-
f"Instantiating job queue of type: {self._job_queue_type} for project '{self.project_cfg.name}'"
|
70
|
-
)
|
71
|
-
if self._job_queue is None:
|
72
|
-
self._job_queue = JobQueueManager(
|
73
|
-
name=self.project_cfg.name,
|
74
|
-
type=self._job_queue_type,
|
75
|
-
backend=JobQueueBackend(
|
76
|
-
job_queue_type=self._job_queue_type,
|
77
|
-
**self._job_queue_backend_cfg.to_dict(),
|
78
|
-
),
|
79
|
-
)
|
80
|
-
|
81
|
-
if self._job_queue is None:
|
82
|
-
if self._job_queue_type == "rq":
|
83
|
-
logger.warning(
|
84
|
-
"JobQueueManager could not be instantiated. The RQ backend is unavailable. "
|
85
|
-
"Please ensure RQ is installed and configured correctly and that the Redis server is running."
|
86
|
-
)
|
87
|
-
elif self._job_queue_type == "apscheduler":
|
88
|
-
logger.warning(
|
89
|
-
"JobQueueManager could not be instantiated. The APScheduler backend is unavailable. "
|
90
|
-
f"Please ensure APScheduler is installed and configured correctly, and that the configured data store ({self.project_cfg.job_queue.backend.data_store.type}) "
|
91
|
-
f"and event_broker ({self.project_cfg.job_queue.backend.event_broker.type}) are accessible."
|
92
|
-
)
|
93
|
-
return None
|
94
|
-
return self._job_queue
|
95
|
-
|
96
|
-
def _get_schedule_ids(self) -> list[Any]:
|
97
|
-
"""Get all schedules from the job queue backend.
|
98
|
-
|
99
|
-
Returns:
|
100
|
-
list[Any]: List of schedule IDs, or empty list if job queue backend is unavailable.
|
101
|
-
"""
|
102
|
-
|
103
|
-
if self.job_queue is None:
|
104
|
-
return []
|
105
|
-
with self.job_queue as job_queue:
|
106
|
-
logger.debug("Fetching schedules ids from job queue")
|
107
|
-
return job_queue.schedule_ids
|
108
|
-
|
109
|
-
def run_job(
|
110
|
-
self,
|
111
|
-
run_func: Callable,
|
112
|
-
pipeline_cfg: PipelineConfig, # Pipeline configuration object
|
113
|
-
name: str, # name: str,
|
114
|
-
inputs: dict | None = None,
|
115
|
-
final_vars: list | None = None,
|
116
|
-
config: dict | None = None,
|
117
|
-
cache: bool | dict = False,
|
118
|
-
executor_cfg: str | dict | Any | None = None,
|
119
|
-
with_adapter_cfg: dict | Any | None = None,
|
120
|
-
pipeline_adapter_cfg: dict | Any | None = None,
|
121
|
-
project_adapter_cfg: dict | Any | None = None,
|
122
|
-
adapter: dict[str, Any] | None = None,
|
123
|
-
reload: bool = False,
|
124
|
-
log_level: str | None = None,
|
125
|
-
max_retries: int | None = None,
|
126
|
-
retry_delay: float | None = None,
|
127
|
-
jitter_factor: float | None = None,
|
128
|
-
retry_exceptions: tuple | None = None,
|
129
|
-
**kwargs,
|
130
|
-
) -> Optional[dict[str, Any]]:
|
131
|
-
"""
|
132
|
-
Add a job to run the pipeline immediately via the job queue queue.
|
133
|
-
|
134
|
-
Args:
|
135
|
-
run_func (Callable): The function to execute in the job queue (e.g., a configured PipelineRunner.run).
|
136
|
-
pipeline_cfg (PipelineConfig): The pipeline configuration object.
|
137
|
-
name (str): The name of the pipeline (used for logging).
|
138
|
-
inputs (dict | None): Inputs for the pipeline run.
|
139
|
-
final_vars (list | None): Final variables for the pipeline run.
|
140
|
-
config (dict | None): Hamilton driver config.
|
141
|
-
cache (bool | dict): Cache configuration.
|
142
|
-
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
|
143
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
|
144
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
|
145
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
|
146
|
-
adapter (dict[str, Any] | None): Additional adapter configuration.
|
147
|
-
reload (bool): Whether to reload the pipeline module.
|
148
|
-
log_level (str | None): Log level for the run.
|
149
|
-
max_retries (int): Maximum number of retries for the job.
|
150
|
-
retry_delay (float): Delay between retries.
|
151
|
-
jitter_factor (float): Jitter factor for retry delay.
|
152
|
-
retry_exceptions (tuple): Exceptions that should trigger a retry.
|
153
|
-
**kwargs: Additional keyword arguments passed directly to the job queue's add_job method.
|
154
|
-
|
155
|
-
Returns:
|
156
|
-
Optional[dict[str, Any]]: The result of the job execution, or None if job queue backend is unavailable.
|
157
|
-
"""
|
158
|
-
logger.debug(f"Adding immediate job for pipeline: {name}")
|
159
|
-
|
160
|
-
pipeline_run_args = {
|
161
|
-
# 'name' is not passed to run_func, it's part of the context already in PipelineRunner
|
162
|
-
"project_cfg": self.project_cfg,
|
163
|
-
"pipeline_cfg": pipeline_cfg,
|
164
|
-
"inputs": inputs,
|
165
|
-
"final_vars": final_vars,
|
166
|
-
"config": config,
|
167
|
-
"cache": cache,
|
168
|
-
"executor_cfg": executor_cfg,
|
169
|
-
"with_adapter_cfg": with_adapter_cfg,
|
170
|
-
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
171
|
-
"project_adapter_cfg": project_adapter_cfg,
|
172
|
-
"adapter": adapter,
|
173
|
-
"reload": reload,
|
174
|
-
"log_level": log_level,
|
175
|
-
"max_retries": max_retries,
|
176
|
-
"retry_delay": retry_delay,
|
177
|
-
"jitter_factor": jitter_factor,
|
178
|
-
"retry_exceptions": retry_exceptions,
|
179
|
-
}
|
180
|
-
pipeline_run_args = {
|
181
|
-
k: v for k, v in pipeline_run_args.items() if v is not None
|
182
|
-
}
|
183
|
-
logger.debug(
|
184
|
-
f"Resolved arguments for target run_func for job '{name}': {pipeline_run_args}"
|
185
|
-
)
|
186
|
-
|
187
|
-
if self.job_queue is None:
|
188
|
-
return None
|
189
|
-
with self.job_queue as job_queue:
|
190
|
-
res = job_queue.run_job(
|
191
|
-
func=run_func,
|
192
|
-
func_kwargs=pipeline_run_args,
|
193
|
-
**kwargs,
|
194
|
-
)
|
195
|
-
|
196
|
-
return res
|
197
|
-
|
198
|
-
def add_job(
|
199
|
-
self,
|
200
|
-
run_func: Callable, # The actual function to run (e.g., PipelineRunner(...).run)
|
201
|
-
pipeline_cfg: PipelineConfig, # Pipeline configuration object
|
202
|
-
name: str,
|
203
|
-
inputs: dict | None = None,
|
204
|
-
final_vars: list | None = None,
|
205
|
-
config: dict | None = None,
|
206
|
-
cache: bool | dict = False,
|
207
|
-
executor_cfg: str | dict | Any | None = None,
|
208
|
-
with_adapter_cfg: dict | Any | None = None,
|
209
|
-
pipeline_adapter_cfg: dict | Any | None = None,
|
210
|
-
project_adapter_cfg: dict | Any | None = None,
|
211
|
-
adapter: dict[str, Any] | None = None,
|
212
|
-
result_ttl: int | dt.timedelta = 120,
|
213
|
-
run_at: dt.datetime | None = None,
|
214
|
-
run_in: float | dt.timedelta | None = None,
|
215
|
-
reload: bool = False,
|
216
|
-
log_level: str | None = None,
|
217
|
-
max_retries: int | None = None,
|
218
|
-
retry_delay: float | None = None,
|
219
|
-
jitter_factor: float | None = None,
|
220
|
-
retry_exceptions: tuple | list | None = None,
|
221
|
-
**kwargs, # Allow other job queue-specific args if needed
|
222
|
-
) -> Optional[Any]:
|
223
|
-
"""
|
224
|
-
Add a job to run the pipeline immediately via the job queue, storing the result.
|
225
|
-
|
226
|
-
Executes the job immediately and returns the job id (UUID). The job result will be stored
|
227
|
-
by the job queue backend for the given `result_ttl` and can be fetched using the job id.
|
228
|
-
|
229
|
-
Args:
|
230
|
-
run_func (Callable): The function to execute in the job queue (e.g., a configured PipelineRunner.run).
|
231
|
-
pipeline_cfg (PipelineConfig): The pipeline configuration object.
|
232
|
-
name (str): The name of the pipeline (used for logging).
|
233
|
-
inputs (dict | None): Inputs for the pipeline run.
|
234
|
-
final_vars (list | None): Final variables for the pipeline run.
|
235
|
-
config (dict | None): Hamilton driver config.
|
236
|
-
cache (bool | dict): Cache configuration.
|
237
|
-
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration.
|
238
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration.
|
239
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration.
|
240
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration.
|
241
|
-
adapter (dict[str, Any] | None): Additional adapter configuration.
|
242
|
-
reload (bool): Whether to reload the pipeline module.
|
243
|
-
log_level (str | None): Log level for the run.
|
244
|
-
result_ttl (int | dt.timedelta): How long the job result should be stored. Defaults to 0 (don't store).
|
245
|
-
run_at (dt.datetime | None): Optional datetime to run the job at.
|
246
|
-
run_in (float | dt.timedelta | None): Optional delay before running the job.
|
247
|
-
max_retries (int): Maximum number of retries for the job.
|
248
|
-
retry_delay (float): Delay between retries.
|
249
|
-
jitter_factor (float): Jitter factor for retry delay.
|
250
|
-
retry_exceptions (tuple): Exceptions that should trigger a retry.
|
251
|
-
**kwargs: Additional keyword arguments passed directly to the job queue's add_job method.
|
252
|
-
|
253
|
-
Returns:
|
254
|
-
Optional[Any]: The ID of the added job or the job object itself, or None if job queue backend is unavailable.
|
255
|
-
"""
|
256
|
-
logger.debug(f"Adding immediate job with result TTL for pipeline: {name}")
|
257
|
-
|
258
|
-
pipeline_run_args = {
|
259
|
-
"project_cfg": self.project_cfg,
|
260
|
-
"pipeline_cfg": pipeline_cfg,
|
261
|
-
"inputs": inputs,
|
262
|
-
"final_vars": final_vars,
|
263
|
-
"config": config,
|
264
|
-
"cache": cache,
|
265
|
-
"executor_cfg": executor_cfg,
|
266
|
-
"with_adapter_cfg": with_adapter_cfg,
|
267
|
-
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
268
|
-
"project_adapter_cfg": project_adapter_cfg,
|
269
|
-
"adapter": adapter,
|
270
|
-
"reload": reload,
|
271
|
-
"log_level": log_level,
|
272
|
-
"max_retries": max_retries,
|
273
|
-
"retry_delay": retry_delay,
|
274
|
-
"jitter_factor": jitter_factor,
|
275
|
-
"retry_exceptions": retry_exceptions,
|
276
|
-
}
|
277
|
-
pipeline_run_args = {
|
278
|
-
k: v for k, v in pipeline_run_args.items() if v is not None
|
279
|
-
}
|
280
|
-
logger.debug(
|
281
|
-
f"Resolved arguments for target run_func for job (TTL) '{name}': {pipeline_run_args}"
|
282
|
-
)
|
283
|
-
|
284
|
-
if self.job_queue is None:
|
285
|
-
return None
|
286
|
-
with self.job_queue as job_queue:
|
287
|
-
job = job_queue.add_job(
|
288
|
-
func=run_func,
|
289
|
-
func_kwargs=pipeline_run_args,
|
290
|
-
result_ttl=result_ttl,
|
291
|
-
run_at=run_at,
|
292
|
-
run_in=run_in,
|
293
|
-
**kwargs,
|
294
|
-
)
|
295
|
-
rprint(
|
296
|
-
f"✅ Successfully added job for "
|
297
|
-
f"[blue]{self.project_cfg.name}.{name}[/blue] with ID [green]{job if isinstance(job, (str, UUID)) else job.id}[/green]"
|
298
|
-
f" and result TTL of {result_ttl} seconds."
|
299
|
-
)
|
300
|
-
return job
|
301
|
-
|
302
|
-
# --- End Moved from PipelineManager ---
|
303
|
-
|
304
|
-
def schedule(
|
305
|
-
self,
|
306
|
-
run_func: Callable,
|
307
|
-
pipeline_cfg: PipelineConfig,
|
308
|
-
# --- Run Parameters (passed to run_func) ---
|
309
|
-
inputs: dict | None = None,
|
310
|
-
final_vars: list | None = None,
|
311
|
-
config: dict | None = None, # Driver config
|
312
|
-
cache: bool | dict = False,
|
313
|
-
executor_cfg: str | dict | Any | None = None,
|
314
|
-
with_adapter_cfg: dict | Any | None = None,
|
315
|
-
pipeline_adapter_cfg: dict | Any | None = None,
|
316
|
-
project_adapter_cfg: dict | Any | None = None,
|
317
|
-
adapter: dict[str, Any] | None = None,
|
318
|
-
reload: bool = False,
|
319
|
-
log_level: str | None = None,
|
320
|
-
max_retries: int | None = None,
|
321
|
-
retry_delay: float | None = None,
|
322
|
-
jitter_factor: float | None = None,
|
323
|
-
retry_exceptions: tuple | None = None,
|
324
|
-
# --- Schedule Parameters (passed to job queue.add_schedule) ---
|
325
|
-
cron: str | dict[str, str | int] | None = None,
|
326
|
-
interval: int | str | dict[str, str | int] | None = None,
|
327
|
-
date: dt.datetime | None = None,
|
328
|
-
overwrite: bool = False,
|
329
|
-
schedule_id: str | None = None,
|
330
|
-
**kwargs,
|
331
|
-
) -> Optional[Union[str, UUID]]:
|
332
|
-
"""
|
333
|
-
Schedule a pipeline for execution using the configured job queue.
|
334
|
-
|
335
|
-
Args:
|
336
|
-
run_func (Callable): The function to execute in the job queue.
|
337
|
-
pipeline_cfg (PipelineConfig): The pipeline configuration object.
|
338
|
-
inputs (dict | None): Inputs for the pipeline run (overrides config).
|
339
|
-
final_vars (list | None): Final variables for the pipeline run (overrides config).
|
340
|
-
config (dict | None): Hamilton driver config (overrides config).
|
341
|
-
cache (bool | dict): Cache configuration (overrides config).
|
342
|
-
executor_cfg (str | dict | ExecutorConfig | None): Executor configuration (overrides config).
|
343
|
-
with_adapter_cfg (dict | WithAdapterConfig | None): Adapter configuration (overrides config).
|
344
|
-
pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline adapter configuration (overrides config).
|
345
|
-
project_adapter_cfg (dict | ProjectAdapterConfig | None): Project adapter configuration (overrides config).
|
346
|
-
adapter (dict | None): Additional Hamilton adapters (overrides config).
|
347
|
-
reload (bool): Whether to reload module (overrides config).
|
348
|
-
log_level (str | None): Log level for the run (overrides config).
|
349
|
-
max_retries (int): Maximum number of retries for the job.
|
350
|
-
retry_delay (float): Delay between retries.
|
351
|
-
jitter_factor (float): Jitter factor for retry delay.
|
352
|
-
retry_exceptions (tuple): Exceptions that should trigger a retry.
|
353
|
-
cron (str | dict | None): Cron expression or dict for cron trigger.
|
354
|
-
interval (int | str | dict | None): Interval in seconds or dict for interval trigger.
|
355
|
-
date (dt.datetime | None): Date for date trigger.
|
356
|
-
overwrite (bool): If True and id_ is None, generates ID '{name}-1', potentially overwriting.
|
357
|
-
schedule_id (str | None): Optional ID for the schedule. If None, generates a new ID.
|
358
|
-
**kwargs: Additional keyword arguments passed to the job queue's add_schedule method,
|
359
|
-
For RQ this includes:
|
360
|
-
- repeat: Repeat count (int or dict)
|
361
|
-
- result_ttl: Time to live for the job result (float or timedelta)
|
362
|
-
- ttl: Time to live for the job (float or timedelta)
|
363
|
-
- use_local_time_zone: Whether to use local time zone for scheduling (bool)
|
364
|
-
For APScheduler, this includes:
|
365
|
-
- misfire_grace_time: Grace time for misfires (timedelta)
|
366
|
-
- coalesce: Whether to coalesce jobs (bool)
|
367
|
-
- max_running_jobs: Maximum instances of the job (int)
|
368
|
-
- max_jitter: Maximum jitter for scheduling (int)
|
369
|
-
- conflict_policy: Policy for conflicting jobs (str)
|
370
|
-
- paused: Whether to pause the job (bool)
|
371
|
-
|
372
|
-
|
373
|
-
Returns:
|
374
|
-
Optional[Union[str, UUID]]: The ID of the scheduled pipeline, or None if job queue backend is unavailable.
|
375
|
-
|
376
|
-
Raises:
|
377
|
-
ValueError: If trigger_type is invalid or required args are missing.
|
378
|
-
Exception: Can raise exceptions from the job queue backend.
|
379
|
-
"""
|
380
|
-
|
381
|
-
project_name = self.project_cfg.name
|
382
|
-
name = pipeline_cfg.name
|
383
|
-
logger.debug(
|
384
|
-
f"Attempting to schedule pipeline: {project_name}.{name} with id: {schedule_id}"
|
385
|
-
)
|
386
|
-
|
387
|
-
# --- Resolve Parameters using pipeline_cfg for defaults ---
|
388
|
-
schedule_cfg = pipeline_cfg.schedule
|
389
|
-
# run_cfg = pipeline_cfg.run
|
390
|
-
|
391
|
-
pipeline_run_args = {
|
392
|
-
"project_cfg": self.project_cfg,
|
393
|
-
"pipeline_cfg": pipeline_cfg,
|
394
|
-
"inputs": inputs,
|
395
|
-
"final_vars": final_vars,
|
396
|
-
"config": config,
|
397
|
-
"cache": cache,
|
398
|
-
"executor_cfg": executor_cfg,
|
399
|
-
"with_adapter_cfg": with_adapter_cfg,
|
400
|
-
"pipeline_adapter_cfg": pipeline_adapter_cfg,
|
401
|
-
"project_adapter_cfg": project_adapter_cfg,
|
402
|
-
"adapter": adapter,
|
403
|
-
"reload": reload,
|
404
|
-
"log_level": log_level,
|
405
|
-
"max_retries": max_retries,
|
406
|
-
"retry_delay": retry_delay,
|
407
|
-
"jitter_factor": jitter_factor,
|
408
|
-
"retry_exceptions": retry_exceptions,
|
409
|
-
}
|
410
|
-
pipeline_run_args = {
|
411
|
-
k: v for k, v in pipeline_run_args.items() if v is not None
|
412
|
-
}
|
413
|
-
logger.debug(f"Resolved run_kwargs for '{name}': {pipeline_run_args}")
|
414
|
-
|
415
|
-
cron = cron if cron is not None else schedule_cfg.cron
|
416
|
-
interval = interval if interval is not None else schedule_cfg.interval
|
417
|
-
date = date if date is not None else schedule_cfg.date
|
418
|
-
logger.debug(
|
419
|
-
f"Resolved schedule parameters for '{name}': cron={cron}, interval={interval}, date={date}"
|
420
|
-
)
|
421
|
-
|
422
|
-
# --- Generate ID if not provided ---
|
423
|
-
# (Keep _generate_id function as is, it uses self._get_schedules())
|
424
|
-
def _generate_id(
|
425
|
-
pipeline_name: str, explicit_id: str | None, force_overwrite_base: bool
|
426
|
-
) -> str:
|
427
|
-
if explicit_id:
|
428
|
-
logger.debug(f"Using explicit schedule ID: {explicit_id}")
|
429
|
-
return explicit_id
|
430
|
-
|
431
|
-
base_id = f"{pipeline_name}-1"
|
432
|
-
|
433
|
-
if force_overwrite_base:
|
434
|
-
logger.debug(f"Overwrite specified, using base ID: {base_id}")
|
435
|
-
return base_id
|
436
|
-
|
437
|
-
try:
|
438
|
-
existing_ids = self._get_schedule_ids()
|
439
|
-
logger.debug(f"Existing schedule IDs: {existing_ids}")
|
440
|
-
|
441
|
-
if not any(
|
442
|
-
id_val.startswith(f"{pipeline_name}-") for id_val in existing_ids
|
443
|
-
):
|
444
|
-
logger.debug(
|
445
|
-
f"No existing schedules found for '{pipeline_name}', using base ID: {base_id}"
|
446
|
-
)
|
447
|
-
return base_id
|
448
|
-
|
449
|
-
# Find highest existing number for this pipeline name
|
450
|
-
max_num = 0
|
451
|
-
for id_val in existing_ids:
|
452
|
-
if id_val.startswith(f"{pipeline_name}-"):
|
453
|
-
try:
|
454
|
-
num_part = id_val.split("-")[-1]
|
455
|
-
num = int(num_part)
|
456
|
-
if num > max_num:
|
457
|
-
max_num = num
|
458
|
-
except (ValueError, IndexError):
|
459
|
-
logger.warning(
|
460
|
-
f"Could not parse number from existing schedule ID: {id_val}"
|
461
|
-
)
|
462
|
-
continue # Skip malformed IDs
|
463
|
-
|
464
|
-
new_id = f"{pipeline_name}-{max_num + 1}"
|
465
|
-
logger.debug(f"Generated new schedule ID: {new_id}")
|
466
|
-
return new_id
|
467
|
-
|
468
|
-
except Exception as e:
|
469
|
-
logger.error(
|
470
|
-
f"Error getting existing schedules to generate ID: {e}. Falling back to base ID: {base_id}"
|
471
|
-
)
|
472
|
-
# Fallback in case of error fetching schedules
|
473
|
-
return base_id
|
474
|
-
|
475
|
-
schedule_id = _generate_id(name, schedule_id, overwrite)
|
476
|
-
|
477
|
-
# --- Add Schedule via Job queue ---
|
478
|
-
try:
|
479
|
-
if self.job_queue is None:
|
480
|
-
return None
|
481
|
-
with self.job_queue as job_queue:
|
482
|
-
# Job queue is now responsible for creating the trigger object
|
483
|
-
# Pass trigger type and kwargs directly
|
484
|
-
added_id = job_queue.add_schedule(
|
485
|
-
func=run_func,
|
486
|
-
func_kwargs=pipeline_run_args, # Pass resolved run parameters
|
487
|
-
cron=cron,
|
488
|
-
interval=interval,
|
489
|
-
date=date,
|
490
|
-
schedule_id=schedule_id,
|
491
|
-
**kwargs, # Pass resolved schedule run parameters
|
492
|
-
)
|
493
|
-
logger.info(
|
494
|
-
f"✅ Successfully scheduled job for "
|
495
|
-
f"[blue]{project_name}.{name}[/blue] with ID [green]{added_id}[/green]"
|
496
|
-
)
|
497
|
-
return added_id
|
498
|
-
except Exception as e:
|
499
|
-
logger.error(
|
500
|
-
f"Failed to add schedule '{schedule_id}' for pipeline '{name}': {e}"
|
501
|
-
)
|
502
|
-
raise
|
503
|
-
|
504
|
-
# --- schedule_all method removed ---
|
505
|
-
# PipelineManager will be responsible for iterating and calling schedule()
|
506
|
-
|
507
|
-
def schedule_all(self, registry: PipelineRegistry, **kwargs) -> Optional[list[str]]:
|
508
|
-
"""
|
509
|
-
Schedule all pipelines found by the registry.
|
510
|
-
|
511
|
-
Args:
|
512
|
-
registry (PipelineRegistry): The pipeline registry to use for finding pipelines.
|
513
|
-
**kwargs: Arguments passed directly to the `schedule` method for each pipeline.
|
514
|
-
Note: Pipeline-specific configurations will still take precedence for
|
515
|
-
defaults if not overridden by kwargs.
|
516
|
-
|
517
|
-
Returns:
|
518
|
-
Optional[list[str]]: List of scheduled pipeline IDs, or None if job queue backend is unavailable.
|
519
|
-
"""
|
520
|
-
if self.job_queue is None:
|
521
|
-
logger.warning(
|
522
|
-
"Job queue backend is unavailable. Cannot schedule pipelines."
|
523
|
-
)
|
524
|
-
return None
|
525
|
-
|
526
|
-
try:
|
527
|
-
names = registry._get_names() # Use registry to find pipelines
|
528
|
-
if not names:
|
529
|
-
logger.info("[yellow]No pipelines found to schedule.[/yellow]")
|
530
|
-
return []
|
531
|
-
|
532
|
-
logger.info(f"Attempting to schedule {len(names)} pipelines...")
|
533
|
-
scheduled_ids = []
|
534
|
-
errors = []
|
535
|
-
for name in names:
|
536
|
-
try:
|
537
|
-
# Load config specifically for this pipeline to get defaults
|
538
|
-
# Note: schedule() will load it again, potential optimization later
|
539
|
-
cfg = registry.load_config(name=name)
|
540
|
-
if (
|
541
|
-
not cfg
|
542
|
-
or not cfg.pipeline
|
543
|
-
or not cfg.pipeline.schedule
|
544
|
-
or not cfg.pipeline.schedule.enabled
|
545
|
-
):
|
546
|
-
logger.info(
|
547
|
-
f"🟡 Skipping schedule for [cyan]{name}[/cyan]: Not configured or disabled in config."
|
548
|
-
)
|
549
|
-
continue
|
550
|
-
|
551
|
-
logger.info(f"Scheduling [cyan]{name}[/cyan]...")
|
552
|
-
# Pass kwargs, allowing overrides of config defaults
|
553
|
-
run_func = registry.get_runner(name).run
|
554
|
-
schedule_id = self.schedule(
|
555
|
-
run_func=run_func, pipeline_cfg=cfg.pipeline, **kwargs
|
556
|
-
)
|
557
|
-
if schedule_id is None:
|
558
|
-
logger.info(
|
559
|
-
f"Skipping adding None schedule_id for pipeline '{name}' to scheduled_ids list."
|
560
|
-
)
|
561
|
-
continue
|
562
|
-
scheduled_ids.append(schedule_id)
|
563
|
-
except Exception as e:
|
564
|
-
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
565
|
-
errors.append(name)
|
566
|
-
logger.error(f"❌ Error scheduling [cyan]{name}[/cyan]: {e}")
|
567
|
-
|
568
|
-
if errors:
|
569
|
-
logger.error(
|
570
|
-
f"\n[bold red]Finished scheduling with errors for: {', '.join(errors)}[/bold red]"
|
571
|
-
)
|
572
|
-
else:
|
573
|
-
logger.success(
|
574
|
-
f"\n[bold green]Successfully scheduled {len(scheduled_ids)} pipelines.[/bold green]"
|
575
|
-
)
|
576
|
-
|
577
|
-
return scheduled_ids
|
578
|
-
|
579
|
-
except Exception as e:
|
580
|
-
logger.error(
|
581
|
-
f"[bold red]An unexpected error occurred during schedule_all: {e}[/bold red]"
|
582
|
-
)
|
583
|
-
return None
|