FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +401 -133
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.19.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
flowerpower/flowerpower.py
CHANGED
@@ -2,14 +2,18 @@ import datetime as dt
|
|
2
2
|
import os
|
3
3
|
import posixpath
|
4
4
|
from pathlib import Path
|
5
|
+
from typing import Any, Callable
|
5
6
|
|
6
7
|
import rich
|
8
|
+
from fsspec_utils import (AbstractFileSystem, BaseStorageOptions,
|
9
|
+
DirFileSystem, filesystem)
|
7
10
|
from loguru import logger
|
8
11
|
|
9
12
|
from . import settings
|
10
13
|
from .cfg import ProjectConfig
|
11
|
-
from .
|
12
|
-
|
14
|
+
from .cfg.pipeline import ExecutorConfig, WithAdapterConfig
|
15
|
+
from .cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
16
|
+
from .cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
13
17
|
from .job_queue import JobQueueManager
|
14
18
|
from .pipeline import PipelineManager
|
15
19
|
from .utils.logging import setup_logging
|
@@ -35,8 +39,6 @@ class FlowerPowerProject:
|
|
35
39
|
self._base_dir = self.pipeline_manager._base_dir
|
36
40
|
self._fs = self.pipeline_manager._fs
|
37
41
|
self._storage_options = self.pipeline_manager._storage_options
|
38
|
-
self._cfg_dir = self.pipeline_manager._cfg_dir
|
39
|
-
self._pipelines_dir = self.pipeline_manager._pipelines_dir
|
40
42
|
self.job_queue_type = (
|
41
43
|
self.job_queue_manager.cfg.type
|
42
44
|
if self.job_queue_manager is not None
|
@@ -48,10 +50,525 @@ class FlowerPowerProject:
|
|
48
50
|
else None
|
49
51
|
)
|
50
52
|
|
53
|
+
def _inject_dependencies(self):
|
54
|
+
"""Inject dependencies between managers for proper architecture.
|
55
|
+
|
56
|
+
This method establishes the correct dependency flow:
|
57
|
+
- Project context is properly established for pipeline execution
|
58
|
+
- JobQueueManager automatically creates its own PipelineRegistry via property
|
59
|
+
"""
|
60
|
+
# Store project reference for pipeline context
|
61
|
+
# This will be used when creating Pipeline instances
|
62
|
+
self.pipeline_manager._project_context = self
|
63
|
+
|
64
|
+
# Note: JobQueueManager now creates its own PipelineRegistry automatically
|
65
|
+
# via the pipeline_registry property, so no manual injection needed
|
66
|
+
|
67
|
+
# --- Convenience Methods for Pipeline Operations ---
|
68
|
+
|
69
|
+
def run(
|
70
|
+
self,
|
71
|
+
name: str,
|
72
|
+
inputs: dict | None = None,
|
73
|
+
final_vars: list[str] | None = None,
|
74
|
+
config: dict | None = None,
|
75
|
+
cache: dict | None = None,
|
76
|
+
executor_cfg: str | dict | ExecutorConfig | None = None,
|
77
|
+
with_adapter_cfg: dict | WithAdapterConfig | None = None,
|
78
|
+
pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
|
79
|
+
project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
|
80
|
+
adapter: dict[str, Any] | None = None,
|
81
|
+
reload: bool = False,
|
82
|
+
log_level: str | None = None,
|
83
|
+
max_retries: int | None = None,
|
84
|
+
retry_delay: float | None = None,
|
85
|
+
jitter_factor: float | None = None,
|
86
|
+
retry_exceptions: tuple | list | None = None,
|
87
|
+
on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
88
|
+
on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
|
89
|
+
) -> dict[str, Any]:
|
90
|
+
"""Execute a pipeline synchronously and return its results.
|
91
|
+
|
92
|
+
This is a convenience method that delegates to the pipeline manager.
|
93
|
+
It provides the same functionality as `self.pipeline_manager.run()`.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
name: Name of the pipeline to run. Must be a valid identifier.
|
97
|
+
inputs: Override pipeline input values. Example: {"data_date": "2025-04-28"}
|
98
|
+
final_vars: Specify which output variables to return. Example: ["model", "metrics"]
|
99
|
+
config: Configuration for Hamilton pipeline executor. Example: {"model": "LogisticRegression"}
|
100
|
+
cache: Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
|
101
|
+
executor_cfg: Execution configuration, can be:
|
102
|
+
- str: Executor name, e.g. "threadpool", "local"
|
103
|
+
- dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
|
104
|
+
- ExecutorConfig: Structured config object
|
105
|
+
with_adapter_cfg: Adapter settings for pipeline execution.
|
106
|
+
Example: {"opentelemetry": True, "tracker": False}
|
107
|
+
pipeline_adapter_cfg: Pipeline-specific adapter settings.
|
108
|
+
Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
|
109
|
+
project_adapter_cfg: Project-level adapter settings.
|
110
|
+
Example: {"opentelemetry": {"host": "http://localhost:4317"}}
|
111
|
+
adapter: Custom adapter instance for pipeline
|
112
|
+
Example: {"ray_graph_adapter": RayGraphAdapter()}
|
113
|
+
reload: Force reload of pipeline configuration.
|
114
|
+
log_level: Logging level for the execution. Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
115
|
+
max_retries: Maximum number of retries for execution.
|
116
|
+
retry_delay: Delay between retries in seconds.
|
117
|
+
jitter_factor: Random jitter factor to add to retry delay
|
118
|
+
retry_exceptions: Exceptions that trigger a retry.
|
119
|
+
on_success: Callback to run on successful pipeline execution.
|
120
|
+
on_failure: Callback to run on pipeline execution failure.
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
dict[str, Any]: Pipeline execution results, mapping output variable names to their computed values.
|
124
|
+
|
125
|
+
Raises:
|
126
|
+
ValueError: If pipeline name doesn't exist or configuration is invalid
|
127
|
+
ImportError: If pipeline module cannot be imported
|
128
|
+
RuntimeError: If execution fails due to pipeline or adapter errors
|
129
|
+
|
130
|
+
Example:
|
131
|
+
```python
|
132
|
+
project = FlowerPowerProject.load(".")
|
133
|
+
|
134
|
+
# Simple execution
|
135
|
+
result = project.run("my_pipeline")
|
136
|
+
|
137
|
+
# With custom inputs
|
138
|
+
result = project.run(
|
139
|
+
"ml_pipeline",
|
140
|
+
inputs={"data_date": "2025-01-01"},
|
141
|
+
final_vars=["model", "metrics"]
|
142
|
+
)
|
143
|
+
```
|
144
|
+
"""
|
145
|
+
# Validate pipeline manager is available
|
146
|
+
if self.pipeline_manager is None:
|
147
|
+
raise RuntimeError(
|
148
|
+
"Pipeline manager is not configured. Cannot execute pipeline. "
|
149
|
+
"Ensure the project was loaded correctly."
|
150
|
+
)
|
151
|
+
|
152
|
+
# Validate required arguments
|
153
|
+
if not name or not isinstance(name, str):
|
154
|
+
raise ValueError("Pipeline 'name' must be a non-empty string")
|
155
|
+
|
156
|
+
if name.strip() != name:
|
157
|
+
raise ValueError(
|
158
|
+
"Pipeline 'name' cannot have leading or trailing whitespace"
|
159
|
+
)
|
160
|
+
|
161
|
+
# Validate optional arguments
|
162
|
+
if inputs is not None and not isinstance(inputs, dict):
|
163
|
+
raise TypeError("'inputs' must be a dictionary")
|
164
|
+
|
165
|
+
if final_vars is not None and not isinstance(final_vars, list):
|
166
|
+
raise TypeError("'final_vars' must be a list of strings")
|
167
|
+
|
168
|
+
if final_vars is not None:
|
169
|
+
for var in final_vars:
|
170
|
+
if not isinstance(var, str):
|
171
|
+
raise TypeError("All items in 'final_vars' must be strings")
|
172
|
+
|
173
|
+
try:
|
174
|
+
return self.pipeline_manager.run(
|
175
|
+
name=name,
|
176
|
+
inputs=inputs,
|
177
|
+
final_vars=final_vars,
|
178
|
+
config=config,
|
179
|
+
cache=cache,
|
180
|
+
executor_cfg=executor_cfg,
|
181
|
+
with_adapter_cfg=with_adapter_cfg,
|
182
|
+
pipeline_adapter_cfg=pipeline_adapter_cfg,
|
183
|
+
project_adapter_cfg=project_adapter_cfg,
|
184
|
+
adapter=adapter,
|
185
|
+
reload=reload,
|
186
|
+
log_level=log_level,
|
187
|
+
max_retries=max_retries,
|
188
|
+
retry_delay=retry_delay,
|
189
|
+
jitter_factor=jitter_factor,
|
190
|
+
retry_exceptions=retry_exceptions,
|
191
|
+
on_success=on_success,
|
192
|
+
on_failure=on_failure,
|
193
|
+
)
|
194
|
+
except Exception as e:
|
195
|
+
# Log error and re-raise with context
|
196
|
+
logger.error(f"Failed to execute pipeline '{name}': {e}")
|
197
|
+
raise RuntimeError(f"Pipeline execution failed for '{name}': {e}") from e
|
198
|
+
|
199
|
+
def enqueue(
|
200
|
+
self,
|
201
|
+
name: str,
|
202
|
+
*args,
|
203
|
+
**kwargs,
|
204
|
+
):
|
205
|
+
"""Enqueue a pipeline for execution via the job queue.
|
206
|
+
|
207
|
+
This is a convenience method that delegates to the job queue manager's
|
208
|
+
enqueue_pipeline method. It provides asynchronous pipeline execution.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
name: Name of the pipeline to enqueue
|
212
|
+
*args: Additional positional arguments for job execution
|
213
|
+
**kwargs: Keyword arguments for pipeline execution and job queue options.
|
214
|
+
Supports all parameters from pipeline_manager.run() plus job queue specific options:
|
215
|
+
- run_in: Schedule the job to run after a delay
|
216
|
+
- run_at: Schedule the job to run at a specific datetime
|
217
|
+
- queue_name: Queue to use (for RQ)
|
218
|
+
- timeout: Job execution timeout
|
219
|
+
- retry: Number of retries
|
220
|
+
- result_ttl: Result time to live
|
221
|
+
- ttl: Job time to live
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
Job ID or result depending on implementation, or None if job queue not configured
|
225
|
+
|
226
|
+
Raises:
|
227
|
+
RuntimeError: If job queue manager is not configured
|
228
|
+
|
229
|
+
Example:
|
230
|
+
```python
|
231
|
+
project = FlowerPowerProject.load(".")
|
232
|
+
|
233
|
+
# Immediate execution via job queue
|
234
|
+
job_id = project.enqueue("my_pipeline", inputs={"date": "today"})
|
235
|
+
|
236
|
+
# Delayed execution
|
237
|
+
job_id = project.enqueue("my_pipeline", inputs={"date": "today"}, run_in=300)
|
238
|
+
|
239
|
+
# Scheduled execution
|
240
|
+
from datetime import datetime
|
241
|
+
job_id = project.enqueue(
|
242
|
+
"my_pipeline",
|
243
|
+
inputs={"date": "today"},
|
244
|
+
run_at=datetime(2025, 1, 1, 9, 0)
|
245
|
+
)
|
246
|
+
```
|
247
|
+
"""
|
248
|
+
# Validate job queue manager is available
|
249
|
+
if self.job_queue_manager is None:
|
250
|
+
raise RuntimeError(
|
251
|
+
"Job queue manager is not configured. Cannot enqueue pipeline jobs. "
|
252
|
+
"Ensure the project was loaded with a job queue configuration."
|
253
|
+
)
|
254
|
+
|
255
|
+
# Validate required arguments
|
256
|
+
if not name or not isinstance(name, str):
|
257
|
+
raise ValueError("Pipeline 'name' must be a non-empty string")
|
258
|
+
|
259
|
+
if name.strip() != name:
|
260
|
+
raise ValueError(
|
261
|
+
"Pipeline 'name' cannot have leading or trailing whitespace"
|
262
|
+
)
|
263
|
+
|
264
|
+
try:
|
265
|
+
return self.job_queue_manager.enqueue_pipeline(
|
266
|
+
name=name, project_context=self, *args, **kwargs
|
267
|
+
)
|
268
|
+
except Exception as e:
|
269
|
+
# Log error and re-raise with context
|
270
|
+
logger.error(f"Failed to enqueue pipeline '{name}': {e}")
|
271
|
+
raise RuntimeError(f"Pipeline enqueue failed for '{name}': {e}") from e
|
272
|
+
|
273
|
+
def schedule(
|
274
|
+
self,
|
275
|
+
name: str,
|
276
|
+
*args,
|
277
|
+
**kwargs,
|
278
|
+
):
|
279
|
+
"""Schedule a pipeline for recurring or future execution.
|
280
|
+
|
281
|
+
This is a convenience method that delegates to the job queue manager's
|
282
|
+
schedule_pipeline method. It provides scheduled pipeline execution.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
name: Name of the pipeline to schedule
|
286
|
+
*args: Additional positional arguments for scheduling
|
287
|
+
**kwargs: Keyword arguments for pipeline execution and scheduling options.
|
288
|
+
Supports all parameters from pipeline_manager.run() plus scheduling options:
|
289
|
+
- cron: Cron expression for recurring execution (e.g., "0 9 * * *")
|
290
|
+
- interval: Time interval for recurring execution (int seconds or dict)
|
291
|
+
- date: Future date for one-time execution (datetime or ISO string)
|
292
|
+
- schedule_id: Unique identifier for the schedule
|
293
|
+
- overwrite: Whether to overwrite existing schedule with same ID
|
294
|
+
|
295
|
+
Returns:
|
296
|
+
Schedule ID or job ID depending on implementation, or None if job queue not configured
|
297
|
+
|
298
|
+
Raises:
|
299
|
+
RuntimeError: If job queue manager is not configured
|
300
|
+
|
301
|
+
Example:
|
302
|
+
```python
|
303
|
+
project = FlowerPowerProject.load(".")
|
304
|
+
|
305
|
+
# Daily schedule with cron
|
306
|
+
schedule_id = project.schedule(
|
307
|
+
"daily_metrics",
|
308
|
+
cron="0 9 * * *", # 9 AM daily
|
309
|
+
inputs={"date": "{{ execution_date }}"}
|
310
|
+
)
|
311
|
+
|
312
|
+
# Interval-based schedule
|
313
|
+
schedule_id = project.schedule(
|
314
|
+
"monitoring",
|
315
|
+
interval={"minutes": 15},
|
316
|
+
inputs={"check_type": "health"}
|
317
|
+
)
|
318
|
+
|
319
|
+
# Future one-time execution
|
320
|
+
from datetime import datetime, timedelta
|
321
|
+
future_date = datetime.now() + timedelta(days=1)
|
322
|
+
schedule_id = project.schedule(
|
323
|
+
"batch_process",
|
324
|
+
date=future_date,
|
325
|
+
inputs={"process_date": "tomorrow"}
|
326
|
+
)
|
327
|
+
```
|
328
|
+
"""
|
329
|
+
# Validate job queue manager is available
|
330
|
+
if self.job_queue_manager is None:
|
331
|
+
raise RuntimeError(
|
332
|
+
"Job queue manager is not configured. Cannot schedule pipeline jobs. "
|
333
|
+
"Ensure the project was loaded with a job queue configuration."
|
334
|
+
)
|
335
|
+
|
336
|
+
# Validate required arguments
|
337
|
+
if not name or not isinstance(name, str):
|
338
|
+
raise ValueError("Pipeline 'name' must be a non-empty string")
|
339
|
+
|
340
|
+
if name.strip() != name:
|
341
|
+
raise ValueError(
|
342
|
+
"Pipeline 'name' cannot have leading or trailing whitespace"
|
343
|
+
)
|
344
|
+
|
345
|
+
try:
|
346
|
+
return self.job_queue_manager.schedule_pipeline(
|
347
|
+
name=name, project_context=self, *args, **kwargs
|
348
|
+
)
|
349
|
+
except Exception as e:
|
350
|
+
# Log error and re-raise with context
|
351
|
+
logger.error(f"Failed to schedule pipeline '{name}': {e}")
|
352
|
+
raise RuntimeError(f"Pipeline schedule failed for '{name}': {e}") from e
|
353
|
+
|
354
|
+
def start_worker(
|
355
|
+
self,
|
356
|
+
background: bool = False,
|
357
|
+
queue_names: list[str] | None = None,
|
358
|
+
with_scheduler: bool = True,
|
359
|
+
**kwargs: Any,
|
360
|
+
) -> None:
|
361
|
+
"""Start a worker process for processing jobs from the queues.
|
362
|
+
|
363
|
+
This is a convenience method that delegates to the job queue manager's
|
364
|
+
start_worker method.
|
365
|
+
|
366
|
+
Args:
|
367
|
+
background: If True, runs the worker in a non-blocking background mode.
|
368
|
+
If False, runs in the current process and blocks until stopped.
|
369
|
+
queue_names: List of queue names to process. If None, processes all
|
370
|
+
queues defined in the backend configuration.
|
371
|
+
with_scheduler: Whether to include the scheduler queue for processing
|
372
|
+
scheduled jobs (if supported by the backend).
|
373
|
+
**kwargs: Additional worker configuration options specific to the job queue backend.
|
374
|
+
|
375
|
+
Raises:
|
376
|
+
RuntimeError: If job queue manager is not configured
|
377
|
+
|
378
|
+
Example:
|
379
|
+
```python
|
380
|
+
project = FlowerPowerProject.load(".")
|
381
|
+
|
382
|
+
# Start worker in foreground (blocks)
|
383
|
+
project.start_worker()
|
384
|
+
|
385
|
+
# Start worker in background
|
386
|
+
project.start_worker(background=True)
|
387
|
+
|
388
|
+
# Start worker for specific queues
|
389
|
+
project.start_worker(queue_names=["high_priority", "default"])
|
390
|
+
```
|
391
|
+
"""
|
392
|
+
# Validate job queue manager is available
|
393
|
+
if self.job_queue_manager is None:
|
394
|
+
raise RuntimeError(
|
395
|
+
"Job queue manager is not configured. Cannot start worker. "
|
396
|
+
"Ensure the project was loaded with a job queue configuration."
|
397
|
+
)
|
398
|
+
|
399
|
+
# Validate optional arguments
|
400
|
+
if queue_names is not None and not isinstance(queue_names, list):
|
401
|
+
raise TypeError("'queue_names' must be a list of strings")
|
402
|
+
|
403
|
+
if queue_names is not None:
|
404
|
+
for queue_name in queue_names:
|
405
|
+
if not isinstance(queue_name, str):
|
406
|
+
raise TypeError("All items in 'queue_names' must be strings")
|
407
|
+
|
408
|
+
if not isinstance(background, bool):
|
409
|
+
raise TypeError("'background' must be a boolean")
|
410
|
+
|
411
|
+
if not isinstance(with_scheduler, bool):
|
412
|
+
raise TypeError("'with_scheduler' must be a boolean")
|
413
|
+
|
414
|
+
try:
|
415
|
+
return self.job_queue_manager.start_worker(
|
416
|
+
background=background,
|
417
|
+
queue_names=queue_names,
|
418
|
+
with_scheduler=with_scheduler,
|
419
|
+
**kwargs,
|
420
|
+
)
|
421
|
+
except Exception as e:
|
422
|
+
# Log error and re-raise with context
|
423
|
+
logger.error(f"Failed to start worker: {e}")
|
424
|
+
raise RuntimeError(f"Worker start failed: {e}") from e
|
425
|
+
|
426
|
+
def stop_worker(self) -> None:
|
427
|
+
"""Stop the worker process.
|
428
|
+
|
429
|
+
This is a convenience method that delegates to the job queue manager's
|
430
|
+
stop_worker method.
|
431
|
+
|
432
|
+
Raises:
|
433
|
+
RuntimeError: If job queue manager is not configured
|
434
|
+
|
435
|
+
Example:
|
436
|
+
```python
|
437
|
+
project = FlowerPowerProject.load(".")
|
438
|
+
project.stop_worker()
|
439
|
+
```
|
440
|
+
"""
|
441
|
+
# Validate job queue manager is available
|
442
|
+
if self.job_queue_manager is None:
|
443
|
+
raise RuntimeError(
|
444
|
+
"Job queue manager is not configured. Cannot stop worker. "
|
445
|
+
"Ensure the project was loaded with a job queue configuration."
|
446
|
+
)
|
447
|
+
|
448
|
+
try:
|
449
|
+
return self.job_queue_manager.stop_worker()
|
450
|
+
except Exception as e:
|
451
|
+
# Log error and re-raise with context
|
452
|
+
logger.error(f"Failed to stop worker: {e}")
|
453
|
+
raise RuntimeError(f"Worker stop failed: {e}") from e
|
454
|
+
|
455
|
+
def start_worker_pool(
|
456
|
+
self,
|
457
|
+
num_workers: int | None = None,
|
458
|
+
background: bool = False,
|
459
|
+
queue_names: list[str] | None = None,
|
460
|
+
with_scheduler: bool = True,
|
461
|
+
**kwargs: Any,
|
462
|
+
) -> None:
|
463
|
+
"""Start a pool of worker processes to handle jobs in parallel.
|
464
|
+
|
465
|
+
This is a convenience method that delegates to the job queue manager's
|
466
|
+
start_worker_pool method.
|
467
|
+
|
468
|
+
Args:
|
469
|
+
num_workers: Number of worker processes to start. If None, uses CPU
|
470
|
+
count or backend-specific default.
|
471
|
+
background: If True, runs the worker pool in a non-blocking background mode.
|
472
|
+
If False, runs in the current process and blocks until stopped.
|
473
|
+
queue_names: List of queue names to process. If None, processes all
|
474
|
+
queues defined in the backend configuration.
|
475
|
+
with_scheduler: Whether to include the scheduler queue for processing
|
476
|
+
scheduled jobs (if supported by the backend).
|
477
|
+
**kwargs: Additional worker pool configuration options specific to the job queue backend.
|
478
|
+
|
479
|
+
Raises:
|
480
|
+
RuntimeError: If job queue manager is not configured
|
481
|
+
|
482
|
+
Example:
|
483
|
+
```python
|
484
|
+
project = FlowerPowerProject.load(".")
|
485
|
+
|
486
|
+
# Start worker pool with default number of workers
|
487
|
+
project.start_worker_pool()
|
488
|
+
|
489
|
+
# Start 4 workers in background
|
490
|
+
project.start_worker_pool(num_workers=4, background=True)
|
491
|
+
|
492
|
+
# Start worker pool for specific queues
|
493
|
+
project.start_worker_pool(
|
494
|
+
num_workers=2,
|
495
|
+
queue_names=["high_priority", "default"]
|
496
|
+
)
|
497
|
+
```
|
498
|
+
"""
|
499
|
+
# Validate job queue manager is available
|
500
|
+
if self.job_queue_manager is None:
|
501
|
+
raise RuntimeError(
|
502
|
+
"Job queue manager is not configured. Cannot start worker pool. "
|
503
|
+
"Ensure the project was loaded with a job queue configuration."
|
504
|
+
)
|
505
|
+
|
506
|
+
# Validate optional arguments
|
507
|
+
if num_workers is not None and (
|
508
|
+
not isinstance(num_workers, int) or num_workers <= 0
|
509
|
+
):
|
510
|
+
raise ValueError("'num_workers' must be a positive integer")
|
511
|
+
|
512
|
+
if queue_names is not None and not isinstance(queue_names, list):
|
513
|
+
raise TypeError("'queue_names' must be a list of strings")
|
514
|
+
|
515
|
+
if queue_names is not None:
|
516
|
+
for queue_name in queue_names:
|
517
|
+
if not isinstance(queue_name, str):
|
518
|
+
raise TypeError("All items in 'queue_names' must be strings")
|
519
|
+
|
520
|
+
if not isinstance(background, bool):
|
521
|
+
raise TypeError("'background' must be a boolean")
|
522
|
+
|
523
|
+
if not isinstance(with_scheduler, bool):
|
524
|
+
raise TypeError("'with_scheduler' must be a boolean")
|
525
|
+
|
526
|
+
try:
|
527
|
+
return self.job_queue_manager.start_worker_pool(
|
528
|
+
num_workers=num_workers,
|
529
|
+
background=background,
|
530
|
+
queue_names=queue_names,
|
531
|
+
with_scheduler=with_scheduler,
|
532
|
+
**kwargs,
|
533
|
+
)
|
534
|
+
except Exception as e:
|
535
|
+
# Log error and re-raise with context
|
536
|
+
logger.error(f"Failed to start worker pool: {e}")
|
537
|
+
raise RuntimeError(f"Worker pool start failed: {e}") from e
|
538
|
+
|
539
|
+
def stop_worker_pool(self) -> None:
|
540
|
+
"""Stop all worker processes in the worker pool.
|
541
|
+
|
542
|
+
This is a convenience method that delegates to the job queue manager's
|
543
|
+
stop_worker_pool method.
|
544
|
+
|
545
|
+
Raises:
|
546
|
+
RuntimeError: If job queue manager is not configured
|
547
|
+
|
548
|
+
Example:
|
549
|
+
```python
|
550
|
+
project = FlowerPowerProject.load(".")
|
551
|
+
project.stop_worker_pool()
|
552
|
+
```
|
553
|
+
"""
|
554
|
+
# Validate job queue manager is available
|
555
|
+
if self.job_queue_manager is None:
|
556
|
+
raise RuntimeError(
|
557
|
+
"Job queue manager is not configured. Cannot stop worker pool. "
|
558
|
+
"Ensure the project was loaded with a job queue configuration."
|
559
|
+
)
|
560
|
+
|
561
|
+
try:
|
562
|
+
return self.job_queue_manager.stop_worker_pool()
|
563
|
+
except Exception as e:
|
564
|
+
# Log error and re-raise with context
|
565
|
+
logger.error(f"Failed to stop worker pool: {e}")
|
566
|
+
raise RuntimeError(f"Worker pool stop failed: {e}") from e
|
567
|
+
|
51
568
|
@staticmethod
|
52
569
|
def _check_project_exists(base_dir: str, fs: AbstractFileSystem | None = None):
|
53
570
|
if fs is None:
|
54
|
-
fs =
|
571
|
+
fs = filesystem(base_dir, dirfs=True)
|
55
572
|
if isinstance(fs, DirFileSystem):
|
56
573
|
if not fs.exists("."):
|
57
574
|
rich.print(
|
@@ -118,7 +635,7 @@ class FlowerPowerProject:
|
|
118
635
|
cached = False
|
119
636
|
cache_storage = None
|
120
637
|
if not fs:
|
121
|
-
fs =
|
638
|
+
fs = filesystem(
|
122
639
|
base_dir,
|
123
640
|
storage_options=storage_options,
|
124
641
|
cached=cached,
|
@@ -135,14 +652,22 @@ class FlowerPowerProject:
|
|
135
652
|
)
|
136
653
|
|
137
654
|
job_queue_manager = JobQueueManager(
|
655
|
+
name=f"{pipeline_manager.project_cfg.name}_job_queue",
|
656
|
+
base_dir=base_dir,
|
138
657
|
storage_options=storage_options,
|
139
658
|
fs=fs,
|
140
|
-
log_level=log_level,
|
141
659
|
)
|
142
|
-
|
660
|
+
|
661
|
+
# Create the project instance
|
662
|
+
project = cls(
|
143
663
|
pipeline_manager=pipeline_manager,
|
144
664
|
job_queue_manager=job_queue_manager,
|
145
665
|
)
|
666
|
+
|
667
|
+
# Inject dependencies after creation to avoid circular imports
|
668
|
+
project._inject_dependencies()
|
669
|
+
|
670
|
+
return project
|
146
671
|
else:
|
147
672
|
logger.error(
|
148
673
|
f"Project does not exist at {base_dir}. Please initialize it first. Use `FlowerPowerProject.init()` to create a new project."
|
@@ -157,8 +682,6 @@ class FlowerPowerProject:
|
|
157
682
|
storage_options: dict | BaseStorageOptions | None = {},
|
158
683
|
fs: AbstractFileSystem | None = None,
|
159
684
|
job_queue_type: str = settings.JOB_QUEUE_TYPE,
|
160
|
-
cfg_dir: str = settings.CONFIG_DIR,
|
161
|
-
pipelines_dir: str = settings.PIPELINES_DIR,
|
162
685
|
hooks_dir: str = settings.HOOKS_DIR,
|
163
686
|
log_level: str | None = None,
|
164
687
|
) -> "FlowerPowerProject":
|
@@ -171,8 +694,6 @@ class FlowerPowerProject:
|
|
171
694
|
storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
|
172
695
|
fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
|
173
696
|
job_queue_type (str): The type of job queue to use for the project.
|
174
|
-
cfg_dir (str): The directory where the project configuration will be stored.
|
175
|
-
pipelines_dir (str): The directory where the project pipelines will be stored.
|
176
697
|
hooks_dir (str): The directory where the project hooks will be stored.
|
177
698
|
Returns:
|
178
699
|
FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
|
@@ -190,14 +711,14 @@ class FlowerPowerProject:
|
|
190
711
|
base_dir = posixpath.join(str(Path.cwd()), name)
|
191
712
|
|
192
713
|
if fs is None:
|
193
|
-
fs =
|
194
|
-
|
714
|
+
fs = filesystem(
|
715
|
+
protocol_or_path=base_dir,
|
195
716
|
dirfs=True,
|
196
717
|
storage_options=storage_options,
|
197
718
|
)
|
198
719
|
|
199
|
-
fs.makedirs(f"{
|
200
|
-
fs.makedirs(
|
720
|
+
fs.makedirs(f"{settings.CONFIG_DIR}/pipelines", exist_ok=True)
|
721
|
+
fs.makedirs(settings.PIPELINES_DIR, exist_ok=True)
|
201
722
|
fs.makedirs(hooks_dir, exist_ok=True)
|
202
723
|
|
203
724
|
cfg = ProjectConfig.load(name=name, job_queue_type=job_queue_type, fs=fs)
|
@@ -267,8 +788,6 @@ class FlowerPower:
|
|
267
788
|
storage_options: dict | BaseStorageOptions | None = {},
|
268
789
|
fs: AbstractFileSystem | None = None,
|
269
790
|
job_queue_type: str = settings.JOB_QUEUE_TYPE,
|
270
|
-
cfg_dir: str = settings.CONFIG_DIR,
|
271
|
-
pipelines_dir: str = settings.PIPELINES_DIR,
|
272
791
|
hooks_dir: str = settings.HOOKS_DIR,
|
273
792
|
) -> FlowerPowerProject:
|
274
793
|
"""
|
@@ -280,8 +799,6 @@ class FlowerPower:
|
|
280
799
|
storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
|
281
800
|
fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
|
282
801
|
job_queue_type (str): The type of job queue to use for the project.
|
283
|
-
cfg_dir (str): The directory where the project configuration will be stored.
|
284
|
-
pipelines_dir (str): The directory where the project pipelines will be stored.
|
285
802
|
hooks_dir (str): The directory where the project hooks will be stored.
|
286
803
|
|
287
804
|
Returns:
|
@@ -300,8 +817,6 @@ class FlowerPower:
|
|
300
817
|
storage_options=storage_options,
|
301
818
|
fs=fs,
|
302
819
|
job_queue_type=job_queue_type,
|
303
|
-
cfg_dir=cfg_dir,
|
304
|
-
pipelines_dir=pipelines_dir,
|
305
820
|
hooks_dir=hooks_dir,
|
306
821
|
)
|
307
822
|
|
@@ -321,8 +836,6 @@ def init(
|
|
321
836
|
storage_options: dict | BaseStorageOptions | None = {},
|
322
837
|
fs: AbstractFileSystem | None = None,
|
323
838
|
job_queue_type: str = settings.JOB_QUEUE_TYPE,
|
324
|
-
cfg_dir: str = settings.CONFIG_DIR,
|
325
|
-
pipelines_dir: str = settings.PIPELINES_DIR,
|
326
839
|
hooks_dir: str = settings.HOOKS_DIR,
|
327
840
|
) -> FlowerPowerProject:
|
328
841
|
"""
|
@@ -334,8 +847,6 @@ def init(
|
|
334
847
|
storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
|
335
848
|
fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
|
336
849
|
job_queue_type (str): The type of job queue to use for the project.
|
337
|
-
cfg_dir (str): The directory where the project configuration will be stored.
|
338
|
-
pipelines_dir (str): The directory where the project pipelines will be stored.
|
339
850
|
hooks_dir (str): The directory where the project hooks will be stored.
|
340
851
|
|
341
852
|
Returns:
|
@@ -347,7 +858,5 @@ def init(
|
|
347
858
|
storage_options=storage_options,
|
348
859
|
fs=fs,
|
349
860
|
job_queue_type=job_queue_type,
|
350
|
-
cfg_dir=cfg_dir,
|
351
|
-
pipelines_dir=pipelines_dir,
|
352
861
|
hooks_dir=hooks_dir,
|
353
862
|
)
|