FlowerPower 0.11.6.20__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +3 -3
- flowerpower/cfg/pipeline/__init__.py +5 -3
- flowerpower/cfg/project/__init__.py +3 -3
- flowerpower/cfg/project/job_queue.py +1 -128
- flowerpower/cli/__init__.py +5 -5
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/job_queue.py +400 -132
- flowerpower/cli/pipeline.py +14 -413
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +537 -28
- flowerpower/job_queue/__init__.py +5 -94
- flowerpower/job_queue/base.py +201 -3
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
- flowerpower/job_queue/rq/manager.py +388 -77
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +2 -2
- flowerpower/pipeline/io.py +14 -16
- flowerpower/pipeline/manager.py +21 -642
- flowerpower/pipeline/pipeline.py +571 -0
- flowerpower/pipeline/registry.py +242 -10
- flowerpower/pipeline/visualizer.py +1 -2
- flowerpower/plugins/_io/__init__.py +8 -0
- flowerpower/plugins/mqtt/manager.py +6 -6
- flowerpower/settings/backend.py +0 -2
- flowerpower/settings/job_queue.py +1 -57
- flowerpower/utils/misc.py +0 -256
- flowerpower/utils/monkey.py +1 -83
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
- flowerpower-0.20.0.dist-info/RECORD +58 -0
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,13 @@ import platform
|
|
10
10
|
import sys
|
11
11
|
import time
|
12
12
|
import uuid
|
13
|
+
import warnings
|
13
14
|
from typing import Any, Callable
|
14
15
|
|
15
16
|
import duration_parser
|
16
17
|
from cron_descriptor import get_description
|
18
|
+
# from ...fs import AbstractFileSystem
|
19
|
+
from fsspec_utils import AbstractFileSystem
|
17
20
|
from humanize import precisedelta
|
18
21
|
from loguru import logger
|
19
22
|
from rq import Queue, Repeat, Retry
|
@@ -23,7 +26,6 @@ from rq.worker import Worker
|
|
23
26
|
from rq.worker_pool import WorkerPool
|
24
27
|
from rq_scheduler import Scheduler
|
25
28
|
|
26
|
-
from ...fs import AbstractFileSystem
|
27
29
|
from ...utils.logging import setup_logging
|
28
30
|
from ..base import BaseJobQueueManager
|
29
31
|
from .setup import RQBackend
|
@@ -175,6 +177,7 @@ class RQManager(BaseJobQueueManager):
|
|
175
177
|
background: bool = False,
|
176
178
|
queue_names: list[str] | None = None,
|
177
179
|
with_scheduler: bool = True,
|
180
|
+
num_workers: int | None = None,
|
178
181
|
**kwargs: Any,
|
179
182
|
) -> None:
|
180
183
|
"""Start a worker process for processing jobs from the queues.
|
@@ -186,6 +189,7 @@ class RQManager(BaseJobQueueManager):
|
|
186
189
|
queues defined in the backend configuration.
|
187
190
|
with_scheduler: Whether to include the scheduler queue for processing
|
188
191
|
scheduled jobs.
|
192
|
+
num_workers: Number of worker processes to start (pool mode).
|
189
193
|
**kwargs: Additional arguments passed to RQ's Worker class.
|
190
194
|
Example: {"burst": True, "logging_level": "INFO", "job_monitoring_interval": 30}
|
191
195
|
|
@@ -210,85 +214,101 @@ class RQManager(BaseJobQueueManager):
|
|
210
214
|
max_jobs=100,
|
211
215
|
job_monitoring_interval=30
|
212
216
|
)
|
217
|
+
# Start a worker pool with 4 processes
|
218
|
+
worker.start_worker(
|
219
|
+
background=True,
|
220
|
+
num_workers=4
|
221
|
+
)
|
213
222
|
```
|
214
223
|
"""
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
queue_names = self._queue_names
|
224
|
-
queue_names_str = ", ".join(queue_names)
|
224
|
+
if num_workers is not None and num_workers > 1:
|
225
|
+
self.start_worker_pool(
|
226
|
+
num_workers=num_workers,
|
227
|
+
background=background,
|
228
|
+
queue_names=queue_names,
|
229
|
+
with_scheduler=with_scheduler,
|
230
|
+
**kwargs,
|
231
|
+
)
|
225
232
|
else:
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
#
|
257
|
-
#
|
258
|
-
|
233
|
+
import multiprocessing
|
234
|
+
|
235
|
+
logging_level = kwargs.pop("logging_level", self._log_level)
|
236
|
+
burst = kwargs.pop("burst", False)
|
237
|
+
max_jobs = kwargs.pop("max_jobs", None)
|
238
|
+
# Determine which queues to process
|
239
|
+
if queue_names is None:
|
240
|
+
# Use all queues by default
|
241
|
+
queue_names = self._queue_names
|
242
|
+
queue_names_str = ", ".join(queue_names)
|
243
|
+
else:
|
244
|
+
# Filter to only include valid queue names
|
245
|
+
queue_names = [
|
246
|
+
name for name in queue_names if name in self._queue_names
|
247
|
+
]
|
248
|
+
queue_names_str = ", ".join(queue_names)
|
249
|
+
|
250
|
+
if not queue_names:
|
251
|
+
logger.error("No valid queues specified, cannot start worker")
|
252
|
+
return
|
253
|
+
|
254
|
+
if with_scheduler:
|
255
|
+
# Add the scheduler queue to the list of queues
|
256
|
+
queue_names.append(self._scheduler_name)
|
257
|
+
queue_names_str = ", ".join(queue_names)
|
258
|
+
|
259
|
+
# Create a worker instance with queue names (not queue objects)
|
260
|
+
worker = Worker(queue_names, connection=self._backend.client, **kwargs)
|
261
|
+
|
262
|
+
if background:
|
263
|
+
# We need to use a separate process rather than a thread because
|
264
|
+
# RQ's signal handler registration only works in the main thread
|
265
|
+
def run_worker_process(queue_names_arg):
|
266
|
+
# Import RQ inside the process to avoid connection sharing issues
|
267
|
+
from redis import Redis
|
268
|
+
from rq import Worker
|
269
|
+
|
270
|
+
# Create a fresh Redis connection in this process
|
271
|
+
redis_conn = Redis.from_url(self._backend.uri)
|
272
|
+
|
273
|
+
# Create a worker instance with queue names
|
274
|
+
worker_proc = Worker(queue_names_arg, connection=redis_conn)
|
275
|
+
|
276
|
+
# Disable the default signal handlers in RQ worker by patching
|
277
|
+
# the _install_signal_handlers method to do nothing
|
278
|
+
worker_proc._install_signal_handlers = lambda: None
|
279
|
+
|
280
|
+
# Work until terminated
|
281
|
+
worker_proc.work(
|
282
|
+
with_scheduler=True,
|
283
|
+
logging_level=logging_level,
|
284
|
+
burst=burst,
|
285
|
+
max_jobs=max_jobs,
|
286
|
+
)
|
259
287
|
|
260
|
-
#
|
261
|
-
|
288
|
+
# Create and start the process
|
289
|
+
process = multiprocessing.Process(
|
290
|
+
target=run_worker_process,
|
291
|
+
args=(queue_names,),
|
292
|
+
name=f"rq-worker-{self.name}",
|
293
|
+
)
|
294
|
+
# Don't use daemon=True to avoid the "daemonic processes are not allowed to have children" error
|
295
|
+
process.start()
|
296
|
+
self._worker_process = process
|
297
|
+
logger.info(
|
298
|
+
f"Started RQ worker in background process (PID: {process.pid}) for queues: {queue_names_str}"
|
299
|
+
)
|
300
|
+
else:
|
301
|
+
# Start worker in the current process (blocking)
|
302
|
+
logger.info(
|
303
|
+
f"Starting RQ worker in current process (blocking) for queues: {queue_names_str}"
|
304
|
+
)
|
305
|
+
worker.work(
|
262
306
|
with_scheduler=True,
|
263
307
|
logging_level=logging_level,
|
264
308
|
burst=burst,
|
265
309
|
max_jobs=max_jobs,
|
266
310
|
)
|
267
311
|
|
268
|
-
# Create and start the process
|
269
|
-
process = multiprocessing.Process(
|
270
|
-
target=run_worker_process,
|
271
|
-
args=(queue_names,),
|
272
|
-
name=f"rq-worker-{self.name}",
|
273
|
-
)
|
274
|
-
# Don't use daemon=True to avoid the "daemonic processes are not allowed to have children" error
|
275
|
-
process.start()
|
276
|
-
self._worker_process = process
|
277
|
-
logger.info(
|
278
|
-
f"Started RQ worker in background process (PID: {process.pid}) for queues: {queue_names_str}"
|
279
|
-
)
|
280
|
-
else:
|
281
|
-
# Start worker in the current process (blocking)
|
282
|
-
logger.info(
|
283
|
-
f"Starting RQ worker in current process (blocking) for queues: {queue_names_str}"
|
284
|
-
)
|
285
|
-
worker.work(
|
286
|
-
with_scheduler=True,
|
287
|
-
logging_level=logging_level,
|
288
|
-
burst=burst,
|
289
|
-
max_jobs=max_jobs,
|
290
|
-
)
|
291
|
-
|
292
312
|
def stop_worker(self) -> None:
|
293
313
|
"""Stop the worker process.
|
294
314
|
|
@@ -304,14 +324,17 @@ class RQManager(BaseJobQueueManager):
|
|
304
324
|
worker.stop_worker()
|
305
325
|
```
|
306
326
|
"""
|
307
|
-
if hasattr(self, "
|
308
|
-
|
309
|
-
self._worker_process.terminate()
|
310
|
-
self._worker_process.join(timeout=5)
|
311
|
-
logger.info("RQ worker process terminated")
|
312
|
-
self._worker_process = None
|
327
|
+
if hasattr(self, "_worker_pool"):
|
328
|
+
self.stop_worker_pool()
|
313
329
|
else:
|
314
|
-
|
330
|
+
if hasattr(self, "_worker_process") and self._worker_process is not None:
|
331
|
+
if self._worker_process.is_alive():
|
332
|
+
self._worker_process.terminate()
|
333
|
+
self._worker_process.join(timeout=5)
|
334
|
+
logger.info("RQ worker process terminated")
|
335
|
+
self._worker_process = None
|
336
|
+
else:
|
337
|
+
logger.warning("No worker process to stop")
|
315
338
|
|
316
339
|
def start_worker_pool(
|
317
340
|
self,
|
@@ -543,6 +566,132 @@ class RQManager(BaseJobQueueManager):
|
|
543
566
|
|
544
567
|
## Jobs ###
|
545
568
|
|
569
|
+
def enqueue(
|
570
|
+
self,
|
571
|
+
func: Callable,
|
572
|
+
*args,
|
573
|
+
**kwargs,
|
574
|
+
) -> Job:
|
575
|
+
"""Enqueue a job for execution (immediate, delayed, or scheduled).
|
576
|
+
|
577
|
+
This is the main method for adding jobs to the queue. It supports:
|
578
|
+
- Immediate execution (no run_at or run_in parameters)
|
579
|
+
- Delayed execution (run_in parameter)
|
580
|
+
- Scheduled execution (run_at parameter)
|
581
|
+
|
582
|
+
Args:
|
583
|
+
func: Function to execute. Must be importable from the worker process.
|
584
|
+
*args: Positional arguments for the function
|
585
|
+
**kwargs: Keyword arguments including:
|
586
|
+
- run_in: Schedule the job to run after a delay (timedelta, int seconds, or string)
|
587
|
+
- run_at: Schedule the job to run at a specific datetime
|
588
|
+
- func_args: Alternative way to pass positional arguments
|
589
|
+
- func_kwargs: Alternative way to pass keyword arguments
|
590
|
+
- Other job queue specific parameters (timeout, retry, etc.)
|
591
|
+
|
592
|
+
Returns:
|
593
|
+
Job: The created job instance
|
594
|
+
|
595
|
+
Example:
|
596
|
+
```python
|
597
|
+
# Immediate execution
|
598
|
+
manager.enqueue(my_func, arg1, arg2, kwarg1="value")
|
599
|
+
|
600
|
+
# Delayed execution
|
601
|
+
manager.enqueue(my_func, arg1, run_in=300) # 5 minutes
|
602
|
+
manager.enqueue(my_func, arg1, run_in=timedelta(hours=1))
|
603
|
+
|
604
|
+
# Scheduled execution
|
605
|
+
manager.enqueue(my_func, arg1, run_at=datetime(2025, 1, 1, 9, 0))
|
606
|
+
```
|
607
|
+
"""
|
608
|
+
# Extract func_args and func_kwargs if provided as alternatives to *args
|
609
|
+
func_args = kwargs.pop("func_args", None)
|
610
|
+
func_kwargs = kwargs.pop("func_kwargs", None)
|
611
|
+
|
612
|
+
# Use provided args or fall back to func_args
|
613
|
+
if args:
|
614
|
+
final_args = args
|
615
|
+
elif func_args:
|
616
|
+
final_args = func_args
|
617
|
+
else:
|
618
|
+
final_args = ()
|
619
|
+
|
620
|
+
# Extract function keyword arguments
|
621
|
+
if func_kwargs:
|
622
|
+
final_kwargs = func_kwargs
|
623
|
+
else:
|
624
|
+
final_kwargs = {}
|
625
|
+
|
626
|
+
# Delegate to add_job with the parameters
|
627
|
+
return self.add_job(
|
628
|
+
func=func, func_args=final_args, func_kwargs=final_kwargs, **kwargs
|
629
|
+
)
|
630
|
+
|
631
|
+
def enqueue_in(
|
632
|
+
self,
|
633
|
+
delay,
|
634
|
+
func: Callable,
|
635
|
+
*args,
|
636
|
+
**kwargs,
|
637
|
+
) -> Job:
|
638
|
+
"""Enqueue a job to run after a specified delay.
|
639
|
+
|
640
|
+
This is a convenience method for delayed execution.
|
641
|
+
|
642
|
+
Args:
|
643
|
+
delay: Time to wait before execution (timedelta, int seconds, or string)
|
644
|
+
func: Function to execute
|
645
|
+
*args: Positional arguments for the function
|
646
|
+
**kwargs: Keyword arguments for the function and job options
|
647
|
+
|
648
|
+
Returns:
|
649
|
+
Job: The created job instance
|
650
|
+
|
651
|
+
Example:
|
652
|
+
```python
|
653
|
+
# Run in 5 minutes
|
654
|
+
manager.enqueue_in(300, my_func, arg1, arg2)
|
655
|
+
|
656
|
+
# Run in 1 hour
|
657
|
+
manager.enqueue_in(timedelta(hours=1), my_func, arg1, kwarg1="value")
|
658
|
+
```
|
659
|
+
"""
|
660
|
+
return self.enqueue(func, *args, run_in=delay, **kwargs)
|
661
|
+
|
662
|
+
def enqueue_at(
|
663
|
+
self,
|
664
|
+
datetime,
|
665
|
+
func: Callable,
|
666
|
+
*args,
|
667
|
+
**kwargs,
|
668
|
+
) -> Job:
|
669
|
+
"""Enqueue a job to run at a specific datetime.
|
670
|
+
|
671
|
+
This is a convenience method for scheduled execution.
|
672
|
+
|
673
|
+
Args:
|
674
|
+
datetime: When to execute the job (datetime object or ISO string)
|
675
|
+
func: Function to execute
|
676
|
+
*args: Positional arguments for the function
|
677
|
+
**kwargs: Keyword arguments for the function and job options
|
678
|
+
|
679
|
+
Returns:
|
680
|
+
Job: The created job instance
|
681
|
+
|
682
|
+
Example:
|
683
|
+
```python
|
684
|
+
# Run at specific time
|
685
|
+
manager.enqueue_at(datetime(2025, 1, 1, 9, 0), my_func, arg1, arg2)
|
686
|
+
|
687
|
+
# Run tomorrow at 9 AM
|
688
|
+
tomorrow_9am = datetime.now() + timedelta(days=1)
|
689
|
+
tomorrow_9am = tomorrow_9am.replace(hour=9, minute=0, second=0)
|
690
|
+
manager.enqueue_at(tomorrow_9am, my_func, arg1, kwarg1="value")
|
691
|
+
```
|
692
|
+
"""
|
693
|
+
return self.enqueue(func, *args, run_at=datetime, **kwargs)
|
694
|
+
|
546
695
|
def add_job(
|
547
696
|
self,
|
548
697
|
func: Callable,
|
@@ -567,6 +716,10 @@ class RQManager(BaseJobQueueManager):
|
|
567
716
|
) -> Job:
|
568
717
|
"""Add a job for immediate or scheduled execution.
|
569
718
|
|
719
|
+
.. deprecated:: 0.12.0
|
720
|
+
Use :meth:`enqueue`, :meth:`enqueue_in`, or :meth:`enqueue_at` instead.
|
721
|
+
The add_job method will be removed in version 1.0.0.
|
722
|
+
|
570
723
|
Args:
|
571
724
|
func: Function to execute. Must be importable from the worker process.
|
572
725
|
func_args: Positional arguments to pass to the function.
|
@@ -640,6 +793,14 @@ class RQManager(BaseJobQueueManager):
|
|
640
793
|
)
|
641
794
|
```
|
642
795
|
"""
|
796
|
+
# Issue deprecation warning
|
797
|
+
warnings.warn(
|
798
|
+
"add_job() is deprecated and will be removed in version 1.0.0. "
|
799
|
+
"Use enqueue(), enqueue_in(), or enqueue_at() instead.",
|
800
|
+
DeprecationWarning,
|
801
|
+
stacklevel=2,
|
802
|
+
)
|
803
|
+
|
643
804
|
job_id = job_id or str(uuid.uuid4())
|
644
805
|
if isinstance(result_ttl, (int, float)):
|
645
806
|
result_ttl = dt.timedelta(seconds=result_ttl)
|
@@ -1580,3 +1741,153 @@ class RQManager(BaseJobQueueManager):
|
|
1580
1741
|
"""
|
1581
1742
|
schedule_ids = [schedule.id for schedule in self.schedules]
|
1582
1743
|
return schedule_ids
|
1744
|
+
|
1745
|
+
# --- Pipeline-specific high-level methods implementation ---
|
1746
|
+
|
1747
|
+
def schedule_pipeline(self, name: str, project_context=None, *args, **kwargs):
|
1748
|
+
"""Schedule a pipeline for execution using its name.
|
1749
|
+
|
1750
|
+
This high-level method loads the pipeline from the internal registry and schedules
|
1751
|
+
its execution with the job queue using the existing add_schedule method.
|
1752
|
+
|
1753
|
+
Args:
|
1754
|
+
name: Name of the pipeline to schedule
|
1755
|
+
project_context: Project context for the pipeline (optional)
|
1756
|
+
*args: Additional positional arguments for scheduling
|
1757
|
+
**kwargs: Additional keyword arguments for scheduling
|
1758
|
+
|
1759
|
+
Returns:
|
1760
|
+
Schedule ID from the underlying add_schedule call
|
1761
|
+
|
1762
|
+
Example:
|
1763
|
+
```python
|
1764
|
+
manager = RQManager(base_dir="/path/to/project")
|
1765
|
+
schedule_id = manager.schedule_pipeline(
|
1766
|
+
"my_pipeline",
|
1767
|
+
cron="0 9 * * *", # Run daily at 9 AM
|
1768
|
+
inputs={"date": "today"}
|
1769
|
+
)
|
1770
|
+
```
|
1771
|
+
"""
|
1772
|
+
logger.info(f"Scheduling pipeline '{name}' via RQ job queue")
|
1773
|
+
|
1774
|
+
# Create a function that will be executed by the job queue
|
1775
|
+
def pipeline_job(*job_args, **job_kwargs):
|
1776
|
+
# Get the pipeline instance
|
1777
|
+
pipeline = self.pipeline_registry.get_pipeline(
|
1778
|
+
name=name,
|
1779
|
+
project_context=project_context,
|
1780
|
+
reload=job_kwargs.pop("reload", False),
|
1781
|
+
)
|
1782
|
+
|
1783
|
+
# Execute the pipeline
|
1784
|
+
return pipeline.run(*job_args, **job_kwargs)
|
1785
|
+
|
1786
|
+
# Extract pipeline execution arguments from kwargs
|
1787
|
+
pipeline_kwargs = {
|
1788
|
+
k: v
|
1789
|
+
for k, v in kwargs.items()
|
1790
|
+
if k
|
1791
|
+
in [
|
1792
|
+
"inputs",
|
1793
|
+
"final_vars",
|
1794
|
+
"config",
|
1795
|
+
"cache",
|
1796
|
+
"executor_cfg",
|
1797
|
+
"with_adapter_cfg",
|
1798
|
+
"pipeline_adapter_cfg",
|
1799
|
+
"project_adapter_cfg",
|
1800
|
+
"adapter",
|
1801
|
+
"reload",
|
1802
|
+
"log_level",
|
1803
|
+
"max_retries",
|
1804
|
+
"retry_delay",
|
1805
|
+
"jitter_factor",
|
1806
|
+
"retry_exceptions",
|
1807
|
+
"on_success",
|
1808
|
+
"on_failure",
|
1809
|
+
]
|
1810
|
+
}
|
1811
|
+
|
1812
|
+
# Extract scheduling arguments
|
1813
|
+
schedule_kwargs = {k: v for k, v in kwargs.items() if k not in pipeline_kwargs}
|
1814
|
+
|
1815
|
+
# Schedule the job
|
1816
|
+
return self.add_schedule(
|
1817
|
+
func=pipeline_job, func_kwargs=pipeline_kwargs, **schedule_kwargs
|
1818
|
+
)
|
1819
|
+
|
1820
|
+
def enqueue_pipeline(self, name: str, project_context=None, *args, **kwargs):
|
1821
|
+
"""Enqueue a pipeline for immediate execution using its name.
|
1822
|
+
|
1823
|
+
This high-level method loads the pipeline from the internal registry and enqueues
|
1824
|
+
it for immediate execution in the job queue using the existing enqueue method.
|
1825
|
+
|
1826
|
+
Args:
|
1827
|
+
name: Name of the pipeline to enqueue
|
1828
|
+
project_context: Project context for the pipeline (optional)
|
1829
|
+
*args: Additional positional arguments for job execution
|
1830
|
+
**kwargs: Additional keyword arguments for job execution
|
1831
|
+
|
1832
|
+
Returns:
|
1833
|
+
Job ID from the underlying enqueue call
|
1834
|
+
|
1835
|
+
Example:
|
1836
|
+
```python
|
1837
|
+
manager = RQManager(base_dir="/path/to/project")
|
1838
|
+
job_id = manager.enqueue_pipeline(
|
1839
|
+
"my_pipeline",
|
1840
|
+
inputs={"date": "2025-01-01"},
|
1841
|
+
final_vars=["result"]
|
1842
|
+
)
|
1843
|
+
```
|
1844
|
+
"""
|
1845
|
+
logger.info(
|
1846
|
+
f"Enqueueing pipeline '{name}' for immediate execution via RQ job queue"
|
1847
|
+
)
|
1848
|
+
|
1849
|
+
# Create a function that will be executed by the job queue
|
1850
|
+
def pipeline_job(*job_args, **job_kwargs):
|
1851
|
+
# Get the pipeline instance
|
1852
|
+
pipeline = self.pipeline_registry.get_pipeline(
|
1853
|
+
name=name,
|
1854
|
+
project_context=project_context,
|
1855
|
+
reload=job_kwargs.pop("reload", False),
|
1856
|
+
)
|
1857
|
+
|
1858
|
+
# Execute the pipeline
|
1859
|
+
return pipeline.run(*job_args, **job_kwargs)
|
1860
|
+
|
1861
|
+
# Extract pipeline execution arguments from kwargs
|
1862
|
+
pipeline_kwargs = {
|
1863
|
+
k: v
|
1864
|
+
for k, v in kwargs.items()
|
1865
|
+
if k
|
1866
|
+
in [
|
1867
|
+
"inputs",
|
1868
|
+
"final_vars",
|
1869
|
+
"config",
|
1870
|
+
"cache",
|
1871
|
+
"executor_cfg",
|
1872
|
+
"with_adapter_cfg",
|
1873
|
+
"pipeline_adapter_cfg",
|
1874
|
+
"project_adapter_cfg",
|
1875
|
+
"adapter",
|
1876
|
+
"reload",
|
1877
|
+
"log_level",
|
1878
|
+
"max_retries",
|
1879
|
+
"retry_delay",
|
1880
|
+
"jitter_factor",
|
1881
|
+
"retry_exceptions",
|
1882
|
+
"on_success",
|
1883
|
+
"on_failure",
|
1884
|
+
]
|
1885
|
+
}
|
1886
|
+
|
1887
|
+
# Extract job queue arguments
|
1888
|
+
job_kwargs = {k: v for k, v in kwargs.items() if k not in pipeline_kwargs}
|
1889
|
+
|
1890
|
+
# Add the job
|
1891
|
+
return self.enqueue(
|
1892
|
+
func=pipeline_job, func_kwargs=pipeline_kwargs, *args, **job_kwargs
|
1893
|
+
)
|
flowerpower/pipeline/__init__.py
CHANGED
flowerpower/pipeline/base.py
CHANGED
@@ -3,11 +3,11 @@ import posixpath
|
|
3
3
|
import sys
|
4
4
|
from types import TracebackType
|
5
5
|
|
6
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
6
7
|
from loguru import logger
|
7
8
|
from munch import Munch
|
8
9
|
|
9
10
|
from ..cfg import PipelineConfig, ProjectConfig
|
10
|
-
from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
|
11
11
|
from ..utils.logging import setup_logging
|
12
12
|
|
13
13
|
setup_logging()
|
@@ -47,7 +47,7 @@ class BasePipeline:
|
|
47
47
|
self._base_dir = base_dir
|
48
48
|
self._storage_options = storage_options
|
49
49
|
if fs is None:
|
50
|
-
fs =
|
50
|
+
fs = filesystem(self._base_dir, **self._storage_options)
|
51
51
|
self._fs = fs
|
52
52
|
self._cfg_dir = cfg_dir
|
53
53
|
self._pipelines_dir = pipelines_dir
|
flowerpower/pipeline/io.py
CHANGED
@@ -7,16 +7,18 @@ Manages the import and export of pipelines.
|
|
7
7
|
|
8
8
|
import posixpath
|
9
9
|
|
10
|
+
from fsspec_utils import (AbstractFileSystem, BaseStorageOptions,
|
11
|
+
DirFileSystem, filesystem)
|
10
12
|
from loguru import logger
|
11
13
|
from rich.console import Console
|
12
14
|
|
13
|
-
# Import necessary config types and utility functions
|
14
|
-
from ..fs.base import (AbstractFileSystem, BaseStorageOptions, DirFileSystem,
|
15
|
-
get_filesystem)
|
16
15
|
from ..settings import LOG_LEVEL
|
17
16
|
from ..utils.logging import setup_logging
|
18
17
|
from .registry import PipelineRegistry
|
19
18
|
|
19
|
+
# Import necessary config types and utility functions
|
20
|
+
|
21
|
+
|
20
22
|
console = Console()
|
21
23
|
|
22
24
|
setup_logging(level=LOG_LEVEL)
|
@@ -69,7 +71,7 @@ class PipelineIOManager:
|
|
69
71
|
|
70
72
|
def _get_filesystem(base_dir, fs, storage_options):
|
71
73
|
if fs is None:
|
72
|
-
fs =
|
74
|
+
fs = filesystem(base_dir, storage_options=storage_options)
|
73
75
|
else:
|
74
76
|
if not isinstance(fs, AbstractFileSystem):
|
75
77
|
raise ValueError(
|
@@ -211,12 +213,10 @@ class PipelineIOManager:
|
|
211
213
|
files = ["conf/project.yml"]
|
212
214
|
|
213
215
|
for name in names:
|
214
|
-
files.extend(
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
]
|
219
|
-
)
|
216
|
+
files.extend([
|
217
|
+
f"conf/pipelines/{name}.yml",
|
218
|
+
f"pipelines/{name}.py",
|
219
|
+
])
|
220
220
|
|
221
221
|
# Sync the filesystem
|
222
222
|
self._sync_filesystem(
|
@@ -366,12 +366,10 @@ class PipelineIOManager:
|
|
366
366
|
f"Pipeline {name} does not exist in the registry. Please check the name."
|
367
367
|
)
|
368
368
|
# Add pipeline files to the list
|
369
|
-
files.extend(
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
]
|
374
|
-
)
|
369
|
+
files.extend([
|
370
|
+
f"conf/pipelines/{name}.yml",
|
371
|
+
f"pipelines/{name}.py",
|
372
|
+
])
|
375
373
|
# Sync the filesystem
|
376
374
|
self._sync_filesystem(
|
377
375
|
src_base_dir=".",
|