FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. flowerpower/cfg/__init__.py +3 -3
  2. flowerpower/cfg/pipeline/__init__.py +5 -3
  3. flowerpower/cfg/project/__init__.py +3 -3
  4. flowerpower/cfg/project/job_queue.py +1 -128
  5. flowerpower/cli/__init__.py +5 -5
  6. flowerpower/cli/cfg.py +0 -3
  7. flowerpower/cli/job_queue.py +401 -133
  8. flowerpower/cli/pipeline.py +14 -413
  9. flowerpower/cli/utils.py +0 -1
  10. flowerpower/flowerpower.py +537 -28
  11. flowerpower/job_queue/__init__.py +5 -94
  12. flowerpower/job_queue/base.py +201 -3
  13. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
  14. flowerpower/job_queue/rq/manager.py +388 -77
  15. flowerpower/pipeline/__init__.py +2 -0
  16. flowerpower/pipeline/base.py +2 -2
  17. flowerpower/pipeline/io.py +14 -16
  18. flowerpower/pipeline/manager.py +21 -642
  19. flowerpower/pipeline/pipeline.py +571 -0
  20. flowerpower/pipeline/registry.py +242 -10
  21. flowerpower/pipeline/visualizer.py +1 -2
  22. flowerpower/plugins/_io/__init__.py +8 -0
  23. flowerpower/plugins/mqtt/manager.py +6 -6
  24. flowerpower/settings/backend.py +0 -2
  25. flowerpower/settings/job_queue.py +1 -57
  26. flowerpower/utils/misc.py +0 -256
  27. flowerpower/utils/monkey.py +1 -83
  28. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
  29. flowerpower-0.20.0.dist-info/RECORD +58 -0
  30. flowerpower/fs/__init__.py +0 -29
  31. flowerpower/fs/base.py +0 -662
  32. flowerpower/fs/ext.py +0 -2143
  33. flowerpower/fs/storage_options.py +0 -1420
  34. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  35. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  36. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  37. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  38. flowerpower/job_queue/apscheduler/setup.py +0 -554
  39. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  40. flowerpower/job_queue/apscheduler/utils.py +0 -311
  41. flowerpower/pipeline/job_queue.py +0 -583
  42. flowerpower/pipeline/runner.py +0 -603
  43. flowerpower/plugins/io/base.py +0 -2520
  44. flowerpower/plugins/io/helpers/datetime.py +0 -298
  45. flowerpower/plugins/io/helpers/polars.py +0 -875
  46. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  47. flowerpower/plugins/io/helpers/sql.py +0 -202
  48. flowerpower/plugins/io/loader/__init__.py +0 -28
  49. flowerpower/plugins/io/loader/csv.py +0 -37
  50. flowerpower/plugins/io/loader/deltatable.py +0 -190
  51. flowerpower/plugins/io/loader/duckdb.py +0 -19
  52. flowerpower/plugins/io/loader/json.py +0 -37
  53. flowerpower/plugins/io/loader/mqtt.py +0 -159
  54. flowerpower/plugins/io/loader/mssql.py +0 -26
  55. flowerpower/plugins/io/loader/mysql.py +0 -26
  56. flowerpower/plugins/io/loader/oracle.py +0 -26
  57. flowerpower/plugins/io/loader/parquet.py +0 -35
  58. flowerpower/plugins/io/loader/postgres.py +0 -26
  59. flowerpower/plugins/io/loader/pydala.py +0 -19
  60. flowerpower/plugins/io/loader/sqlite.py +0 -23
  61. flowerpower/plugins/io/metadata.py +0 -244
  62. flowerpower/plugins/io/saver/__init__.py +0 -28
  63. flowerpower/plugins/io/saver/csv.py +0 -36
  64. flowerpower/plugins/io/saver/deltatable.py +0 -186
  65. flowerpower/plugins/io/saver/duckdb.py +0 -19
  66. flowerpower/plugins/io/saver/json.py +0 -36
  67. flowerpower/plugins/io/saver/mqtt.py +0 -28
  68. flowerpower/plugins/io/saver/mssql.py +0 -26
  69. flowerpower/plugins/io/saver/mysql.py +0 -26
  70. flowerpower/plugins/io/saver/oracle.py +0 -26
  71. flowerpower/plugins/io/saver/parquet.py +0 -36
  72. flowerpower/plugins/io/saver/postgres.py +0 -26
  73. flowerpower/plugins/io/saver/pydala.py +0 -20
  74. flowerpower/plugins/io/saver/sqlite.py +0 -24
  75. flowerpower/utils/scheduler.py +0 -311
  76. flowerpower-0.11.6.19.dist-info/RECORD +0 -102
  77. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
  78. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
  79. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
  80. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,13 @@ import platform
10
10
  import sys
11
11
  import time
12
12
  import uuid
13
+ import warnings
13
14
  from typing import Any, Callable
14
15
 
15
16
  import duration_parser
16
17
  from cron_descriptor import get_description
18
+ # from ...fs import AbstractFileSystem
19
+ from fsspec_utils import AbstractFileSystem
17
20
  from humanize import precisedelta
18
21
  from loguru import logger
19
22
  from rq import Queue, Repeat, Retry
@@ -23,7 +26,6 @@ from rq.worker import Worker
23
26
  from rq.worker_pool import WorkerPool
24
27
  from rq_scheduler import Scheduler
25
28
 
26
- from ...fs import AbstractFileSystem
27
29
  from ...utils.logging import setup_logging
28
30
  from ..base import BaseJobQueueManager
29
31
  from .setup import RQBackend
@@ -175,6 +177,7 @@ class RQManager(BaseJobQueueManager):
175
177
  background: bool = False,
176
178
  queue_names: list[str] | None = None,
177
179
  with_scheduler: bool = True,
180
+ num_workers: int | None = None,
178
181
  **kwargs: Any,
179
182
  ) -> None:
180
183
  """Start a worker process for processing jobs from the queues.
@@ -186,6 +189,7 @@ class RQManager(BaseJobQueueManager):
186
189
  queues defined in the backend configuration.
187
190
  with_scheduler: Whether to include the scheduler queue for processing
188
191
  scheduled jobs.
192
+ num_workers: Number of worker processes to start (pool mode).
189
193
  **kwargs: Additional arguments passed to RQ's Worker class.
190
194
  Example: {"burst": True, "logging_level": "INFO", "job_monitoring_interval": 30}
191
195
 
@@ -210,85 +214,101 @@ class RQManager(BaseJobQueueManager):
210
214
  max_jobs=100,
211
215
  job_monitoring_interval=30
212
216
  )
217
+ # Start a worker pool with 4 processes
218
+ worker.start_worker(
219
+ background=True,
220
+ num_workers=4
221
+ )
213
222
  ```
214
223
  """
215
- import multiprocessing
216
-
217
- logging_level = kwargs.pop("logging_level", self._log_level)
218
- burst = kwargs.pop("burst", False)
219
- max_jobs = kwargs.pop("max_jobs", None)
220
- # Determine which queues to process
221
- if queue_names is None:
222
- # Use all queues by default
223
- queue_names = self._queue_names
224
- queue_names_str = ", ".join(queue_names)
224
+ if num_workers is not None and num_workers > 1:
225
+ self.start_worker_pool(
226
+ num_workers=num_workers,
227
+ background=background,
228
+ queue_names=queue_names,
229
+ with_scheduler=with_scheduler,
230
+ **kwargs,
231
+ )
225
232
  else:
226
- # Filter to only include valid queue names
227
- queue_names = [name for name in queue_names if name in self._queue_names]
228
- queue_names_str = ", ".join(queue_names)
229
-
230
- if not queue_names:
231
- logger.error("No valid queues specified, cannot start worker")
232
- return
233
-
234
- if with_scheduler:
235
- # Add the scheduler queue to the list of queues
236
- queue_names.append(self._scheduler_name)
237
- queue_names_str = ", ".join(queue_names)
238
-
239
- # Create a worker instance with queue names (not queue objects)
240
- worker = Worker(queue_names, connection=self._backend.client, **kwargs)
241
-
242
- if background:
243
- # We need to use a separate process rather than a thread because
244
- # RQ's signal handler registration only works in the main thread
245
- def run_worker_process(queue_names_arg):
246
- # Import RQ inside the process to avoid connection sharing issues
247
- from redis import Redis
248
- from rq import Worker
249
-
250
- # Create a fresh Redis connection in this process
251
- redis_conn = Redis.from_url(self._backend.uri)
252
-
253
- # Create a worker instance with queue names
254
- worker_proc = Worker(queue_names_arg, connection=redis_conn)
255
-
256
- # Disable the default signal handlers in RQ worker by patching
257
- # the _install_signal_handlers method to do nothing
258
- worker_proc._install_signal_handlers = lambda: None
233
+ import multiprocessing
234
+
235
+ logging_level = kwargs.pop("logging_level", self._log_level)
236
+ burst = kwargs.pop("burst", False)
237
+ max_jobs = kwargs.pop("max_jobs", None)
238
+ # Determine which queues to process
239
+ if queue_names is None:
240
+ # Use all queues by default
241
+ queue_names = self._queue_names
242
+ queue_names_str = ", ".join(queue_names)
243
+ else:
244
+ # Filter to only include valid queue names
245
+ queue_names = [
246
+ name for name in queue_names if name in self._queue_names
247
+ ]
248
+ queue_names_str = ", ".join(queue_names)
249
+
250
+ if not queue_names:
251
+ logger.error("No valid queues specified, cannot start worker")
252
+ return
253
+
254
+ if with_scheduler:
255
+ # Add the scheduler queue to the list of queues
256
+ queue_names.append(self._scheduler_name)
257
+ queue_names_str = ", ".join(queue_names)
258
+
259
+ # Create a worker instance with queue names (not queue objects)
260
+ worker = Worker(queue_names, connection=self._backend.client, **kwargs)
261
+
262
+ if background:
263
+ # We need to use a separate process rather than a thread because
264
+ # RQ's signal handler registration only works in the main thread
265
+ def run_worker_process(queue_names_arg):
266
+ # Import RQ inside the process to avoid connection sharing issues
267
+ from redis import Redis
268
+ from rq import Worker
269
+
270
+ # Create a fresh Redis connection in this process
271
+ redis_conn = Redis.from_url(self._backend.uri)
272
+
273
+ # Create a worker instance with queue names
274
+ worker_proc = Worker(queue_names_arg, connection=redis_conn)
275
+
276
+ # Disable the default signal handlers in RQ worker by patching
277
+ # the _install_signal_handlers method to do nothing
278
+ worker_proc._install_signal_handlers = lambda: None
279
+
280
+ # Work until terminated
281
+ worker_proc.work(
282
+ with_scheduler=True,
283
+ logging_level=logging_level,
284
+ burst=burst,
285
+ max_jobs=max_jobs,
286
+ )
259
287
 
260
- # Work until terminated
261
- worker_proc.work(
288
+ # Create and start the process
289
+ process = multiprocessing.Process(
290
+ target=run_worker_process,
291
+ args=(queue_names,),
292
+ name=f"rq-worker-{self.name}",
293
+ )
294
+ # Don't use daemon=True to avoid the "daemonic processes are not allowed to have children" error
295
+ process.start()
296
+ self._worker_process = process
297
+ logger.info(
298
+ f"Started RQ worker in background process (PID: {process.pid}) for queues: {queue_names_str}"
299
+ )
300
+ else:
301
+ # Start worker in the current process (blocking)
302
+ logger.info(
303
+ f"Starting RQ worker in current process (blocking) for queues: {queue_names_str}"
304
+ )
305
+ worker.work(
262
306
  with_scheduler=True,
263
307
  logging_level=logging_level,
264
308
  burst=burst,
265
309
  max_jobs=max_jobs,
266
310
  )
267
311
 
268
- # Create and start the process
269
- process = multiprocessing.Process(
270
- target=run_worker_process,
271
- args=(queue_names,),
272
- name=f"rq-worker-{self.name}",
273
- )
274
- # Don't use daemon=True to avoid the "daemonic processes are not allowed to have children" error
275
- process.start()
276
- self._worker_process = process
277
- logger.info(
278
- f"Started RQ worker in background process (PID: {process.pid}) for queues: {queue_names_str}"
279
- )
280
- else:
281
- # Start worker in the current process (blocking)
282
- logger.info(
283
- f"Starting RQ worker in current process (blocking) for queues: {queue_names_str}"
284
- )
285
- worker.work(
286
- with_scheduler=True,
287
- logging_level=logging_level,
288
- burst=burst,
289
- max_jobs=max_jobs,
290
- )
291
-
292
312
  def stop_worker(self) -> None:
293
313
  """Stop the worker process.
294
314
 
@@ -304,14 +324,17 @@ class RQManager(BaseJobQueueManager):
304
324
  worker.stop_worker()
305
325
  ```
306
326
  """
307
- if hasattr(self, "_worker_process") and self._worker_process is not None:
308
- if self._worker_process.is_alive():
309
- self._worker_process.terminate()
310
- self._worker_process.join(timeout=5)
311
- logger.info("RQ worker process terminated")
312
- self._worker_process = None
327
+ if hasattr(self, "_worker_pool"):
328
+ self.stop_worker_pool()
313
329
  else:
314
- logger.warning("No worker process to stop")
330
+ if hasattr(self, "_worker_process") and self._worker_process is not None:
331
+ if self._worker_process.is_alive():
332
+ self._worker_process.terminate()
333
+ self._worker_process.join(timeout=5)
334
+ logger.info("RQ worker process terminated")
335
+ self._worker_process = None
336
+ else:
337
+ logger.warning("No worker process to stop")
315
338
 
316
339
  def start_worker_pool(
317
340
  self,
@@ -543,6 +566,132 @@ class RQManager(BaseJobQueueManager):
543
566
 
544
567
  ## Jobs ###
545
568
 
569
+ def enqueue(
570
+ self,
571
+ func: Callable,
572
+ *args,
573
+ **kwargs,
574
+ ) -> Job:
575
+ """Enqueue a job for execution (immediate, delayed, or scheduled).
576
+
577
+ This is the main method for adding jobs to the queue. It supports:
578
+ - Immediate execution (no run_at or run_in parameters)
579
+ - Delayed execution (run_in parameter)
580
+ - Scheduled execution (run_at parameter)
581
+
582
+ Args:
583
+ func: Function to execute. Must be importable from the worker process.
584
+ *args: Positional arguments for the function
585
+ **kwargs: Keyword arguments including:
586
+ - run_in: Schedule the job to run after a delay (timedelta, int seconds, or string)
587
+ - run_at: Schedule the job to run at a specific datetime
588
+ - func_args: Alternative way to pass positional arguments
589
+ - func_kwargs: Alternative way to pass keyword arguments
590
+ - Other job queue specific parameters (timeout, retry, etc.)
591
+
592
+ Returns:
593
+ Job: The created job instance
594
+
595
+ Example:
596
+ ```python
597
+ # Immediate execution
598
+ manager.enqueue(my_func, arg1, arg2, kwarg1="value")
599
+
600
+ # Delayed execution
601
+ manager.enqueue(my_func, arg1, run_in=300) # 5 minutes
602
+ manager.enqueue(my_func, arg1, run_in=timedelta(hours=1))
603
+
604
+ # Scheduled execution
605
+ manager.enqueue(my_func, arg1, run_at=datetime(2025, 1, 1, 9, 0))
606
+ ```
607
+ """
608
+ # Extract func_args and func_kwargs if provided as alternatives to *args
609
+ func_args = kwargs.pop("func_args", None)
610
+ func_kwargs = kwargs.pop("func_kwargs", None)
611
+
612
+ # Use provided args or fall back to func_args
613
+ if args:
614
+ final_args = args
615
+ elif func_args:
616
+ final_args = func_args
617
+ else:
618
+ final_args = ()
619
+
620
+ # Extract function keyword arguments
621
+ if func_kwargs:
622
+ final_kwargs = func_kwargs
623
+ else:
624
+ final_kwargs = {}
625
+
626
+ # Delegate to add_job with the parameters
627
+ return self.add_job(
628
+ func=func, func_args=final_args, func_kwargs=final_kwargs, **kwargs
629
+ )
630
+
631
+ def enqueue_in(
632
+ self,
633
+ delay,
634
+ func: Callable,
635
+ *args,
636
+ **kwargs,
637
+ ) -> Job:
638
+ """Enqueue a job to run after a specified delay.
639
+
640
+ This is a convenience method for delayed execution.
641
+
642
+ Args:
643
+ delay: Time to wait before execution (timedelta, int seconds, or string)
644
+ func: Function to execute
645
+ *args: Positional arguments for the function
646
+ **kwargs: Keyword arguments for the function and job options
647
+
648
+ Returns:
649
+ Job: The created job instance
650
+
651
+ Example:
652
+ ```python
653
+ # Run in 5 minutes
654
+ manager.enqueue_in(300, my_func, arg1, arg2)
655
+
656
+ # Run in 1 hour
657
+ manager.enqueue_in(timedelta(hours=1), my_func, arg1, kwarg1="value")
658
+ ```
659
+ """
660
+ return self.enqueue(func, *args, run_in=delay, **kwargs)
661
+
662
+ def enqueue_at(
663
+ self,
664
+ datetime,
665
+ func: Callable,
666
+ *args,
667
+ **kwargs,
668
+ ) -> Job:
669
+ """Enqueue a job to run at a specific datetime.
670
+
671
+ This is a convenience method for scheduled execution.
672
+
673
+ Args:
674
+ datetime: When to execute the job (datetime object or ISO string)
675
+ func: Function to execute
676
+ *args: Positional arguments for the function
677
+ **kwargs: Keyword arguments for the function and job options
678
+
679
+ Returns:
680
+ Job: The created job instance
681
+
682
+ Example:
683
+ ```python
684
+ # Run at specific time
685
+ manager.enqueue_at(datetime(2025, 1, 1, 9, 0), my_func, arg1, arg2)
686
+
687
+ # Run tomorrow at 9 AM
688
+ tomorrow_9am = datetime.now() + timedelta(days=1)
689
+ tomorrow_9am = tomorrow_9am.replace(hour=9, minute=0, second=0)
690
+ manager.enqueue_at(tomorrow_9am, my_func, arg1, kwarg1="value")
691
+ ```
692
+ """
693
+ return self.enqueue(func, *args, run_at=datetime, **kwargs)
694
+
546
695
  def add_job(
547
696
  self,
548
697
  func: Callable,
@@ -567,6 +716,10 @@ class RQManager(BaseJobQueueManager):
567
716
  ) -> Job:
568
717
  """Add a job for immediate or scheduled execution.
569
718
 
719
+ .. deprecated:: 0.12.0
720
+ Use :meth:`enqueue`, :meth:`enqueue_in`, or :meth:`enqueue_at` instead.
721
+ The add_job method will be removed in version 1.0.0.
722
+
570
723
  Args:
571
724
  func: Function to execute. Must be importable from the worker process.
572
725
  func_args: Positional arguments to pass to the function.
@@ -640,6 +793,14 @@ class RQManager(BaseJobQueueManager):
640
793
  )
641
794
  ```
642
795
  """
796
+ # Issue deprecation warning
797
+ warnings.warn(
798
+ "add_job() is deprecated and will be removed in version 1.0.0. "
799
+ "Use enqueue(), enqueue_in(), or enqueue_at() instead.",
800
+ DeprecationWarning,
801
+ stacklevel=2,
802
+ )
803
+
643
804
  job_id = job_id or str(uuid.uuid4())
644
805
  if isinstance(result_ttl, (int, float)):
645
806
  result_ttl = dt.timedelta(seconds=result_ttl)
@@ -1580,3 +1741,153 @@ class RQManager(BaseJobQueueManager):
1580
1741
  """
1581
1742
  schedule_ids = [schedule.id for schedule in self.schedules]
1582
1743
  return schedule_ids
1744
+
1745
+ # --- Pipeline-specific high-level methods implementation ---
1746
+
1747
+ def schedule_pipeline(self, name: str, project_context=None, *args, **kwargs):
1748
+ """Schedule a pipeline for execution using its name.
1749
+
1750
+ This high-level method loads the pipeline from the internal registry and schedules
1751
+ its execution with the job queue using the existing add_schedule method.
1752
+
1753
+ Args:
1754
+ name: Name of the pipeline to schedule
1755
+ project_context: Project context for the pipeline (optional)
1756
+ *args: Additional positional arguments for scheduling
1757
+ **kwargs: Additional keyword arguments for scheduling
1758
+
1759
+ Returns:
1760
+ Schedule ID from the underlying add_schedule call
1761
+
1762
+ Example:
1763
+ ```python
1764
+ manager = RQManager(base_dir="/path/to/project")
1765
+ schedule_id = manager.schedule_pipeline(
1766
+ "my_pipeline",
1767
+ cron="0 9 * * *", # Run daily at 9 AM
1768
+ inputs={"date": "today"}
1769
+ )
1770
+ ```
1771
+ """
1772
+ logger.info(f"Scheduling pipeline '{name}' via RQ job queue")
1773
+
1774
+ # Create a function that will be executed by the job queue
1775
+ def pipeline_job(*job_args, **job_kwargs):
1776
+ # Get the pipeline instance
1777
+ pipeline = self.pipeline_registry.get_pipeline(
1778
+ name=name,
1779
+ project_context=project_context,
1780
+ reload=job_kwargs.pop("reload", False),
1781
+ )
1782
+
1783
+ # Execute the pipeline
1784
+ return pipeline.run(*job_args, **job_kwargs)
1785
+
1786
+ # Extract pipeline execution arguments from kwargs
1787
+ pipeline_kwargs = {
1788
+ k: v
1789
+ for k, v in kwargs.items()
1790
+ if k
1791
+ in [
1792
+ "inputs",
1793
+ "final_vars",
1794
+ "config",
1795
+ "cache",
1796
+ "executor_cfg",
1797
+ "with_adapter_cfg",
1798
+ "pipeline_adapter_cfg",
1799
+ "project_adapter_cfg",
1800
+ "adapter",
1801
+ "reload",
1802
+ "log_level",
1803
+ "max_retries",
1804
+ "retry_delay",
1805
+ "jitter_factor",
1806
+ "retry_exceptions",
1807
+ "on_success",
1808
+ "on_failure",
1809
+ ]
1810
+ }
1811
+
1812
+ # Extract scheduling arguments
1813
+ schedule_kwargs = {k: v for k, v in kwargs.items() if k not in pipeline_kwargs}
1814
+
1815
+ # Schedule the job
1816
+ return self.add_schedule(
1817
+ func=pipeline_job, func_kwargs=pipeline_kwargs, **schedule_kwargs
1818
+ )
1819
+
1820
+ def enqueue_pipeline(self, name: str, project_context=None, *args, **kwargs):
1821
+ """Enqueue a pipeline for immediate execution using its name.
1822
+
1823
+ This high-level method loads the pipeline from the internal registry and enqueues
1824
+ it for immediate execution in the job queue using the existing enqueue method.
1825
+
1826
+ Args:
1827
+ name: Name of the pipeline to enqueue
1828
+ project_context: Project context for the pipeline (optional)
1829
+ *args: Additional positional arguments for job execution
1830
+ **kwargs: Additional keyword arguments for job execution
1831
+
1832
+ Returns:
1833
+ Job ID from the underlying enqueue call
1834
+
1835
+ Example:
1836
+ ```python
1837
+ manager = RQManager(base_dir="/path/to/project")
1838
+ job_id = manager.enqueue_pipeline(
1839
+ "my_pipeline",
1840
+ inputs={"date": "2025-01-01"},
1841
+ final_vars=["result"]
1842
+ )
1843
+ ```
1844
+ """
1845
+ logger.info(
1846
+ f"Enqueueing pipeline '{name}' for immediate execution via RQ job queue"
1847
+ )
1848
+
1849
+ # Create a function that will be executed by the job queue
1850
+ def pipeline_job(*job_args, **job_kwargs):
1851
+ # Get the pipeline instance
1852
+ pipeline = self.pipeline_registry.get_pipeline(
1853
+ name=name,
1854
+ project_context=project_context,
1855
+ reload=job_kwargs.pop("reload", False),
1856
+ )
1857
+
1858
+ # Execute the pipeline
1859
+ return pipeline.run(*job_args, **job_kwargs)
1860
+
1861
+ # Extract pipeline execution arguments from kwargs
1862
+ pipeline_kwargs = {
1863
+ k: v
1864
+ for k, v in kwargs.items()
1865
+ if k
1866
+ in [
1867
+ "inputs",
1868
+ "final_vars",
1869
+ "config",
1870
+ "cache",
1871
+ "executor_cfg",
1872
+ "with_adapter_cfg",
1873
+ "pipeline_adapter_cfg",
1874
+ "project_adapter_cfg",
1875
+ "adapter",
1876
+ "reload",
1877
+ "log_level",
1878
+ "max_retries",
1879
+ "retry_delay",
1880
+ "jitter_factor",
1881
+ "retry_exceptions",
1882
+ "on_success",
1883
+ "on_failure",
1884
+ ]
1885
+ }
1886
+
1887
+ # Extract job queue arguments
1888
+ job_kwargs = {k: v for k, v in kwargs.items() if k not in pipeline_kwargs}
1889
+
1890
+ # Add the job
1891
+ return self.enqueue(
1892
+ func=pipeline_job, func_kwargs=pipeline_kwargs, *args, **job_kwargs
1893
+ )
@@ -1,5 +1,7 @@
1
1
  from .manager import PipelineManager
2
+ from .pipeline import Pipeline
2
3
 
3
4
  __all__ = [
4
5
  "PipelineManager",
6
+ "Pipeline",
5
7
  ]
@@ -3,11 +3,11 @@ import posixpath
3
3
  import sys
4
4
  from types import TracebackType
5
5
 
6
+ from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
6
7
  from loguru import logger
7
8
  from munch import Munch
8
9
 
9
10
  from ..cfg import PipelineConfig, ProjectConfig
10
- from ..fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
11
11
  from ..utils.logging import setup_logging
12
12
 
13
13
  setup_logging()
@@ -47,7 +47,7 @@ class BasePipeline:
47
47
  self._base_dir = base_dir
48
48
  self._storage_options = storage_options
49
49
  if fs is None:
50
- fs = get_filesystem(self._base_dir, **self._storage_options)
50
+ fs = filesystem(self._base_dir, **self._storage_options)
51
51
  self._fs = fs
52
52
  self._cfg_dir = cfg_dir
53
53
  self._pipelines_dir = pipelines_dir
@@ -7,16 +7,18 @@ Manages the import and export of pipelines.
7
7
 
8
8
  import posixpath
9
9
 
10
+ from fsspec_utils import (AbstractFileSystem, BaseStorageOptions,
11
+ DirFileSystem, filesystem)
10
12
  from loguru import logger
11
13
  from rich.console import Console
12
14
 
13
- # Import necessary config types and utility functions
14
- from ..fs.base import (AbstractFileSystem, BaseStorageOptions, DirFileSystem,
15
- get_filesystem)
16
15
  from ..settings import LOG_LEVEL
17
16
  from ..utils.logging import setup_logging
18
17
  from .registry import PipelineRegistry
19
18
 
19
+ # Import necessary config types and utility functions
20
+
21
+
20
22
  console = Console()
21
23
 
22
24
  setup_logging(level=LOG_LEVEL)
@@ -69,7 +71,7 @@ class PipelineIOManager:
69
71
 
70
72
  def _get_filesystem(base_dir, fs, storage_options):
71
73
  if fs is None:
72
- fs = get_filesystem(base_dir, storage_options=storage_options)
74
+ fs = filesystem(base_dir, storage_options=storage_options)
73
75
  else:
74
76
  if not isinstance(fs, AbstractFileSystem):
75
77
  raise ValueError(
@@ -211,12 +213,10 @@ class PipelineIOManager:
211
213
  files = ["conf/project.yml"]
212
214
 
213
215
  for name in names:
214
- files.extend(
215
- [
216
- f"conf/pipelines/{name}.yml",
217
- f"pipelines/{name}.py",
218
- ]
219
- )
216
+ files.extend([
217
+ f"conf/pipelines/{name}.yml",
218
+ f"pipelines/{name}.py",
219
+ ])
220
220
 
221
221
  # Sync the filesystem
222
222
  self._sync_filesystem(
@@ -366,12 +366,10 @@ class PipelineIOManager:
366
366
  f"Pipeline {name} does not exist in the registry. Please check the name."
367
367
  )
368
368
  # Add pipeline files to the list
369
- files.extend(
370
- [
371
- f"conf/pipelines/{name}.yml",
372
- f"pipelines/{name}.py",
373
- ]
374
- )
369
+ files.extend([
370
+ f"conf/pipelines/{name}.yml",
371
+ f"pipelines/{name}.py",
372
+ ])
375
373
  # Sync the filesystem
376
374
  self._sync_filesystem(
377
375
  src_base_dir=".",