FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. flowerpower/cfg/__init__.py +3 -3
  2. flowerpower/cfg/pipeline/__init__.py +5 -3
  3. flowerpower/cfg/project/__init__.py +3 -3
  4. flowerpower/cfg/project/job_queue.py +1 -128
  5. flowerpower/cli/__init__.py +5 -5
  6. flowerpower/cli/cfg.py +0 -3
  7. flowerpower/cli/job_queue.py +401 -133
  8. flowerpower/cli/pipeline.py +14 -413
  9. flowerpower/cli/utils.py +0 -1
  10. flowerpower/flowerpower.py +537 -28
  11. flowerpower/job_queue/__init__.py +5 -94
  12. flowerpower/job_queue/base.py +201 -3
  13. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
  14. flowerpower/job_queue/rq/manager.py +388 -77
  15. flowerpower/pipeline/__init__.py +2 -0
  16. flowerpower/pipeline/base.py +2 -2
  17. flowerpower/pipeline/io.py +14 -16
  18. flowerpower/pipeline/manager.py +21 -642
  19. flowerpower/pipeline/pipeline.py +571 -0
  20. flowerpower/pipeline/registry.py +242 -10
  21. flowerpower/pipeline/visualizer.py +1 -2
  22. flowerpower/plugins/_io/__init__.py +8 -0
  23. flowerpower/plugins/mqtt/manager.py +6 -6
  24. flowerpower/settings/backend.py +0 -2
  25. flowerpower/settings/job_queue.py +1 -57
  26. flowerpower/utils/misc.py +0 -256
  27. flowerpower/utils/monkey.py +1 -83
  28. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
  29. flowerpower-0.20.0.dist-info/RECORD +58 -0
  30. flowerpower/fs/__init__.py +0 -29
  31. flowerpower/fs/base.py +0 -662
  32. flowerpower/fs/ext.py +0 -2143
  33. flowerpower/fs/storage_options.py +0 -1420
  34. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  35. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  36. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  37. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  38. flowerpower/job_queue/apscheduler/setup.py +0 -554
  39. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  40. flowerpower/job_queue/apscheduler/utils.py +0 -311
  41. flowerpower/pipeline/job_queue.py +0 -583
  42. flowerpower/pipeline/runner.py +0 -603
  43. flowerpower/plugins/io/base.py +0 -2520
  44. flowerpower/plugins/io/helpers/datetime.py +0 -298
  45. flowerpower/plugins/io/helpers/polars.py +0 -875
  46. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  47. flowerpower/plugins/io/helpers/sql.py +0 -202
  48. flowerpower/plugins/io/loader/__init__.py +0 -28
  49. flowerpower/plugins/io/loader/csv.py +0 -37
  50. flowerpower/plugins/io/loader/deltatable.py +0 -190
  51. flowerpower/plugins/io/loader/duckdb.py +0 -19
  52. flowerpower/plugins/io/loader/json.py +0 -37
  53. flowerpower/plugins/io/loader/mqtt.py +0 -159
  54. flowerpower/plugins/io/loader/mssql.py +0 -26
  55. flowerpower/plugins/io/loader/mysql.py +0 -26
  56. flowerpower/plugins/io/loader/oracle.py +0 -26
  57. flowerpower/plugins/io/loader/parquet.py +0 -35
  58. flowerpower/plugins/io/loader/postgres.py +0 -26
  59. flowerpower/plugins/io/loader/pydala.py +0 -19
  60. flowerpower/plugins/io/loader/sqlite.py +0 -23
  61. flowerpower/plugins/io/metadata.py +0 -244
  62. flowerpower/plugins/io/saver/__init__.py +0 -28
  63. flowerpower/plugins/io/saver/csv.py +0 -36
  64. flowerpower/plugins/io/saver/deltatable.py +0 -186
  65. flowerpower/plugins/io/saver/duckdb.py +0 -19
  66. flowerpower/plugins/io/saver/json.py +0 -36
  67. flowerpower/plugins/io/saver/mqtt.py +0 -28
  68. flowerpower/plugins/io/saver/mssql.py +0 -26
  69. flowerpower/plugins/io/saver/mysql.py +0 -26
  70. flowerpower/plugins/io/saver/oracle.py +0 -26
  71. flowerpower/plugins/io/saver/parquet.py +0 -36
  72. flowerpower/plugins/io/saver/postgres.py +0 -26
  73. flowerpower/plugins/io/saver/pydala.py +0 -20
  74. flowerpower/plugins/io/saver/sqlite.py +0 -24
  75. flowerpower/utils/scheduler.py +0 -311
  76. flowerpower-0.11.6.19.dist-info/RECORD +0 -102
  77. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
  78. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
  79. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
  80. {flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,12 @@
1
1
  import importlib
2
2
  from typing import Any, Optional
3
3
 
4
+ from fsspec_utils import AbstractFileSystem
4
5
  from loguru import logger
5
6
 
6
7
  from ..cfg.project import ProjectConfig
7
- from ..fs import AbstractFileSystem
8
8
  from ..utils.logging import setup_logging
9
9
 
10
- if importlib.util.find_spec("apscheduler"):
11
- from .apscheduler import APSBackend, APSManager
12
- else:
13
- APSBackend = None
14
- APSManager = None
15
10
  if importlib.util.find_spec("rq"):
16
11
  from .rq import RQBackend, RQManager
17
12
  else:
@@ -42,18 +37,6 @@ class JobQueueBackend:
42
37
  queues=["high", "default", "low"]
43
38
  )
44
39
 
45
- # Create APScheduler backend with PostgreSQL and Redis
46
- aps_backend = JobQueueBackend(
47
- job_queue_type="apscheduler",
48
- data_store={
49
- "type": "postgresql",
50
- "uri": "postgresql+asyncpg://user:pass@localhost/db"
51
- },
52
- event_broker={
53
- "type": "redis",
54
- "uri": "redis://localhost:6379/0"
55
- }
56
- )
57
40
  ```
58
41
  """
59
42
 
@@ -67,21 +50,14 @@ class JobQueueBackend:
67
50
  Args:
68
51
  job_queue_type: The type of backend to create. Valid values are:
69
52
  - "rq": Redis Queue backend using Redis
70
- - "apscheduler": APScheduler backend supporting various databases
71
- and event brokers
72
53
  **kwargs: Backend-specific configuration options:
73
54
  For RQ:
74
55
  - uri (str): Redis connection URI
75
56
  - queues (list[str]): List of queue names
76
57
  - result_ttl (int): Time to live for results in seconds
77
- For APScheduler:
78
- - data_store (dict): Data store configuration
79
- - event_broker (dict): Event broker configuration
80
- - cleanup_interval (int): Cleanup interval in seconds
81
- - max_concurrent_jobs (int): Maximum concurrent jobs
82
58
 
83
59
  Returns:
84
- BaseBackend: An instance of RQBackend or APSBackend depending on
60
+ BaseBackend: An instance of RQBackend depending on
85
61
  the specified job queue type.
86
62
 
87
63
  Raises:
@@ -99,27 +75,10 @@ class JobQueueBackend:
99
75
  result_ttl=3600
100
76
  )
101
77
 
102
- # Create APScheduler backend with PostgreSQL and Redis
103
- aps_backend = Backend(
104
- job_queue_type="apscheduler",
105
- data_store={
106
- "type": "postgresql",
107
- "uri": "postgresql+asyncpg://user:pass@localhost/db",
108
- "schema": "scheduler"
109
- },
110
- event_broker={
111
- "type": "redis",
112
- "uri": "redis://localhost:6379/0"
113
- },
114
- cleanup_interval=300,
115
- max_concurrent_jobs=10
116
- )
117
78
  ```
118
79
  """
119
80
  if job_queue_type == "rq" and RQBackend is not None:
120
81
  return RQBackend(**kwargs)
121
- elif job_queue_type == "apscheduler" and APSBackend is not None:
122
- return APSBackend(**kwargs)
123
82
  else:
124
83
  if job_queue_type == "rq" and RQBackend is None:
125
84
  logger.warning(
@@ -127,15 +86,9 @@ class JobQueueBackend:
127
86
  "Install rq to use RQ. `uv pip install flowerpower[rq]` or `uv add flowerpower[rq]`"
128
87
  )
129
88
  return None
130
- elif job_queue_type == "apscheduler" and APSBackend is None:
131
- logger.warning(
132
- "APScheduler is not installed. `JobQueueBackend` is not initialized and using the job queue is disabled. "
133
- "Install apscheduler to use APScheduler. `uv pip install flowerpower[apscheduler]` or `uv add flowerpower[apscheduler]`"
134
- )
135
- return None
136
89
  else:
137
90
  raise ValueError(
138
- f"Invalid job queue type: {job_queue_type}. Valid types: ['rq', 'apscheduler']"
91
+ f"Invalid job queue type: {job_queue_type}. Valid types: ['rq']"
139
92
  )
140
93
 
141
94
 
@@ -161,17 +114,6 @@ class JobQueueManager:
161
114
  log_level="DEBUG"
162
115
  )
163
116
 
164
- # Create an APScheduler job queue with custom backend
165
- from flowerpower.job_queue.apscheduler import APSBackend
166
- backend_config = APSBackend(
167
- data_store={"type": "postgresql", "uri": "postgresql+asyncpg://user:pass@localhost/db"},
168
- event_broker={"type": "redis", "uri": "redis://localhost:6379/0"}
169
- )
170
- aps_worker = JobQueueManager(
171
- type="apscheduler",
172
- name="scheduler",
173
- backend=backend_config
174
- )
175
117
 
176
118
  ```
177
119
  """
@@ -192,7 +134,6 @@ class JobQueueManager:
192
134
  Args:
193
135
  type: The type of job queue to create. Valid values are:
194
136
  - "rq": Redis Queue job queue for Redis-based job queuing
195
- - "apscheduler": APScheduler job queue for advanced job scheduling
196
137
  name: Name of the job queue instance. Used for identification in logs
197
138
  and monitoring.
198
139
  base_dir: Base directory for job queue files and configuration. Defaults
@@ -209,8 +150,7 @@ class JobQueueManager:
209
150
  job queue implementation.
210
151
 
211
152
  Returns:
212
- BaseJobQueueManager: An instance of the specified job queue type (RQManager,
213
- APSManager).
153
+ BaseJobQueueManager: An instance of the specified job queue type (RQManager).
214
154
 
215
155
  Raises:
216
156
  ValueError: If an invalid job queue type is specified.
@@ -224,14 +164,6 @@ class JobQueueManager:
224
164
  # Basic RQ job queue
225
165
  worker = JobQueueManager(type="rq", name="basic_worker")
226
166
 
227
- # APScheduler with custom logging and storage
228
- worker = JobQueueManager(
229
- type="apscheduler",
230
- name="scheduler",
231
- base_dir="/app/data",
232
- storage_options={"mode": "async"},
233
- log_level="DEBUG"
234
- )
235
167
 
236
168
  ```
237
169
  """
@@ -260,35 +192,14 @@ class JobQueueManager:
260
192
  )
261
193
  return None
262
194
 
263
- elif type == "apscheduler":
264
- if APSManager is not None:
265
- return APSManager(
266
- name=name,
267
- base_dir=base_dir,
268
- backend=backend,
269
- storage_options=storage_options,
270
- fs=fs,
271
- log_level=log_level,
272
- **kwargs,
273
- )
274
- else:
275
- logger.warning(
276
- "`JobQueueManager` can not be initialized. This might be due to missing dependencies (APScheduler), invalid configuration or backend not being available."
277
- )
278
- return None
279
-
280
195
  else:
281
- raise ImportError(
282
- f"Invalid job queue type: {type}. Valid types: ['rq', 'apscheduler']"
283
- )
196
+ raise ImportError(f"Invalid job queue type: {type}. Valid types: ['rq']")
284
197
 
285
198
 
286
199
  __all__ = [
287
200
  "JobQueueManager",
288
201
  "RQManager",
289
- "APSManager",
290
202
  # "HueyWorker",
291
203
  "JobQueueBackend",
292
204
  "RQBackend",
293
- "APSBackend",
294
205
  ]
@@ -14,7 +14,9 @@ import urllib.parse
14
14
  from dataclasses import dataclass, field
15
15
  from enum import Enum
16
16
  from pathlib import Path
17
- from typing import Any, TypeVar
17
+ from typing import TYPE_CHECKING, Any, TypeVar
18
+
19
+ from loguru import logger
18
20
 
19
21
  if importlib.util.find_spec("sqlalchemy"):
20
22
  from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
@@ -22,8 +24,13 @@ else:
22
24
  create_async_engine = None
23
25
  AsyncEngine = TypeVar("AsyncEngine")
24
26
 
27
+ # Import PipelineRegistry with TYPE_CHECKING to avoid circular imports
28
+ if TYPE_CHECKING:
29
+ from ..pipeline.registry import PipelineRegistry
30
+
31
+ from fsspec_utils import AbstractFileSystem, filesystem
32
+
25
33
  from ..cfg import ProjectConfig
26
- from ..fs import AbstractFileSystem, get_filesystem
27
34
  # from ..utils.misc import update_config_from_dict
28
35
  from ..settings import BACKEND_PROPERTIES, CACHE_DIR, CONFIG_DIR, PIPELINES_DIR
29
36
 
@@ -357,6 +364,9 @@ class BaseJobQueueManager:
357
364
  self._pipelines_dir = kwargs.get("pipelines_dir", PIPELINES_DIR)
358
365
  self._cfg_dir = CONFIG_DIR
359
366
 
367
+ # Initialize pipeline registry (will be injected by FlowerPowerProject)
368
+ self._pipeline_registry = None
369
+
360
370
  if storage_options is not None:
361
371
  cached = True
362
372
  cache_storage = posixpath.join(
@@ -367,7 +377,7 @@ class BaseJobQueueManager:
367
377
  cached = False
368
378
  cache_storage = None
369
379
  if not fs:
370
- fs = get_filesystem(
380
+ fs = filesystem(
371
381
  self._base_dir,
372
382
  storage_options=storage_options,
373
383
  cached=cached,
@@ -411,3 +421,191 @@ class BaseJobQueueManager:
411
421
 
412
422
  if modules_path not in sys.path:
413
423
  sys.path.insert(0, modules_path)
424
+
425
+ @property
426
+ def pipeline_registry(self) -> "PipelineRegistry":
427
+ """Get or create a PipelineRegistry instance for this job queue manager.
428
+
429
+ This property lazily creates a PipelineRegistry using the job queue manager's
430
+ filesystem and directory configuration. The registry is cached after first access.
431
+
432
+ Returns:
433
+ PipelineRegistry: A registry instance configured with this manager's settings
434
+
435
+ Raises:
436
+ RuntimeError: If PipelineRegistry creation fails
437
+
438
+ Example:
439
+ ```python
440
+ manager = RQManager(base_dir="/path/to/project")
441
+ registry = manager.pipeline_registry # Creates registry on first access
442
+ pipeline = registry.get_pipeline("my_pipeline")
443
+ ```
444
+ """
445
+ if self._pipeline_registry is None:
446
+ try:
447
+ # Import here to avoid circular import issues
448
+ from ..pipeline.registry import PipelineRegistry
449
+
450
+ # Create registry using the from_filesystem factory method
451
+ self._pipeline_registry = PipelineRegistry.from_filesystem(
452
+ base_dir=self._base_dir,
453
+ fs=self._fs,
454
+ storage_options=self._storage_options,
455
+ )
456
+
457
+ logger.debug(
458
+ f"Created PipelineRegistry for JobQueueManager with base_dir: {self._base_dir}"
459
+ )
460
+
461
+ except Exception as e:
462
+ error_msg = f"Failed to create PipelineRegistry: {e}"
463
+ logger.error(error_msg)
464
+ raise RuntimeError(error_msg) from e
465
+
466
+ return self._pipeline_registry
467
+
468
+ # --- Pipeline-specific high-level methods ---
469
+
470
+ def schedule_pipeline(self, name: str, *args, **kwargs):
471
+ """Schedule a pipeline for execution using its name.
472
+
473
+ This high-level method loads the pipeline from the internal registry and schedules
474
+ its execution with the job queue.
475
+
476
+ Args:
477
+ name: Name of the pipeline to schedule
478
+ *args: Additional positional arguments for scheduling
479
+ **kwargs: Additional keyword arguments for scheduling
480
+
481
+ Returns:
482
+ Schedule ID or job ID depending on implementation
483
+
484
+ Raises:
485
+ NotImplementedError: Must be implemented by subclasses
486
+ """
487
+ raise NotImplementedError("Subclasses must implement schedule_pipeline()")
488
+
489
+ def enqueue_pipeline(self, name: str, *args, **kwargs):
490
+ """Enqueue a pipeline for immediate execution using its name.
491
+
492
+ This high-level method loads the pipeline from the internal registry and enqueues
493
+ it for immediate execution in the job queue.
494
+
495
+ Args:
496
+ name: Name of the pipeline to enqueue
497
+ *args: Additional positional arguments for job execution
498
+ **kwargs: Additional keyword arguments for job execution
499
+
500
+ Returns:
501
+ Job ID or result depending on implementation
502
+
503
+ Raises:
504
+ NotImplementedError: Must be implemented by subclasses
505
+ """
506
+ raise NotImplementedError("Subclasses must implement enqueue_pipeline()")
507
+
508
+ # --- Core job queue methods ---
509
+
510
+ def enqueue(self, func, *args, **kwargs):
511
+ """Enqueue a job for execution (immediate, delayed, or scheduled).
512
+
513
+ This is the main method for adding jobs to the queue. It supports:
514
+ - Immediate execution (no run_at or run_in parameters)
515
+ - Delayed execution (run_in parameter)
516
+ - Scheduled execution (run_at parameter)
517
+
518
+ Args:
519
+ func: Function to execute. Must be importable from the worker process.
520
+ *args: Positional arguments for the function
521
+ **kwargs: Keyword arguments including:
522
+ - run_in: Schedule the job to run after a delay (timedelta, int seconds, or string)
523
+ - run_at: Schedule the job to run at a specific datetime
524
+ - Other job queue specific parameters (timeout, retry, etc.)
525
+
526
+ Returns:
527
+ Job object or job ID depending on implementation
528
+
529
+ Raises:
530
+ NotImplementedError: Must be implemented by subclasses
531
+
532
+ Example:
533
+ ```python
534
+ # Immediate execution
535
+ manager.enqueue(my_func, arg1, arg2, kwarg1="value")
536
+
537
+ # Delayed execution
538
+ manager.enqueue(my_func, arg1, run_in=300) # 5 minutes
539
+ manager.enqueue(my_func, arg1, run_in=timedelta(hours=1))
540
+
541
+ # Scheduled execution
542
+ manager.enqueue(my_func, arg1, run_at=datetime(2025, 1, 1, 9, 0))
543
+ ```
544
+ """
545
+ raise NotImplementedError("Subclasses must implement enqueue()")
546
+
547
+ def enqueue_in(self, delay, func, *args, **kwargs):
548
+ """Enqueue a job to run after a specified delay.
549
+
550
+ This is a convenience method for delayed execution. It's equivalent to
551
+ calling enqueue() with the run_in parameter.
552
+
553
+ Args:
554
+ delay: Time to wait before execution (timedelta, int seconds, or string)
555
+ func: Function to execute
556
+ *args: Positional arguments for the function
557
+ **kwargs: Keyword arguments for the function and job options
558
+
559
+ Returns:
560
+ Job object or job ID depending on implementation
561
+
562
+ Raises:
563
+ NotImplementedError: Must be implemented by subclasses
564
+
565
+ Example:
566
+ ```python
567
+ # Run in 5 minutes
568
+ manager.enqueue_in(300, my_func, arg1, arg2)
569
+
570
+ # Run in 1 hour
571
+ manager.enqueue_in(timedelta(hours=1), my_func, arg1, kwarg1="value")
572
+
573
+ # Run in 30 seconds (string format)
574
+ manager.enqueue_in("30s", my_func, arg1)
575
+ ```
576
+ """
577
+ raise NotImplementedError("Subclasses must implement enqueue_in()")
578
+
579
+ def enqueue_at(self, datetime, func, *args, **kwargs):
580
+ """Enqueue a job to run at a specific datetime.
581
+
582
+ This is a convenience method for scheduled execution. It's equivalent to
583
+ calling enqueue() with the run_at parameter.
584
+
585
+ Args:
586
+ datetime: When to execute the job (datetime object or ISO string)
587
+ func: Function to execute
588
+ *args: Positional arguments for the function
589
+ **kwargs: Keyword arguments for the function and job options
590
+
591
+ Returns:
592
+ Job object or job ID depending on implementation
593
+
594
+ Raises:
595
+ NotImplementedError: Must be implemented by subclasses
596
+
597
+ Example:
598
+ ```python
599
+ # Run at specific time
600
+ manager.enqueue_at(datetime(2025, 1, 1, 9, 0), my_func, arg1, arg2)
601
+
602
+ # Run tomorrow at 9 AM
603
+ tomorrow_9am = datetime.now() + timedelta(days=1)
604
+ tomorrow_9am = tomorrow_9am.replace(hour=9, minute=0, second=0)
605
+ manager.enqueue_at(tomorrow_9am, my_func, arg1, kwarg1="value")
606
+
607
+ # Run using ISO string
608
+ manager.enqueue_at("2025-01-01T09:00:00", my_func, arg1)
609
+ ```
610
+ """
611
+ raise NotImplementedError("Subclasses must implement enqueue_at()")
@@ -1,11 +1,8 @@
1
1
  # filepath: /Volumes/WD_Blue_1TB/coding/libs/flowerpower/src/flowerpower/worker/rq/concurrent_workers.py
2
2
  import concurrent.futures
3
3
  import datetime as dt
4
- import logging
5
4
  import os
6
- import threading
7
5
  import time
8
- import traceback
9
6
  from concurrent.futures import ThreadPoolExecutor
10
7
 
11
8
  from loguru import logger