FlowerPower 1.0.0b1__py3-none-any.whl → 1.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. flowerpower/__init__.py +2 -3
  2. flowerpower/cfg/__init__.py +10 -8
  3. flowerpower/cfg/pipeline/__init__.py +11 -7
  4. flowerpower/cfg/project/__init__.py +11 -8
  5. flowerpower/cfg/project/job_queue.py +10 -29
  6. flowerpower/cli/__init__.py +62 -28
  7. flowerpower/cli/job_queue.py +306 -123
  8. flowerpower/cli/mqtt.py +22 -16
  9. flowerpower/cli/pipeline.py +294 -114
  10. flowerpower/flowerpower.py +14 -8
  11. flowerpower/fs/__init__.py +7 -3
  12. flowerpower/fs/ext.py +6 -2
  13. flowerpower/io/base.py +17 -10
  14. flowerpower/io/loader/_duckdb.py +1 -0
  15. flowerpower/io/loader/deltatable.py +6 -2
  16. flowerpower/io/saver/deltatable.py +1 -2
  17. flowerpower/job_queue/__init__.py +16 -12
  18. flowerpower/job_queue/apscheduler/__init__.py +1 -1
  19. flowerpower/job_queue/apscheduler/manager.py +11 -6
  20. flowerpower/job_queue/apscheduler/utils.py +6 -4
  21. flowerpower/job_queue/base.py +1 -0
  22. flowerpower/job_queue/rq/__init__.py +1 -1
  23. flowerpower/job_queue/rq/manager.py +12 -3
  24. flowerpower/pipeline/io.py +11 -9
  25. flowerpower/pipeline/job_queue.py +5 -5
  26. flowerpower/pipeline/manager.py +35 -27
  27. flowerpower/pipeline/registry.py +26 -16
  28. flowerpower/pipeline/runner.py +3 -4
  29. flowerpower/plugins/mqtt/__init__.py +7 -7
  30. flowerpower/plugins/mqtt/cfg.py +3 -2
  31. flowerpower/plugins/mqtt/manager.py +25 -23
  32. flowerpower/utils/misc.py +6 -4
  33. flowerpower/utils/templates.py +1 -4
  34. {flowerpower-1.0.0b1.dist-info → flowerpower-1.0.0b3.dist-info}/METADATA +1 -1
  35. {flowerpower-1.0.0b1.dist-info → flowerpower-1.0.0b3.dist-info}/RECORD +38 -38
  36. {flowerpower-1.0.0b1.dist-info → flowerpower-1.0.0b3.dist-info}/WHEEL +0 -0
  37. {flowerpower-1.0.0b1.dist-info → flowerpower-1.0.0b3.dist-info}/entry_points.txt +0 -0
  38. {flowerpower-1.0.0b1.dist-info → flowerpower-1.0.0b3.dist-info}/top_level.txt +0 -0
@@ -4,16 +4,16 @@ import posixpath
4
4
  from pathlib import Path
5
5
 
6
6
  import rich
7
- from fsspec.spec import AbstractFileSystem
8
7
 
9
- from .cfg import ProjectConfig
10
- from .fs import get_filesystem
11
8
  from . import settings
9
+ from .cfg import ProjectConfig
10
+ from .fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
11
+
12
12
 
13
13
  def init(
14
14
  name: str | None = None,
15
15
  base_dir: str | None = None,
16
- storage_options: dict = {},
16
+ storage_options: dict | BaseStorageOptions | None = {},
17
17
  fs: AbstractFileSystem | None = None,
18
18
  job_queue_type: str = settings.DEFAULT_JOB_QUEUE,
19
19
  cfg_dir: str = settings.CONFIG_DIR,
@@ -27,20 +27,26 @@ def init(
27
27
  if base_dir is None:
28
28
  base_dir = str(Path.cwd())
29
29
 
30
- fs = get_filesystem(posixpath.join(base_dir, name), **storage_options)
30
+ if fs is None:
31
+ fs = get_filesystem(
32
+ posixpath.join(base_dir, name),
33
+ cached=True,
34
+ dirfs=True,
35
+ storage_options=storage_options,
36
+ )
31
37
 
32
38
  fs.makedirs(f"{cfg_dir}/pipelines", exist_ok=True)
33
39
  fs.makedirs(pipelines_dir, exist_ok=True)
34
40
  fs.makedirs(hooks_dir, exist_ok=True)
35
41
 
36
- cfg = ProjectConfig.load(base_dir=posixpath.join(base_dir, name), name=name, job_queue_type=job_queue_type)
42
+ cfg = ProjectConfig.load(name=name, job_queue_type=job_queue_type, fs=fs)
37
43
 
38
- with open(posixpath.join(base_dir, name, "README.md"), "w") as f:
44
+ with fs.open("README.md", "w") as f:
39
45
  f.write(
40
46
  f"# {name.replace('_', ' ').upper()}\n\n"
41
47
  f"**created with FlowerPower**\n\n*{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
42
48
  )
43
- cfg.save()
49
+ cfg.save(fs=fs)
44
50
  os.chdir(posixpath.join(base_dir, name))
45
51
 
46
52
  rich.print(
@@ -11,9 +11,13 @@ else:
11
11
  from .base import get_filesystem # noqa: E402
12
12
  from .storage_options import AwsStorageOptions # noqa: E402
13
13
  from .storage_options import AzureStorageOptions # noqa: E402
14
- from .storage_options import (BaseStorageOptions, GcsStorageOptions,
15
- GitHubStorageOptions, GitLabStorageOptions,
16
- StorageOptions)
14
+ from .storage_options import (
15
+ BaseStorageOptions,
16
+ GcsStorageOptions,
17
+ GitHubStorageOptions,
18
+ GitLabStorageOptions,
19
+ StorageOptions,
20
+ )
17
21
 
18
22
  __all__ = [
19
23
  "get_filesystem",
flowerpower/fs/ext.py CHANGED
@@ -11,8 +11,12 @@ import pyarrow.dataset as pds
11
11
  import pyarrow.parquet as pq
12
12
  from fsspec import AbstractFileSystem
13
13
 
14
- from ..utils.misc import (_dict_to_dataframe, convert_large_types_to_standard,
15
- run_parallel, to_pyarrow_table)
14
+ from ..utils.misc import (
15
+ _dict_to_dataframe,
16
+ convert_large_types_to_standard,
17
+ run_parallel,
18
+ to_pyarrow_table,
19
+ )
16
20
  from ..utils.polars import pl
17
21
 
18
22
  if importlib.util.find_spec("duckdb") is not None:
flowerpower/io/base.py CHANGED
@@ -13,9 +13,14 @@ from pydantic import BaseModel, ConfigDict
13
13
 
14
14
  from ..fs import get_filesystem
15
15
  from ..fs.ext import _dict_to_dataframe, path_to_glob
16
- from ..fs.storage_options import (AwsStorageOptions, AzureStorageOptions,
17
- GcsStorageOptions, GitHubStorageOptions,
18
- GitLabStorageOptions, StorageOptions)
16
+ from ..fs.storage_options import (
17
+ AwsStorageOptions,
18
+ AzureStorageOptions,
19
+ GcsStorageOptions,
20
+ GitHubStorageOptions,
21
+ GitLabStorageOptions,
22
+ StorageOptions,
23
+ )
19
24
  from ..utils.misc import convert_large_types_to_standard, to_pyarrow_table
20
25
  from ..utils.polars import pl
21
26
  from ..utils.sql import sql2polars_filter, sql2pyarrow_filter
@@ -1411,13 +1416,15 @@ class BaseDatabaseIO(BaseModel):
1411
1416
  db in ["postgres", "mysql", "mssql", "oracle"]
1412
1417
  and not self.connection_string
1413
1418
  ):
1414
- if not all([
1415
- self.username,
1416
- self.password,
1417
- self.server,
1418
- self.port,
1419
- self.database,
1420
- ]):
1419
+ if not all(
1420
+ [
1421
+ self.username,
1422
+ self.password,
1423
+ self.server,
1424
+ self.port,
1425
+ self.database,
1426
+ ]
1427
+ ):
1421
1428
  raise ValueError(
1422
1429
  f"{self.type_} requires connection_string or username, password, server, port, and table_name "
1423
1430
  "to build it."
@@ -6,6 +6,7 @@ import pandas as pd
6
6
  import polars as pl
7
7
  import pyarrow as pa
8
8
  import pyarrow.dataset as pds
9
+
9
10
  # from hamilton.function_modifiers import dataloader
10
11
  from pydantic import BaseModel
11
12
 
@@ -7,13 +7,17 @@ import pyarrow as pa
7
7
  import pyarrow.dataset as pds
8
8
  from deltalake import DeltaTable, table
9
9
  from deltalake.exceptions import TableNotFoundError
10
+
10
11
  # from ..utils import get_dataframe_metadata, get_delta_metadata
11
12
  from loguru import logger
12
13
  from sherlock import RedisLock
13
14
 
14
15
  from ..base import BaseDatasetReader
15
- from ..metadata import (get_dataframe_metadata, get_delta_metadata,
16
- get_pyarrow_dataset_metadata)
16
+ from ..metadata import (
17
+ get_dataframe_metadata,
18
+ get_delta_metadata,
19
+ get_pyarrow_dataset_metadata,
20
+ )
17
21
 
18
22
  # from hamilton.function_modifiers import dataloader
19
23
 
@@ -3,8 +3,7 @@ from typing import Any
3
3
  import pandas as pd
4
4
  import polars as pl
5
5
  import pyarrow as pa
6
- from deltalake.table import (ColumnProperties, CommitProperties,
7
- PostCommitHookProperties)
6
+ from deltalake.table import ColumnProperties, CommitProperties, PostCommitHookProperties
8
7
  from deltalake.writer import WriterProperties, write_deltalake
9
8
  from redis import Redis, StrictRedis
10
9
  from sherlock import RedisLock
@@ -1,16 +1,16 @@
1
1
  from typing import Any, Optional
2
2
 
3
+ from ..cfg.project import ProjectConfig
3
4
  from ..fs import AbstractFileSystem
4
5
  from ..utils.logging import setup_logging
5
6
  from .apscheduler import APSBackend, APSManager
6
7
  from .base import BaseBackend, BaseJobQueueManager
7
8
  from .rq import RQBackend, RQManager
8
- from ..cfg.project import ProjectConfig
9
9
 
10
10
  setup_logging()
11
11
 
12
12
 
13
- class JobQueue:
13
+ class JobQueueManager:
14
14
  """A factory class for creating job queue instances for job scheduling and execution.
15
15
 
16
16
  This class provides a unified interface for creating different types of job queue instances
@@ -26,7 +26,7 @@ class JobQueue:
26
26
  Example:
27
27
  ```python
28
28
  # Create an RQ job queue
29
- rq_worker = JobQueue(
29
+ rq_worker = JobQueueManager(
30
30
  type="rq",
31
31
  name="my_worker",
32
32
  log_level="DEBUG"
@@ -38,7 +38,7 @@ class JobQueue:
38
38
  data_store={"type": "postgresql", "uri": "postgresql+asyncpg://user:pass@localhost/db"},
39
39
  event_broker={"type": "redis", "uri": "redis://localhost:6379/0"}
40
40
  )
41
- aps_worker = JobQueue(
41
+ aps_worker = JobQueueManager(
42
42
  type="apscheduler",
43
43
  name="scheduler",
44
44
  backend=backend_config
@@ -49,7 +49,7 @@ class JobQueue:
49
49
 
50
50
  def __new__(
51
51
  cls,
52
- type: str |None = None,
52
+ type: str | None = None,
53
53
  name: str | None = None,
54
54
  base_dir: str | None = ".",
55
55
  backend: BaseBackend | None = None,
@@ -93,10 +93,10 @@ class JobQueue:
93
93
  Example:
94
94
  ```python
95
95
  # Basic RQ job queue
96
- worker = JobQueue(type="rq", name="basic_worker")
96
+ worker = JobQueueManager(type="rq", name="basic_worker")
97
97
 
98
98
  # APScheduler with custom logging and storage
99
- worker = JobQueue(
99
+ worker = JobQueueManager(
100
100
  type="apscheduler",
101
101
  name="scheduler",
102
102
  base_dir="/app/data",
@@ -108,8 +108,12 @@ class JobQueue:
108
108
  """
109
109
  if type is None:
110
110
  type = ProjectConfig.load(
111
- base_dir=base_dir, name=name, fs=fs, storage_options=storage_options or {}).job_queue.type
112
-
111
+ base_dir=base_dir,
112
+ name=name,
113
+ fs=fs,
114
+ storage_options=storage_options or {},
115
+ ).job_queue.type
116
+
113
117
  if type == "rq":
114
118
  return RQManager(
115
119
  name=name,
@@ -130,7 +134,7 @@ class JobQueue:
130
134
  log_level=log_level,
131
135
  **kwargs,
132
136
  )
133
-
137
+
134
138
  else:
135
139
  raise ValueError(
136
140
  f"Invalid job queue type: {type}. Valid types: ['rq', 'apscheduler']"
@@ -242,10 +246,10 @@ class Backend:
242
246
 
243
247
 
244
248
  __all__ = [
245
- "JobQueue",
249
+ "JobQueueManager",
246
250
  "RQManager",
247
251
  "APSManager",
248
- #"HueyWorker",
252
+ # "HueyWorker",
249
253
  "Backend",
250
254
  "RQBackend",
251
255
  "APSBackend",
@@ -1,6 +1,6 @@
1
+ from .manager import APSManager
1
2
  from .setup import APSBackend, APSDataStore, APSEventBroker
2
3
  from .trigger import APSTrigger
3
- from .manager import APSManager
4
4
 
5
5
  __all__ = [
6
6
  "APSManager",
@@ -8,6 +8,7 @@ import datetime as dt
8
8
  import importlib.util
9
9
  from typing import Any, Callable
10
10
  from uuid import UUID
11
+
11
12
  import duration_parser
12
13
  from fsspec.spec import AbstractFileSystem
13
14
  from loguru import logger
@@ -181,11 +182,13 @@ class APSManager(BaseJobQueueManager):
181
182
  sqla_engine=data_store.sqla_engine
182
183
  )
183
184
  else:
184
- event_broker = APSEventBroker(**{
185
- k: v
186
- for k, v in self.cfg.backend.event_broker.to_dict().items()
187
- if k != "from_ds_sqla"
188
- })
185
+ event_broker = APSEventBroker(
186
+ **{
187
+ k: v
188
+ for k, v in self.cfg.backend.event_broker.to_dict().items()
189
+ if k != "from_ds_sqla"
190
+ }
191
+ )
189
192
  self._backend = APSBackend(data_store=data_store, event_broker=event_broker)
190
193
 
191
194
  logger.info(
@@ -375,7 +378,9 @@ class APSManager(BaseJobQueueManager):
375
378
  if isinstance(result_ttl, (int, float)):
376
379
  result_ttl = dt.timedelta(seconds=result_ttl)
377
380
 
378
- run_at = dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
381
+ run_at = (
382
+ dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
383
+ )
379
384
  run_in = duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
380
385
 
381
386
  if run_in:
@@ -160,10 +160,12 @@ def format_trigger(trigger):
160
160
  )
161
161
  cron_parts = {k: v.strip("'") for k, v in cron_parts.items()}
162
162
  crontab = f"{cron_parts['minute']} {cron_parts['hour']} {cron_parts['day']} {cron_parts['month']} {cron_parts['day_of_week']}"
163
- human_readable = humanize_crontab(**{
164
- k: cron_parts[k]
165
- for k in ["minute", "hour", "day", "month", "day_of_week"]
166
- })
163
+ human_readable = humanize_crontab(
164
+ **{
165
+ k: cron_parts[k]
166
+ for k in ["minute", "hour", "day", "month", "day_of_week"]
167
+ }
168
+ )
167
169
  return f"Cron: {human_readable} ({crontab})"
168
170
  except Exception:
169
171
  return f"Cron: {str(trigger)}"
@@ -18,6 +18,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine
18
18
 
19
19
  from ..cfg import ProjectConfig
20
20
  from ..fs import AbstractFileSystem, get_filesystem
21
+
21
22
  # from ..utils.misc import update_config_from_dict
22
23
  from ..settings import BACKEND_PROPERTIES
23
24
 
@@ -1,6 +1,6 @@
1
+ from .manager import RQManager
1
2
  from .setup import RQBackend
2
3
  from .utils import show_jobs, show_schedules
3
- from .manager import RQManager
4
4
 
5
5
  __all__ = [
6
6
  "RQManager",
@@ -11,6 +11,7 @@ import sys
11
11
  import time
12
12
  import uuid
13
13
  from typing import Any, Callable
14
+
14
15
  import duration_parser
15
16
  from cron_descriptor import get_description
16
17
  from humanize import precisedelta
@@ -658,7 +659,9 @@ class RQManager(BaseJobQueueManager):
658
659
  queue = self._queues[queue_name]
659
660
  if run_at:
660
661
  # Schedule the job to run at a specific time
661
- run_at = dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
662
+ run_at = (
663
+ dt.datetime.fromisoformat(run_at) if isinstance(run_at, str) else run_at
664
+ )
662
665
  job = queue.enqueue_at(
663
666
  run_at,
664
667
  func,
@@ -677,8 +680,14 @@ class RQManager(BaseJobQueueManager):
677
680
  )
678
681
  elif run_in:
679
682
  # Schedule the job to run after a delay
680
- run_in = duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
681
- run_in = dt.timedelta(seconds=run_in) if isinstance(run_in, (int, float)) else run_in
683
+ run_in = (
684
+ duration_parser.parse(run_in) if isinstance(run_in, str) else run_in
685
+ )
686
+ run_in = (
687
+ dt.timedelta(seconds=run_in)
688
+ if isinstance(run_in, (int, float))
689
+ else run_in
690
+ )
682
691
  job = queue.enqueue_in(
683
692
  run_in,
684
693
  func,
@@ -40,7 +40,7 @@ class PipelineIOManager:
40
40
  name: str,
41
41
  src_base_dir: str,
42
42
  src_fs: AbstractFileSystem | None = None,
43
- src_storage_options: BaseStorageOptions | None = None,
43
+ src_storage_options: dict | BaseStorageOptions | None = {},
44
44
  overwrite: bool = False,
45
45
  ):
46
46
  """
@@ -66,7 +66,7 @@ class PipelineIOManager:
66
66
  ```
67
67
  """
68
68
  if src_fs is None:
69
- src_fs = get_filesystem(src_base_dir, **(src_storage_options or {}))
69
+ src_fs = get_filesystem(src_base_dir, storage_options=src_storage_options)
70
70
 
71
71
  # Use project_cfg attributes for destination paths and filesystem
72
72
  dest_pipeline_file = posixpath.join(
@@ -124,7 +124,7 @@ class PipelineIOManager:
124
124
  pipelines: dict[str, str] | list[str],
125
125
  src_base_dir: str,
126
126
  src_fs: AbstractFileSystem | None = None,
127
- src_storage_options: BaseStorageOptions | None = None,
127
+ src_storage_options: dict | BaseStorageOptions | None = {},
128
128
  overwrite: bool = False,
129
129
  ):
130
130
  """
@@ -173,7 +173,7 @@ class PipelineIOManager:
173
173
  self,
174
174
  src_base_dir: str,
175
175
  src_fs: AbstractFileSystem | None = None,
176
- src_storage_options: BaseStorageOptions | None = None,
176
+ src_storage_options: dict | BaseStorageOptions | None = {},
177
177
  overwrite: bool = False,
178
178
  ):
179
179
  """Import all pipelines from a given path.
@@ -199,7 +199,7 @@ class PipelineIOManager:
199
199
  ```
200
200
  """
201
201
  if not src_fs:
202
- src_fs = get_filesystem(src_base_dir, **(src_storage_options or {}))
202
+ src_fs = get_filesystem(src_base_dir, storage_options=src_storage_options)
203
203
 
204
204
  console.print(f"🔍 Search pipelines in [green]{src_base_dir}[/green]...")
205
205
 
@@ -241,7 +241,7 @@ class PipelineIOManager:
241
241
  name: str,
242
242
  dest_base_dir: str,
243
243
  dest_fs: AbstractFileSystem | None = None,
244
- des_storage_options: BaseStorageOptions | None = None,
244
+ dest_storage_options: dict | BaseStorageOptions | None = {},
245
245
  overwrite: bool = False,
246
246
  ):
247
247
  """
@@ -275,7 +275,9 @@ class PipelineIOManager:
275
275
  raise ValueError(f"Pipeline {self.project_cfg.name}.{name} does not exist.")
276
276
 
277
277
  if dest_fs is None:
278
- dest_fs = get_filesystem(dest_base_dir, **(des_storage_options or {}))
278
+ dest_fs = get_filesystem(
279
+ dest_base_dir, storage_options=dest_storage_options
280
+ )
279
281
 
280
282
  # Define destination paths relative to base_dir
281
283
  dest_pipeline_file = posixpath.join(
@@ -318,7 +320,7 @@ class PipelineIOManager:
318
320
  pipelines: list[str],
319
321
  dest_base_dir: str,
320
322
  dest_fs: AbstractFileSystem | None = None,
321
- dest_storage_options: BaseStorageOptions | None = None,
323
+ dest_storage_options: dict | BaseStorageOptions | None = {},
322
324
  overwrite: bool = False,
323
325
  ):
324
326
  """
@@ -361,7 +363,7 @@ class PipelineIOManager:
361
363
  self,
362
364
  dest_base_dir: str,
363
365
  dest_fs: AbstractFileSystem | None = None,
364
- dest_storage_options: BaseStorageOptions | None = None,
366
+ dest_storage_options: dict | BaseStorageOptions | None = {},
365
367
  overwrite: bool = False,
366
368
  ):
367
369
  """Export all pipelines to a given path.
@@ -10,13 +10,14 @@ from uuid import UUID
10
10
  from loguru import logger
11
11
  from rich import print as rprint
12
12
 
13
+ from .. import settings
14
+
13
15
  # Import necessary config types
14
16
  from ..cfg import PipelineConfig, ProjectConfig
15
17
  from ..fs import AbstractFileSystem
18
+ from ..job_queue import JobQueueManager
16
19
  from ..utils.logging import setup_logging
17
- from ..job_queue import JobQueue
18
20
  from .registry import PipelineRegistry
19
- from .. import settings
20
21
 
21
22
  setup_logging()
22
23
 
@@ -63,7 +64,7 @@ class PipelineJobQueue:
63
64
  f"Instantiating worker of type: {self._job_queue_type} for project '{self.project_cfg.name}'"
64
65
  )
65
66
  # Pass the necessary parts of project_cfg to the Job queue
66
- return JobQueue(
67
+ return JobQueueManager(
67
68
  type=self._job_queue_type,
68
69
  fs=self._fs,
69
70
  )
@@ -116,7 +117,6 @@ class PipelineJobQueue:
116
117
  """
117
118
  logger.debug(f"Adding immediate job for pipeline: {name}")
118
119
 
119
-
120
120
  pipeline_run_args = {
121
121
  # 'name' is not passed to run_func, it's part of the context already in PipelineRunner
122
122
  "inputs": inputs,
@@ -332,7 +332,7 @@ class PipelineJobQueue:
332
332
 
333
333
  # --- Resolve Parameters using pipeline_cfg for defaults ---
334
334
  schedule_cfg = pipeline_cfg.schedule
335
- #run_cfg = pipeline_cfg.run
335
+ # run_cfg = pipeline_cfg.run
336
336
 
337
337
  pipeline_run_args = {
338
338
  "inputs": inputs,