FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +7 -14
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +8 -6
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +11 -24
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +22 -415
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +345 -146
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +21 -12
- flowerpower/pipeline/io.py +58 -54
- flowerpower/pipeline/manager.py +165 -726
- flowerpower/pipeline/pipeline.py +643 -0
- flowerpower/pipeline/registry.py +285 -18
- flowerpower/pipeline/visualizer.py +5 -6
- flowerpower/plugins/io/__init__.py +8 -0
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -21
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -256
- flowerpower/utils/monkey.py +1 -83
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -238
- flowerpower/cli/job_queue.py +0 -1061
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/__init__.py +0 -294
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/job_queue/base.py +0 -413
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
- flowerpower/job_queue/rq/manager.py +0 -1582
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -87
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/METADATA +0 -537
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
flowerpower/__init__.py
CHANGED
@@ -1,22 +1,18 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
|
3
3
|
from .cfg import Config, PipelineConfig, ProjectConfig
|
4
|
-
from .flowerpower import FlowerPower, FlowerPowerProject
|
5
|
-
from .flowerpower import init as init_project # noqa: E402
|
6
|
-
from .job_queue import JobQueueManager # noqa: E402
|
4
|
+
from .flowerpower import FlowerPower, FlowerPowerProject, create_project
|
7
5
|
from .pipeline import PipelineManager
|
8
6
|
|
9
7
|
__version__ = importlib.metadata.version("FlowerPower")
|
10
8
|
|
11
9
|
__all__ = [
|
12
10
|
"__version__",
|
13
|
-
"
|
11
|
+
"create_project",
|
14
12
|
"FlowerPower",
|
15
13
|
"FlowerPowerProject",
|
16
14
|
"PipelineManager",
|
17
|
-
"JobQueueManager",
|
18
15
|
"Config",
|
19
16
|
"ProjectConfig",
|
20
17
|
"PipelineConfig",
|
21
|
-
"PipelineConfig",
|
22
18
|
]
|
flowerpower/cfg/__init__.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
3
|
import msgspec
|
4
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
4
5
|
from munch import Munch
|
5
6
|
|
6
|
-
from ..
|
7
|
+
from ..settings import CONFIG_DIR, PIPELINES_DIR
|
7
8
|
from .base import BaseConfig
|
8
9
|
from .pipeline import PipelineConfig, init_pipeline_config
|
9
10
|
from .project import ProjectConfig, init_project_config
|
@@ -49,7 +50,6 @@ class Config(BaseConfig):
|
|
49
50
|
base_dir: str = ".",
|
50
51
|
name: str | None = None,
|
51
52
|
pipeline_name: str | None = None,
|
52
|
-
job_queue_type: str | None = None,
|
53
53
|
fs: AbstractFileSystem | None = None,
|
54
54
|
storage_options: dict | BaseStorageOptions | None = {},
|
55
55
|
):
|
@@ -59,7 +59,6 @@ class Config(BaseConfig):
|
|
59
59
|
base_dir (str, optional): Base directory for configurations. Defaults to ".".
|
60
60
|
name (str | None, optional): Project name. Defaults to None.
|
61
61
|
pipeline_name (str | None, optional): Pipeline name. Defaults to None.
|
62
|
-
job_queue_type (str | None, optional): Type of job queue to use. Defaults to None.
|
63
62
|
fs (AbstractFileSystem | None, optional): Filesystem to use. Defaults to None.
|
64
63
|
storage_options (dict | Munch, optional): Options for filesystem. Defaults to empty Munch.
|
65
64
|
|
@@ -72,18 +71,16 @@ class Config(BaseConfig):
|
|
72
71
|
base_dir="my_project",
|
73
72
|
name="test_project",
|
74
73
|
pipeline_name="etl",
|
75
|
-
job_queue_type="rq"
|
76
74
|
)
|
77
75
|
```
|
78
76
|
"""
|
79
77
|
if fs is None:
|
80
|
-
fs =
|
78
|
+
fs = filesystem(
|
81
79
|
base_dir, cached=True, dirfs=True, storage_options=storage_options
|
82
80
|
)
|
83
81
|
project = ProjectConfig.load(
|
84
82
|
base_dir=base_dir,
|
85
83
|
name=name,
|
86
|
-
job_queue_type=job_queue_type,
|
87
84
|
fs=fs,
|
88
85
|
storage_options=storage_options,
|
89
86
|
)
|
@@ -123,15 +120,15 @@ class Config(BaseConfig):
|
|
123
120
|
```
|
124
121
|
"""
|
125
122
|
if fs is None and self.fs is None:
|
126
|
-
self.fs =
|
123
|
+
self.fs = filesystem(
|
127
124
|
self.base_dir, cached=True, dirfs=True, **storage_options
|
128
125
|
)
|
129
126
|
|
130
|
-
if not self.fs.exists(
|
131
|
-
self.fs.makedirs(
|
127
|
+
if not self.fs.exists(CONFIG_DIR):
|
128
|
+
self.fs.makedirs(CONFIG_DIR)
|
132
129
|
|
133
130
|
if pipeline:
|
134
|
-
self.fs.makedirs(
|
131
|
+
self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
|
135
132
|
h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
|
136
133
|
self.pipeline.to_yaml(
|
137
134
|
path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
|
@@ -210,7 +207,6 @@ def init_config(
|
|
210
207
|
base_dir: str = ".",
|
211
208
|
name: str | None = None,
|
212
209
|
pipeline_name: str | None = None,
|
213
|
-
job_queue_type: str | None = None,
|
214
210
|
fs: AbstractFileSystem | None = None,
|
215
211
|
storage_options: dict | BaseStorageOptions | None = {},
|
216
212
|
):
|
@@ -223,7 +219,6 @@ def init_config(
|
|
223
219
|
base_dir (str, optional): Base directory for configurations. Defaults to ".".
|
224
220
|
name (str | None, optional): Project name. Defaults to None.
|
225
221
|
pipeline_name (str | None, optional): Pipeline name. Defaults to None.
|
226
|
-
job_queue_type (str | None, optional): Type of job queue to use. Defaults to None.
|
227
222
|
fs (AbstractFileSystem | None, optional): Filesystem to use. Defaults to None.
|
228
223
|
storage_options (dict | Munch, optional): Options for filesystem. Defaults to empty Munch.
|
229
224
|
|
@@ -236,7 +231,6 @@ def init_config(
|
|
236
231
|
base_dir="my_project",
|
237
232
|
name="test_project",
|
238
233
|
pipeline_name="data-pipeline",
|
239
|
-
job_queue_type="rq"
|
240
234
|
)
|
241
235
|
```
|
242
236
|
"""
|
@@ -249,7 +243,6 @@ def init_config(
|
|
249
243
|
project_cfg = init_project_config(
|
250
244
|
base_dir=base_dir,
|
251
245
|
name=name,
|
252
|
-
job_queue_type=job_queue_type,
|
253
246
|
fs=fs,
|
254
247
|
storage_options=storage_options,
|
255
248
|
)
|
flowerpower/cfg/base.py
CHANGED
@@ -2,7 +2,8 @@ import copy
|
|
2
2
|
from typing import Any, Self
|
3
3
|
|
4
4
|
import msgspec
|
5
|
-
from
|
5
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
6
|
+
from ..utils.misc import get_filesystem
|
6
7
|
|
7
8
|
|
8
9
|
class BaseConfig(msgspec.Struct, kw_only=True):
|
@@ -20,12 +21,10 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
20
21
|
Raises:
|
21
22
|
NotImplementedError: If the filesystem does not support writing files.
|
22
23
|
"""
|
23
|
-
|
24
|
-
fs = filesystem("file")
|
24
|
+
fs = get_filesystem(fs)
|
25
25
|
try:
|
26
26
|
with fs.open(path, "wb") as f:
|
27
27
|
f.write(msgspec.yaml.encode(self, order="deterministic"))
|
28
|
-
# yaml.dump(self.to_dict(), f, default_flow_style=False)
|
29
28
|
except NotImplementedError:
|
30
29
|
raise NotImplementedError("The filesystem does not support writing files.")
|
31
30
|
|
@@ -54,29 +53,42 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
54
53
|
An instance of the class with the values from the YAML file.
|
55
54
|
|
56
55
|
"""
|
57
|
-
|
58
|
-
fs = filesystem("file")
|
56
|
+
fs = get_filesystem(fs)
|
59
57
|
with fs.open(path) as f:
|
60
|
-
# data = yaml.full_load(f)
|
61
|
-
# return cls.from_dict(data)
|
62
58
|
return msgspec.yaml.decode(f.read(), type=cls, strict=False)
|
63
59
|
|
64
|
-
def
|
60
|
+
def _apply_dict_updates(self, target: Self, d: dict[str, Any]) -> None:
|
61
|
+
"""
|
62
|
+
Helper method to apply dictionary updates to a target instance.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
target: The target instance to apply updates to.
|
66
|
+
d: The dictionary containing updates to apply.
|
67
|
+
"""
|
65
68
|
for k, v in d.items():
|
66
|
-
if hasattr(
|
67
|
-
current_value = getattr(
|
69
|
+
if hasattr(target, k):
|
70
|
+
current_value = getattr(target, k)
|
68
71
|
if isinstance(current_value, dict) and isinstance(v, dict):
|
69
72
|
current_value.update(v)
|
70
73
|
else:
|
71
|
-
setattr(
|
74
|
+
setattr(target, k, v)
|
72
75
|
else:
|
73
|
-
|
76
|
+
# Use object.__setattr__ to bypass msgspec.Struct's restrictions
|
77
|
+
object.__setattr__(target, k, v)
|
78
|
+
|
79
|
+
def update(self, d: dict[str, Any]) -> None:
|
80
|
+
"""
|
81
|
+
Updates this instance with values from the provided dictionary.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
d: The dictionary containing updates to apply.
|
85
|
+
"""
|
86
|
+
self._apply_dict_updates(self, d)
|
74
87
|
|
75
88
|
def merge_dict(self, d: dict[str, Any]) -> Self:
|
76
89
|
"""
|
77
90
|
Creates a copy of this instance and updates the copy with values
|
78
|
-
from the provided dictionary
|
79
|
-
its default value. The original instance (self) is not modified.
|
91
|
+
from the provided dictionary. The original instance (self) is not modified.
|
80
92
|
|
81
93
|
Args:
|
82
94
|
d: The dictionary to get values from.
|
@@ -84,16 +96,8 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
84
96
|
Returns:
|
85
97
|
A new instance of the struct with updated values.
|
86
98
|
"""
|
87
|
-
self_copy = copy.
|
88
|
-
|
89
|
-
if hasattr(self_copy, k):
|
90
|
-
current_value = getattr(self_copy, k)
|
91
|
-
if isinstance(current_value, dict) and isinstance(v, dict):
|
92
|
-
current_value.update(v)
|
93
|
-
else:
|
94
|
-
setattr(self_copy, k, v)
|
95
|
-
else:
|
96
|
-
setattr(self_copy, k, v)
|
99
|
+
self_copy = copy.deepcopy(self)
|
100
|
+
self._apply_dict_updates(self_copy, d)
|
97
101
|
return self_copy
|
98
102
|
|
99
103
|
def merge(self, source: Self) -> Self:
|
@@ -1,13 +1,15 @@
|
|
1
1
|
import msgspec
|
2
2
|
import yaml
|
3
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
3
4
|
from hamilton.function_modifiers import source, value
|
4
5
|
from munch import Munch, munchify
|
5
6
|
|
6
|
-
from ...fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
|
7
7
|
from ..base import BaseConfig
|
8
8
|
from .adapter import AdapterConfig
|
9
|
+
from .run import ExecutorConfig as ExecutorConfig
|
9
10
|
from .run import RunConfig
|
10
|
-
from .
|
11
|
+
from .run import WithAdapterConfig as WithAdapterConfig
|
12
|
+
#from .schedule import ScheduleConfig
|
11
13
|
|
12
14
|
|
13
15
|
class PipelineConfig(BaseConfig):
|
@@ -20,7 +22,7 @@ class PipelineConfig(BaseConfig):
|
|
20
22
|
Attributes:
|
21
23
|
name (str | None): The name of the pipeline.
|
22
24
|
run (RunConfig): Configuration for pipeline execution.
|
23
|
-
schedule (ScheduleConfig): Configuration for pipeline scheduling.
|
25
|
+
schedule (ScheduleConfig): Configuration for pipeline scheduling. DEPRECATED.
|
24
26
|
params (dict): Pipeline parameters.
|
25
27
|
adapter (AdapterConfig): Configuration for the pipeline adapter.
|
26
28
|
h_params (dict): Hamilton-formatted parameters.
|
@@ -43,7 +45,7 @@ class PipelineConfig(BaseConfig):
|
|
43
45
|
|
44
46
|
name: str | None = msgspec.field(default=None)
|
45
47
|
run: RunConfig = msgspec.field(default_factory=RunConfig)
|
46
|
-
|
48
|
+
#: ScheduleConfig = msgspec.field(default_factory=ScheduleConfig)
|
47
49
|
params: dict = msgspec.field(default_factory=dict)
|
48
50
|
adapter: AdapterConfig = msgspec.field(default_factory=AdapterConfig)
|
49
51
|
h_params: dict = msgspec.field(default_factory=dict)
|
@@ -166,7 +168,7 @@ class PipelineConfig(BaseConfig):
|
|
166
168
|
```
|
167
169
|
"""
|
168
170
|
if fs is None:
|
169
|
-
fs =
|
171
|
+
fs = filesystem(
|
170
172
|
base_dir, cached=False, dirfs=True, storage_options=storage_options
|
171
173
|
)
|
172
174
|
if fs.exists("conf/pipelines"):
|
@@ -207,7 +209,7 @@ class PipelineConfig(BaseConfig):
|
|
207
209
|
```
|
208
210
|
"""
|
209
211
|
if fs is None:
|
210
|
-
fs =
|
212
|
+
fs = filesystem(
|
211
213
|
base_dir, cached=True, dirfs=True, storage_options=storage_options
|
212
214
|
)
|
213
215
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import datetime as dt
|
2
|
+
|
3
|
+
import msgspec
|
4
|
+
from munch import munchify
|
5
|
+
|
6
|
+
from ..base import BaseConfig
|
7
|
+
|
8
|
+
|
9
|
+
class ScheduleConfig(BaseConfig):
|
10
|
+
cron: str | dict | None = msgspec.field(default=None)
|
11
|
+
interval: str | int | dict | None = msgspec.field(default=None)
|
12
|
+
date: str | None = msgspec.field(default=None)
|
13
|
+
|
14
|
+
def __post_init__(self):
|
15
|
+
if isinstance(self.date, str):
|
16
|
+
try:
|
17
|
+
self.date = dt.datetime.fromisoformat(self.date)
|
18
|
+
except ValueError:
|
19
|
+
raise ValueError(
|
20
|
+
f"Invalid date format: {self.date}. Expected ISO format."
|
21
|
+
)
|
22
|
+
if isinstance(self.cron, dict):
|
23
|
+
self.cron = munchify(self.cron)
|
24
|
+
if isinstance(self.interval, dict):
|
25
|
+
self.interval = munchify(self.interval)
|
26
|
+
|
27
|
+
|
28
|
+
# class ScheduleConfig(BaseConfig):
|
29
|
+
# run: ScheduleRunConfig = msgspec.field(default_factory=ScheduleRunConfig)
|
30
|
+
# trigger: ScheduleTriggerConfig = msgspec.field(
|
31
|
+
# default_factory=ScheduleTriggerConfig
|
32
|
+
# )
|
@@ -39,17 +39,12 @@ class MLFlowConfig(BaseConfig):
|
|
39
39
|
self.run_tags = munchify(self.run_tags)
|
40
40
|
|
41
41
|
|
42
|
-
# class OpenLineageConfig(BaseConfig):
|
43
|
-
# namespace : str | None = msgspec.field(default=None)
|
44
|
-
# job_name : str | None = msgspec.field(default=None)
|
45
|
-
|
46
42
|
|
47
43
|
class AdapterConfig(BaseConfig):
|
48
44
|
hamilton_tracker: HamiltonTracerConfig = msgspec.field(
|
49
45
|
default_factory=HamiltonTracerConfig
|
50
46
|
)
|
51
47
|
mlflow: MLFlowConfig = msgspec.field(default_factory=MLFlowConfig)
|
52
|
-
# openlineage: OpenLineageConfig | dict = msgspec.field(default_factory=OpenLineageConfig)
|
53
48
|
|
54
49
|
def __post_init__(self):
|
55
50
|
if isinstance(self.hamilton_tracker, dict):
|