FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +7 -14
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +8 -6
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +11 -24
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/cfg.py +0 -3
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +22 -415
- flowerpower/cli/utils.py +0 -1
- flowerpower/flowerpower.py +345 -146
- flowerpower/pipeline/__init__.py +2 -0
- flowerpower/pipeline/base.py +21 -12
- flowerpower/pipeline/io.py +58 -54
- flowerpower/pipeline/manager.py +165 -726
- flowerpower/pipeline/pipeline.py +643 -0
- flowerpower/pipeline/registry.py +285 -18
- flowerpower/pipeline/visualizer.py +5 -6
- flowerpower/plugins/io/__init__.py +8 -0
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -21
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -256
- flowerpower/utils/monkey.py +1 -83
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -238
- flowerpower/cli/job_queue.py +0 -1061
- flowerpower/fs/__init__.py +0 -29
- flowerpower/fs/base.py +0 -662
- flowerpower/fs/ext.py +0 -2143
- flowerpower/fs/storage_options.py +0 -1420
- flowerpower/job_queue/__init__.py +0 -294
- flowerpower/job_queue/apscheduler/__init__.py +0 -11
- flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
- flowerpower/job_queue/apscheduler/manager.py +0 -1051
- flowerpower/job_queue/apscheduler/setup.py +0 -554
- flowerpower/job_queue/apscheduler/trigger.py +0 -169
- flowerpower/job_queue/apscheduler/utils.py +0 -311
- flowerpower/job_queue/base.py +0 -413
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
- flowerpower/job_queue/rq/manager.py +0 -1582
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/pipeline/job_queue.py +0 -583
- flowerpower/pipeline/runner.py +0 -603
- flowerpower/plugins/io/base.py +0 -2520
- flowerpower/plugins/io/helpers/datetime.py +0 -298
- flowerpower/plugins/io/helpers/polars.py +0 -875
- flowerpower/plugins/io/helpers/pyarrow.py +0 -570
- flowerpower/plugins/io/helpers/sql.py +0 -202
- flowerpower/plugins/io/loader/__init__.py +0 -28
- flowerpower/plugins/io/loader/csv.py +0 -37
- flowerpower/plugins/io/loader/deltatable.py +0 -190
- flowerpower/plugins/io/loader/duckdb.py +0 -19
- flowerpower/plugins/io/loader/json.py +0 -37
- flowerpower/plugins/io/loader/mqtt.py +0 -159
- flowerpower/plugins/io/loader/mssql.py +0 -26
- flowerpower/plugins/io/loader/mysql.py +0 -26
- flowerpower/plugins/io/loader/oracle.py +0 -26
- flowerpower/plugins/io/loader/parquet.py +0 -35
- flowerpower/plugins/io/loader/postgres.py +0 -26
- flowerpower/plugins/io/loader/pydala.py +0 -19
- flowerpower/plugins/io/loader/sqlite.py +0 -23
- flowerpower/plugins/io/metadata.py +0 -244
- flowerpower/plugins/io/saver/__init__.py +0 -28
- flowerpower/plugins/io/saver/csv.py +0 -36
- flowerpower/plugins/io/saver/deltatable.py +0 -186
- flowerpower/plugins/io/saver/duckdb.py +0 -19
- flowerpower/plugins/io/saver/json.py +0 -36
- flowerpower/plugins/io/saver/mqtt.py +0 -28
- flowerpower/plugins/io/saver/mssql.py +0 -26
- flowerpower/plugins/io/saver/mysql.py +0 -26
- flowerpower/plugins/io/saver/oracle.py +0 -26
- flowerpower/plugins/io/saver/parquet.py +0 -36
- flowerpower/plugins/io/saver/postgres.py +0 -26
- flowerpower/plugins/io/saver/pydala.py +0 -20
- flowerpower/plugins/io/saver/sqlite.py +0 -24
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -87
- flowerpower/utils/scheduler.py +0 -311
- flowerpower-0.11.6.20.dist-info/METADATA +0 -537
- flowerpower-0.11.6.20.dist-info/RECORD +0 -102
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
flowerpower/utils/misc.py
CHANGED
@@ -7,262 +7,7 @@ import time
|
|
7
7
|
from typing import Any
|
8
8
|
|
9
9
|
import msgspec
|
10
|
-
|
11
|
-
if importlib.util.find_spec("pyarrow"):
|
12
|
-
import pyarrow as pa
|
13
|
-
|
14
|
-
def convert_large_types_to_standard(schema: pa.Schema) -> pa.Schema:
|
15
|
-
# Define mapping of large types to standard types
|
16
|
-
type_mapping = {
|
17
|
-
pa.large_string(): pa.string(),
|
18
|
-
pa.large_binary(): pa.binary(),
|
19
|
-
pa.large_list(pa.null()): pa.list_(pa.null()),
|
20
|
-
}
|
21
|
-
|
22
|
-
# Convert fields
|
23
|
-
new_fields = []
|
24
|
-
for field in schema:
|
25
|
-
field_type = field.type
|
26
|
-
# Check if type exists in mapping
|
27
|
-
if field_type in type_mapping:
|
28
|
-
new_field = pa.field(
|
29
|
-
name=field.name,
|
30
|
-
type=type_mapping[field_type],
|
31
|
-
nullable=field.nullable,
|
32
|
-
metadata=field.metadata,
|
33
|
-
)
|
34
|
-
new_fields.append(new_field)
|
35
|
-
# Handle large lists with nested types
|
36
|
-
elif isinstance(field_type, pa.LargeListType):
|
37
|
-
new_field = pa.field(
|
38
|
-
name=field.name,
|
39
|
-
type=pa.list_(field_type.value_type),
|
40
|
-
nullable=field.nullable,
|
41
|
-
metadata=field.metadata,
|
42
|
-
)
|
43
|
-
new_fields.append(new_field)
|
44
|
-
else:
|
45
|
-
new_fields.append(field)
|
46
|
-
|
47
|
-
return pa.schema(new_fields)
|
48
|
-
|
49
|
-
|
50
|
-
else:
|
51
|
-
|
52
|
-
def convert_large_types_to_standard(*args, **kwargs):
|
53
|
-
raise ImportError("pyarrow not installed")
|
54
|
-
|
55
|
-
|
56
|
-
if importlib.util.find_spec("polars"):
|
57
|
-
import polars as pl
|
58
|
-
|
59
|
-
def _dict_to_dataframe(
|
60
|
-
data: dict | list[dict], unique: bool | list[str] | str = False
|
61
|
-
) -> pl.DataFrame:
|
62
|
-
"""
|
63
|
-
Convert a dictionary or list of dictionaries to a polars DataFrame.
|
64
|
-
|
65
|
-
Args:
|
66
|
-
data: (dict | list[dict]) Data to convert.
|
67
|
-
|
68
|
-
Returns:
|
69
|
-
pl.DataFrame: Converted data.
|
70
|
-
|
71
|
-
Examples:
|
72
|
-
>>> # Single dict with list values
|
73
|
-
>>> data = {'a': [1, 2, 3], 'b': [4, 5, 6]}
|
74
|
-
>>> _dict_to_dataframe(data)
|
75
|
-
shape: (3, 2)
|
76
|
-
┌─────┬─────┐
|
77
|
-
│ a ┆ b │
|
78
|
-
│ --- ┆ --- │
|
79
|
-
│ i64 ┆ i64 │
|
80
|
-
╞═════╪═════╡
|
81
|
-
│ 1 ┆ 4 │
|
82
|
-
│ 2 ┆ 5 │
|
83
|
-
│ 3 ┆ 6 │
|
84
|
-
└─────┴─────┘
|
85
|
-
|
86
|
-
>>> # Single dict with scalar values
|
87
|
-
>>> data = {'a': 1, 'b': 2}
|
88
|
-
>>> _dict_to_dataframe(data)
|
89
|
-
shape: (1, 2)
|
90
|
-
┌─────┬─────┐
|
91
|
-
│ a ┆ b │
|
92
|
-
│ --- ┆ --- │
|
93
|
-
│ i64 ┆ i64 │
|
94
|
-
╞═════╪═════╡
|
95
|
-
│ 1 ┆ 2 │
|
96
|
-
└─────┴─────┘
|
97
|
-
|
98
|
-
>>> # List of dicts with scalar values
|
99
|
-
>>> data = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]
|
100
|
-
>>> _dict_to_dataframe(data)
|
101
|
-
shape: (2, 2)
|
102
|
-
┌─────┬─────┐
|
103
|
-
│ a ┆ b │
|
104
|
-
│ --- ┆ --- │
|
105
|
-
│ i64 ┆ i64 │
|
106
|
-
╞═════╪═════╡
|
107
|
-
│ 1 ┆ 2 │
|
108
|
-
│ 3 ┆ 4 │
|
109
|
-
└─────┴─────┘
|
110
|
-
|
111
|
-
>>> # List of dicts with list values
|
112
|
-
>>> data = [{'a': [1, 2], 'b': [3, 4]}, {'a': [5, 6], 'b': [7, 8]}]
|
113
|
-
>>> _dict_to_dataframe(data)
|
114
|
-
shape: (2, 2)
|
115
|
-
┌───────┬───────┐
|
116
|
-
│ a ┆ b │
|
117
|
-
│ --- ┆ --- │
|
118
|
-
│ list ┆ list │
|
119
|
-
╞═══════╪═══════╡
|
120
|
-
│ [1,2] ┆ [3,4] │
|
121
|
-
│ [5,6] ┆ [7,8] │
|
122
|
-
└───────┴───────┘
|
123
|
-
"""
|
124
|
-
if isinstance(data, list):
|
125
|
-
# If it's a single-element list, just use the first element
|
126
|
-
if len(data) == 1:
|
127
|
-
data = data[0]
|
128
|
-
# If it's a list of dicts
|
129
|
-
else:
|
130
|
-
first_item = data[0]
|
131
|
-
# Check if the dict values are lists/tuples
|
132
|
-
if any(isinstance(v, (list, tuple)) for v in first_item.values()):
|
133
|
-
# Each dict becomes a row with list/tuple values
|
134
|
-
data = pl.DataFrame(data)
|
135
|
-
else:
|
136
|
-
# If values are scalars, convert list of dicts to DataFrame
|
137
|
-
data = pl.DataFrame(data)
|
138
|
-
|
139
|
-
if unique:
|
140
|
-
data = data.unique(
|
141
|
-
subset=None if not isinstance(unique, str | list) else unique,
|
142
|
-
maintain_order=True,
|
143
|
-
)
|
144
|
-
return data
|
145
|
-
|
146
|
-
# If it's a single dict
|
147
|
-
if isinstance(data, dict):
|
148
|
-
# Check if values are lists/tuples
|
149
|
-
if any(isinstance(v, (list, tuple)) for v in data.values()):
|
150
|
-
# Get the length of any list value (assuming all lists have same length)
|
151
|
-
length = len(
|
152
|
-
next(v for v in data.values() if isinstance(v, (list, tuple)))
|
153
|
-
)
|
154
|
-
# Convert to DataFrame where each list element becomes a row
|
155
|
-
data = pl.DataFrame({
|
156
|
-
k: v if isinstance(v, (list, tuple)) else [v] * length
|
157
|
-
for k, v in data.items()
|
158
|
-
})
|
159
|
-
else:
|
160
|
-
# If values are scalars, wrap them in a list to create a single row
|
161
|
-
data = pl.DataFrame({k: [v] for k, v in data.items()})
|
162
|
-
|
163
|
-
if unique:
|
164
|
-
data = data.unique(
|
165
|
-
subset=None if not isinstance(unique, str | list) else unique,
|
166
|
-
maintain_order=True,
|
167
|
-
)
|
168
|
-
return data
|
169
|
-
|
170
|
-
raise ValueError("Input must be a dictionary or list of dictionaries")
|
171
|
-
|
172
|
-
else:
|
173
|
-
|
174
|
-
def _dict_to_dataframe(*args, **kwargs):
|
175
|
-
raise ImportError("polars not installed")
|
176
|
-
|
177
|
-
|
178
|
-
if (
|
179
|
-
importlib.util.find_spec("pandas")
|
180
|
-
and importlib.util.find_spec("polars")
|
181
|
-
and importlib.util.find_spec("pyarrow")
|
182
|
-
):
|
183
|
-
from typing import Generator
|
184
|
-
|
185
|
-
import pandas as pd
|
186
|
-
|
187
|
-
def to_pyarrow_table(
|
188
|
-
data: pl.DataFrame
|
189
|
-
| pl.LazyFrame
|
190
|
-
| pd.DataFrame
|
191
|
-
| dict
|
192
|
-
| list[pl.DataFrame | pl.LazyFrame | pd.DataFrame | dict],
|
193
|
-
concat: bool = False,
|
194
|
-
unique: bool | list[str] | str = False,
|
195
|
-
) -> pa.Table:
|
196
|
-
if isinstance(data, dict):
|
197
|
-
data = _dict_to_dataframe(data)
|
198
|
-
if isinstance(data, list):
|
199
|
-
if isinstance(data[0], dict):
|
200
|
-
data = _dict_to_dataframe(data, unique=unique)
|
201
|
-
|
202
|
-
if not isinstance(data, list):
|
203
|
-
data = [data]
|
204
|
-
|
205
|
-
if isinstance(data[0], pl.LazyFrame):
|
206
|
-
data = [dd.collect() for dd in data]
|
207
|
-
|
208
|
-
if isinstance(data[0], pl.DataFrame):
|
209
|
-
if concat:
|
210
|
-
data = pl.concat(data, how="diagonal_relaxed")
|
211
|
-
if unique:
|
212
|
-
data = data.unique(
|
213
|
-
subset=None if not isinstance(unique, str | list) else unique,
|
214
|
-
maintain_order=True,
|
215
|
-
)
|
216
|
-
data = data.to_arrow()
|
217
|
-
data = data.cast(convert_large_types_to_standard(data.schema))
|
218
|
-
else:
|
219
|
-
data = [dd.to_arrow() for dd in data]
|
220
|
-
data = [
|
221
|
-
dd.cast(convert_large_types_to_standard(dd.schema)) for dd in data
|
222
|
-
]
|
223
|
-
|
224
|
-
elif isinstance(data[0], pd.DataFrame):
|
225
|
-
data = [pa.Table.from_pandas(dd, preserve_index=False) for dd in data]
|
226
|
-
if concat:
|
227
|
-
data = pa.concat_tables(data, promote_options="permissive")
|
228
|
-
if unique:
|
229
|
-
data = (
|
230
|
-
pl.from_arrow(data)
|
231
|
-
.unique(
|
232
|
-
subset=None
|
233
|
-
if not isinstance(unique, str | list)
|
234
|
-
else unique,
|
235
|
-
maintain_order=True,
|
236
|
-
)
|
237
|
-
.to_arrow()
|
238
|
-
)
|
239
|
-
data = data.cast(convert_large_types_to_standard(data.schema))
|
240
|
-
|
241
|
-
elif isinstance(data[0], pa.RecordBatch | pa.RecordBatchReader | Generator):
|
242
|
-
if concat:
|
243
|
-
data = pa.Table.from_batches(data)
|
244
|
-
if unique:
|
245
|
-
data = (
|
246
|
-
pl.from_arrow(data)
|
247
|
-
.unique(
|
248
|
-
subset=None
|
249
|
-
if not isinstance(unique, str | list)
|
250
|
-
else unique,
|
251
|
-
maintain_order=True,
|
252
|
-
)
|
253
|
-
.to_arrow()
|
254
|
-
)
|
255
|
-
data = data.cast(convert_large_types_to_standard(data.schema))
|
256
|
-
else:
|
257
|
-
data = [pa.Table.from_batches([dd]) for dd in data]
|
258
|
-
|
259
|
-
return data
|
260
|
-
|
261
|
-
else:
|
262
|
-
|
263
|
-
def to_pyarrow_table(*args, **kwargs):
|
264
|
-
raise ImportError("pandas, polars, or pyarrow not installed")
|
265
|
-
|
10
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
266
11
|
|
267
12
|
if importlib.util.find_spec("joblib"):
|
268
13
|
from joblib import Parallel, delayed
|
@@ -484,3 +229,19 @@ def update_nested_dict(
|
|
484
229
|
# Direct update
|
485
230
|
result[key] = value
|
486
231
|
return result
|
232
|
+
|
233
|
+
|
234
|
+
def get_filesystem(fs: AbstractFileSystem | None = None, fs_type: str = "file") -> AbstractFileSystem:
|
235
|
+
"""
|
236
|
+
Helper function to get a filesystem instance.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
fs: An optional filesystem instance to use. If provided, this will be returned directly.
|
240
|
+
fs_type: The type of filesystem to create if fs is None. Defaults to "file".
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
An AbstractFileSystem instance.
|
244
|
+
"""
|
245
|
+
if fs is None:
|
246
|
+
fs = filesystem(fs_type)
|
247
|
+
return fs
|
flowerpower/utils/monkey.py
CHANGED
@@ -1,85 +1,3 @@
|
|
1
|
-
|
2
|
-
import sys
|
1
|
+
# Placeholder file - APScheduler monkey patches removed
|
3
2
|
|
4
|
-
from dill import dumps, loads
|
5
3
|
|
6
|
-
|
7
|
-
def patch_pickle():
|
8
|
-
"""
|
9
|
-
Patch the pickle serializer in the apscheduler module.
|
10
|
-
|
11
|
-
This function replaces the `dumps` and `loads` functions in the `apscheduler.serializers.pickle` module
|
12
|
-
with custom implementations.
|
13
|
-
|
14
|
-
This is useful when you want to modify the behavior of the pickle serializer used by the apscheduler module.
|
15
|
-
|
16
|
-
Example usage:
|
17
|
-
patch_pickle()
|
18
|
-
|
19
|
-
"""
|
20
|
-
sys.modules["apscheduler.serializers.pickle"].dumps = dumps
|
21
|
-
sys.modules["apscheduler.serializers.pickle"].loads = loads
|
22
|
-
|
23
|
-
|
24
|
-
if importlib.util.find_spec("apscheduler"):
|
25
|
-
from apscheduler._structures import Job, Schedule, Task
|
26
|
-
|
27
|
-
def job_to_dict(job):
|
28
|
-
return {
|
29
|
-
"id": str(job.id),
|
30
|
-
"task_id": job.task_id,
|
31
|
-
"args": [str(arg) for arg in job.args],
|
32
|
-
"kwargs": job.kwargs,
|
33
|
-
"schedule_id": job.schedule_id,
|
34
|
-
"scheduled_fire_time": (
|
35
|
-
job.scheduled_fire_time.isoformat() if job.scheduled_fire_time else None
|
36
|
-
),
|
37
|
-
"jitter": job.jitter.total_seconds(),
|
38
|
-
"start_deadline": (
|
39
|
-
job.start_deadline.isoformat() if job.start_deadline else None
|
40
|
-
),
|
41
|
-
"result_expiration_time": job.result_expiration_time.total_seconds(),
|
42
|
-
"created_at": job.created_at.isoformat(),
|
43
|
-
"acquired_by": job.acquired_by,
|
44
|
-
"acquired_until": (
|
45
|
-
job.acquired_until.isoformat() if job.acquired_until else None
|
46
|
-
),
|
47
|
-
}
|
48
|
-
|
49
|
-
Job.to_dict = job_to_dict
|
50
|
-
|
51
|
-
def task_to_dict(task):
|
52
|
-
return {
|
53
|
-
"id": task.id,
|
54
|
-
"func": task.func,
|
55
|
-
"job_executor": task.job_executor,
|
56
|
-
"max_running_jobs": task.max_running_jobs,
|
57
|
-
"misfire_grace_time": task.misfire_grace_time,
|
58
|
-
}
|
59
|
-
|
60
|
-
Task.to_dict = task_to_dict
|
61
|
-
|
62
|
-
def schedule_to_dict(schedule):
|
63
|
-
return {
|
64
|
-
"id": schedule.id,
|
65
|
-
"task_id": schedule.task_id,
|
66
|
-
"trigger": str(schedule.trigger),
|
67
|
-
"args": [str(arg) for arg in schedule.args],
|
68
|
-
"kwargs": schedule.kwargs,
|
69
|
-
"paused": schedule.paused,
|
70
|
-
"coalesce": schedule.coalesce.name if schedule.coalesce else None,
|
71
|
-
"misfire_grace_time": schedule.misfire_grace_time,
|
72
|
-
"max_jitter": schedule.max_jitter,
|
73
|
-
"next_fire_time": (
|
74
|
-
schedule.next_fire_time.isoformat() if schedule.next_fire_time else None
|
75
|
-
),
|
76
|
-
"last_fire_time": (
|
77
|
-
schedule.last_fire_time.isoformat() if schedule.last_fire_time else None
|
78
|
-
),
|
79
|
-
"acquired_by": schedule.acquired_by,
|
80
|
-
"acquired_until": (
|
81
|
-
schedule.acquired_until.isoformat() if schedule.acquired_until else None
|
82
|
-
),
|
83
|
-
}
|
84
|
-
|
85
|
-
Schedule.to_dict = schedule_to_dict
|