FlowerPower 0.11.6.12__tar.gz → 0.11.6.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowerpower-0.11.6.12/src/FlowerPower.egg-info → flowerpower-0.11.6.14}/PKG-INFO +1 -1
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/pyproject.toml +1 -1
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14/src/FlowerPower.egg-info}/PKG-INFO +1 -1
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/fs/ext.py +2 -2
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/pyarrow.py +135 -8
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/LICENSE +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/README.md +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/setup.cfg +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/entry_points.txt +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/requires.txt +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/top_level.txt +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/base.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/pipeline/run.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/pipeline/schedule.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/project/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/project/adapter.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cfg/project/job_queue.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/cfg.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/job_queue.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/mqtt.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/pipeline.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/cli/utils.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/flowerpower.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/fs/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/fs/base.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/fs/storage_options.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/manager.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/setup.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/trigger.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/utils.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/base.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/_trigger.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/manager.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/setup.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/rq/utils.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/mqtt.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/base.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/io.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/job_queue.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/manager.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/registry.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/runner.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/pipeline/visualizer.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/base.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/datetime.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/polars.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/sql.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/csv.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/deltatable.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/duckdb.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/json.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/mqtt.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/mssql.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/mysql.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/oracle.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/parquet.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/postgres.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/pydala.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/sqlite.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/metadata.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/csv.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/deltatable.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/duckdb.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/json.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/mqtt.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/mssql.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/mysql.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/oracle.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/parquet.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/postgres.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/pydala.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/sqlite.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/mqtt/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/mqtt/cfg.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/mqtt/manager.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/__init__.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/backend.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/executor.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/general.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/hamilton.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/job_queue.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/logging.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/settings/retry.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/callback.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/logging.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/misc.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/monkey.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/open_telemetry.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/scheduler.py +0 -0
- {flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.11.6.
|
3
|
+
Version: 0.11.6.14
|
4
4
|
Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
|
4
4
|
authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">= 3.11"
|
7
|
-
version = "0.11.6.
|
7
|
+
version = "0.11.6.14"
|
8
8
|
keywords = [
|
9
9
|
"hamilton",
|
10
10
|
"workflow",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.11.6.
|
3
|
+
Version: 0.11.6.14
|
4
4
|
Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -951,7 +951,7 @@ def _read_parquet(
|
|
951
951
|
if isinstance(tables, list):
|
952
952
|
if len(tables) > 1:
|
953
953
|
schemas = [t.schema for t in tables]
|
954
|
-
unified_schema = unify_schemas_pa(schemas)
|
954
|
+
unified_schema = unify_schemas_pa(schemas, standardize_timezones=True)
|
955
955
|
tables = [cast_schema(t, unified_schema) for t in tables]
|
956
956
|
|
957
957
|
tables = [table for table in tables if table.num_rows > 0]
|
@@ -1095,7 +1095,7 @@ def _read_parquet_batches(
|
|
1095
1095
|
# Unify schemas before concatenation
|
1096
1096
|
if len(batch_tables) > 1:
|
1097
1097
|
schemas = [t.schema for t in batch_tables]
|
1098
|
-
unified_schema = unify_schemas_pa(schemas)
|
1098
|
+
unified_schema = unify_schemas_pa(schemas, standardize_timezones=True)
|
1099
1099
|
batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
|
1100
1100
|
batch_tables = [table for table in batch_tables if table.num_rows > 0]
|
1101
1101
|
if not batch_tables:
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/pyarrow.py
RENAMED
@@ -28,18 +28,133 @@ F32_MIN = float(np.finfo(np.float32).min)
|
|
28
28
|
F32_MAX = float(np.finfo(np.float32).max)
|
29
29
|
|
30
30
|
|
31
|
+
def dominant_timezone_per_column(
|
32
|
+
schemas: list[pa.Schema],
|
33
|
+
) -> dict[str, tuple[str | None, str | None]]:
|
34
|
+
"""
|
35
|
+
For each timestamp column (by name) across all schemas, detect the most frequent timezone (including None).
|
36
|
+
If None and a timezone are tied, prefer the timezone.
|
37
|
+
Returns a dict: {column_name: dominant_timezone}
|
38
|
+
"""
|
39
|
+
from collections import Counter, defaultdict
|
40
|
+
|
41
|
+
tz_counts = defaultdict(Counter)
|
42
|
+
units = {}
|
43
|
+
|
44
|
+
for schema in schemas:
|
45
|
+
for field in schema:
|
46
|
+
if pa.types.is_timestamp(field.type):
|
47
|
+
tz = field.type.tz
|
48
|
+
name = field.name
|
49
|
+
tz_counts[name][tz] += 1
|
50
|
+
# Track unit for each column (assume consistent)
|
51
|
+
if name not in units:
|
52
|
+
units[name] = field.type.unit
|
53
|
+
|
54
|
+
dominant = {}
|
55
|
+
for name, counter in tz_counts.items():
|
56
|
+
most_common = counter.most_common()
|
57
|
+
if not most_common:
|
58
|
+
continue
|
59
|
+
top_count = most_common[0][1]
|
60
|
+
# Find all with top_count
|
61
|
+
top_tzs = [tz for tz, cnt in most_common if cnt == top_count]
|
62
|
+
# If tie and one is not None, prefer not-None
|
63
|
+
if len(top_tzs) > 1 and any(tz is not None for tz in top_tzs):
|
64
|
+
tz = next(tz for tz in top_tzs if tz is not None)
|
65
|
+
else:
|
66
|
+
tz = most_common[0][0]
|
67
|
+
dominant[name] = (units[name], tz)
|
68
|
+
return dominant
|
69
|
+
|
70
|
+
|
71
|
+
def standardize_schema_timezones_by_majority(
|
72
|
+
schemas: list[pa.Schema],
|
73
|
+
) -> list[pa.Schema]:
|
74
|
+
"""
|
75
|
+
For each timestamp column (by name) across all schemas, set the timezone to the most frequent (with tie-breaking).
|
76
|
+
Returns a new list of schemas with updated timestamp timezones.
|
77
|
+
"""
|
78
|
+
dom = dominant_timezone_per_column(schemas)
|
79
|
+
new_schemas = []
|
80
|
+
for schema in schemas:
|
81
|
+
fields = []
|
82
|
+
for field in schema:
|
83
|
+
if pa.types.is_timestamp(field.type) and field.name in dom:
|
84
|
+
unit, tz = dom[field.name]
|
85
|
+
fields.append(
|
86
|
+
pa.field(
|
87
|
+
field.name,
|
88
|
+
pa.timestamp(unit, tz),
|
89
|
+
field.nullable,
|
90
|
+
field.metadata,
|
91
|
+
)
|
92
|
+
)
|
93
|
+
else:
|
94
|
+
fields.append(field)
|
95
|
+
new_schemas.append(pa.schema(fields, schema.metadata))
|
96
|
+
return new_schemas
|
97
|
+
|
98
|
+
|
99
|
+
def standardize_schema_timezones(
|
100
|
+
schemas: list[pa.Schema], timezone: str | None = None
|
101
|
+
) -> list[pa.Schema]:
|
102
|
+
"""
|
103
|
+
Standardize timezone info for all timestamp columns in a list of PyArrow schemas.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
schemas (list of pa.Schema): List of PyArrow schemas.
|
107
|
+
timezone (str or None): If None, remove timezone from all timestamp columns.
|
108
|
+
If str, set this timezone for all timestamp columns.
|
109
|
+
If "auto", use the most frequent timezone across schemas.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
list of pa.Schema: New schemas with standardized timezone info.
|
113
|
+
"""
|
114
|
+
if timezone == "auto":
|
115
|
+
# Use the most frequent timezone for each column
|
116
|
+
return standardize_schema_timezones_by_majority(schemas)
|
117
|
+
new_schemas = []
|
118
|
+
for schema in schemas:
|
119
|
+
fields = []
|
120
|
+
for field in schema:
|
121
|
+
if pa.types.is_timestamp(field.type):
|
122
|
+
fields.append(
|
123
|
+
pa.field(
|
124
|
+
field.name,
|
125
|
+
pa.timestamp(field.type.unit, timezone),
|
126
|
+
field.nullable,
|
127
|
+
field.metadata,
|
128
|
+
)
|
129
|
+
)
|
130
|
+
else:
|
131
|
+
fields.append(field)
|
132
|
+
new_schemas.append(pa.schema(fields, schema.metadata))
|
133
|
+
return new_schemas
|
134
|
+
|
135
|
+
|
31
136
|
def unify_schemas(
|
32
|
-
schemas: list[pa.Schema],
|
137
|
+
schemas: list[pa.Schema],
|
138
|
+
use_large_dtypes: bool = False,
|
139
|
+
timezone: str | None = None,
|
140
|
+
standardize_timezones: bool = True,
|
33
141
|
) -> pa.Schema:
|
34
142
|
"""
|
35
143
|
Unify a list of PyArrow schemas into a single schema.
|
36
144
|
|
37
145
|
Args:
|
38
146
|
schemas (list[pa.Schema]): List of PyArrow schemas to unify.
|
147
|
+
use_large_dtypes (bool): If True, keep large types like large_string.
|
148
|
+
timezone (str | None): If specified, standardize all timestamp columns to this timezone.
|
149
|
+
If "auto", use the most frequent timezone across schemas.
|
150
|
+
If None, remove timezone from all timestamp columns.
|
151
|
+
standardize_timezones (bool): If True, standardize all timestamp columns to the most frequent timezone.
|
39
152
|
|
40
153
|
Returns:
|
41
154
|
pa.Schema: A unified PyArrow schema.
|
42
155
|
"""
|
156
|
+
if standardize_timezones:
|
157
|
+
schemas = standardize_schema_timezones(schemas, timezone)
|
43
158
|
try:
|
44
159
|
return pa.unify_schemas(schemas, promote_options="permissive")
|
45
160
|
except (pa.lib.ArrowInvalid, pa.lib.ArrowTypeError) as e:
|
@@ -189,7 +304,7 @@ def _can_downcast_to_float32(array: pa.Array) -> bool:
|
|
189
304
|
return F32_MIN <= min_val <= max_val <= F32_MAX
|
190
305
|
|
191
306
|
|
192
|
-
def _get_optimal_int_type(array: pa.Array) -> pa.DataType:
|
307
|
+
def _get_optimal_int_type(array: pa.Array, allow_unsigned: bool) -> pa.DataType:
|
193
308
|
"""
|
194
309
|
Determine the most efficient integer type based on data range.
|
195
310
|
"""
|
@@ -202,7 +317,8 @@ def _get_optimal_int_type(array: pa.Array) -> pa.DataType:
|
|
202
317
|
min_val = min_max["min"].as_py()
|
203
318
|
max_val = min_max["max"].as_py()
|
204
319
|
|
205
|
-
if min_val >= 0:
|
320
|
+
if allow_unsigned and min_val >= 0:
|
321
|
+
# If allow_unsigned is True, check for unsigned types
|
206
322
|
if max_val <= 255:
|
207
323
|
return pa.uint8()
|
208
324
|
elif max_val <= 65535:
|
@@ -211,6 +327,7 @@ def _get_optimal_int_type(array: pa.Array) -> pa.DataType:
|
|
211
327
|
return pa.uint32()
|
212
328
|
else:
|
213
329
|
return pa.uint64()
|
330
|
+
|
214
331
|
else: # Signed
|
215
332
|
if -128 <= min_val and max_val <= 127:
|
216
333
|
return pa.int8()
|
@@ -222,7 +339,9 @@ def _get_optimal_int_type(array: pa.Array) -> pa.DataType:
|
|
222
339
|
return pa.int64()
|
223
340
|
|
224
341
|
|
225
|
-
def _optimize_numeric_array(
|
342
|
+
def _optimize_numeric_array(
|
343
|
+
array: pa.Array, shrink: bool, allow_unsigned: bool = True
|
344
|
+
) -> pa.Array:
|
226
345
|
"""
|
227
346
|
Optimize numeric PyArrow array by downcasting when possible.
|
228
347
|
Uses vectorized operations for efficiency.
|
@@ -242,7 +361,7 @@ def _optimize_numeric_array(array: pa.Array, shrink: bool) -> pa.Array:
|
|
242
361
|
if array.type in [pa.int8(), pa.uint8()]:
|
243
362
|
return array
|
244
363
|
|
245
|
-
optimal_type = _get_optimal_int_type(array)
|
364
|
+
optimal_type = _get_optimal_int_type(array, allow_unsigned)
|
246
365
|
return pc.cast(array, optimal_type)
|
247
366
|
|
248
367
|
# Default: return unchanged
|
@@ -333,7 +452,11 @@ def _optimize_string_array(
|
|
333
452
|
|
334
453
|
|
335
454
|
def _process_column(
|
336
|
-
table: pa.Table,
|
455
|
+
table: pa.Table,
|
456
|
+
col_name: str,
|
457
|
+
shrink_numerics: bool,
|
458
|
+
allow_unsigned: bool,
|
459
|
+
time_zone: str | None = None,
|
337
460
|
) -> pa.Array:
|
338
461
|
"""
|
339
462
|
Process a single column for type optimization.
|
@@ -346,7 +469,7 @@ def _process_column(
|
|
346
469
|
|
347
470
|
# Process based on current type
|
348
471
|
if pa.types.is_floating(array.type) or pa.types.is_integer(array.type):
|
349
|
-
return _optimize_numeric_array(array, shrink_numerics)
|
472
|
+
return _optimize_numeric_array(array, shrink_numerics, allow_unsigned)
|
350
473
|
elif pa.types.is_string(array.type):
|
351
474
|
return _optimize_string_array(array, col_name, shrink_numerics, time_zone)
|
352
475
|
|
@@ -360,6 +483,7 @@ def opt_dtype(
|
|
360
483
|
exclude: str | list[str] | None = None,
|
361
484
|
time_zone: str | None = None,
|
362
485
|
shrink_numerics: bool = True,
|
486
|
+
allow_unsigned: bool = True,
|
363
487
|
strict: bool = False,
|
364
488
|
) -> pa.Table:
|
365
489
|
"""
|
@@ -376,6 +500,7 @@ def opt_dtype(
|
|
376
500
|
exclude: Column(s) to exclude from optimization
|
377
501
|
time_zone: Optional time zone for datetime parsing
|
378
502
|
shrink_numerics: Whether to downcast numeric types when possible
|
503
|
+
allow_unsigned: Whether to allow unsigned types
|
379
504
|
strict: If True, will raise an error if any column cannot be optimized
|
380
505
|
|
381
506
|
Returns:
|
@@ -401,7 +526,9 @@ def opt_dtype(
|
|
401
526
|
try:
|
402
527
|
# Process column for optimization
|
403
528
|
new_columns.append(
|
404
|
-
_process_column(
|
529
|
+
_process_column(
|
530
|
+
table, col_name, shrink_numerics, allow_unsigned, time_zone
|
531
|
+
)
|
405
532
|
)
|
406
533
|
except Exception as e:
|
407
534
|
if strict:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/FlowerPower.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/manager.py
RENAMED
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/setup.py
RENAMED
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/trigger.py
RENAMED
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/job_queue/apscheduler/utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/datetime.py
RENAMED
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/helpers/polars.py
RENAMED
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/__init__.py
RENAMED
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/deltatable.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/parquet.py
RENAMED
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/loader/postgres.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/__init__.py
RENAMED
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/deltatable.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.12 → flowerpower-0.11.6.14}/src/flowerpower/plugins/io/saver/postgres.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|