FlowerPower 0.11.6.5__tar.gz → 0.11.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowerpower-0.11.6.5/src/FlowerPower.egg-info → flowerpower-0.11.6.7}/PKG-INFO +1 -1
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/pyproject.toml +1 -1
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7/src/FlowerPower.egg-info}/PKG-INFO +1 -1
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/fs/ext.py +77 -47
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/polars.py +6 -6
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/pyarrow.py +3 -3
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/LICENSE +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/README.md +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/setup.cfg +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/entry_points.txt +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/requires.txt +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/top_level.txt +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/base.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/run.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/schedule.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/adapter.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/job_queue.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/cfg.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/job_queue.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/mqtt.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/pipeline.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/cli/utils.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/flowerpower.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/fs/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/fs/base.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/fs/storage_options.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/manager.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/setup.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/trigger.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/utils.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/base.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/_trigger.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/manager.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/setup.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/utils.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/mqtt.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/base.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/io.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/job_queue.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/manager.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/registry.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/runner.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/visualizer.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/base.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/datetime.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/sql.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/csv.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/deltatable.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/duckdb.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/json.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mqtt.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mssql.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mysql.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/oracle.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/parquet.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/postgres.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/pydala.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/sqlite.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/metadata.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/csv.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/deltatable.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/duckdb.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/json.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mqtt.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mssql.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mysql.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/oracle.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/parquet.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/postgres.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/pydala.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/sqlite.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/cfg.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/manager.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/__init__.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/backend.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/executor.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/general.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/hamilton.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/job_queue.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/logging.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/settings/retry.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/callback.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/logging.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/misc.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/monkey.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/open_telemetry.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/scheduler.py +0 -0
- {flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.11.6.
|
3
|
+
Version: 0.11.6.7
|
4
4
|
Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
|
4
4
|
authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">= 3.11"
|
7
|
-
version = "0.11.6.
|
7
|
+
version = "0.11.6.7"
|
8
8
|
keywords = [
|
9
9
|
"hamilton",
|
10
10
|
"workflow",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.11.6.
|
3
|
+
Version: 0.11.6.7
|
4
4
|
Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -814,6 +814,7 @@ def _read_parquet_file(
|
|
814
814
|
path: Path to Parquet file
|
815
815
|
self: Filesystem instance to use for reading
|
816
816
|
include_file_path: Add source filepath as a column
|
817
|
+
opt_dtypes: Optimize DataFrame dtypes
|
817
818
|
**kwargs: Additional arguments passed to pq.read_table()
|
818
819
|
|
819
820
|
Returns:
|
@@ -830,6 +831,11 @@ def _read_parquet_file(
|
|
830
831
|
>>> print("file_path" in table.column_names)
|
831
832
|
True
|
832
833
|
"""
|
834
|
+
if not path.endswith(".parquet"):
|
835
|
+
raise ValueError(
|
836
|
+
f"Path '{path}' does not point to a Parquet file. "
|
837
|
+
"Ensure the path ends with '.parquet'."
|
838
|
+
)
|
833
839
|
table = pq.read_table(path, filesystem=self, **kwargs)
|
834
840
|
if include_file_path:
|
835
841
|
table = table.add_column(0, "file_path", pl.Series([path] * table.num_rows))
|
@@ -841,6 +847,30 @@ def _read_parquet_file(
|
|
841
847
|
def read_parquet_file(
|
842
848
|
self, path: str, include_file_path: bool = False, opt_dtypes: bool = False, **kwargs
|
843
849
|
) -> pa.Table:
|
850
|
+
"""Read a single Parquet file from any filesystem.
|
851
|
+
|
852
|
+
Internal function that handles reading individual Parquet files and
|
853
|
+
optionally adds the source filepath as a column.
|
854
|
+
|
855
|
+
Args:
|
856
|
+
path: Path to Parquet file
|
857
|
+
include_file_path: Add source filepath as a column
|
858
|
+
opt_dtypes: Optimize DataFrame dtypes
|
859
|
+
**kwargs: Additional arguments passed to pq.read_table()
|
860
|
+
|
861
|
+
Returns:
|
862
|
+
pa.Table: PyArrow Table containing Parquet data
|
863
|
+
|
864
|
+
Example:
|
865
|
+
>>> fs = LocalFileSystem()
|
866
|
+
>>> table = fs.read_parquet_file(
|
867
|
+
... "data.parquet",
|
868
|
+
... include_file_path=True,
|
869
|
+
... use_threads=True
|
870
|
+
... )
|
871
|
+
>>> print("file_path" in table.column_names)
|
872
|
+
True
|
873
|
+
"""
|
844
874
|
return _read_parquet_file(
|
845
875
|
path=path,
|
846
876
|
self=self,
|
@@ -852,7 +882,7 @@ def read_parquet_file(
|
|
852
882
|
|
853
883
|
def _read_parquet(
|
854
884
|
self,
|
855
|
-
path,
|
885
|
+
path: str | list[str],
|
856
886
|
include_file_path: bool = False,
|
857
887
|
use_threads: bool = True,
|
858
888
|
concat: bool = True,
|
@@ -874,48 +904,48 @@ def _read_parquet(
|
|
874
904
|
Returns:
|
875
905
|
(pa.Table | list[pa.Table]): Pyarrow Table or list of Pyarrow Tables.
|
876
906
|
"""
|
877
|
-
if not include_file_path and concat:
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
else:
|
883
|
-
|
884
|
-
|
885
|
-
|
907
|
+
# if not include_file_path and concat:
|
908
|
+
# if isinstance(path, str):
|
909
|
+
# path = path.replace("**", "").replace("*.parquet", "")
|
910
|
+
# table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
|
911
|
+
# return table
|
912
|
+
# else:
|
913
|
+
if isinstance(path, str):
|
914
|
+
path = path_to_glob(path, format="parquet")
|
915
|
+
path = self.glob(path)
|
886
916
|
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
self=self,
|
893
|
-
include_file_path=include_file_path,
|
894
|
-
opt_dtypes=opt_dtypes,
|
895
|
-
n_jobs=-1,
|
896
|
-
backend="threading",
|
897
|
-
verbose=verbose,
|
898
|
-
**kwargs,
|
899
|
-
)
|
900
|
-
else:
|
901
|
-
tables = [
|
902
|
-
_read_parquet_file(
|
903
|
-
p,
|
904
|
-
self=self,
|
905
|
-
include_file_path=include_file_path,
|
906
|
-
opt_dtypes=opt_dtypes,
|
907
|
-
**kwargs,
|
908
|
-
)
|
909
|
-
for p in path
|
910
|
-
]
|
911
|
-
else:
|
912
|
-
tables = _read_parquet_file(
|
913
|
-
path=path,
|
917
|
+
if isinstance(path, list):
|
918
|
+
if use_threads:
|
919
|
+
tables = run_parallel(
|
920
|
+
_read_parquet_file,
|
921
|
+
path,
|
914
922
|
self=self,
|
915
923
|
include_file_path=include_file_path,
|
916
924
|
opt_dtypes=opt_dtypes,
|
925
|
+
n_jobs=-1,
|
926
|
+
backend="threading",
|
927
|
+
verbose=verbose,
|
917
928
|
**kwargs,
|
918
929
|
)
|
930
|
+
else:
|
931
|
+
tables = [
|
932
|
+
_read_parquet_file(
|
933
|
+
p,
|
934
|
+
self=self,
|
935
|
+
include_file_path=include_file_path,
|
936
|
+
opt_dtypes=opt_dtypes,
|
937
|
+
**kwargs,
|
938
|
+
)
|
939
|
+
for p in path
|
940
|
+
]
|
941
|
+
else:
|
942
|
+
tables = _read_parquet_file(
|
943
|
+
path=path,
|
944
|
+
self=self,
|
945
|
+
include_file_path=include_file_path,
|
946
|
+
opt_dtypes=opt_dtypes,
|
947
|
+
**kwargs,
|
948
|
+
)
|
919
949
|
if concat:
|
920
950
|
# Unify schemas before concatenation if opt_dtypes or multiple tables
|
921
951
|
if isinstance(tables, list):
|
@@ -1001,14 +1031,14 @@ def _read_parquet_batches(
|
|
1001
1031
|
... print(f"Batch schema: {batch.schema}")
|
1002
1032
|
"""
|
1003
1033
|
# Fast path for simple cases
|
1004
|
-
if not include_file_path and concat and batch_size is None:
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1034
|
+
# if not include_file_path and concat and batch_size is None:
|
1035
|
+
# if isinstance(path, str):
|
1036
|
+
# path = path.replace("**", "").replace("*.parquet", "")
|
1037
|
+
# table = _read_parquet_file(
|
1038
|
+
# path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
|
1039
|
+
# )
|
1040
|
+
# yield table
|
1041
|
+
# return
|
1012
1042
|
|
1013
1043
|
# Resolve path(s) to list
|
1014
1044
|
if isinstance(path, str):
|
@@ -1058,13 +1088,13 @@ def _read_parquet_batches(
|
|
1058
1088
|
schemas = [t.schema for t in batch_tables]
|
1059
1089
|
unified_schema = unify_schemas_pa(schemas)
|
1060
1090
|
batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
|
1061
|
-
|
1091
|
+
batch_table = pa.concat_tables(
|
1062
1092
|
[table for table in batch_tables if table.num_rows > 0],
|
1063
1093
|
promote_options="permissive",
|
1064
1094
|
)
|
1065
1095
|
# if opt_dtypes:
|
1066
1096
|
# result = opt_dtype_pa(result, strict=False)
|
1067
|
-
yield
|
1097
|
+
yield batch_table
|
1068
1098
|
else:
|
1069
1099
|
# if opt_dtypes and isinstance(batch_tables, list):
|
1070
1100
|
# batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
|
@@ -68,20 +68,20 @@ def _optimize_string_column(
|
|
68
68
|
cleaned_expr = _clean_string_expr(col_name)
|
69
69
|
non_null = series.drop_nulls().replace({"-": None, "": None, "None": None})
|
70
70
|
if len(non_null) == 0:
|
71
|
-
return pl.col(col_name).cast(
|
71
|
+
return pl.col(col_name).cast(series.dtype)
|
72
72
|
|
73
73
|
stripped = non_null.str.strip_chars()
|
74
74
|
lowercase = stripped.str.to_lowercase()
|
75
75
|
|
76
76
|
# Check for boolean values
|
77
|
-
if lowercase.str.contains(BOOLEAN_REGEX).all():
|
77
|
+
if lowercase.str.contains(BOOLEAN_REGEX).all(ignore_nulls=False):
|
78
78
|
return (
|
79
79
|
cleaned_expr.str.to_lowercase()
|
80
80
|
.str.contains(BOOLEAN_TRUE_REGEX)
|
81
81
|
.alias(col_name)
|
82
82
|
)
|
83
83
|
|
84
|
-
elif stripped.str.contains(INTEGER_REGEX).all():
|
84
|
+
elif stripped.str.contains(INTEGER_REGEX).all(ignore_nulls=False):
|
85
85
|
int_expr = cleaned_expr.cast(pl.Int64)
|
86
86
|
return (
|
87
87
|
int_expr.shrink_dtype().alias(col_name)
|
@@ -90,7 +90,7 @@ def _optimize_string_column(
|
|
90
90
|
)
|
91
91
|
|
92
92
|
# Check for numeric values
|
93
|
-
elif stripped.str.contains(FLOAT_REGEX).all():
|
93
|
+
elif stripped.str.contains(FLOAT_REGEX).all(ignore_nulls=False):
|
94
94
|
float_expr = cleaned_expr.str.replace_all(",", ".").cast(pl.Float64)
|
95
95
|
|
96
96
|
if shrink_numerics:
|
@@ -104,7 +104,7 @@ def _optimize_string_column(
|
|
104
104
|
return float_expr.alias(col_name)
|
105
105
|
|
106
106
|
try:
|
107
|
-
if stripped.str.contains(DATETIME_REGEX).all():
|
107
|
+
if stripped.str.contains(DATETIME_REGEX).all(ignore_nulls=False):
|
108
108
|
return cleaned_expr.str.to_datetime(
|
109
109
|
strict=False, time_unit="us", time_zone=time_zone
|
110
110
|
).alias(col_name)
|
@@ -123,7 +123,7 @@ def _get_column_expr(
|
|
123
123
|
|
124
124
|
# Handle all-null columns
|
125
125
|
if series.is_null().all():
|
126
|
-
return pl.col(col_name).cast(
|
126
|
+
return pl.col(col_name).cast(series.dtype)
|
127
127
|
|
128
128
|
# Process based on current type
|
129
129
|
if series.dtype.is_numeric():
|
@@ -255,7 +255,7 @@ def _all_match_regex(array: pa.Array, pattern: str) -> bool:
|
|
255
255
|
Uses pyarrow.compute.match_substring_regex for vectorized evaluation.
|
256
256
|
"""
|
257
257
|
if len(array) == 0 or array.null_count == len(array):
|
258
|
-
return
|
258
|
+
return False
|
259
259
|
|
260
260
|
# Check if al values match the pattern
|
261
261
|
return pc.all(pc.match_substring_regex(array, pattern, ignore_case=True)).as_py()
|
@@ -272,7 +272,7 @@ def _optimize_string_array(
|
|
272
272
|
if len(array) == 0:
|
273
273
|
return pa.array([], type=pa.int8())
|
274
274
|
if array.null_count == len(array):
|
275
|
-
return pa.array([None] * len(array), type=
|
275
|
+
return pa.array([None] * len(array), type=array.type)
|
276
276
|
|
277
277
|
# Clean string values
|
278
278
|
cleaned_array = _clean_string_array(array)
|
@@ -342,7 +342,7 @@ def _process_column(
|
|
342
342
|
|
343
343
|
# Handle all-null columns
|
344
344
|
if array.null_count == len(array):
|
345
|
-
return pa.array([None] * len(array), type=
|
345
|
+
return pa.array([None] * len(array), type=array.type)
|
346
346
|
|
347
347
|
# Process based on current type
|
348
348
|
if pa.types.is_floating(array.type) or pa.types.is_integer(array.type):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/manager.py
RENAMED
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/setup.py
RENAMED
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/trigger.py
RENAMED
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/datetime.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/deltatable.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{flowerpower-0.11.6.5 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/deltatable.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|