FlowerPower 0.11.6.4__py3-none-any.whl → 0.11.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flowerpower/fs/ext.py CHANGED
@@ -923,7 +923,10 @@ def _read_parquet(
923
923
  schemas = [t.schema for t in tables]
924
924
  unified_schema = unify_schemas_pa(schemas)
925
925
  tables = [cast_schema(t, unified_schema) for t in tables]
926
- result = pa.concat_tables(tables, promote_options="permissive")
926
+ result = pa.concat_tables(
927
+ [table for table in tables if table.num_rows > 0],
928
+ promote_options="permissive",
929
+ )
927
930
  # if opt_dtypes:
928
931
  # result = opt_dtype_pa(result, strict=False)
929
932
  return result
@@ -932,7 +935,10 @@ def _read_parquet(
932
935
  # tables = opt_dtype_pa(tables, strict=False)
933
936
  return tables
934
937
  else:
935
- return pa.concat_tables(tables, promote_options="permissive")
938
+ return pa.concat_tables(
939
+ [table for table in tables if table.num_rows > 0],
940
+ promote_options="permissive",
941
+ )
936
942
  return tables
937
943
 
938
944
 
@@ -1052,7 +1058,10 @@ def _read_parquet_batches(
1052
1058
  schemas = [t.schema for t in batch_tables]
1053
1059
  unified_schema = unify_schemas_pa(schemas)
1054
1060
  batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
1055
- result = pa.concat_tables(batch_tables, promote_options="permissive")
1061
+ result = pa.concat_tables(
1062
+ [table for table in batch_tables if table.num_rows > 0],
1063
+ promote_options="permissive",
1064
+ )
1056
1065
  # if opt_dtypes:
1057
1066
  # result = opt_dtype_pa(result, strict=False)
1058
1067
  yield result
@@ -68,20 +68,20 @@ def _optimize_string_column(
68
68
  cleaned_expr = _clean_string_expr(col_name)
69
69
  non_null = series.drop_nulls().replace({"-": None, "": None, "None": None})
70
70
  if len(non_null) == 0:
71
- return pl.col(col_name).cast(pl.Int8)
71
+ return pl.col(col_name).cast(series.dtype)
72
72
 
73
73
  stripped = non_null.str.strip_chars()
74
74
  lowercase = stripped.str.to_lowercase()
75
75
 
76
76
  # Check for boolean values
77
- if lowercase.str.contains(BOOLEAN_REGEX).all():
77
+ if lowercase.str.contains(BOOLEAN_REGEX).all(ignore_nulls=False):
78
78
  return (
79
79
  cleaned_expr.str.to_lowercase()
80
80
  .str.contains(BOOLEAN_TRUE_REGEX)
81
81
  .alias(col_name)
82
82
  )
83
83
 
84
- elif stripped.str.contains(INTEGER_REGEX).all():
84
+ elif stripped.str.contains(INTEGER_REGEX).all(ignore_nulls=False):
85
85
  int_expr = cleaned_expr.cast(pl.Int64)
86
86
  return (
87
87
  int_expr.shrink_dtype().alias(col_name)
@@ -90,7 +90,7 @@ def _optimize_string_column(
90
90
  )
91
91
 
92
92
  # Check for numeric values
93
- elif stripped.str.contains(FLOAT_REGEX).all():
93
+ elif stripped.str.contains(FLOAT_REGEX).all(ignore_nulls=False):
94
94
  float_expr = cleaned_expr.str.replace_all(",", ".").cast(pl.Float64)
95
95
 
96
96
  if shrink_numerics:
@@ -104,7 +104,7 @@ def _optimize_string_column(
104
104
  return float_expr.alias(col_name)
105
105
 
106
106
  try:
107
- if stripped.str.contains(DATETIME_REGEX).all():
107
+ if stripped.str.contains(DATETIME_REGEX).all(ignore_nulls=False):
108
108
  return cleaned_expr.str.to_datetime(
109
109
  strict=False, time_unit="us", time_zone=time_zone
110
110
  ).alias(col_name)
@@ -123,7 +123,7 @@ def _get_column_expr(
123
123
 
124
124
  # Handle all-null columns
125
125
  if series.is_null().all():
126
- return pl.col(col_name).cast(pl.Int8)
126
+ return pl.col(col_name).cast(series.dtype)
127
127
 
128
128
  # Process based on current type
129
129
  if series.dtype.is_numeric():
@@ -255,7 +255,7 @@ def _all_match_regex(array: pa.Array, pattern: str) -> bool:
255
255
  Uses pyarrow.compute.match_substring_regex for vectorized evaluation.
256
256
  """
257
257
  if len(array) == 0 or array.null_count == len(array):
258
- return True
258
+ return False
259
259
 
260
260
  # Check if al values match the pattern
261
261
  return pc.all(pc.match_substring_regex(array, pattern, ignore_case=True)).as_py()
@@ -272,7 +272,7 @@ def _optimize_string_array(
272
272
  if len(array) == 0:
273
273
  return pa.array([], type=pa.int8())
274
274
  if array.null_count == len(array):
275
- return pa.array([None] * len(array), type=pa.int8())
275
+ return pa.array([None] * len(array), type=array.type)
276
276
 
277
277
  # Clean string values
278
278
  cleaned_array = _clean_string_array(array)
@@ -342,7 +342,7 @@ def _process_column(
342
342
 
343
343
  # Handle all-null columns
344
344
  if array.null_count == len(array):
345
- return pa.array([None] * len(array), type=pa.int8())
345
+ return pa.array([None] * len(array), type=array.type)
346
346
 
347
347
  # Process based on current type
348
348
  if pa.types.is_floating(array.type) or pa.types.is_integer(array.type):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6.4
3
+ Version: 0.11.6.6
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -18,7 +18,7 @@ flowerpower/cli/pipeline.py,sha256=60P6u_QOSgp0jJXEMxazEEo5Sh7-SWFo-Kkuaz21YuI,3
18
18
  flowerpower/cli/utils.py,sha256=nDSSj_1nlYlMmj252kRZeohhFqHv9yvdgDEduQCyWOc,5152
19
19
  flowerpower/fs/__init__.py,sha256=uZaPXErEfQqQRbKRIjkB9yiygd45X5_psYn9-VVrBTQ,910
20
20
  flowerpower/fs/base.py,sha256=TqgqBsaFj13O1NpAr8kHuGJ9CTlaSWViMB8Ai_iuCjs,22761
21
- flowerpower/fs/ext.py,sha256=2NmhSbCIL0qnONMRNPHcPUuR39bGjWpxJE4hNHU5Rvw,69044
21
+ flowerpower/fs/ext.py,sha256=U-MX-OYsqhNzapcGVaGTifeMTZ5hY7VlAtQ8c8j5zi0,69314
22
22
  flowerpower/fs/storage_options.py,sha256=msq5TpxAU8tcE_Bxjw6SyxaFa75UjdYnR4-O9U2wmbk,48034
23
23
  flowerpower/job_queue/__init__.py,sha256=a25hIqv2xoFKb4JZlyUukS0ppZ9-2sJKH3XAvbk3rlk,10788
24
24
  flowerpower/job_queue/base.py,sha256=YwLunDQSyqkSU_vJ69C5SSybJeJP1bAiZ3teUtOchxA,13640
@@ -47,8 +47,8 @@ flowerpower/pipeline/visualizer.py,sha256=amjMrl5NetErE198HzZBPWVZBi_t5jj9ydxWpu
47
47
  flowerpower/plugins/io/base.py,sha256=_MruHw-eC67AHm6pieClLQ1X3uFvHg1uqIWsBuZOZos,97097
48
48
  flowerpower/plugins/io/metadata.py,sha256=PCrepLilXRWKDsB5BKFF_-OFs712s1zBeitW-84lDLQ,7005
49
49
  flowerpower/plugins/io/helpers/datetime.py,sha256=1WBUg2ywcsodJQwoF6JiIGc9yhVobvE2IErWp4i95m4,10649
50
- flowerpower/plugins/io/helpers/polars.py,sha256=GkiGVHDRduStAnaQSxqXmoT55D9nfxVqCHUSgf5hS0M,27361
51
- flowerpower/plugins/io/helpers/pyarrow.py,sha256=xsHDvEqUgb__ZFNXyfXHRccCxoW-62v4hmEj4NAA0XM,13900
50
+ flowerpower/plugins/io/helpers/polars.py,sha256=cuzMby0a90AMFXhNEycf53UOwdHw4uxnx322l3m7jB0,27443
51
+ flowerpower/plugins/io/helpers/pyarrow.py,sha256=NwA2NAPMIcGmaFE3gx1jKYW_-6gAxQ8Oczdgk4Av-s8,13903
52
52
  flowerpower/plugins/io/helpers/sql.py,sha256=BPIxjarKF3p93EdtUu-md8KislE9q8IWNSeZ5toFU6U,7298
53
53
  flowerpower/plugins/io/loader/__init__.py,sha256=MKH42nvVokaWas0wFgX1yrpU5iLpvHjRqqF-KzwLHCg,780
54
54
  flowerpower/plugins/io/loader/csv.py,sha256=Q5bmcbbr530sT1kQ2YiJwvsMUPqi0VcZWsLOygmzRyI,827
@@ -94,9 +94,9 @@ flowerpower/utils/monkey.py,sha256=VPl3yimoWhwD9kI05BFsjNvtyQiDyLfY4Q85Bb6Ma0w,2
94
94
  flowerpower/utils/open_telemetry.py,sha256=fQWJWbIQFtKIxMBjAWeF12NGnqT0isO3A3j-DSOv_vE,949
95
95
  flowerpower/utils/scheduler.py,sha256=2zJ_xmLXpvXUQNF1XS2Gqm3Ogo907ctZ50GtvQB_rhE,9354
96
96
  flowerpower/utils/templates.py,sha256=ouyEeSDqa9PjW8c32fGpcINlpC0WToawRFZkMPtwsLE,1591
97
- flowerpower-0.11.6.4.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
- flowerpower-0.11.6.4.dist-info/METADATA,sha256=y90zVAntdH6BJpp1vM2magFQYMI3Tj_UsntYFI6dcj0,21612
99
- flowerpower-0.11.6.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- flowerpower-0.11.6.4.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
- flowerpower-0.11.6.4.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
- flowerpower-0.11.6.4.dist-info/RECORD,,
97
+ flowerpower-0.11.6.6.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
98
+ flowerpower-0.11.6.6.dist-info/METADATA,sha256=LaDZ3aj-zU2iPSpQxEJE3A9QrjCYLIvHxk_tCmP2ams,21612
99
+ flowerpower-0.11.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ flowerpower-0.11.6.6.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
101
+ flowerpower-0.11.6.6.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
102
+ flowerpower-0.11.6.6.dist-info/RECORD,,