PyPI - FlowerPower - Versions diffs - 0.11.6__tar.gz → 0.11.6.1__tar.gz - Mend

FlowerPower 0.11.6tar.gz → 0.11.6.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{flowerpower-0.11.6/src/FlowerPower.egg-info → flowerpower-0.11.6.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FlowerPower
-Version: 0.11.6
+Version: 0.11.6.1
 Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
 Author-email: "Volker L." <ligno.blades@gmail.com>
 Project-URL: Homepage, https://github.com/legout/flowerpower

{flowerpower-0.11.6 → flowerpower-0.11.6.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
 authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
 readme = "README.md"
 requires-python = ">= 3.11"
-version = "0.11.6"
+version = "0.11.6.1"
 keywords = [
   "hamilton",
   "workflow",

{flowerpower-0.11.6 → flowerpower-0.11.6.1/src/FlowerPower.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FlowerPower
-Version: 0.11.6
+Version: 0.11.6.1
 Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
 Author-email: "Volker L." <ligno.blades@gmail.com>
 Project-URL: Homepage, https://github.com/legout/flowerpower

{flowerpower-0.11.6 → flowerpower-0.11.6.1}/src/flowerpower/fs/ext.py RENAMED Viewed

@@ -193,6 +193,7 @@ def _read_json(
         as_dataframe: (bool, optional) If True, return a DataFrame. Defaults to True.
         concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
         verbose: (bool, optional) If True, print verbose output. Defaults to False.
+        opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
         **kwargs: Additional keyword arguments.
     Returns:
@@ -247,8 +248,8 @@ def _read_json(
             data = [opt_dtype_pl(df, strict=False) for df in data]
         if concat:
             result = pl.concat(data, how="diagonal_relaxed")
-            if opt_dtypes:
-                result = opt_dtype_pl(result, strict=False)
+            # if opt_dtypes:
+            #   result = opt_dtype_pl(result, strict=False)
             return result
     return data
@@ -280,6 +281,7 @@ def _read_json_batches(
         concat: Combine files within each batch
         use_threads: Enable parallel file reading within batches
         verbose: Print progress information
+        opt_dtypes: Optimize DataFrame dtypes
         **kwargs: Additional arguments for DataFrame conversion
     Yields:
@@ -354,10 +356,16 @@ def _read_json_batches(
                     ][0]
                     for _data in batch_data
                 ]
+            if opt_dtypes:
+                batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
             if concat and len(batch_dfs) > 1:
-                yield pl.concat(batch_dfs, how="diagonal_relaxed")
+                batch_df = pl.concat(batch_dfs, how="diagonal_relaxed")
+                # if opt_dtypes:
+                #    batch_df = opt_dtype_pl(batch_df, strict=False)
+                yield batch_df
             else:
+                # if opt_dtypes:
+                #    batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
                 yield batch_dfs
         else:
             yield batch_data
@@ -403,6 +411,7 @@ def read_json(
         concat: Combine multiple files/batches into single result
         use_threads: Enable parallel file reading
         verbose: Print progress information
+        opt_dtypes: Optimize DataFrame dtypes for performance
         **kwargs: Additional arguments passed to DataFrame conversion
     Returns:
@@ -486,6 +495,7 @@ def _read_csv_file(
         path: Path to CSV file
         self: Filesystem instance to use for reading
         include_file_path: Add source filepath as a column
+        opt_dtypes: Optimize DataFrame dtypes
         **kwargs: Additional arguments passed to pl.read_csv()
     Returns:
@@ -544,6 +554,7 @@ def _read_csv(
         use_threads: (bool, optional) If True, read files in parallel. Defaults to True.
         concat: (bool, optional) If True, concatenate the DataFrames. Defaults to True.
         verbose: (bool, optional) If True, print verbose output. Defaults to False.
+        opt_dtypes: (bool, optional) If True, optimize DataFrame dtypes. Defaults to False.
         **kwargs: Additional keyword arguments.
     Returns:
@@ -587,8 +598,8 @@ def _read_csv(
         )
     if concat:
         result = pl.concat(dfs, how="diagonal_relaxed")
-        if opt_dtypes:
-            result = opt_dtype_pl(result, strict=False)
+        # if opt_dtypes:
+        #    result = opt_dtype_pl(result, strict=False)
         return result
     return dfs
@@ -616,6 +627,7 @@ def _read_csv_batches(
         concat: Combine files within each batch
         use_threads: Enable parallel file reading within batches
         verbose: Print progress information
+        opt_dtypes: Optimize DataFrame dtypes
         **kwargs: Additional arguments passed to pl.read_csv()
     Yields:
@@ -667,23 +679,28 @@ def _read_csv_batches(
                 n_jobs=-1,
                 backend="threading",
                 verbose=verbose,
+                opt_dtypes=opt_dtypes,
                 **kwargs,
             )
         else:
             batch_dfs = [
                 _read_csv_file(
-                    p, self=self, include_file_path=include_file_path, **kwargs
+                    p,
+                    self=self,
+                    include_file_path=include_file_path,
+                    opt_dtypes=opt_dtypes,
+                    **kwargs,
                 )
                 for p in batch_paths
             ]
-        if opt_dtypes:
-            batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
+        # if opt_dtypes:
+        #    batch_dfs = [opt_dtype_pl(df, strict=False) for df in batch_dfs]
         if concat and len(batch_dfs) > 1:
             result = pl.concat(batch_dfs, how="diagonal_relaxed")
-            if opt_dtypes:
-                result = opt_dtype_pl(result, strict=False)
+            # if opt_dtypes:
+            #    result = opt_dtype_pl(result, strict=False)
             yield result
         else:
             yield batch_dfs
@@ -766,6 +783,7 @@ def read_csv(
             concat=concat,
             use_threads=use_threads,
             verbose=verbose,
+            opt_dtypes=opt_dtypes,
             **kwargs,
         )
     return _read_csv(
@@ -775,6 +793,7 @@ def read_csv(
         concat=concat,
         use_threads=use_threads,
         verbose=verbose,
+        opt_dtypes=opt_dtypes,
         **kwargs,
     )
@@ -858,9 +877,7 @@ def _read_parquet(
     if not include_file_path and concat:
         if isinstance(path, str):
             path = path.replace("**", "").replace("*.parquet", "")
-        table = pq.read_table(path, filesystem=self, **kwargs)
-        if opt_dtypes:
-            table = opt_dtype_pa(table, strict=False)
+        table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
         return table
     else:
         if isinstance(path, str):
@@ -907,12 +924,12 @@ def _read_parquet(
                 unified_schema = unify_schemas_pa(schemas)
                 tables = [cast_schema(t, unified_schema) for t in tables]
             result = pa.concat_tables(tables, promote_options="permissive")
-            if opt_dtypes:
-                result = opt_dtype_pa(result, strict=False)
+            # if opt_dtypes:
+            #    result = opt_dtype_pa(result, strict=False)
             return result
         elif isinstance(tables, pa.Table):
-            if opt_dtypes:
-                tables = opt_dtype_pa(tables, strict=False)
+            # if opt_dtypes:
+            #    tables = opt_dtype_pa(tables, strict=False)
             return tables
         else:
             return pa.concat_tables(tables, promote_options="permissive")
@@ -981,9 +998,9 @@ def _read_parquet_batches(
     if not include_file_path and concat and batch_size is None:
         if isinstance(path, str):
             path = path.replace("**", "").replace("*.parquet", "")
-        table = pq.read_table(path, filesystem=self, **kwargs)
-        if opt_dtypes:
-            table = opt_dtype_pa(table, strict=False)
+        table = _read_parquet_file(
+            path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
+        )
         yield table
         return
@@ -994,7 +1011,11 @@ def _read_parquet_batches(
     if not isinstance(path, list):
         yield _read_parquet_file(
-            path=path, self=self, include_file_path=include_file_path, **kwargs
+            path=path,
+            self=self,
+            include_file_path=include_file_path,
+            opt_dtypes=opt_dtypes,
+            **kwargs,
         )
         return
@@ -1032,12 +1053,12 @@ def _read_parquet_batches(
                 unified_schema = unify_schemas_pa(schemas)
                 batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
             result = pa.concat_tables(batch_tables, promote_options="permissive")
-            if opt_dtypes:
-                result = opt_dtype_pa(result, strict=False)
+            # if opt_dtypes:
+            #    result = opt_dtype_pa(result, strict=False)
             yield result
         else:
-            if opt_dtypes and isinstance(batch_tables, list):
-                batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
+            # if opt_dtypes and isinstance(batch_tables, list):
+            #    batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
             yield batch_tables
@@ -1077,6 +1098,7 @@ def read_parquet(
         concat: Combine multiple files/batches into single Table
         use_threads: Enable parallel file reading
         verbose: Print progress information
+        opt_dtypes: Optimize Table dtypes for performance
         **kwargs: Additional arguments passed to pq.read_table()
     Returns:
@@ -1119,6 +1141,7 @@ def read_parquet(
             concat=concat,
             use_threads=use_threads,
             verbose=verbose,
+            opt_dtypes=opt_dtypes,
             **kwargs,
         )
     return _read_parquet(
@@ -1128,6 +1151,7 @@ def read_parquet(
         use_threads=use_threads,
         concat=concat,
         verbose=verbose,
+        opt_dtypes=opt_dtypes,
         **kwargs,
     )
@@ -1142,6 +1166,7 @@ def read_files(
     jsonlines: bool = False,
     use_threads: bool = True,
     verbose: bool = False,
+    opt_dtypes: bool = False,
     **kwargs: Any,
 ) -> (
     pl.DataFrame
@@ -1175,6 +1200,7 @@ def read_files(
         jsonlines: For JSON format, whether to read as JSON Lines
         use_threads: Enable parallel file reading
         verbose: Print progress information
+        opt_dtypes: Optimize DataFrame/Arrow Table dtypes for performance
         **kwargs: Additional format-specific arguments
     Returns:
@@ -1224,6 +1250,7 @@ def read_files(
                 concat=concat,
                 use_threads=use_threads,
                 verbose=verbose,
+                opt_dtypes=opt_dtypes,
                 **kwargs,
             )
         return read_json(
@@ -1234,6 +1261,7 @@ def read_files(
             concat=concat,
             use_threads=use_threads,
             verbose=verbose,
+            opt_dtypes=opt_dtypes,
             **kwargs,
         )
     elif format == "csv":
@@ -1246,6 +1274,7 @@ def read_files(
                 concat=concat,
                 use_threads=use_threads,
                 verbose=verbose,
+                opt_dtypes=opt_dtypes,
                 **kwargs,
             )
         return read_csv(
@@ -1255,6 +1284,7 @@ def read_files(
             use_threads=use_threads,
             concat=concat,
             verbose=verbose,
+            opt_dtypes=opt_dtypes,
             **kwargs,
         )
     elif format == "parquet":
@@ -1267,6 +1297,7 @@ def read_files(
                 concat=concat,
                 use_threads=use_threads,
                 verbose=verbose,
+                opt_dtypes=opt_dtypes,
                 **kwargs,
             )
         return read_parquet(
@@ -1276,6 +1307,7 @@ def read_files(
             use_threads=use_threads,
             concat=concat,
             verbose=verbose,
+            opt_dtypes=opt_dtypes,
             **kwargs,
         )

FlowerPower 0.11.6__tar.gz → 0.11.6.1__tar.gz

FlowerPower 0.11.6tar.gz → 0.11.6.1tar.gz