FlowerPower 0.11.6.6__tar.gz → 0.11.6.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {flowerpower-0.11.6.6/src/FlowerPower.egg-info → flowerpower-0.11.6.7}/PKG-INFO +1 -1
  2. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/pyproject.toml +1 -1
  3. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7/src/FlowerPower.egg-info}/PKG-INFO +1 -1
  4. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/fs/ext.py +77 -47
  5. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/LICENSE +0 -0
  6. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/README.md +0 -0
  7. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/setup.cfg +0 -0
  8. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/SOURCES.txt +0 -0
  9. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
  10. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/entry_points.txt +0 -0
  11. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/requires.txt +0 -0
  12. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/FlowerPower.egg-info/top_level.txt +0 -0
  13. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/__init__.py +0 -0
  14. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/__init__.py +0 -0
  15. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/base.py +0 -0
  16. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/__init__.py +0 -0
  17. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/adapter.py +0 -0
  18. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/run.py +0 -0
  19. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/pipeline/schedule.py +0 -0
  20. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/__init__.py +0 -0
  21. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/adapter.py +0 -0
  22. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cfg/project/job_queue.py +0 -0
  23. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/__init__.py +0 -0
  24. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/cfg.py +0 -0
  25. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/job_queue.py +0 -0
  26. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/mqtt.py +0 -0
  27. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/pipeline.py +0 -0
  28. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/cli/utils.py +0 -0
  29. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/flowerpower.py +0 -0
  30. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/fs/__init__.py +0 -0
  31. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/fs/base.py +0 -0
  32. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/fs/storage_options.py +0 -0
  33. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/__init__.py +0 -0
  34. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/__init__.py +0 -0
  35. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -0
  36. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -0
  37. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/manager.py +0 -0
  38. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/setup.py +0 -0
  39. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/trigger.py +0 -0
  40. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/apscheduler/utils.py +0 -0
  41. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/base.py +0 -0
  42. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/__init__.py +0 -0
  43. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/_trigger.py +0 -0
  44. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -0
  45. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -0
  46. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/manager.py +0 -0
  47. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/setup.py +0 -0
  48. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/job_queue/rq/utils.py +0 -0
  49. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/mqtt.py +0 -0
  50. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/__init__.py +0 -0
  51. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/base.py +0 -0
  52. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/io.py +0 -0
  53. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/job_queue.py +0 -0
  54. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/manager.py +0 -0
  55. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/registry.py +0 -0
  56. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/runner.py +0 -0
  57. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/pipeline/visualizer.py +0 -0
  58. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/base.py +0 -0
  59. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/datetime.py +0 -0
  60. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/polars.py +0 -0
  61. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/pyarrow.py +0 -0
  62. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/helpers/sql.py +0 -0
  63. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/__init__.py +0 -0
  64. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/csv.py +0 -0
  65. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/deltatable.py +0 -0
  66. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/duckdb.py +0 -0
  67. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/json.py +0 -0
  68. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mqtt.py +0 -0
  69. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mssql.py +0 -0
  70. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/mysql.py +0 -0
  71. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/oracle.py +0 -0
  72. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/parquet.py +0 -0
  73. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/postgres.py +0 -0
  74. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/pydala.py +0 -0
  75. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/loader/sqlite.py +0 -0
  76. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/metadata.py +0 -0
  77. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/__init__.py +0 -0
  78. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/csv.py +0 -0
  79. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/deltatable.py +0 -0
  80. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/duckdb.py +0 -0
  81. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/json.py +0 -0
  82. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mqtt.py +0 -0
  83. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mssql.py +0 -0
  84. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/mysql.py +0 -0
  85. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/oracle.py +0 -0
  86. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/parquet.py +0 -0
  87. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/postgres.py +0 -0
  88. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/pydala.py +0 -0
  89. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/io/saver/sqlite.py +0 -0
  90. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/__init__.py +0 -0
  91. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/cfg.py +0 -0
  92. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/plugins/mqtt/manager.py +0 -0
  93. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/__init__.py +0 -0
  94. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/backend.py +0 -0
  95. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/executor.py +0 -0
  96. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/general.py +0 -0
  97. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/hamilton.py +0 -0
  98. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/job_queue.py +0 -0
  99. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/logging.py +0 -0
  100. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/settings/retry.py +0 -0
  101. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/callback.py +0 -0
  102. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/logging.py +0 -0
  103. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/misc.py +0 -0
  104. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/monkey.py +0 -0
  105. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/open_telemetry.py +0 -0
  106. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/scheduler.py +0 -0
  107. {flowerpower-0.11.6.6 → flowerpower-0.11.6.7}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6.6
3
+ Version: 0.11.6.7
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -4,7 +4,7 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
4
4
  authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.11"
7
- version = "0.11.6.6"
7
+ version = "0.11.6.7"
8
8
  keywords = [
9
9
  "hamilton",
10
10
  "workflow",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.6.6
3
+ Version: 0.11.6.7
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -814,6 +814,7 @@ def _read_parquet_file(
814
814
  path: Path to Parquet file
815
815
  self: Filesystem instance to use for reading
816
816
  include_file_path: Add source filepath as a column
817
+ opt_dtypes: Optimize DataFrame dtypes
817
818
  **kwargs: Additional arguments passed to pq.read_table()
818
819
 
819
820
  Returns:
@@ -830,6 +831,11 @@ def _read_parquet_file(
830
831
  >>> print("file_path" in table.column_names)
831
832
  True
832
833
  """
834
+ if not path.endswith(".parquet"):
835
+ raise ValueError(
836
+ f"Path '{path}' does not point to a Parquet file. "
837
+ "Ensure the path ends with '.parquet'."
838
+ )
833
839
  table = pq.read_table(path, filesystem=self, **kwargs)
834
840
  if include_file_path:
835
841
  table = table.add_column(0, "file_path", pl.Series([path] * table.num_rows))
@@ -841,6 +847,30 @@ def _read_parquet_file(
841
847
  def read_parquet_file(
842
848
  self, path: str, include_file_path: bool = False, opt_dtypes: bool = False, **kwargs
843
849
  ) -> pa.Table:
850
+ """Read a single Parquet file from any filesystem.
851
+
852
+ Internal function that handles reading individual Parquet files and
853
+ optionally adds the source filepath as a column.
854
+
855
+ Args:
856
+ path: Path to Parquet file
857
+ include_file_path: Add source filepath as a column
858
+ opt_dtypes: Optimize DataFrame dtypes
859
+ **kwargs: Additional arguments passed to pq.read_table()
860
+
861
+ Returns:
862
+ pa.Table: PyArrow Table containing Parquet data
863
+
864
+ Example:
865
+ >>> fs = LocalFileSystem()
866
+ >>> table = fs.read_parquet_file(
867
+ ... "data.parquet",
868
+ ... include_file_path=True,
869
+ ... use_threads=True
870
+ ... )
871
+ >>> print("file_path" in table.column_names)
872
+ True
873
+ """
844
874
  return _read_parquet_file(
845
875
  path=path,
846
876
  self=self,
@@ -852,7 +882,7 @@ def read_parquet_file(
852
882
 
853
883
  def _read_parquet(
854
884
  self,
855
- path,
885
+ path: str | list[str],
856
886
  include_file_path: bool = False,
857
887
  use_threads: bool = True,
858
888
  concat: bool = True,
@@ -874,48 +904,48 @@ def _read_parquet(
874
904
  Returns:
875
905
  (pa.Table | list[pa.Table]): Pyarrow Table or list of Pyarrow Tables.
876
906
  """
877
- if not include_file_path and concat:
878
- if isinstance(path, str):
879
- path = path.replace("**", "").replace("*.parquet", "")
880
- table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
881
- return table
882
- else:
883
- if isinstance(path, str):
884
- path = path_to_glob(path, format="parquet")
885
- path = self.glob(path)
907
+ # if not include_file_path and concat:
908
+ # if isinstance(path, str):
909
+ # path = path.replace("**", "").replace("*.parquet", "")
910
+ # table = _read_parquet_file(path, self=self, opt_dtypes=opt_dtypes, **kwargs)
911
+ # return table
912
+ # else:
913
+ if isinstance(path, str):
914
+ path = path_to_glob(path, format="parquet")
915
+ path = self.glob(path)
886
916
 
887
- if isinstance(path, list):
888
- if use_threads:
889
- tables = run_parallel(
890
- _read_parquet_file,
891
- path,
892
- self=self,
893
- include_file_path=include_file_path,
894
- opt_dtypes=opt_dtypes,
895
- n_jobs=-1,
896
- backend="threading",
897
- verbose=verbose,
898
- **kwargs,
899
- )
900
- else:
901
- tables = [
902
- _read_parquet_file(
903
- p,
904
- self=self,
905
- include_file_path=include_file_path,
906
- opt_dtypes=opt_dtypes,
907
- **kwargs,
908
- )
909
- for p in path
910
- ]
911
- else:
912
- tables = _read_parquet_file(
913
- path=path,
917
+ if isinstance(path, list):
918
+ if use_threads:
919
+ tables = run_parallel(
920
+ _read_parquet_file,
921
+ path,
914
922
  self=self,
915
923
  include_file_path=include_file_path,
916
924
  opt_dtypes=opt_dtypes,
925
+ n_jobs=-1,
926
+ backend="threading",
927
+ verbose=verbose,
917
928
  **kwargs,
918
929
  )
930
+ else:
931
+ tables = [
932
+ _read_parquet_file(
933
+ p,
934
+ self=self,
935
+ include_file_path=include_file_path,
936
+ opt_dtypes=opt_dtypes,
937
+ **kwargs,
938
+ )
939
+ for p in path
940
+ ]
941
+ else:
942
+ tables = _read_parquet_file(
943
+ path=path,
944
+ self=self,
945
+ include_file_path=include_file_path,
946
+ opt_dtypes=opt_dtypes,
947
+ **kwargs,
948
+ )
919
949
  if concat:
920
950
  # Unify schemas before concatenation if opt_dtypes or multiple tables
921
951
  if isinstance(tables, list):
@@ -1001,14 +1031,14 @@ def _read_parquet_batches(
1001
1031
  ... print(f"Batch schema: {batch.schema}")
1002
1032
  """
1003
1033
  # Fast path for simple cases
1004
- if not include_file_path and concat and batch_size is None:
1005
- if isinstance(path, str):
1006
- path = path.replace("**", "").replace("*.parquet", "")
1007
- table = _read_parquet_file(
1008
- path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
1009
- )
1010
- yield table
1011
- return
1034
+ # if not include_file_path and concat and batch_size is None:
1035
+ # if isinstance(path, str):
1036
+ # path = path.replace("**", "").replace("*.parquet", "")
1037
+ # table = _read_parquet_file(
1038
+ # path=path, self=self, opt_dtypes=opt_dtypes, **kwargs
1039
+ # )
1040
+ # yield table
1041
+ # return
1012
1042
 
1013
1043
  # Resolve path(s) to list
1014
1044
  if isinstance(path, str):
@@ -1058,13 +1088,13 @@ def _read_parquet_batches(
1058
1088
  schemas = [t.schema for t in batch_tables]
1059
1089
  unified_schema = unify_schemas_pa(schemas)
1060
1090
  batch_tables = [cast_schema(t, unified_schema) for t in batch_tables]
1061
- result = pa.concat_tables(
1091
+ batch_table = pa.concat_tables(
1062
1092
  [table for table in batch_tables if table.num_rows > 0],
1063
1093
  promote_options="permissive",
1064
1094
  )
1065
1095
  # if opt_dtypes:
1066
1096
  # result = opt_dtype_pa(result, strict=False)
1067
- yield result
1097
+ yield batch_table
1068
1098
  else:
1069
1099
  # if opt_dtypes and isinstance(batch_tables, list):
1070
1100
  # batch_tables = [opt_dtype_pa(t, strict=False) for t in batch_tables]
File without changes
File without changes
File without changes