FlowerPower 0.11.4__py3-none-any.whl → 0.11.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flowerpower/fs/base.py CHANGED
@@ -597,18 +597,29 @@ def get_filesystem(
597
597
  ... )
598
598
  """
599
599
  if fs is not None:
600
+ if dirfs:
601
+ if fs.protocol == "dir":
602
+ base_path = path.split("://")[1]
603
+ if base_path != fs.path:
604
+ fs = DirFileSystem(
605
+ path=posixpath.join(fs.path, base_path.replace(fs.path, "")),
606
+ fs=fs.fs,
607
+ )
600
608
  if cached:
601
609
  if fs.is_cache_fs:
602
610
  return fs
603
611
  return MonitoredSimpleCacheFileSystem(fs=fs, cache_storage=cache_storage)
604
612
 
605
- if dirfs:
606
- if fs.protocol == "dir":
607
- return fs
608
- return DirFileSystem(path=path, fs=fs)
609
-
610
613
  pp = infer_storage_options(str(path) if isinstance(path, Path) else path)
611
- protocol = pp.get("protocol")
614
+ protocol = (
615
+ storage_options_kwargs.get("protocol", None)
616
+ or (
617
+ storage_options.get("protocol", None)
618
+ if isinstance(storage_options, dict)
619
+ else getattr(storage_options, "protocol", None)
620
+ )
621
+ or pp.get("protocol", "file")
622
+ )
612
623
 
613
624
  if protocol == "file" or protocol == "local":
614
625
  fs = filesystem(protocol)
@@ -622,6 +633,8 @@ def get_filesystem(
622
633
  path = pp.get("path", "").lstrip("/")
623
634
  if len(host) and host not in path:
624
635
  path = posixpath.join(host, path)
636
+ if "." in path:
637
+ path = posixpath.dirname(path)
625
638
 
626
639
  if isinstance(storage_options, dict):
627
640
  storage_options = storage_options_from_dict(protocol, storage_options)
@@ -145,7 +145,11 @@ class PipelineManager:
145
145
  cache_storage=cache_storage,
146
146
  )
147
147
  self._fs = fs
148
- self._storage_options = storage_options or fs.storage_options
148
+ self._storage_options = (
149
+ storage_options or fs.storage_options
150
+ if fs.protocol != "dir"
151
+ else fs.fs.storage_options
152
+ )
149
153
 
150
154
  # Store overrides for ProjectConfig loading
151
155
  self._cfg_dir = cfg_dir
@@ -1,4 +1,5 @@
1
1
  import importlib
2
+ import os
2
3
  import posixpath
3
4
  from typing import Any, Generator
4
5
 
@@ -22,8 +23,9 @@ from sqlalchemy import create_engine, text
22
23
  from ...fs import get_filesystem
23
24
  from ...fs.ext import _dict_to_dataframe, path_to_glob
24
25
  from ...fs.storage_options import (AwsStorageOptions, AzureStorageOptions,
25
- GcsStorageOptions, GitHubStorageOptions,
26
- GitLabStorageOptions, StorageOptions)
26
+ BaseStorageOptions, GcsStorageOptions,
27
+ GitHubStorageOptions, GitLabStorageOptions,
28
+ StorageOptions)
27
29
  from ...utils.misc import convert_large_types_to_standard, to_pyarrow_table
28
30
  from .helpers.polars import pl
29
31
  from .helpers.sql import sql2polars_filter, sql2pyarrow_filter
@@ -75,67 +77,80 @@ class BaseFileIO(msgspec.Struct, gc=False):
75
77
  ) = field(default=None)
76
78
  fs: AbstractFileSystem | None = field(default=None)
77
79
  format: str | None = None
78
- _raw_path: str | list[str] | None = field(default=None)
80
+ # _base_path: str | list[str] | None = field(default=None)
81
+ # _full_path: str | list[str] | None = field(default=None)
82
+ # _rel_path: str | list[str] | None = field(default=None)
83
+ # _glob_path
79
84
  _metadata: dict[str, Any] | None = field(default=None)
80
85
 
81
86
  def __post_init__(self):
82
- self._raw_path = self.path
83
- if isinstance(self.storage_options, dict):
84
- if "protocol" not in self.storage_options:
85
- self.storage_options["protocol"] = get_protocol(self.path)
86
- self.storage_options = StorageOptions(
87
- **self.storage_options
88
- ).storage_options
89
- if isinstance(self.storage_options, StorageOptions):
90
- self.storage_options = self.storage_options.storage_options
87
+ # self._base_path = self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
91
88
 
92
89
  if self.fs is None:
93
90
  self.fs = get_filesystem(
94
- path=self.path if isinstance(self.path, str) else self.path[0],
91
+ path=self._base_path,
95
92
  storage_options=self.storage_options,
96
93
  fs=self.fs,
97
94
  dirfs=True,
98
95
  )
96
+ self.storage_options = (
97
+ self.storage_options or self.fs.storage_options
98
+ if self.protocol != "dir"
99
+ else self.fs.fs.storage_options
100
+ )
99
101
 
100
- if hasattr(self.storage_options, "protocol"):
101
- protocol = self.storage_options.protocol
102
- else:
103
- protocol = self.fs.protocol
104
- if protocol == "dir":
105
- protocol = (
106
- self.fs.fs.protocol
107
- if isinstance(self.fs.fs.protocol, str)
108
- else self.fs.fs.protocol[0]
109
- )
110
- if isinstance(protocol, list | tuple):
111
- protocol = protocol[0]
112
-
113
- if isinstance(self.path, str):
114
- self.path = (
115
- self.path.replace(protocol + "://", "")
116
- .replace(f"**/*.{self.format}", "")
117
- .replace("**", "")
118
- .replace("*", "")
119
- .rstrip("/")
120
- )
102
+ # self.path = (
103
+ # self._raw_path.replace(protocol + "://", "")
104
+ # .replace(f"**/*.{self.format}", "")
105
+ # .replace("**", "")
106
+ # .replace("*", "")
107
+ # .rstrip("/")
108
+ # )
121
109
 
122
110
  @property
123
- def _path(self):
111
+ def protocol(self):
112
+ """Get the protocol of the filesystem."""
113
+ protocol = (
114
+ self.fs.protocol if self.fs.protocol != "dir" else self.fs.fs.protocol
115
+ )
116
+ if isinstance(protocol, list | tuple):
117
+ protocol = protocol[0]
118
+ return protocol
119
+
120
+ @property
121
+ def _base_path(self) -> str:
122
+ """Get the base path for the filesystem."""
123
+
124
+ path = (
125
+ self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
126
+ )
127
+ return path
128
+
129
+ @property
130
+ def _path(self) -> str | list[str]:
124
131
  if self.fs.protocol == "dir":
125
132
  if isinstance(self.path, list):
126
133
  return [
127
- p.replace(self.fs.path.lstrip("/"), "").lstrip("/")
134
+ p.replace(self._base_path.lstrip("/"), "").lstrip("/")
128
135
  for p in self.path
129
136
  ]
130
137
  else:
131
- return self.path.replace(self.fs.path.lstrip("/"), "").lstrip("/")
138
+ return self.path.replace(self._base_path.lstrip("/"), "").lstrip("/")
132
139
  return self.path
133
140
 
134
141
  @property
135
- def _glob_path(self):
142
+ def _glob_path(self) -> str | list[str]:
143
+ if isinstance(self._path, list):
144
+ return self._path
136
145
  return path_to_glob(self._path, self.format)
137
146
 
138
- def list_files(self):
147
+ @property
148
+ def _root_path(self) -> str:
149
+ if self.fs.protocol == "dir":
150
+ return self._base_path.replace(self.fs.path, "")
151
+ return self._base_path
152
+
153
+ def list_files(self) -> list[str]:
139
154
  if isinstance(self._path, list):
140
155
  return self._path
141
156
 
@@ -762,7 +777,7 @@ class BaseDatasetReader(BaseFileReader, gc=False):
762
777
  Returns:
763
778
  pds.Dataset: PyArrow Dataset.
764
779
  """
765
- if hasattr(self, "_dataset") and not reload:
780
+ if self._dataset is not None and not reload:
766
781
  if metadata:
767
782
  return self._dataset, self._metadata
768
783
  return self._dataset
@@ -779,9 +794,9 @@ class BaseDatasetReader(BaseFileReader, gc=False):
779
794
  self._dataset, path=self.path, format=self.format
780
795
  )
781
796
  elif self.format == "parquet":
782
- if self.fs.exists(posixpath.join(self._path, "_metadata")):
797
+ if self.fs.exists(posixpath.join(self._root_path, "_metadata")):
783
798
  self._dataset = self.fs.parquet_dataset(
784
- self._path,
799
+ posixpath.join(self._root_path, "_metadata"),
785
800
  schema=self.schema_,
786
801
  partitioning=self.partitioning,
787
802
  **kwargs,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FlowerPower
3
- Version: 0.11.4
3
+ Version: 0.11.5.1
4
4
  Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
5
  Author-email: "Volker L." <ligno.blades@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -17,7 +17,7 @@ flowerpower/cli/mqtt.py,sha256=GM5d6bzG01THZd7SwXFAC3j0cait642eXT50P3R22vk,6281
17
17
  flowerpower/cli/pipeline.py,sha256=60P6u_QOSgp0jJXEMxazEEo5Sh7-SWFo-Kkuaz21YuI,37845
18
18
  flowerpower/cli/utils.py,sha256=nDSSj_1nlYlMmj252kRZeohhFqHv9yvdgDEduQCyWOc,5152
19
19
  flowerpower/fs/__init__.py,sha256=uZaPXErEfQqQRbKRIjkB9yiygd45X5_psYn9-VVrBTQ,910
20
- flowerpower/fs/base.py,sha256=LPuIYVHSBvjv0ml2R9NiMlM0wLhlCDUAG8XAcvhUDX4,22090
20
+ flowerpower/fs/base.py,sha256=A8zOtcYu3ixbIGayP1b-GTFnul8iZATUW6eTtloRJ48,22605
21
21
  flowerpower/fs/ext.py,sha256=gsCJ87VxVdy22oVtNRaN4M-SLO8WORVf5JRyDeQjjEs,63834
22
22
  flowerpower/fs/storage_options.py,sha256=msq5TpxAU8tcE_Bxjw6SyxaFa75UjdYnR4-O9U2wmbk,48034
23
23
  flowerpower/job_queue/__init__.py,sha256=a25hIqv2xoFKb4JZlyUukS0ppZ9-2sJKH3XAvbk3rlk,10788
@@ -40,11 +40,11 @@ flowerpower/pipeline/__init__.py,sha256=xbEn_RN0vVNqLZMSFOCdV41ggUkYrghFVJYd_EC0
40
40
  flowerpower/pipeline/base.py,sha256=N3N0iqiVo2vUVli_WSADAQMq283mG9OdFql58LXeF2Q,3275
41
41
  flowerpower/pipeline/io.py,sha256=8Mlw4G7ehHHZEk4Qui-HcKBM3tBF4FuqUbjfNxK09iU,15963
42
42
  flowerpower/pipeline/job_queue.py,sha256=hl38-0QZCH5wujUf0qIqYznIPDLsJAoNDcOD7YGVQ6s,26114
43
- flowerpower/pipeline/manager.py,sha256=OMVznuQ0fSI4Jnw7fw0XTYdqYRqioJRC2B2kTxbmlN0,74361
43
+ flowerpower/pipeline/manager.py,sha256=KVpOclUEUAETUNJamJJGuKt3oxCaLitQgxWxkE1q028,74460
44
44
  flowerpower/pipeline/registry.py,sha256=WWQoaxtgnlntFEIPQzYM1gk0zUXwrH2PmDLGbTzhrZs,18991
45
45
  flowerpower/pipeline/runner.py,sha256=dsSVYixFXqlxFk8EJfT4wV_7IwgkXq0ErwH_yf_NGS8,25654
46
46
  flowerpower/pipeline/visualizer.py,sha256=amjMrl5NetErE198HzZBPWVZBi_t5jj9ydxWpuNLoTI,5013
47
- flowerpower/plugins/io/base.py,sha256=GXhJO7jPOGaKHpk_--W-p4ZlVYWBBDQywOtCdakhPuk,79122
47
+ flowerpower/plugins/io/base.py,sha256=Plr0v2y0MMf3azxqz7rIc2wk6CXFzpPczW6p5Qb9jiI,79464
48
48
  flowerpower/plugins/io/metadata.py,sha256=31FoVyRz6zdWMWda0f1GHq0xMokVB3SVYrLT2TejOH8,7271
49
49
  flowerpower/plugins/io/helpers/datetime.py,sha256=1WBUg2ywcsodJQwoF6JiIGc9yhVobvE2IErWp4i95m4,10649
50
50
  flowerpower/plugins/io/helpers/polars.py,sha256=VuksokWrsKk57-s5JkpsmzWCkaOIEiI4ONeAIO9LAdw,18071
@@ -93,9 +93,9 @@ flowerpower/utils/monkey.py,sha256=VPl3yimoWhwD9kI05BFsjNvtyQiDyLfY4Q85Bb6Ma0w,2
93
93
  flowerpower/utils/open_telemetry.py,sha256=fQWJWbIQFtKIxMBjAWeF12NGnqT0isO3A3j-DSOv_vE,949
94
94
  flowerpower/utils/scheduler.py,sha256=2zJ_xmLXpvXUQNF1XS2Gqm3Ogo907ctZ50GtvQB_rhE,9354
95
95
  flowerpower/utils/templates.py,sha256=ouyEeSDqa9PjW8c32fGpcINlpC0WToawRFZkMPtwsLE,1591
96
- flowerpower-0.11.4.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
97
- flowerpower-0.11.4.dist-info/METADATA,sha256=Cf8dxvh4OWmQM8TmuuaQmlk7re4n1fCZTPeIf-NnS5Y,21610
98
- flowerpower-0.11.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- flowerpower-0.11.4.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
100
- flowerpower-0.11.4.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
101
- flowerpower-0.11.4.dist-info/RECORD,,
96
+ flowerpower-0.11.5.1.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
97
+ flowerpower-0.11.5.1.dist-info/METADATA,sha256=S67wBjxZk_VkQtsIB-HOQl_lOP_-v2kK9Ri1HMuBskc,21612
98
+ flowerpower-0.11.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ flowerpower-0.11.5.1.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
100
+ flowerpower-0.11.5.1.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
101
+ flowerpower-0.11.5.1.dist-info/RECORD,,