FlowerPower 0.11.3__py3-none-any.whl → 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/fs/base.py +9 -1
- flowerpower/pipeline/manager.py +5 -1
- flowerpower/plugins/io/base.py +78 -53
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/METADATA +1 -1
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/RECORD +9 -9
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/WHEEL +0 -0
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.11.3.dist-info → flowerpower-0.11.5.dist-info}/top_level.txt +0 -0
flowerpower/fs/base.py
CHANGED
@@ -608,7 +608,15 @@ def get_filesystem(
|
|
608
608
|
return DirFileSystem(path=path, fs=fs)
|
609
609
|
|
610
610
|
pp = infer_storage_options(str(path) if isinstance(path, Path) else path)
|
611
|
-
protocol =
|
611
|
+
protocol = (
|
612
|
+
storage_options_kwargs.get("protocol", None)
|
613
|
+
or (
|
614
|
+
storage_options.get("protocol", None)
|
615
|
+
if isinstance(storage_options, dict)
|
616
|
+
else getattr(storage_options, "protocol", None)
|
617
|
+
)
|
618
|
+
or pp.get("protocol", "file")
|
619
|
+
)
|
612
620
|
|
613
621
|
if protocol == "file" or protocol == "local":
|
614
622
|
fs = filesystem(protocol)
|
flowerpower/pipeline/manager.py
CHANGED
@@ -145,7 +145,11 @@ class PipelineManager:
|
|
145
145
|
cache_storage=cache_storage,
|
146
146
|
)
|
147
147
|
self._fs = fs
|
148
|
-
self._storage_options =
|
148
|
+
self._storage_options = (
|
149
|
+
storage_options or fs.storage_options
|
150
|
+
if fs.protocol != "dir"
|
151
|
+
else fs.fs.storage_options
|
152
|
+
)
|
149
153
|
|
150
154
|
# Store overrides for ProjectConfig loading
|
151
155
|
self._cfg_dir = cfg_dir
|
flowerpower/plugins/io/base.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import importlib
|
2
|
+
import os
|
2
3
|
import posixpath
|
3
4
|
from typing import Any, Generator
|
4
5
|
|
@@ -22,8 +23,9 @@ from sqlalchemy import create_engine, text
|
|
22
23
|
from ...fs import get_filesystem
|
23
24
|
from ...fs.ext import _dict_to_dataframe, path_to_glob
|
24
25
|
from ...fs.storage_options import (AwsStorageOptions, AzureStorageOptions,
|
25
|
-
|
26
|
-
GitLabStorageOptions,
|
26
|
+
BaseStorageOptions, GcsStorageOptions,
|
27
|
+
GitHubStorageOptions, GitLabStorageOptions,
|
28
|
+
StorageOptions)
|
27
29
|
from ...utils.misc import convert_large_types_to_standard, to_pyarrow_table
|
28
30
|
from .helpers.polars import pl
|
29
31
|
from .helpers.sql import sql2polars_filter, sql2pyarrow_filter
|
@@ -75,67 +77,80 @@ class BaseFileIO(msgspec.Struct, gc=False):
|
|
75
77
|
) = field(default=None)
|
76
78
|
fs: AbstractFileSystem | None = field(default=None)
|
77
79
|
format: str | None = None
|
78
|
-
|
80
|
+
# _base_path: str | list[str] | None = field(default=None)
|
81
|
+
# _full_path: str | list[str] | None = field(default=None)
|
82
|
+
# _rel_path: str | list[str] | None = field(default=None)
|
83
|
+
# _glob_path
|
79
84
|
_metadata: dict[str, Any] | None = field(default=None)
|
80
85
|
|
81
86
|
def __post_init__(self):
|
82
|
-
self.
|
83
|
-
if isinstance(self.storage_options, dict):
|
84
|
-
if "protocol" not in self.storage_options:
|
85
|
-
self.storage_options["protocol"] = get_protocol(self.path)
|
86
|
-
self.storage_options = StorageOptions(
|
87
|
-
**self.storage_options
|
88
|
-
).storage_options
|
89
|
-
if isinstance(self.storage_options, StorageOptions):
|
90
|
-
self.storage_options = self.storage_options.storage_options
|
87
|
+
# self._base_path = self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
|
91
88
|
|
92
89
|
if self.fs is None:
|
93
90
|
self.fs = get_filesystem(
|
94
|
-
path=self.
|
91
|
+
path=self._base_path,
|
95
92
|
storage_options=self.storage_options,
|
96
93
|
fs=self.fs,
|
97
94
|
dirfs=True,
|
98
95
|
)
|
96
|
+
self.storage_options = (
|
97
|
+
self.storage_options or self.fs.storage_options
|
98
|
+
if self.protocol != "dir"
|
99
|
+
else self.fs.fs.storage_options
|
100
|
+
)
|
99
101
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
102
|
+
# self.path = (
|
103
|
+
# self._raw_path.replace(protocol + "://", "")
|
104
|
+
# .replace(f"**/*.{self.format}", "")
|
105
|
+
# .replace("**", "")
|
106
|
+
# .replace("*", "")
|
107
|
+
# .rstrip("/")
|
108
|
+
# )
|
109
|
+
|
110
|
+
@property
|
111
|
+
def protocol(self):
|
112
|
+
"""Get the protocol of the filesystem."""
|
113
|
+
protocol = (
|
114
|
+
self.fs.protocol if self.fs.protocol != "dir" else self.fs.fs.protocol
|
115
|
+
)
|
116
|
+
if isinstance(protocol, list | tuple):
|
117
|
+
protocol = protocol[0]
|
118
|
+
return protocol
|
119
|
+
|
120
|
+
@property
|
121
|
+
def _base_path(self) -> str:
|
122
|
+
"""Get the base path for the filesystem."""
|
123
|
+
|
124
|
+
path = (
|
125
|
+
self.path if isinstance(self.path, str) else os.path.commonpath(self.path)
|
126
|
+
)
|
127
|
+
return path
|
121
128
|
|
122
129
|
@property
|
123
|
-
def _path(self):
|
130
|
+
def _path(self) -> str | list[str]:
|
124
131
|
if self.fs.protocol == "dir":
|
125
132
|
if isinstance(self.path, list):
|
126
133
|
return [
|
127
|
-
p.replace(self.
|
134
|
+
p.replace(self._base_path.lstrip("/"), "").lstrip("/")
|
128
135
|
for p in self.path
|
129
136
|
]
|
130
137
|
else:
|
131
|
-
return self.path.replace(self.
|
138
|
+
return self.path.replace(self._base_path.lstrip("/"), "").lstrip("/")
|
132
139
|
return self.path
|
133
140
|
|
134
141
|
@property
|
135
|
-
def _glob_path(self):
|
142
|
+
def _glob_path(self) -> str | list[str]:
|
143
|
+
if isinstance(self._path, list):
|
144
|
+
return self._path
|
136
145
|
return path_to_glob(self._path, self.format)
|
137
146
|
|
138
|
-
|
147
|
+
@property
|
148
|
+
def _root_path(self) -> str:
|
149
|
+
if self.fs.protocol == "dir":
|
150
|
+
return self._base_path.replace(self.fs.path, "")
|
151
|
+
return self._base_path
|
152
|
+
|
153
|
+
def list_files(self) -> list[str]:
|
139
154
|
if isinstance(self._path, list):
|
140
155
|
return self._path
|
141
156
|
|
@@ -276,7 +291,7 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
276
291
|
return df
|
277
292
|
|
278
293
|
def iter_pandas(
|
279
|
-
self,
|
294
|
+
self, reload: bool = False, **kwargs
|
280
295
|
) -> Generator[pd.DataFrame, None, None]:
|
281
296
|
"""Iterate over Pandas DataFrames.
|
282
297
|
|
@@ -287,7 +302,10 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
287
302
|
Returns:
|
288
303
|
Generator[pd.DataFrame, None, None]: Generator of Pandas DataFrames.
|
289
304
|
"""
|
290
|
-
self.
|
305
|
+
if self.batch_size is None and "batch_size" not in kwargs:
|
306
|
+
self.batch_size = 1
|
307
|
+
|
308
|
+
self._load(reload=reload, **kwargs)
|
291
309
|
if isinstance(self._data, list | Generator):
|
292
310
|
for df in self._data:
|
293
311
|
yield df if isinstance(df, pd.DataFrame) else df.to_pandas()
|
@@ -324,14 +342,17 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
324
342
|
return df
|
325
343
|
|
326
344
|
def _iter_polars_dataframe(
|
327
|
-
self,
|
345
|
+
self, reload: bool = False, **kwargs
|
328
346
|
) -> Generator[pl.DataFrame, None, None]:
|
329
347
|
"""Iterate over Polars DataFrames.
|
330
348
|
|
331
349
|
Returns:
|
332
350
|
Generator[pl.DataFrame, None, None]: Generator of Polars DataFrames.
|
333
351
|
"""
|
334
|
-
self.
|
352
|
+
if self.batch_size is None and "batch_size" not in kwargs:
|
353
|
+
self.batch_size = 1
|
354
|
+
|
355
|
+
self._load(reload=reload, **kwargs)
|
335
356
|
if isinstance(self._data, list | Generator):
|
336
357
|
for df in self._data:
|
337
358
|
yield df if isinstance(df, pl.DataFrame) else pl.from_arrow(df)
|
@@ -361,7 +382,7 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
361
382
|
return df
|
362
383
|
|
363
384
|
def _iter_polars_lazyframe(
|
364
|
-
self,
|
385
|
+
self, reload: bool = False, **kwargs
|
365
386
|
) -> Generator[pl.LazyFrame, None, None]:
|
366
387
|
"""Iterate over Polars LazyFrames.
|
367
388
|
|
@@ -372,7 +393,9 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
372
393
|
Returns:
|
373
394
|
Generator[pl.LazyFrame, None, None]: Generator of Polars LazyFrames.
|
374
395
|
"""
|
375
|
-
self.
|
396
|
+
if self.batch_size is None and "batch_size" not in kwargs:
|
397
|
+
self.batch_size = 1
|
398
|
+
self._load(reload=reload, **kwargs)
|
376
399
|
if isinstance(self._data, list | Generator):
|
377
400
|
for df in self._data:
|
378
401
|
yield (
|
@@ -420,12 +443,11 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
420
443
|
def iter_polars(
|
421
444
|
self,
|
422
445
|
lazy: bool = False,
|
423
|
-
batch_size: int = 1,
|
424
446
|
**kwargs,
|
425
447
|
) -> Generator[pl.DataFrame | pl.LazyFrame, None, None]:
|
426
448
|
if lazy:
|
427
|
-
yield from self._iter_polars_lazyframe(
|
428
|
-
yield from self._iter_polars_dataframe(
|
449
|
+
yield from self._iter_polars_lazyframe(**kwargs)
|
450
|
+
yield from self._iter_polars_dataframe(**kwargs)
|
429
451
|
|
430
452
|
def to_pyarrow_table(
|
431
453
|
self, metadata: bool = False, reload: bool = False, **kwargs
|
@@ -459,14 +481,17 @@ class BaseFileReader(BaseFileIO, gc=False):
|
|
459
481
|
return df
|
460
482
|
|
461
483
|
def iter_pyarrow_table(
|
462
|
-
self,
|
484
|
+
self, reload: bool = False, **kwargs
|
463
485
|
) -> Generator[pa.Table, None, None]:
|
464
486
|
"""Iterate over PyArrow Tables.
|
465
487
|
|
466
488
|
Returns:
|
467
489
|
Generator[pa.Table, None, None]: Generator of PyArrow Tables.
|
468
490
|
"""
|
469
|
-
self.
|
491
|
+
if self.batch_size is None and "batch_size" not in kwargs:
|
492
|
+
self.batch_size = 1
|
493
|
+
|
494
|
+
self._load(reload=reload, **kwargs)
|
470
495
|
if isinstance(self._data, list | Generator):
|
471
496
|
for df in self._data:
|
472
497
|
yield df.to_arrow(**kwargs) if isinstance(df, pl.DataFrame) else df
|
@@ -752,7 +777,7 @@ class BaseDatasetReader(BaseFileReader, gc=False):
|
|
752
777
|
Returns:
|
753
778
|
pds.Dataset: PyArrow Dataset.
|
754
779
|
"""
|
755
|
-
if
|
780
|
+
if self._dataset is not None and not reload:
|
756
781
|
if metadata:
|
757
782
|
return self._dataset, self._metadata
|
758
783
|
return self._dataset
|
@@ -769,9 +794,9 @@ class BaseDatasetReader(BaseFileReader, gc=False):
|
|
769
794
|
self._dataset, path=self.path, format=self.format
|
770
795
|
)
|
771
796
|
elif self.format == "parquet":
|
772
|
-
if self.fs.exists(posixpath.join(self.
|
797
|
+
if self.fs.exists(posixpath.join(self._root_path, "_metadata")):
|
773
798
|
self._dataset = self.fs.parquet_dataset(
|
774
|
-
self.
|
799
|
+
posixpath.join(self._root_path, "_metadata"),
|
775
800
|
schema=self.schema_,
|
776
801
|
partitioning=self.partitioning,
|
777
802
|
**kwargs,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.5
|
4
4
|
Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -17,7 +17,7 @@ flowerpower/cli/mqtt.py,sha256=GM5d6bzG01THZd7SwXFAC3j0cait642eXT50P3R22vk,6281
|
|
17
17
|
flowerpower/cli/pipeline.py,sha256=60P6u_QOSgp0jJXEMxazEEo5Sh7-SWFo-Kkuaz21YuI,37845
|
18
18
|
flowerpower/cli/utils.py,sha256=nDSSj_1nlYlMmj252kRZeohhFqHv9yvdgDEduQCyWOc,5152
|
19
19
|
flowerpower/fs/__init__.py,sha256=uZaPXErEfQqQRbKRIjkB9yiygd45X5_psYn9-VVrBTQ,910
|
20
|
-
flowerpower/fs/base.py,sha256=
|
20
|
+
flowerpower/fs/base.py,sha256=bWfY_xPPZ9DhSpSGMC2SV6OdbYp5XyDrjxLYhTjJlqo,22352
|
21
21
|
flowerpower/fs/ext.py,sha256=gsCJ87VxVdy22oVtNRaN4M-SLO8WORVf5JRyDeQjjEs,63834
|
22
22
|
flowerpower/fs/storage_options.py,sha256=msq5TpxAU8tcE_Bxjw6SyxaFa75UjdYnR4-O9U2wmbk,48034
|
23
23
|
flowerpower/job_queue/__init__.py,sha256=a25hIqv2xoFKb4JZlyUukS0ppZ9-2sJKH3XAvbk3rlk,10788
|
@@ -40,11 +40,11 @@ flowerpower/pipeline/__init__.py,sha256=xbEn_RN0vVNqLZMSFOCdV41ggUkYrghFVJYd_EC0
|
|
40
40
|
flowerpower/pipeline/base.py,sha256=N3N0iqiVo2vUVli_WSADAQMq283mG9OdFql58LXeF2Q,3275
|
41
41
|
flowerpower/pipeline/io.py,sha256=8Mlw4G7ehHHZEk4Qui-HcKBM3tBF4FuqUbjfNxK09iU,15963
|
42
42
|
flowerpower/pipeline/job_queue.py,sha256=hl38-0QZCH5wujUf0qIqYznIPDLsJAoNDcOD7YGVQ6s,26114
|
43
|
-
flowerpower/pipeline/manager.py,sha256=
|
43
|
+
flowerpower/pipeline/manager.py,sha256=KVpOclUEUAETUNJamJJGuKt3oxCaLitQgxWxkE1q028,74460
|
44
44
|
flowerpower/pipeline/registry.py,sha256=WWQoaxtgnlntFEIPQzYM1gk0zUXwrH2PmDLGbTzhrZs,18991
|
45
45
|
flowerpower/pipeline/runner.py,sha256=dsSVYixFXqlxFk8EJfT4wV_7IwgkXq0ErwH_yf_NGS8,25654
|
46
46
|
flowerpower/pipeline/visualizer.py,sha256=amjMrl5NetErE198HzZBPWVZBi_t5jj9ydxWpuNLoTI,5013
|
47
|
-
flowerpower/plugins/io/base.py,sha256=
|
47
|
+
flowerpower/plugins/io/base.py,sha256=Plr0v2y0MMf3azxqz7rIc2wk6CXFzpPczW6p5Qb9jiI,79464
|
48
48
|
flowerpower/plugins/io/metadata.py,sha256=31FoVyRz6zdWMWda0f1GHq0xMokVB3SVYrLT2TejOH8,7271
|
49
49
|
flowerpower/plugins/io/helpers/datetime.py,sha256=1WBUg2ywcsodJQwoF6JiIGc9yhVobvE2IErWp4i95m4,10649
|
50
50
|
flowerpower/plugins/io/helpers/polars.py,sha256=VuksokWrsKk57-s5JkpsmzWCkaOIEiI4ONeAIO9LAdw,18071
|
@@ -93,9 +93,9 @@ flowerpower/utils/monkey.py,sha256=VPl3yimoWhwD9kI05BFsjNvtyQiDyLfY4Q85Bb6Ma0w,2
|
|
93
93
|
flowerpower/utils/open_telemetry.py,sha256=fQWJWbIQFtKIxMBjAWeF12NGnqT0isO3A3j-DSOv_vE,949
|
94
94
|
flowerpower/utils/scheduler.py,sha256=2zJ_xmLXpvXUQNF1XS2Gqm3Ogo907ctZ50GtvQB_rhE,9354
|
95
95
|
flowerpower/utils/templates.py,sha256=ouyEeSDqa9PjW8c32fGpcINlpC0WToawRFZkMPtwsLE,1591
|
96
|
-
flowerpower-0.11.
|
97
|
-
flowerpower-0.11.
|
98
|
-
flowerpower-0.11.
|
99
|
-
flowerpower-0.11.
|
100
|
-
flowerpower-0.11.
|
101
|
-
flowerpower-0.11.
|
96
|
+
flowerpower-0.11.5.dist-info/licenses/LICENSE,sha256=9AkLexxrmr0aBgSHiqxpJk9wgazpP1CTJyiDyr56J9k,1063
|
97
|
+
flowerpower-0.11.5.dist-info/METADATA,sha256=Cmyl1IhcjYUTdIrKw_VSGtjclhFUHzIcWlM7yknDL3I,21610
|
98
|
+
flowerpower-0.11.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
99
|
+
flowerpower-0.11.5.dist-info/entry_points.txt,sha256=61X11i5a2IwC9LBiP20XCDl5zMOigGCjMCx17B7bDbQ,52
|
100
|
+
flowerpower-0.11.5.dist-info/top_level.txt,sha256=VraH4WtEUfSxs5L-rXwDQhzQb9eLHTUtgvmFZ2dAYnA,12
|
101
|
+
flowerpower-0.11.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|