daplapath 2.0.12__py3-none-any.whl → 2.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daplapath/path.py +42 -5
- {daplapath-2.0.12.dist-info → daplapath-2.0.13.dist-info}/METADATA +1 -1
- daplapath-2.0.13.dist-info/RECORD +6 -0
- daplapath-2.0.12.dist-info/RECORD +0 -6
- {daplapath-2.0.12.dist-info → daplapath-2.0.13.dist-info}/LICENSE.md +0 -0
- {daplapath-2.0.12.dist-info → daplapath-2.0.13.dist-info}/WHEEL +0 -0
daplapath/path.py
CHANGED
|
@@ -128,7 +128,9 @@ class MyGCSFileSystem(gcsfs.GCSFileSystem):
|
|
|
128
128
|
"""Remove contents of a directory in GCS. It might take some time before files are actually deleted."""
|
|
129
129
|
path = pathlib.Path(path)
|
|
130
130
|
remaining = self.glob(str(path / "**"))
|
|
131
|
-
assert all(self.isdir(x) for x in remaining),
|
|
131
|
+
assert all(self.isdir(x) for x in remaining), [
|
|
132
|
+
x for x in remaining if not self.isdir(x)
|
|
133
|
+
]
|
|
132
134
|
storage_client = storage.Client()
|
|
133
135
|
bucket = storage_client.get_bucket(path.parts[0])
|
|
134
136
|
blobs = bucket.list_blobs(prefix="/".join(path.parts) + "/")
|
|
@@ -272,6 +274,7 @@ class Path(str, _PathBase):
|
|
|
272
274
|
self, pattern: str | None = None, recursive: bool = True, **kwargs
|
|
273
275
|
) -> "PathSeries":
|
|
274
276
|
"""Create PathSeries of files/directories that match the pattern."""
|
|
277
|
+
|
|
275
278
|
recursive = kwargs.get("recurse_symlinks", recursive)
|
|
276
279
|
|
|
277
280
|
if pattern:
|
|
@@ -734,6 +737,27 @@ class Path(str, _PathBase):
|
|
|
734
737
|
return False
|
|
735
738
|
return bool(len(self.glob(f"**/*{self.suffix}")))
|
|
736
739
|
|
|
740
|
+
def has_all_partitions(
|
|
741
|
+
self, ids: Iterable[str], nrow_min: int, id_col: str, strict: bool = True
|
|
742
|
+
) -> bool:
|
|
743
|
+
if not self.exists():
|
|
744
|
+
return False
|
|
745
|
+
paths = self.glob(f"**/*{self.suffix}")
|
|
746
|
+
for id_ in ids:
|
|
747
|
+
these_paths = [path for path in paths if f"{id_col}={id_}" in path]
|
|
748
|
+
if len(these_paths) != 1:
|
|
749
|
+
return False
|
|
750
|
+
this_path = next(iter(these_paths))
|
|
751
|
+
if Path(this_path).nrow < nrow_min:
|
|
752
|
+
return False
|
|
753
|
+
if not strict:
|
|
754
|
+
return True
|
|
755
|
+
for path in paths:
|
|
756
|
+
if not any(f"{id_col}={id_}" for id_ in ids):
|
|
757
|
+
# partition is not in list of ids
|
|
758
|
+
return False
|
|
759
|
+
return True
|
|
760
|
+
|
|
737
761
|
def isfile(self) -> bool:
|
|
738
762
|
return not self.isdir()
|
|
739
763
|
|
|
@@ -966,9 +990,13 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
966
990
|
"""Select only the files in the Series."""
|
|
967
991
|
return self[self.is_file()]
|
|
968
992
|
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
993
|
+
def buckets_path(self) -> "PathSeries":
|
|
994
|
+
return self.files.apply(lambda x: x.buckets_path())
|
|
995
|
+
|
|
996
|
+
def partition_root(self, keep: str | None = "last") -> "PathSeries":
|
|
997
|
+
return self.files.apply(lambda x: x.partition_root)[
|
|
998
|
+
lambda x: ~x.duplicated(keep=keep)
|
|
999
|
+
]
|
|
972
1000
|
|
|
973
1001
|
@property
|
|
974
1002
|
def partitioned_files(self) -> "PathSeries":
|
|
@@ -1173,9 +1201,18 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
1173
1201
|
|
|
1174
1202
|
def __getattr__(self, attr: str) -> Any:
|
|
1175
1203
|
"""Get Path attribute for each row."""
|
|
1204
|
+
|
|
1205
|
+
def get_property(path: Path):
|
|
1206
|
+
x = getattr(path, attr)
|
|
1207
|
+
if callable(x):
|
|
1208
|
+
raise ValueError(
|
|
1209
|
+
f"{self.__class__.__name__} cannot access Path methods, only properties."
|
|
1210
|
+
)
|
|
1211
|
+
return x
|
|
1212
|
+
|
|
1176
1213
|
if attr in dir(self._path_type) and attr != "name" and attr not in dir(str):
|
|
1177
1214
|
try:
|
|
1178
|
-
series = self.apply(
|
|
1215
|
+
series = self.apply(get_property)
|
|
1179
1216
|
series.name = attr
|
|
1180
1217
|
return series
|
|
1181
1218
|
except IndexError as e:
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
+
daplapath/path.py,sha256=SgpEuMIT5bmS4cIlQohWTq6fd3rj34CkARoiQFnC-uk,57175
|
|
3
|
+
daplapath-2.0.13.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
+
daplapath-2.0.13.dist-info/METADATA,sha256=xwOO8TTkU8SBFfA-Ai2He1WFNuuCwc-oIfXL3L68Sio,14698
|
|
5
|
+
daplapath-2.0.13.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
+
daplapath-2.0.13.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
-
daplapath/path.py,sha256=3yXHAK6s_58iKQnaze66Xp3D7hDFq-JTHjv1PygryTM,55953
|
|
3
|
-
daplapath-2.0.12.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
-
daplapath-2.0.12.dist-info/METADATA,sha256=md2OMBcIP4F1HRcbKhuNrBXL58mB6HTsGtoOmIlAOwg,14698
|
|
5
|
-
daplapath-2.0.12.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
-
daplapath-2.0.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|