daplapath 2.0.12__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
daplapath/path.py CHANGED
@@ -128,7 +128,9 @@ class MyGCSFileSystem(gcsfs.GCSFileSystem):
128
128
  """Remove contents of a directory in GCS. It might take some time before files are actually deleted."""
129
129
  path = pathlib.Path(path)
130
130
  remaining = self.glob(str(path / "**"))
131
- assert all(self.isdir(x) for x in remaining), remaining
131
+ assert all(self.isdir(x) for x in remaining), [
132
+ x for x in remaining if not self.isdir(x)
133
+ ]
132
134
  storage_client = storage.Client()
133
135
  bucket = storage_client.get_bucket(path.parts[0])
134
136
  blobs = bucket.list_blobs(prefix="/".join(path.parts) + "/")
@@ -272,6 +274,7 @@ class Path(str, _PathBase):
272
274
  self, pattern: str | None = None, recursive: bool = True, **kwargs
273
275
  ) -> "PathSeries":
274
276
  """Create PathSeries of files/directories that match the pattern."""
277
+
275
278
  recursive = kwargs.get("recurse_symlinks", recursive)
276
279
 
277
280
  if pattern:
@@ -339,6 +342,9 @@ class Path(str, _PathBase):
339
342
  pass
340
343
  return out_path
341
344
 
345
+ def read_text(self, *args, **kwargs):
346
+ return self._path.read_text(*args, **kwargs)
347
+
342
348
  def versions(self, include_versionless: bool = False) -> "PathSeries":
343
349
  """Returns a PathSeries of all versions of the file."""
344
350
  files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
@@ -734,6 +740,27 @@ class Path(str, _PathBase):
734
740
  return False
735
741
  return bool(len(self.glob(f"**/*{self.suffix}")))
736
742
 
743
+ def has_all_partitions(
744
+ self, ids: Iterable[str], nrow_min: int, id_col: str, strict: bool = True
745
+ ) -> bool:
746
+ if not self.exists():
747
+ return False
748
+ paths = self.glob(f"**/*{self.suffix}")
749
+ for id_ in ids:
750
+ these_paths = [path for path in paths if f"{id_col}={id_}" in path]
751
+ if len(these_paths) != 1:
752
+ return False
753
+ this_path = next(iter(these_paths))
754
+ if Path(this_path).nrow < nrow_min:
755
+ return False
756
+ if not strict:
757
+ return True
758
+ for path in paths:
759
+ if not any(f"{id_col}={id_}" for id_ in ids):
760
+ # partition is not in list of ids
761
+ return False
762
+ return True
763
+
737
764
  def isfile(self) -> bool:
738
765
  return not self.isdir()
739
766
 
@@ -966,9 +993,13 @@ class PathSeries(pd.Series, _PathBase):
966
993
  """Select only the files in the Series."""
967
994
  return self[self.is_file()]
968
995
 
969
- @property
970
- def partition_root(self) -> "PathSeries":
971
- return self.files.apply(lambda x: x.partition_root).drop_duplicates()
996
+ def buckets_path(self) -> "PathSeries":
997
+ return self.files.apply(lambda x: x.buckets_path())
998
+
999
+ def partition_root(self, keep: str | None = "last") -> "PathSeries":
1000
+ return self.files.apply(lambda x: x.partition_root)[
1001
+ lambda x: ~x.duplicated(keep=keep)
1002
+ ]
972
1003
 
973
1004
  @property
974
1005
  def partitioned_files(self) -> "PathSeries":
@@ -1173,9 +1204,18 @@ class PathSeries(pd.Series, _PathBase):
1173
1204
 
1174
1205
  def __getattr__(self, attr: str) -> Any:
1175
1206
  """Get Path attribute for each row."""
1207
+
1208
+ def get_property(path: Path):
1209
+ x = getattr(path, attr)
1210
+ if callable(x):
1211
+ raise ValueError(
1212
+ f"{self.__class__.__name__} cannot access Path methods, only properties."
1213
+ )
1214
+ return x
1215
+
1176
1216
  if attr in dir(self._path_type) and attr != "name" and attr not in dir(str):
1177
1217
  try:
1178
- series = self.apply(lambda path: getattr(path, attr))
1218
+ series = self.apply(get_property)
1179
1219
  series.name = attr
1180
1220
  return series
1181
1221
  except IndexError as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.12
3
+ Version: 2.1.0
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -0,0 +1,6 @@
1
+ daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
+ daplapath/path.py,sha256=B4EU6hPeFOVMo34Li8e-z61IfdXgMHcHNjpSd1so89w,57271
3
+ daplapath-2.1.0.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
+ daplapath-2.1.0.dist-info/METADATA,sha256=3_FKexzVOL2-C5HHVw9rS_jXaB_Qk_mHKaHNT-MCplI,14697
5
+ daplapath-2.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
+ daplapath-2.1.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
- daplapath/path.py,sha256=3yXHAK6s_58iKQnaze66Xp3D7hDFq-JTHjv1PygryTM,55953
3
- daplapath-2.0.12.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
- daplapath-2.0.12.dist-info/METADATA,sha256=md2OMBcIP4F1HRcbKhuNrBXL58mB6HTsGtoOmIlAOwg,14698
5
- daplapath-2.0.12.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
- daplapath-2.0.12.dist-info/RECORD,,