daplapath 2.0.11__tar.gz → 2.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.11
3
+ Version: 2.0.13
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -128,7 +128,9 @@ class MyGCSFileSystem(gcsfs.GCSFileSystem):
128
128
  """Remove contents of a directory in GCS. It might take some time before files are actually deleted."""
129
129
  path = pathlib.Path(path)
130
130
  remaining = self.glob(str(path / "**"))
131
- assert all(self.isdir(x) for x in remaining), remaining
131
+ assert all(self.isdir(x) for x in remaining), [
132
+ x for x in remaining if not self.isdir(x)
133
+ ]
132
134
  storage_client = storage.Client()
133
135
  bucket = storage_client.get_bucket(path.parts[0])
134
136
  blobs = bucket.list_blobs(prefix="/".join(path.parts) + "/")
@@ -272,6 +274,7 @@ class Path(str, _PathBase):
272
274
  self, pattern: str | None = None, recursive: bool = True, **kwargs
273
275
  ) -> "PathSeries":
274
276
  """Create PathSeries of files/directories that match the pattern."""
277
+
275
278
  recursive = kwargs.get("recurse_symlinks", recursive)
276
279
 
277
280
  if pattern:
@@ -730,15 +733,31 @@ class Path(str, _PathBase):
730
733
  return self._new(self.split(self.suffix)[0] + self.suffix)
731
734
 
732
735
  def is_partitioned(self) -> bool:
733
- if (
734
- not self.suffix
735
- or self.suffix not in self
736
- or self.isfile()
737
- and self.count(self.suffix) != 2
738
- ):
736
+ if not self.suffix or self.count(self.suffix) != 2:
739
737
  return False
740
738
  return bool(len(self.glob(f"**/*{self.suffix}")))
741
739
 
740
+ def has_all_partitions(
741
+ self, ids: Iterable[str], nrow_min: int, id_col: str, strict: bool = True
742
+ ) -> bool:
743
+ if not self.exists():
744
+ return False
745
+ paths = self.glob(f"**/*{self.suffix}")
746
+ for id_ in ids:
747
+ these_paths = [path for path in paths if f"{id_col}={id_}" in path]
748
+ if len(these_paths) != 1:
749
+ return False
750
+ this_path = next(iter(these_paths))
751
+ if Path(this_path).nrow < nrow_min:
752
+ return False
753
+ if not strict:
754
+ return True
755
+ for path in paths:
756
+ if not any(f"{id_col}={id_}" for id_ in ids):
757
+ # partition is not in list of ids
758
+ return False
759
+ return True
760
+
742
761
  def isfile(self) -> bool:
743
762
  return not self.isdir()
744
763
 
@@ -971,9 +990,13 @@ class PathSeries(pd.Series, _PathBase):
971
990
  """Select only the files in the Series."""
972
991
  return self[self.is_file()]
973
992
 
974
- @property
975
- def partition_root(self) -> "PathSeries":
976
- return self.files.apply(lambda x: x.partition_root).drop_duplicates()
993
+ def buckets_path(self) -> "PathSeries":
994
+ return self.files.apply(lambda x: x.buckets_path())
995
+
996
+ def partition_root(self, keep: str | None = "last") -> "PathSeries":
997
+ return self.files.apply(lambda x: x.partition_root)[
998
+ lambda x: ~x.duplicated(keep=keep)
999
+ ]
977
1000
 
978
1001
  @property
979
1002
  def partitioned_files(self) -> "PathSeries":
@@ -1178,9 +1201,18 @@ class PathSeries(pd.Series, _PathBase):
1178
1201
 
1179
1202
  def __getattr__(self, attr: str) -> Any:
1180
1203
  """Get Path attribute for each row."""
1204
+
1205
+ def get_property(path: Path):
1206
+ x = getattr(path, attr)
1207
+ if callable(x):
1208
+ raise ValueError(
1209
+ f"{self.__class__.__name__} cannot access Path methods, only properties."
1210
+ )
1211
+ return x
1212
+
1181
1213
  if attr in dir(self._path_type) and attr != "name" and attr not in dir(str):
1182
1214
  try:
1183
- series = self.apply(lambda path: getattr(path, attr))
1215
+ series = self.apply(get_property)
1184
1216
  series.name = attr
1185
1217
  return series
1186
1218
  except IndexError as e:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "daplapath"
3
- version = "2.0.11"
3
+ version = "2.0.13"
4
4
  description = "A pathlib.Path class for dapla"
5
5
  authors = ["ort <ort@ssb.no>"]
6
6
  license = "MIT"
File without changes
File without changes