daplapath 2.0.9__py3-none-any.whl → 2.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
daplapath/path.py CHANGED
@@ -31,6 +31,11 @@ try:
31
31
  except ImportError:
32
32
  pass
33
33
 
34
+ try:
35
+ from google.cloud import storage
36
+ except ImportError:
37
+ pass
38
+
34
39
  # regex with the prefix '_v' followed by an integer of any length
35
40
  VERSION_PATTERN = r"_v(\d+)\."
36
41
  VERSION_PREFIX = "_v"
@@ -113,15 +118,26 @@ class LocalFileSystem(AbstractFileSystem):
113
118
  return os.makedirs(path, exist_ok=exist_ok)
114
119
 
115
120
 
116
- class GCSFileSystem(gcsfs.GCSFileSystem):
121
+ class MyGCSFileSystem(gcsfs.GCSFileSystem):
117
122
  def isdir(self, path: str) -> bool:
118
123
  """Check if path is a directory."""
119
124
  info = super(gcsfs.GCSFileSystem, self).info(path)
120
125
  return info["type"] == "directory"
121
126
 
127
+ def rmdir(self, path: str) -> None:
128
+ """Remove contents of a directory in GCS. It might take some time before files are actually deleted."""
129
+ path = pathlib.Path(path)
130
+ remaining = self.glob(str(path / "**"))
131
+ assert all(self.isdir(x) for x in remaining), remaining
132
+ storage_client = storage.Client()
133
+ bucket = storage_client.get_bucket(path.parts[0])
134
+ blobs = bucket.list_blobs(prefix="/".join(path.parts) + "/")
135
+ for blob in blobs:
136
+ blob.delete()
137
+
122
138
 
123
139
  if any("dapla" in key.lower() for key in os.environ) and "gcsfs" in locals():
124
- _config = Config(GCSFileSystem)
140
+ _config = Config(MyGCSFileSystem)
125
141
  else:
126
142
  _config = Config(LocalFileSystem)
127
143
 
@@ -314,7 +330,14 @@ class Path(str, _PathBase):
314
330
  return self._cp_or_mv(destination, "cp")
315
331
 
316
332
  def mv(self, destination: "Path | str") -> "Path":
317
- return self._cp_or_mv(destination, "mv")
333
+ was_dir = self.isdir()
334
+ out_path = self._cp_or_mv(destination, "mv")
335
+ if was_dir:
336
+ try:
337
+ self.file_system.rmdir(str(self))
338
+ except (FileNotFoundError, NotADirectoryError):
339
+ pass
340
+ return out_path
318
341
 
319
342
  def versions(self, include_versionless: bool = False) -> "PathSeries":
320
343
  """Returns a PathSeries of all versions of the file."""
@@ -393,7 +416,9 @@ class Path(str, _PathBase):
393
416
  if timeout:
394
417
  timestamp: datetime.datetime = highest_numbered.timestamp
395
418
 
396
- time_should_be_at_least = pd.Timestamp.now() - pd.Timedelta(minutes=timeout)
419
+ time_should_be_at_least = pd.Timestamp.now(tz="Europe/Oslo").replace(
420
+ tzinfo=None
421
+ ).round("s") - pd.Timedelta(minutes=timeout)
397
422
  if timestamp > time_should_be_at_least:
398
423
  raise ValueError(
399
424
  f"Latest version of the file was updated {timestamp[0]}, which "
@@ -483,6 +508,8 @@ class Path(str, _PathBase):
483
508
  """
484
509
  if not isinstance(period, (str, int)):
485
510
  raise TypeError(f"'period' should be string or int. Got {type(period)}")
511
+ if not self.period:
512
+ raise ValueError(f"Cannot set period to path without period. {self}")
486
513
  if str(period) == self.period:
487
514
  return self
488
515
  return self.with_periods(period)
@@ -506,6 +533,8 @@ class Path(str, _PathBase):
506
533
  raise TypeError(
507
534
  f"'to_period' should be string or int. Got {type(to_period)}"
508
535
  )
536
+ if not self.periods:
537
+ raise ValueError(f"Cannot set period to path without period. {self}")
509
538
 
510
539
  periods: tuple[str] = (
511
540
  (str(from_period), str(to_period)) if to_period else (str(from_period),)
@@ -696,9 +725,19 @@ class Path(str, _PathBase):
696
725
 
697
726
  @property
698
727
  def partition_root(self) -> "Path":
699
- if ".parquet" not in self:
728
+ if not self.suffix or self.count(self.suffix) != 2:
700
729
  return self
701
- return self.split(".parquet")[0] + ".parquet"
730
+ return self._new(self.split(self.suffix)[0] + self.suffix)
731
+
732
+ def is_partitioned(self) -> bool:
733
+ if (
734
+ not self.suffix
735
+ or self.suffix not in self
736
+ or self.isfile()
737
+ and self.count(self.suffix) != 2
738
+ ):
739
+ return False
740
+ return bool(len(self.glob(f"**/*{self.suffix}")))
702
741
 
703
742
  def isfile(self) -> bool:
704
743
  return not self.isdir()
@@ -971,32 +1010,44 @@ class PathSeries(pd.Series, _PathBase):
971
1010
 
972
1011
  def within_minutes(self, minutes: int):
973
1012
  """Select files with a timestamp within the given number of minutes."""
974
- time_then = pd.Timestamp.now() - pd.Timedelta(minutes=minutes)
1013
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1014
+ "s"
1015
+ ) - pd.Timedelta(minutes=minutes)
975
1016
  return self.files[lambda x: x.timestamp > time_then]
976
1017
 
977
1018
  def within_hours(self, hours: int):
978
1019
  """Select files with a timestamp within the given number of hours."""
979
- time_then = pd.Timestamp.now() - pd.Timedelta(hours=hours)
1020
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1021
+ "s"
1022
+ ) - pd.Timedelta(hours=hours)
980
1023
  return self.files[lambda x: x.timestamp > time_then]
981
1024
 
982
1025
  def within_days(self, days: int):
983
1026
  """Select files with a timestamp within the given number of days."""
984
- time_then = pd.Timestamp.now() - pd.Timedelta(days=days)
1027
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1028
+ "s"
1029
+ ) - pd.Timedelta(days=days)
985
1030
  return self.files[lambda x: x.timestamp > time_then]
986
1031
 
987
1032
  def not_within_minutes(self, minutes: int):
988
1033
  """Select files with a timestamp within the given number of minutes."""
989
- time_then = pd.Timestamp.now() - pd.Timedelta(minutes=minutes)
1034
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1035
+ "s"
1036
+ ) - pd.Timedelta(minutes=minutes)
990
1037
  return self.files[lambda x: x.timestamp < time_then]
991
1038
 
992
1039
  def not_within_hours(self, hours: int):
993
1040
  """Select files with a timestamp within the given number of hours."""
994
- time_then = pd.Timestamp.now() - pd.Timedelta(hours=hours)
1041
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1042
+ "s"
1043
+ ) - pd.Timedelta(hours=hours)
995
1044
  return self.files[lambda x: x.timestamp < time_then]
996
1045
 
997
1046
  def not_within_days(self, days: int):
998
1047
  """Select files with a timestamp within the given number of days."""
999
- time_then = pd.Timestamp.now() - pd.Timedelta(days=days)
1048
+ time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
1049
+ "s"
1050
+ ) - pd.Timedelta(days=days)
1000
1051
  return self.files[lambda x: x.timestamp < time_then]
1001
1052
 
1002
1053
  @property
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.9
3
+ Version: 2.0.11
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -0,0 +1,6 @@
1
+ daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
+ daplapath/path.py,sha256=kGfYhBgA0eZLq26wWpM7XUbh1-xsN-KdamXsevXwbu8,56058
3
+ daplapath-2.0.11.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
+ daplapath-2.0.11.dist-info/METADATA,sha256=NRXrbquGmzccuj6XsFbvPZfyJZrt-ANfG-BsjO8Mb7U,14698
5
+ daplapath-2.0.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
+ daplapath-2.0.11.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
2
- daplapath/path.py,sha256=8XFAPMmiDKxX_XLOORFJPmNf5BIkyf6drMdG--CwI94,54190
3
- daplapath-2.0.9.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
4
- daplapath-2.0.9.dist-info/METADATA,sha256=jM7z7Rgzfh2gAe6rgqA_WY0EVjr3KsDOYCDo0JcyvJY,14697
5
- daplapath-2.0.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
- daplapath-2.0.9.dist-info/RECORD,,