daplapath 2.0.9__py3-none-any.whl → 2.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daplapath/path.py +63 -12
- {daplapath-2.0.9.dist-info → daplapath-2.0.11.dist-info}/METADATA +1 -1
- daplapath-2.0.11.dist-info/RECORD +6 -0
- daplapath-2.0.9.dist-info/RECORD +0 -6
- {daplapath-2.0.9.dist-info → daplapath-2.0.11.dist-info}/LICENSE.md +0 -0
- {daplapath-2.0.9.dist-info → daplapath-2.0.11.dist-info}/WHEEL +0 -0
daplapath/path.py
CHANGED
|
@@ -31,6 +31,11 @@ try:
|
|
|
31
31
|
except ImportError:
|
|
32
32
|
pass
|
|
33
33
|
|
|
34
|
+
try:
|
|
35
|
+
from google.cloud import storage
|
|
36
|
+
except ImportError:
|
|
37
|
+
pass
|
|
38
|
+
|
|
34
39
|
# regex with the prefix '_v' followed by an integer of any length
|
|
35
40
|
VERSION_PATTERN = r"_v(\d+)\."
|
|
36
41
|
VERSION_PREFIX = "_v"
|
|
@@ -113,15 +118,26 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
113
118
|
return os.makedirs(path, exist_ok=exist_ok)
|
|
114
119
|
|
|
115
120
|
|
|
116
|
-
class
|
|
121
|
+
class MyGCSFileSystem(gcsfs.GCSFileSystem):
|
|
117
122
|
def isdir(self, path: str) -> bool:
|
|
118
123
|
"""Check if path is a directory."""
|
|
119
124
|
info = super(gcsfs.GCSFileSystem, self).info(path)
|
|
120
125
|
return info["type"] == "directory"
|
|
121
126
|
|
|
127
|
+
def rmdir(self, path: str) -> None:
|
|
128
|
+
"""Remove contents of a directory in GCS. It might take some time before files are actually deleted."""
|
|
129
|
+
path = pathlib.Path(path)
|
|
130
|
+
remaining = self.glob(str(path / "**"))
|
|
131
|
+
assert all(self.isdir(x) for x in remaining), remaining
|
|
132
|
+
storage_client = storage.Client()
|
|
133
|
+
bucket = storage_client.get_bucket(path.parts[0])
|
|
134
|
+
blobs = bucket.list_blobs(prefix="/".join(path.parts) + "/")
|
|
135
|
+
for blob in blobs:
|
|
136
|
+
blob.delete()
|
|
137
|
+
|
|
122
138
|
|
|
123
139
|
if any("dapla" in key.lower() for key in os.environ) and "gcsfs" in locals():
|
|
124
|
-
_config = Config(
|
|
140
|
+
_config = Config(MyGCSFileSystem)
|
|
125
141
|
else:
|
|
126
142
|
_config = Config(LocalFileSystem)
|
|
127
143
|
|
|
@@ -314,7 +330,14 @@ class Path(str, _PathBase):
|
|
|
314
330
|
return self._cp_or_mv(destination, "cp")
|
|
315
331
|
|
|
316
332
|
def mv(self, destination: "Path | str") -> "Path":
|
|
317
|
-
|
|
333
|
+
was_dir = self.isdir()
|
|
334
|
+
out_path = self._cp_or_mv(destination, "mv")
|
|
335
|
+
if was_dir:
|
|
336
|
+
try:
|
|
337
|
+
self.file_system.rmdir(str(self))
|
|
338
|
+
except (FileNotFoundError, NotADirectoryError):
|
|
339
|
+
pass
|
|
340
|
+
return out_path
|
|
318
341
|
|
|
319
342
|
def versions(self, include_versionless: bool = False) -> "PathSeries":
|
|
320
343
|
"""Returns a PathSeries of all versions of the file."""
|
|
@@ -393,7 +416,9 @@ class Path(str, _PathBase):
|
|
|
393
416
|
if timeout:
|
|
394
417
|
timestamp: datetime.datetime = highest_numbered.timestamp
|
|
395
418
|
|
|
396
|
-
time_should_be_at_least = pd.Timestamp.now()
|
|
419
|
+
time_should_be_at_least = pd.Timestamp.now(tz="Europe/Oslo").replace(
|
|
420
|
+
tzinfo=None
|
|
421
|
+
).round("s") - pd.Timedelta(minutes=timeout)
|
|
397
422
|
if timestamp > time_should_be_at_least:
|
|
398
423
|
raise ValueError(
|
|
399
424
|
f"Latest version of the file was updated {timestamp[0]}, which "
|
|
@@ -483,6 +508,8 @@ class Path(str, _PathBase):
|
|
|
483
508
|
"""
|
|
484
509
|
if not isinstance(period, (str, int)):
|
|
485
510
|
raise TypeError(f"'period' should be string or int. Got {type(period)}")
|
|
511
|
+
if not self.period:
|
|
512
|
+
raise ValueError(f"Cannot set period to path without period. {self}")
|
|
486
513
|
if str(period) == self.period:
|
|
487
514
|
return self
|
|
488
515
|
return self.with_periods(period)
|
|
@@ -506,6 +533,8 @@ class Path(str, _PathBase):
|
|
|
506
533
|
raise TypeError(
|
|
507
534
|
f"'to_period' should be string or int. Got {type(to_period)}"
|
|
508
535
|
)
|
|
536
|
+
if not self.periods:
|
|
537
|
+
raise ValueError(f"Cannot set period to path without period. {self}")
|
|
509
538
|
|
|
510
539
|
periods: tuple[str] = (
|
|
511
540
|
(str(from_period), str(to_period)) if to_period else (str(from_period),)
|
|
@@ -696,9 +725,19 @@ class Path(str, _PathBase):
|
|
|
696
725
|
|
|
697
726
|
@property
|
|
698
727
|
def partition_root(self) -> "Path":
|
|
699
|
-
if
|
|
728
|
+
if not self.suffix or self.count(self.suffix) != 2:
|
|
700
729
|
return self
|
|
701
|
-
return self.split(
|
|
730
|
+
return self._new(self.split(self.suffix)[0] + self.suffix)
|
|
731
|
+
|
|
732
|
+
def is_partitioned(self) -> bool:
|
|
733
|
+
if (
|
|
734
|
+
not self.suffix
|
|
735
|
+
or self.suffix not in self
|
|
736
|
+
or self.isfile()
|
|
737
|
+
and self.count(self.suffix) != 2
|
|
738
|
+
):
|
|
739
|
+
return False
|
|
740
|
+
return bool(len(self.glob(f"**/*{self.suffix}")))
|
|
702
741
|
|
|
703
742
|
def isfile(self) -> bool:
|
|
704
743
|
return not self.isdir()
|
|
@@ -971,32 +1010,44 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
971
1010
|
|
|
972
1011
|
def within_minutes(self, minutes: int):
|
|
973
1012
|
"""Select files with a timestamp within the given number of minutes."""
|
|
974
|
-
time_then = pd.Timestamp.now()
|
|
1013
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1014
|
+
"s"
|
|
1015
|
+
) - pd.Timedelta(minutes=minutes)
|
|
975
1016
|
return self.files[lambda x: x.timestamp > time_then]
|
|
976
1017
|
|
|
977
1018
|
def within_hours(self, hours: int):
|
|
978
1019
|
"""Select files with a timestamp within the given number of hours."""
|
|
979
|
-
time_then = pd.Timestamp.now()
|
|
1020
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1021
|
+
"s"
|
|
1022
|
+
) - pd.Timedelta(hours=hours)
|
|
980
1023
|
return self.files[lambda x: x.timestamp > time_then]
|
|
981
1024
|
|
|
982
1025
|
def within_days(self, days: int):
|
|
983
1026
|
"""Select files with a timestamp within the given number of days."""
|
|
984
|
-
time_then = pd.Timestamp.now()
|
|
1027
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1028
|
+
"s"
|
|
1029
|
+
) - pd.Timedelta(days=days)
|
|
985
1030
|
return self.files[lambda x: x.timestamp > time_then]
|
|
986
1031
|
|
|
987
1032
|
def not_within_minutes(self, minutes: int):
|
|
988
1033
|
"""Select files with a timestamp within the given number of minutes."""
|
|
989
|
-
time_then = pd.Timestamp.now()
|
|
1034
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1035
|
+
"s"
|
|
1036
|
+
) - pd.Timedelta(minutes=minutes)
|
|
990
1037
|
return self.files[lambda x: x.timestamp < time_then]
|
|
991
1038
|
|
|
992
1039
|
def not_within_hours(self, hours: int):
|
|
993
1040
|
"""Select files with a timestamp within the given number of hours."""
|
|
994
|
-
time_then = pd.Timestamp.now()
|
|
1041
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1042
|
+
"s"
|
|
1043
|
+
) - pd.Timedelta(hours=hours)
|
|
995
1044
|
return self.files[lambda x: x.timestamp < time_then]
|
|
996
1045
|
|
|
997
1046
|
def not_within_days(self, days: int):
|
|
998
1047
|
"""Select files with a timestamp within the given number of days."""
|
|
999
|
-
time_then = pd.Timestamp.now()
|
|
1048
|
+
time_then = pd.Timestamp.now(tz="Europe/Oslo").replace(tzinfo=None).round(
|
|
1049
|
+
"s"
|
|
1050
|
+
) - pd.Timedelta(days=days)
|
|
1000
1051
|
return self.files[lambda x: x.timestamp < time_then]
|
|
1001
1052
|
|
|
1002
1053
|
@property
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
+
daplapath/path.py,sha256=kGfYhBgA0eZLq26wWpM7XUbh1-xsN-KdamXsevXwbu8,56058
|
|
3
|
+
daplapath-2.0.11.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
+
daplapath-2.0.11.dist-info/METADATA,sha256=NRXrbquGmzccuj6XsFbvPZfyJZrt-ANfG-BsjO8Mb7U,14698
|
|
5
|
+
daplapath-2.0.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
+
daplapath-2.0.11.dist-info/RECORD,,
|
daplapath-2.0.9.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
daplapath/__init__.py,sha256=Qdpwhl8y3-i_42-4-KVT-sPQtJqwWRENLFxROZ_rfbU,86
|
|
2
|
-
daplapath/path.py,sha256=8XFAPMmiDKxX_XLOORFJPmNf5BIkyf6drMdG--CwI94,54190
|
|
3
|
-
daplapath-2.0.9.dist-info/LICENSE.md,sha256=hxspefYgWP3U6OZFhCifqWMI5ksnKzgFxNKgQnG7Ozc,1074
|
|
4
|
-
daplapath-2.0.9.dist-info/METADATA,sha256=jM7z7Rgzfh2gAe6rgqA_WY0EVjr3KsDOYCDo0JcyvJY,14697
|
|
5
|
-
daplapath-2.0.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
6
|
-
daplapath-2.0.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|