daplapath 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {daplapath-2.0.1 → daplapath-2.0.2}/PKG-INFO +1 -1
- {daplapath-2.0.1 → daplapath-2.0.2}/daplapath/path.py +26 -5
- {daplapath-2.0.1 → daplapath-2.0.2}/pyproject.toml +1 -1
- {daplapath-2.0.1 → daplapath-2.0.2}/LICENSE.md +0 -0
- {daplapath-2.0.1 → daplapath-2.0.2}/README.md +0 -0
- {daplapath-2.0.1 → daplapath-2.0.2}/daplapath/__init__.py +0 -0
|
@@ -51,12 +51,11 @@ class Config:
|
|
|
51
51
|
class LocalFileSystem:
|
|
52
52
|
"""Mimicks GCS's FileSystem but using standard library (os, glob, shutil)."""
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@staticmethod
|
|
55
55
|
def glob(
|
|
56
|
-
cls,
|
|
57
56
|
path: str,
|
|
58
57
|
recursive: bool = True,
|
|
59
|
-
detail: bool =
|
|
58
|
+
detail: bool = False,
|
|
60
59
|
include_hidden: bool = False,
|
|
61
60
|
**kwargs,
|
|
62
61
|
) -> list[dict] | list[str]:
|
|
@@ -65,7 +64,7 @@ class LocalFileSystem:
|
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
if not detail:
|
|
68
|
-
return relevant_paths
|
|
67
|
+
return list(relevant_paths)
|
|
69
68
|
with ThreadPoolExecutor() as executor:
|
|
70
69
|
return list(executor.map(get_file_info, relevant_paths))
|
|
71
70
|
|
|
@@ -428,6 +427,8 @@ class Path(str, _PathBase):
|
|
|
428
427
|
"""
|
|
429
428
|
if not isinstance(period, (str, int)):
|
|
430
429
|
raise TypeError(f"'period' should be string or int. Got {type(period)}")
|
|
430
|
+
if str(period) == self.period:
|
|
431
|
+
return self
|
|
431
432
|
return self.with_periods(period)
|
|
432
433
|
|
|
433
434
|
def with_periods(self, from_period: str, to_period: str | None = None) -> "Path":
|
|
@@ -464,7 +465,9 @@ class Path(str, _PathBase):
|
|
|
464
465
|
parent = f"{self.parent}/" if self.parent != "." else ""
|
|
465
466
|
|
|
466
467
|
return self.__class__(
|
|
467
|
-
f"{parent}{stem}{period_string}{version_string}{self.suffix}"
|
|
468
|
+
f"{parent}{stem}{period_string}{version_string}{self.suffix}".replace(
|
|
469
|
+
"".join(self.periods), period_string.strip(self._period_prefix)
|
|
470
|
+
)
|
|
468
471
|
)
|
|
469
472
|
|
|
470
473
|
@property
|
|
@@ -553,6 +556,12 @@ class Path(str, _PathBase):
|
|
|
553
556
|
except KeyError:
|
|
554
557
|
return read_nrows(file, 1).columns
|
|
555
558
|
|
|
559
|
+
@property
|
|
560
|
+
def schema(self) -> pyarrow.Schema:
|
|
561
|
+
"""Date types of the file's columns."""
|
|
562
|
+
with self.open("rb") as file:
|
|
563
|
+
return pq.read_schema(file)
|
|
564
|
+
|
|
556
565
|
@property
|
|
557
566
|
def dtypes(self) -> pd.Series:
|
|
558
567
|
"""Date types of the file's columns."""
|
|
@@ -628,6 +637,10 @@ class Path(str, _PathBase):
|
|
|
628
637
|
"""File size in terrabytes."""
|
|
629
638
|
return self.kb / 1_000_000_000
|
|
630
639
|
|
|
640
|
+
@property
|
|
641
|
+
def partition_root(self) -> "Path":
|
|
642
|
+
return self.split(".parquet")[0] + ".parquet"
|
|
643
|
+
|
|
631
644
|
def is_dir(self) -> bool:
|
|
632
645
|
try:
|
|
633
646
|
return self.file_system.isdir(self)
|
|
@@ -842,6 +855,10 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
842
855
|
"""Select only the files in the Series."""
|
|
843
856
|
return self[self.is_file()]
|
|
844
857
|
|
|
858
|
+
@property
|
|
859
|
+
def partition_root(self) -> "PathSeries":
|
|
860
|
+
return self.files.apply(lambda x: x.partition_root).drop_duplicates()
|
|
861
|
+
|
|
845
862
|
@property
|
|
846
863
|
def dirs(self) -> "PathSeries":
|
|
847
864
|
"""Select only the directories in the Series."""
|
|
@@ -1469,6 +1486,10 @@ def as_str(obj) -> str:
|
|
|
1469
1486
|
raise TypeError(type(obj))
|
|
1470
1487
|
|
|
1471
1488
|
|
|
1489
|
+
def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
|
|
1490
|
+
return Path(path1).parts == Path(path2).parts
|
|
1491
|
+
|
|
1492
|
+
|
|
1472
1493
|
def sort_by_period(paths: Iterable[str]) -> Iterable[str]:
|
|
1473
1494
|
try:
|
|
1474
1495
|
periods = [pd.Timestamp(path.period) for path in paths]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|