daplapath 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {daplapath-2.0.0 → daplapath-2.0.2}/PKG-INFO +1 -1
- {daplapath-2.0.0 → daplapath-2.0.2}/daplapath/path.py +27 -5
- {daplapath-2.0.0 → daplapath-2.0.2}/pyproject.toml +1 -1
- {daplapath-2.0.0 → daplapath-2.0.2}/LICENSE.md +0 -0
- {daplapath-2.0.0 → daplapath-2.0.2}/README.md +0 -0
- {daplapath-2.0.0 → daplapath-2.0.2}/daplapath/__init__.py +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
import functools
|
|
3
4
|
from collections.abc import Iterable
|
|
@@ -50,12 +51,11 @@ class Config:
|
|
|
50
51
|
class LocalFileSystem:
|
|
51
52
|
"""Mimicks GCS's FileSystem but using standard library (os, glob, shutil)."""
|
|
52
53
|
|
|
53
|
-
@
|
|
54
|
+
@staticmethod
|
|
54
55
|
def glob(
|
|
55
|
-
cls,
|
|
56
56
|
path: str,
|
|
57
57
|
recursive: bool = True,
|
|
58
|
-
detail: bool =
|
|
58
|
+
detail: bool = False,
|
|
59
59
|
include_hidden: bool = False,
|
|
60
60
|
**kwargs,
|
|
61
61
|
) -> list[dict] | list[str]:
|
|
@@ -64,7 +64,7 @@ class LocalFileSystem:
|
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
if not detail:
|
|
67
|
-
return relevant_paths
|
|
67
|
+
return list(relevant_paths)
|
|
68
68
|
with ThreadPoolExecutor() as executor:
|
|
69
69
|
return list(executor.map(get_file_info, relevant_paths))
|
|
70
70
|
|
|
@@ -427,6 +427,8 @@ class Path(str, _PathBase):
|
|
|
427
427
|
"""
|
|
428
428
|
if not isinstance(period, (str, int)):
|
|
429
429
|
raise TypeError(f"'period' should be string or int. Got {type(period)}")
|
|
430
|
+
if str(period) == self.period:
|
|
431
|
+
return self
|
|
430
432
|
return self.with_periods(period)
|
|
431
433
|
|
|
432
434
|
def with_periods(self, from_period: str, to_period: str | None = None) -> "Path":
|
|
@@ -463,7 +465,9 @@ class Path(str, _PathBase):
|
|
|
463
465
|
parent = f"{self.parent}/" if self.parent != "." else ""
|
|
464
466
|
|
|
465
467
|
return self.__class__(
|
|
466
|
-
f"{parent}{stem}{period_string}{version_string}{self.suffix}"
|
|
468
|
+
f"{parent}{stem}{period_string}{version_string}{self.suffix}".replace(
|
|
469
|
+
"".join(self.periods), period_string.strip(self._period_prefix)
|
|
470
|
+
)
|
|
467
471
|
)
|
|
468
472
|
|
|
469
473
|
@property
|
|
@@ -552,6 +556,12 @@ class Path(str, _PathBase):
|
|
|
552
556
|
except KeyError:
|
|
553
557
|
return read_nrows(file, 1).columns
|
|
554
558
|
|
|
559
|
+
@property
|
|
560
|
+
def schema(self) -> pyarrow.Schema:
|
|
561
|
+
"""Date types of the file's columns."""
|
|
562
|
+
with self.open("rb") as file:
|
|
563
|
+
return pq.read_schema(file)
|
|
564
|
+
|
|
555
565
|
@property
|
|
556
566
|
def dtypes(self) -> pd.Series:
|
|
557
567
|
"""Date types of the file's columns."""
|
|
@@ -627,6 +637,10 @@ class Path(str, _PathBase):
|
|
|
627
637
|
"""File size in terrabytes."""
|
|
628
638
|
return self.kb / 1_000_000_000
|
|
629
639
|
|
|
640
|
+
@property
|
|
641
|
+
def partition_root(self) -> "Path":
|
|
642
|
+
return self.split(".parquet")[0] + ".parquet"
|
|
643
|
+
|
|
630
644
|
def is_dir(self) -> bool:
|
|
631
645
|
try:
|
|
632
646
|
return self.file_system.isdir(self)
|
|
@@ -841,6 +855,10 @@ class PathSeries(pd.Series, _PathBase):
|
|
|
841
855
|
"""Select only the files in the Series."""
|
|
842
856
|
return self[self.is_file()]
|
|
843
857
|
|
|
858
|
+
@property
|
|
859
|
+
def partition_root(self) -> "PathSeries":
|
|
860
|
+
return self.files.apply(lambda x: x.partition_root).drop_duplicates()
|
|
861
|
+
|
|
844
862
|
@property
|
|
845
863
|
def dirs(self) -> "PathSeries":
|
|
846
864
|
"""Select only the directories in the Series."""
|
|
@@ -1468,6 +1486,10 @@ def as_str(obj) -> str:
|
|
|
1468
1486
|
raise TypeError(type(obj))
|
|
1469
1487
|
|
|
1470
1488
|
|
|
1489
|
+
def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
|
|
1490
|
+
return Path(path1).parts == Path(path2).parts
|
|
1491
|
+
|
|
1492
|
+
|
|
1471
1493
|
def sort_by_period(paths: Iterable[str]) -> Iterable[str]:
|
|
1472
1494
|
try:
|
|
1473
1495
|
periods = [pd.Timestamp(path.period) for path in paths]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|