daplapath 2.0.1__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.1
3
+ Version: 2.0.2
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -51,12 +51,11 @@ class Config:
51
51
  class LocalFileSystem:
52
52
  """Mimicks GCS's FileSystem but using standard library (os, glob, shutil)."""
53
53
 
54
- @classmethod
54
+ @staticmethod
55
55
  def glob(
56
- cls,
57
56
  path: str,
58
57
  recursive: bool = True,
59
- detail: bool = True,
58
+ detail: bool = False,
60
59
  include_hidden: bool = False,
61
60
  **kwargs,
62
61
  ) -> list[dict] | list[str]:
@@ -65,7 +64,7 @@ class LocalFileSystem:
65
64
  )
66
65
 
67
66
  if not detail:
68
- return relevant_paths
67
+ return list(relevant_paths)
69
68
  with ThreadPoolExecutor() as executor:
70
69
  return list(executor.map(get_file_info, relevant_paths))
71
70
 
@@ -428,6 +427,8 @@ class Path(str, _PathBase):
428
427
  """
429
428
  if not isinstance(period, (str, int)):
430
429
  raise TypeError(f"'period' should be string or int. Got {type(period)}")
430
+ if str(period) == self.period:
431
+ return self
431
432
  return self.with_periods(period)
432
433
 
433
434
  def with_periods(self, from_period: str, to_period: str | None = None) -> "Path":
@@ -464,7 +465,9 @@ class Path(str, _PathBase):
464
465
  parent = f"{self.parent}/" if self.parent != "." else ""
465
466
 
466
467
  return self.__class__(
467
- f"{parent}{stem}{period_string}{version_string}{self.suffix}"
468
+ f"{parent}{stem}{period_string}{version_string}{self.suffix}".replace(
469
+ "".join(self.periods), period_string.strip(self._period_prefix)
470
+ )
468
471
  )
469
472
 
470
473
  @property
@@ -553,6 +556,12 @@ class Path(str, _PathBase):
553
556
  except KeyError:
554
557
  return read_nrows(file, 1).columns
555
558
 
559
+ @property
560
+ def schema(self) -> pyarrow.Schema:
561
+ """Date types of the file's columns."""
562
+ with self.open("rb") as file:
563
+ return pq.read_schema(file)
564
+
556
565
  @property
557
566
  def dtypes(self) -> pd.Series:
558
567
  """Date types of the file's columns."""
@@ -628,6 +637,10 @@ class Path(str, _PathBase):
628
637
  """File size in terrabytes."""
629
638
  return self.kb / 1_000_000_000
630
639
 
640
+ @property
641
+ def partition_root(self) -> "Path":
642
+ return self.split(".parquet")[0] + ".parquet"
643
+
631
644
  def is_dir(self) -> bool:
632
645
  try:
633
646
  return self.file_system.isdir(self)
@@ -842,6 +855,10 @@ class PathSeries(pd.Series, _PathBase):
842
855
  """Select only the files in the Series."""
843
856
  return self[self.is_file()]
844
857
 
858
+ @property
859
+ def partition_root(self) -> "PathSeries":
860
+ return self.files.apply(lambda x: x.partition_root).drop_duplicates()
861
+
845
862
  @property
846
863
  def dirs(self) -> "PathSeries":
847
864
  """Select only the directories in the Series."""
@@ -1469,6 +1486,10 @@ def as_str(obj) -> str:
1469
1486
  raise TypeError(type(obj))
1470
1487
 
1471
1488
 
1489
+ def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
1490
+ return Path(path1).parts == Path(path2).parts
1491
+
1492
+
1472
1493
  def sort_by_period(paths: Iterable[str]) -> Iterable[str]:
1473
1494
  try:
1474
1495
  periods = [pd.Timestamp(path.period) for path in paths]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "daplapath"
3
- version = "2.0.1"
3
+ version = "2.0.2"
4
4
  description = "A pathlib.Path class for dapla"
5
5
  authors = ["ort <ort@ssb.no>"]
6
6
  license = "MIT"
File without changes
File without changes