daplapath 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: daplapath
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: A pathlib.Path class for dapla
5
5
  License: MIT
6
6
  Author: ort
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
  from dataclasses import dataclass
2
3
  import functools
3
4
  from collections.abc import Iterable
@@ -50,12 +51,11 @@ class Config:
50
51
  class LocalFileSystem:
51
52
  """Mimicks GCS's FileSystem but using standard library (os, glob, shutil)."""
52
53
 
53
- @classmethod
54
+ @staticmethod
54
55
  def glob(
55
- cls,
56
56
  path: str,
57
57
  recursive: bool = True,
58
- detail: bool = True,
58
+ detail: bool = False,
59
59
  include_hidden: bool = False,
60
60
  **kwargs,
61
61
  ) -> list[dict] | list[str]:
@@ -64,7 +64,7 @@ class LocalFileSystem:
64
64
  )
65
65
 
66
66
  if not detail:
67
- return relevant_paths
67
+ return list(relevant_paths)
68
68
  with ThreadPoolExecutor() as executor:
69
69
  return list(executor.map(get_file_info, relevant_paths))
70
70
 
@@ -427,6 +427,8 @@ class Path(str, _PathBase):
427
427
  """
428
428
  if not isinstance(period, (str, int)):
429
429
  raise TypeError(f"'period' should be string or int. Got {type(period)}")
430
+ if str(period) == self.period:
431
+ return self
430
432
  return self.with_periods(period)
431
433
 
432
434
  def with_periods(self, from_period: str, to_period: str | None = None) -> "Path":
@@ -463,7 +465,9 @@ class Path(str, _PathBase):
463
465
  parent = f"{self.parent}/" if self.parent != "." else ""
464
466
 
465
467
  return self.__class__(
466
- f"{parent}{stem}{period_string}{version_string}{self.suffix}"
468
+ f"{parent}{stem}{period_string}{version_string}{self.suffix}".replace(
469
+ "".join(self.periods), period_string.strip(self._period_prefix)
470
+ )
467
471
  )
468
472
 
469
473
  @property
@@ -552,6 +556,12 @@ class Path(str, _PathBase):
552
556
  except KeyError:
553
557
  return read_nrows(file, 1).columns
554
558
 
559
+ @property
560
+ def schema(self) -> pyarrow.Schema:
561
+ """Date types of the file's columns."""
562
+ with self.open("rb") as file:
563
+ return pq.read_schema(file)
564
+
555
565
  @property
556
566
  def dtypes(self) -> pd.Series:
557
567
  """Date types of the file's columns."""
@@ -627,6 +637,10 @@ class Path(str, _PathBase):
627
637
  """File size in terrabytes."""
628
638
  return self.kb / 1_000_000_000
629
639
 
640
+ @property
641
+ def partition_root(self) -> "Path":
642
+ return self.split(".parquet")[0] + ".parquet"
643
+
630
644
  def is_dir(self) -> bool:
631
645
  try:
632
646
  return self.file_system.isdir(self)
@@ -841,6 +855,10 @@ class PathSeries(pd.Series, _PathBase):
841
855
  """Select only the files in the Series."""
842
856
  return self[self.is_file()]
843
857
 
858
+ @property
859
+ def partition_root(self) -> "PathSeries":
860
+ return self.files.apply(lambda x: x.partition_root).drop_duplicates()
861
+
844
862
  @property
845
863
  def dirs(self) -> "PathSeries":
846
864
  """Select only the directories in the Series."""
@@ -1468,6 +1486,10 @@ def as_str(obj) -> str:
1468
1486
  raise TypeError(type(obj))
1469
1487
 
1470
1488
 
1489
+ def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
1490
+ return Path(path1).parts == Path(path2).parts
1491
+
1492
+
1471
1493
  def sort_by_period(paths: Iterable[str]) -> Iterable[str]:
1472
1494
  try:
1473
1495
  periods = [pd.Timestamp(path.period) for path in paths]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "daplapath"
3
- version = "2.0.0"
3
+ version = "2.0.2"
4
4
  description = "A pathlib.Path class for dapla"
5
5
  authors = ["ort <ort@ssb.no>"]
6
6
  license = "MIT"
File without changes
File without changes