PyPI - daplapath - Versions diffs - 2.1.2__tar.gz → 2.1.4__tar.gz - Mend

daplapath 2.1.2tar.gz → 2.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{daplapath-2.1.2 → daplapath-2.1.4}/PKG-INFO +1 -1
daplapath-2.1.4/daplapath/__init__.py +1 -0
{daplapath-2.1.2 → daplapath-2.1.4}/daplapath/path.py +222 -142
{daplapath-2.1.2 → daplapath-2.1.4}/pyproject.toml +1 -1
daplapath-2.1.2/daplapath/__init__.py +0 -3
{daplapath-2.1.2 → daplapath-2.1.4}/LICENSE.md +0 -0
{daplapath-2.1.2 → daplapath-2.1.4}/README.md +0 -0

{daplapath-2.1.2 → daplapath-2.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: daplapath
-Version: 2.1.2
+Version: 2.1.4
 Summary: A pathlib.Path class for dapla
 License: MIT
 Author: ort

daplapath-2.1.4/daplapath/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .path import LocalFileSystem, Path, PathSeries, config

{daplapath-2.1.2 → daplapath-2.1.4}/daplapath/path.py RENAMED Viewed

@@ -1,30 +1,30 @@
 from __future__ import annotations
-from dataclasses import dataclass
+import datetime
 import functools
-from collections.abc import Iterable
-from concurrent.futures import ThreadPoolExecutor
 import glob
+import inspect
+import io
+import itertools
 import json
+import os
 import pathlib
-from pathlib import PurePosixPath, PurePath
 import re
-import io
-import os
 import shutil
-from typing import Callable, Any
-import inspect
-import itertools
+from collections.abc import Callable, Iterable
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from pathlib import PurePath
+from typing import Any
-from fsspec.spec import AbstractFileSystem
-import datetime
 import numpy as np
 import pandas as pd
 import pandas.io.formats.format as fmt
-from pandas.api.types import is_dict_like
 import pyarrow
-import pyarrow.parquet as pq
 import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+from fsspec.spec import AbstractFileSystem
+from pandas.api.types import is_dict_like
 try:
     import gcsfs
@@ -48,10 +48,18 @@ PERIOD_PREFIX = "_p"
 INDEX_NAMES = ["timestamp", "mb", "type"]
-@dataclass
+@dataclass(slots=True)
 class Config:
-    __slots__ = ("file_system",)
-    file_system: Callable
+    fs: Callable
+    team: str | None
+    env: str
+    default_protocol: str = "gs"
+    bucket_pattern: str = "{default_protocol}://ssb-{team}-data-{bucket}-prod"
+    def __getitem__(self, key: str) -> Any:
+        if not hasattr(self, key):
+            raise KeyError(key)
+        return getattr(self, key)
 class LocalFileSystem(AbstractFileSystem):
@@ -72,7 +80,7 @@ class LocalFileSystem(AbstractFileSystem):
         if not detail:
             return list(relevant_paths)
         with ThreadPoolExecutor() as executor:
-            return list(executor.map(get_file_info, relevant_paths))
+            return {x["name"]: x for x in executor.map(get_file_info, relevant_paths)}
     @classmethod
     def ls(cls, path: str, detail: bool = False, **kwargs):
@@ -109,6 +117,10 @@ class LocalFileSystem(AbstractFileSystem):
     def rm_file(path: str, *args, **kwargs) -> None:
         return os.remove(path, *args, **kwargs)
+    @staticmethod
+    def rm(path: str, *args, **kwargs) -> None:
+        return os.remove(path, *args, **kwargs)
     @staticmethod
     def rmdir(path: str, *args, **kwargs) -> None:
         return shutil.rmtree(path, *args, **kwargs)
@@ -139,9 +151,19 @@ class MyGCSFileSystem(gcsfs.GCSFileSystem):
 if any("dapla" in key.lower() for key in os.environ) and "gcsfs" in locals():
-    _config = Config(MyGCSFileSystem)
+    _fs = MyGCSFileSystem()
 else:
-    _config = Config(LocalFileSystem)
+    _fs = LocalFileSystem()
+config = Config(
+    fs=_fs,
+    team=os.environ.get("DAPLA_GROUP_CONTEXT", "")
+    .replace("-developers", "")
+    .replace("-data-admins", "")
+    or None,
+    env=os.environ.get("DAPLA_ENVIRONMENT", "prod").lower(),
+)
+del _fs
 class Tree:
@@ -177,12 +199,12 @@ class _PathBase:
     _period_prefix: str = PERIOD_PREFIX
     @staticmethod
-    def set_option(pat: str, value: Any) -> None:
+    def set_config(pat: str, value: Any) -> None:
         """Change config variable."""
-        setattr(_config, pat, value)
+        setattr(config, pat, value)
     @property
-    def _file_system_constructor(self) -> Callable | type:
+    def _fs_constructor(self) -> Callable | type:
         """Can be overridden in subclass.
         Must return a function or a class that, when called,
@@ -191,13 +213,13 @@ class _PathBase:
         The 'info' method should return a dict like with at least the keys
         'updated', 'size', 'name' and 'type'.
         """
-        return _config.file_system
+        return config.fs
 class Path(str, _PathBase):
     """Path object that works like a string, with methods for working with the GCS file system."""
-    _file_system_attrs: set[str] = {
+    _fs_attrs: set[str] = {
         "info",
         "isdir",
         "open",
@@ -214,35 +236,71 @@ class Path(str, _PathBase):
         return PathSeries
     @staticmethod
-    def _standardize_path(path: str | PurePosixPath) -> str:
+    def _standardize_path(path: str | PurePath) -> str:
         """Make sure delimiter is '/' and path ends without '/'."""
-        return (
-            str(path)
-            .replace("\\", "/")
-            .replace(r"\"", "/")
-            .replace("//", "/")
-            .rstrip("/")
-        )
+        return str(path).replace("\\", "/").replace(r"\"", "/")
-    def __new__(cls, gcs_path: str | PurePath | None = None, file_system=None):
+    def __new__(cls, gcs_path: str | os.PathLike | None = None, fs=None):
         """Construct Path with '/' as delimiter."""
         gcs_path = cls._standardize_path(gcs_path or "")
         obj = super().__new__(cls, gcs_path)
-        obj._path = PurePosixPath(obj)
-        obj._file_system = file_system
+        if fs is not None:
+            obj._fs = fs
+        elif gcs_path.startswith("/buckets"):
+            obj._fs = LocalFileSystem()
+        else:
+            obj._fs = config.fs
         return obj
     def buckets_path(self) -> "Path":
-        if self.startswith("/buckets"):
-            return self
-        root = self.parts[0]
-        bucket = root.split("-data-")[-1].split("-prod")[0]
         try:
-            return self._new(f"/buckets/{bucket}/{'/'.join(self.parts[1:])}")
-        except IndexError:
-            return self._new(f"/buckets/{bucket}")
+            protocol, _ = str(self).split("://")
+        except ValueError:
+            protocol = ""
+        root, *subdirs = str(self).replace(f"{protocol}://", "").split("/")
+        bucket = root.split("-data-")[-1].split(f"-{config.env}")[0]
+        if config.team in root:
+            new_root = "/buckets"
+            team = config.team
+        else:
+            team = root.split("-data-")[0]
+            team = team.lstrip(team.split("-")[0]).strip("-")
+            bucket = bucket.replace("delt-", "")
+            new_root = f"/buckets/shared/{team}"
+        subdirs = "/".join(subdirs).strip("/")
+        if subdirs:
+            return self.__class__(f"{new_root}/{bucket}/{subdirs}", self.fs)
+        else:
+            return self.__class__(f"{new_root}/{bucket}", self.fs)
+    def gs_path(self) -> "Path":
+        if not str(self).startswith("/buckets/"):
+            raise ValueError(
+                f"Can only convert paths starting with '/buckets/' to GCS path. Got {self}"
+            )
+        if "/shared/" in str(self):
+            team, bucket, *subdirs = str(self).split("/shared/")[1].split("/")
+            bucket = "delt-" + bucket
+        elif not config.team:
+            raise ValueError(
+                "Must set config.team (hint: from daplapath import config; config.team = 'name')"
+            )
+        else:
+            team = config.team
+            bucket, *subdirs = str(self).replace("/buckets/", "").split("/")
+        gspath = (
+            config.bucket_pattern.format(
+                team=team,
+                bucket=bucket,
+                env=config.env,
+                default_protocol=config.default_protocol,
+            )
+            + "/"
+            + "/".join(subdirs)
+        )
+        return self.__class__(gspath, self.fs)
     def tree(
         self,
@@ -274,7 +332,6 @@ class Path(str, _PathBase):
         self, pattern: str | None = None, recursive: bool = True, **kwargs
     ) -> "PathSeries":
         """Create PathSeries of files/directories that match the pattern."""
         recursive = kwargs.get("recurse_symlinks", recursive)
         if pattern:
@@ -290,17 +347,17 @@ class Path(str, _PathBase):
         kwargs["detail"] = True
-        if "recursive" in get_arguments(self.file_system.glob):
+        if "recursive" in get_arguments(self.fs.glob):
             kwargs["recursive"] = recursive
         else:
-            # try to set to non-recursive if file_system.glob allows argument 'maxdepth'
+            # try to set to non-recursive if fs.glob allows argument 'maxdepth'
             kwargs["maxdepth"] = None if recursive else 1
         try:
-            info: list[dict] | dict = self.file_system.glob(pattern, **kwargs)
+            info: list[dict] | dict = self.fs.glob(pattern, **kwargs)
         except TypeError:
             kwargs.pop("maxdepth", None)
-            info: list[dict] | dict = self.file_system.glob(pattern, **kwargs)
+            info: list[dict] | dict = self.fs.glob(pattern, **kwargs)
         if isinstance(info, dict):
             # file system can return single dict if only one file path
@@ -324,10 +381,17 @@ class Path(str, _PathBase):
         """
         return self.glob("**", recursive=recursive, **kwargs)
+    def unlink(self, missing_ok: bool = False) -> None:
+        if not self.exists():
+            if not missing_ok:
+                raise FileNotFoundError(str(self))
+            return
+        return self.fs.rm(recursive=False)
     def rmdir(self) -> None:
         files = self.glob("**").files
         with ThreadPoolExecutor() as executor:
-            list(executor.map(self.file_system.rm_file, files))
+            list(executor.map(self.fs.rm_file, files))
     def cp(self, destination: "Path | str") -> "Path":
         return self._cp_or_mv(destination, "cp")
@@ -337,28 +401,25 @@ class Path(str, _PathBase):
         out_path = self._cp_or_mv(destination, "mv")
         if was_dir:
             try:
-                self.file_system.rmdir(str(self))
+                self.fs.rmdir(str(self))
             except (FileNotFoundError, NotADirectoryError):
                 pass
         return out_path
-    def read_text(self, *args, **kwargs):
-        return self._path.read_text(*args, **kwargs)
     def versions(self, include_versionless: bool = False) -> "PathSeries":
         """Returns a PathSeries of all versions of the file."""
-        files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
+        files_in_folder: Iterable[Path] = self.parent.glob(
+            f"*{self.suffix}", recursive=False
+        )
         if self.version_number:
-            start, _, end = re.split(self._version_pattern, self)
+            start, *_ = re.split(self._version_pattern, self.name)
         else:
-            start, end = self.stem, self.suffix
+            start = self.stem
         # create boolean mask. With numpy to make it work with both pandas and list
         arr = np.array(files_in_folder)
-        is_version_of_this_file = (np_str_contains(arr, start)) & (
-            np_str_endswith(arr, end)
-        )
+        is_version_of_this_file = np_str_contains(arr, start)
         if not include_versionless:
             is_version_of_this_file &= np_str_matches(arr, self._version_pattern)
@@ -382,7 +443,7 @@ class Path(str, _PathBase):
         Lists files in the parent directory with the same versionless stem
         and selects the one with the highest version number.
-        Returns
+        Returns:
         -------
         A Path.
         """
@@ -405,11 +466,11 @@ class Path(str, _PathBase):
             Minutes needed between the timestamp of the current highest
             numbered version.
-        Returns
+        Returns:
         ------
         A Path with a new version number.
-        Raises
+        Raises:
         ------
         ValueError:
             If the method is run before the timeout period is up.
@@ -425,7 +486,7 @@ class Path(str, _PathBase):
             time_should_be_at_least = pd.Timestamp.now(tz="Europe/Oslo").replace(
                 tzinfo=None
             ).round("s") - pd.Timedelta(minutes=timeout)
-            if timestamp > time_should_be_at_least:
+            if timestamp is not None and timestamp > time_should_be_at_least:
                 raise ValueError(
                     f"Latest version of the file was updated {timestamp[0]}, which "
                     f"is less than the timeout period of {timeout} minutes. "
@@ -439,7 +500,7 @@ class Path(str, _PathBase):
     def with_version(self, version: int | None) -> "Path":
         """Replace the Path's version number, if any, with a new version number.
-        Examples
+        Examples:
         --------
         >>> Path('file.parquet').with_version(1)
         'file_v1.parquet'
@@ -456,13 +517,13 @@ class Path(str, _PathBase):
         self, include_versionless: bool = False
     ) -> "PathSeries":
         """Returns a PathSeries of all periods of the file."""
-        files_in_folder: Iterable[Path] = self.parent.glob("**", recursive=False)
+        files_in_folder: Iterable[Path] = self.parent.glob(
+            f"*{self.suffix}", recursive=False
+        )
         # create boolean mask. With numpy to make it work with both pandas and list
         arr = np.array(files_in_folder)
-        is_version_of_this_file = (
-            np_str_contains(arr, self.periodless_stem)
-        ) & np_str_endswith(arr, self.suffix)
+        is_version_of_this_file = np_str_contains(arr, self.periodless_stem)
         if not include_versionless:
             is_version_of_this_file &= np_str_matches(arr, self._version_pattern)
@@ -486,12 +547,12 @@ class Path(str, _PathBase):
         Lists files in the parent directory with the same
         versionless and periodless stem and selects the path that sorts last.
-        Raises
+        Raises:
         ------
         ValueError: If there is mismatch in period patterns, e.g. if one
             path has the period "2020-01-01" and one path has "2021".
-        Returns
+        Returns:
         -------
         A Path.
         """
@@ -500,14 +561,14 @@ class Path(str, _PathBase):
                 include_versionless=False
             )
             sorted_paths = sort_by_period(period_paths)
-            return next(iter(reversed(sorted_paths)))
+            return list(sorted_paths)[-1]
         except (IndexError, StopIteration) as e:
             raise FileNotFoundError(self) from e
     def with_period(self, period: str) -> "Path":
         """Replace the Path's period, if any, with a new periods.
-        Examples
+        Examples:
         --------
         >>> Path('file_v1.parquet').with_period("2024-01-01")
         'file_p2024-01-01_v1.parquet'
@@ -523,7 +584,7 @@ class Path(str, _PathBase):
     def with_periods(self, from_period: str, to_period: str | None = None) -> "Path":
         """Replace the Path's period, if any, with one or two new periods.
-        Examples
+        Examples:
         --------
         >>> Path('file_v1.parquet').with_periods("2024-01-01")
         'file_p2024-01-01_v1.parquet'
@@ -591,46 +652,57 @@ class Path(str, _PathBase):
     @property
     def periodless_stem(self) -> str:
         """Return the file stem before the period pattern."""
-        return str(re.sub(f"{self._period_pattern}.*", "", self._path.stem))
+        return str(re.sub(f"{self._period_pattern}.*", "", self.stem))
     @property
     def versionless_stem(self) -> str:
         """Return the file stem before the version pattern."""
-        return self._new(re.split(self._version_pattern, self._path.name)[0]).stem
+        return self._new(re.split(self._version_pattern, self.name)[0]).stem
     @property
     def parent(self) -> "Path":
         """Parent path."""
-        return self._new(self._path.parent)
+        return self._new("/".join(self.split("/")[:-1]))
     @property
     def parents(self) -> "list[Path]":
         """Parent path."""
-        return [self._new(parent) for parent in self._path.parents]
+        no_protocol = self.split("://")[-1]
+        return [
+            self._new("/".join(no_protocol.split("/")[:i]))
+            for i in range(no_protocol.count("/"))
+        ][::-1]
     @property
     def name(self) -> str:
         """Final part of the path."""
-        return self._path.name
+        return self.split("/")[-1]
     @property
     def stem(self) -> str:
         """File name without the suffix"""
-        return self._path.stem
+        return self.split("/")[-1].replace(self.suffix, "")
     @property
     def parts(self) -> tuple[str]:
-        return self._path.parts
+        no_protocol = self.split("://")[-1]
+        return tuple(no_protocol.split("/"))
     @property
     def suffix(self) -> str:
         """Final file path suffix."""
-        return self._path.suffix
+        name = self.name
+        if "." not in name:
+            return ""
+        return "." + (name).split(".")[-1]
     @property
     def suffixes(self) -> list[str]:
         """File path suffixes, if multiple."""
-        return self._path.suffixes
+        name = self.name
+        if "." not in name:
+            return []
+        return ["." + suff for suff in (name).split(".")[1:]]
     @property
     def index_column_names(self) -> list[str]:
@@ -691,9 +763,14 @@ class Path(str, _PathBase):
         try:
             info = self._info
         except AttributeError:
-            info = self.file_system.info(self)
+            info = self.fs.info(self)
             self._info = info
-        return _get_timestamps(info["updated"])
+        try:
+            return _get_timestamps(info["updated"])
+        except Exception as e:
+            if not self.exists() or self.is_dir():
+                return None
+            raise e
     @property
     def type(self) -> str:
@@ -705,7 +782,7 @@ class Path(str, _PathBase):
         try:
             info = self._info
         except AttributeError:
-            info = self.file_system.info(self)
+            info = self.fs.info(self)
             self._info = info
         return info["size"]
@@ -770,29 +847,29 @@ class Path(str, _PathBase):
         return self.isdir()
     def with_suffix(self, suffix: str):
-        return self._new(self._path.with_suffix(suffix))
+        return self._new(self.replace(self.suffix, suffix))
     def with_name(self, new_name: str):
-        return self._new(self._path.with_name(new_name))
+        return self._new(self.replace(self.name, new_name))
-    def with_stem(self, new_with_stem: str):
-        return self._new(self._path.with_stem(new_with_stem))
+    def with_stem(self, new_stem: str):
+        return self._new(self.replace(self.stem, new_stem))
     @property
-    def file_system(self):
-        if self._file_system is None:
-            self._file_system = self._file_system_constructor()
-        return self._file_system
+    def fs(self):
+        if self._fs is None:
+            self._fs = self._fs_constructor()
+        return self._fs
-    @file_system.setter
-    def file_system(self, val):
-        self._file_system = val
-        return self._file_system
+    @fs.setter
+    def fs(self, val):
+        self._fs = val
+        return self._fs
     def __truediv__(self, other: str | os.PathLike | PurePath) -> "Path":
         """Append a string or Path to the path with a forward slash.
-        Example
+        Example:
         -------
         >>> folder = 'ssb-areal-data-delt-kart-prod/analyse_data/klargjorte-data/2023'
         >>> file_path = folder / "ABAS_kommune_flate_p2023_v1.parquet"
@@ -807,13 +884,12 @@ class Path(str, _PathBase):
         return self._new(f"{self}/{as_str(other)}")
     def __getattribute__(self, name):
-        """stackoverflow hack to ensure we return Path when using string methods.
+        """Stackoverflow hack to ensure we return Path when using string methods.
         It works for all but the string magigmethods, importantly __add__.
         """
         # skip magic methods
-        if name not in dir(str) or name.startswith("__") and name.endswith("__"):
+        if name not in dir(str) or (name.startswith("__") and name.endswith("__")):
             return super().__getattribute__(name)
         def method(self, *args, **kwargs):
@@ -832,35 +908,35 @@ class Path(str, _PathBase):
         return method.__get__(self)
     def __getattr__(self, attr: str) -> Any:
-        """Get file_system attribute."""
+        """Get fs attribute."""
         error_message = f"{self.__class__.__name__} has no attribute '{attr}'"
         if attr.startswith("_"):
             raise AttributeError(error_message)
-        if attr not in self._file_system_attrs:
+        if attr not in self._fs_attrs:
             raise AttributeError(error_message)
-        return functools.partial(getattr(self.file_system, attr), self)
+        return functools.partial(getattr(self.fs, attr), self)
     def __fspath__(self) -> str:
         return str(self)
     def __dir__(self) -> list[str]:
-        return list(sorted({x for x in dir(Path)} | self._file_system_attrs))
+        return list(sorted({x for x in dir(Path)} | self._fs_attrs))
     def _iterable_constructor(self, info: list[dict], **kwargs) -> "PathSeries":
         series: pd.Series = _get_paths_and_index(info).apply(self.__class__)
         for path in series:
-            path._file_system = self._file_system
+            path._fs = self._fs
         return self._iterable_type(series, **kwargs)
     def _new(self, new_path: str | Path) -> "Path":
-        return self.__class__(new_path, self.file_system)
+        return self.__class__(new_path, self.fs)
     def _cp_or_mv(self, destination: "Path | str", attr: str) -> "Path":
-        func: Callable = getattr(self.file_system, attr)
+        func: Callable = getattr(self.fs, attr)
         try:
             func(self, destination)
         except FileNotFoundError:
-            destination = self.__class__(destination)
+            destination = self.__class__(destination, self.fs)
             sources = list(self.glob("**").files)
             destinations = [path.replace(self, destination) for path in sources]
             with ThreadPoolExecutor() as executor:
@@ -921,7 +997,7 @@ class PathSeries(pd.Series, _PathBase):
     names: Series
         The names of the file paths.
-    Methods
+    Methods:
     -------
     tree():
         con
@@ -961,18 +1037,24 @@ class PathSeries(pd.Series, _PathBase):
             data is not None
             and len(data)
             and not (
-                isinstance(data, pd.Series)
-                and len(data.index.names) == len(self._index_names)
-                or isinstance(index, pd.MultiIndex)
-                and len(index.names) == len(self._index_names)
+                (
+                    isinstance(data, pd.Series)
+                    and len(data.index.names) == len(self._index_names)
+                )
+                or (
+                    isinstance(index, pd.MultiIndex)
+                    and len(index.names) == len(self._index_names)
+                )
                 # dict with e.g. tuple keys, turned into MultiIndex
-                or is_dict_like(data)
-                and all(len(key) == len(self._index_names) for key in data.keys())
+                or (
+                    is_dict_like(data)
+                    and all(len(key) == len(self._index_names) for key in data.keys())
+                )
             )
         )
         if should_construct_index:
-            file_system = kwargs.get("file_system", self._file_system_constructor())
-            data = _get_paths_and_index([file_system.info(path) for path in data])
+            fs = kwargs.get("fs", self._fs_constructor())
+            data = _get_paths_and_index([fs.info(path) for path in data])
         super().__init__(data, index=index, **kwargs)
@@ -1338,7 +1420,7 @@ def _pathseries_constructor_with_fallback(
     max_parts: int | None = 2,
     path_series_type: type | None = None,
     **kwargs,
-) -> "PathSeries | pd.Series":
+) -> PathSeries | pd.Series:
     path_series_type = path_series_type or PathSeries
     kwargs["name"] = kwargs.pop("name", "path")
@@ -1372,7 +1454,7 @@ def _pathseries_constructor_with_fallback(
         return series
-def _dataframe_constructor(data=None, index=None, **kwargs) -> "pd.DataFrame":
+def _dataframe_constructor(data=None, index=None, **kwargs) -> pd.DataFrame:
     data.name = "path"
     return pd.DataFrame(data, index=index, **kwargs)
@@ -1396,16 +1478,15 @@ def split_path_and_make_copyable_html(
     split: Text pattern to split the path on. Defaults to "/".
     display_prefix: The text to display instead of the parent directory. Defaults to ".../".
-    Returns
+    Returns:
     -------
     A string that holds the HTML and JavaScript code to be passed to IPython.display.display.
     """
-    copy_to_clipboard_js = f"""<script>
-function copyToClipboard(text, event) {{
+    copy_to_clipboard_js = """<script>
+function copyToClipboard(text, event) {
     event.preventDefault();
     navigator.clipboard.writeText(text)
-        .then(() => {{
+        .then(() => {
             const alertBox = document.createElement('div');
             const selection = window.getSelection();
@@ -1418,14 +1499,14 @@ function copyToClipboard(text, event) {{
             alertBox.innerHTML = 'Copied to clipboard';
             document.body.appendChild(alertBox);
-            setTimeout(function() {{
+            setTimeout(function() {
                 alertBox.style.display = 'none';
-            }}, 1500);  // 1.5 seconds
-        }})
-        .catch(err => {{
+            }, 1500);  // 1.5 seconds
+        })
+        .catch(err => {
             console.error('Could not copy text: ', err);
-        }});
-}}
+        });
+}
 </script>"""
     if split is not None:
@@ -1640,28 +1721,28 @@ def get_schema(file) -> pyarrow.Schema:
         # try:
         #     return ds.dataset(file).schema
         # except (TypeError, FileNotFoundError) as e:
-        if not hasattr(file, "file_system"):
+        if not hasattr(file, "fs"):
             raise e
-        file_system = file.file_system
+        fs = file.fs
         def _get_schema(path):
             try:
                 return pq.read_schema(path)
             except FileNotFoundError as e:
                 try:
-                    with file_system.open(path, "rb") as f:
+                    with fs.open(path, "rb") as f:
                         return pq.read_schema(f)
                 except Exception as e2:
                     raise e2.__class__(f"{e2}. {path}") from e
-        child_paths = file_system.glob(file + "/**/*.parquet")
+        child_paths = fs.glob(file + "/**/*.parquet")
         if not len(child_paths):
             raise e.__class__(f"{e}: {file}") from e
         with ThreadPoolExecutor() as executor:
             schemas: list[pyarrow.Schema] = list(
-                executor.map(_get_schema, file_system.glob(file + "/**/*.parquet"))
+                executor.map(_get_schema, fs.glob(file + "/**/*.parquet"))
             )
         if not schemas:
             raise ValueError(f"Couldn't find any schemas among {child_paths}.") from e
@@ -1740,15 +1821,14 @@ def sort_by_period(paths: Iterable[str]) -> Iterable[str]:
     except ValueError:
         # select last period
         periods = [pd.Timestamp(next(iter(reversed(path.periods)))) for path in paths]
-    combined = list(zip(periods, range(len(paths)), paths, strict=True))
+    combined = list(zip(periods, paths, list(range(len(paths))), strict=True))
     combined.sort()
-    indices: list[int] = [x[1] for x in combined]
+    indices: list[int] = [x[2] for x in combined]
     try:
         return paths.iloc[indices]
     except AttributeError:
-        return paths.__class__([x[2] for x in combined])
+        return paths.__class__([x[1] for x in combined])
 np_str_contains: Callable = np.vectorize(str.__contains__)
-np_str_endswith: Callable = np.vectorize(str.endswith)
 np_str_matches: Callable = np.vectorize(lambda txt, pat: bool(re.search(pat, txt)))

{daplapath-2.1.2 → daplapath-2.1.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "daplapath"
-version = "2.1.2"
+version = "2.1.4"
 description = "A pathlib.Path class for dapla"
 authors = ["ort <ort@ssb.no>"]
 license = "MIT"

daplapath-2.1.2/daplapath/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .path import Path
-from .path import PathSeries
-from .path import LocalFileSystem

{daplapath-2.1.2 → daplapath-2.1.4}/LICENSE.md RENAMED Viewed

File without changes

{daplapath-2.1.2 → daplapath-2.1.4}/README.md RENAMED Viewed

File without changes

daplapath 2.1.2__tar.gz → 2.1.4__tar.gz

daplapath 2.1.2tar.gz → 2.1.4tar.gz