PyPI - daplapath - Versions diffs - 2.0.8__tar.gz → 2.0.9__tar.gz - Mend

daplapath 2.0.8tar.gz → 2.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

{daplapath-2.0.8 → daplapath-2.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: daplapath
-Version: 2.0.8
+Version: 2.0.9
 Summary: A pathlib.Path class for dapla
 License: MIT
 Author: ort

{daplapath-2.0.8 → daplapath-2.0.9}/daplapath/path.py RENAMED Viewed

@@ -599,7 +599,7 @@ class Path(str, _PathBase):
     @property
     def index_column_names(self) -> list[str]:
-        return _get_index_cols(self.schema)
+        return _get_index_cols(self.schema, self)
     @property
     def columns(self) -> pd.Index:
@@ -612,7 +612,7 @@ class Path(str, _PathBase):
             ]
         except (KeyError, TypeError):
             names = schema.names
-        index_cols = _get_index_cols(schema)
+        index_cols = _get_index_cols(schema, self)
         return pd.Index(names).difference(index_cols)
     @property
@@ -621,16 +621,14 @@ class Path(str, _PathBase):
         try:
             with self.open("rb") as file:
                 return get_schema(file)
-        except (
-            Exception
-        ):  # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
+        except Exception:
             return get_schema(self)
     @property
     def dtypes(self) -> pd.Series:
         """Date types of the file's columns."""
         schema = self.schema
-        index_cols = _get_index_cols(schema)
+        index_cols = _get_index_cols(schema, self)
         return pd.Series(schema.types, index=schema.names).loc[
             lambda x: ~x.index.isin(index_cols)
         ]
@@ -641,9 +639,7 @@ class Path(str, _PathBase):
         try:
             with self.open("rb") as file:
                 return get_shape(file)
-        except (
-            Exception
-        ):  # (PermissionError, FileNotFoundError, TypeError, IsADirectoryError):
+        except Exception:
             return get_shape(self)
     @property
@@ -1517,8 +1513,11 @@ def get_path_tree(
     return tree
-def _get_index_cols(schema: pyarrow.Schema) -> list[str]:
-    cols = json.loads(schema.metadata[b"pandas"])["index_columns"]
+def _get_index_cols(schema: pyarrow.Schema, path_or_file: str | Path) -> list[str]:
+    try:
+        cols = json.loads(schema.metadata[b"pandas"])["index_columns"]
+    except KeyError as e:
+        raise KeyError(f"{e}. For {type(path_or_file)}: {path_or_file}")
     return [x for x in cols if not isinstance(x, dict)]
@@ -1570,13 +1569,21 @@ def get_schema(file) -> pyarrow.Schema:
                 except Exception as e2:
                     raise e2.__class__(f"{e2}. {path}") from e
+        child_paths = file_system.glob(file + "/**/*.parquet")
+        if not len(child_paths):
+            raise e.__class__(f"{e}: {file}") from e
         with ThreadPoolExecutor() as executor:
-            return pyarrow.unify_schemas(
-                list(
-                    executor.map(_get_schema, file_system.glob(file + "/**/*.parquet"))
-                ),
-                promote_options="permissive",
+            schemas: list[pyarrow.Schema] = list(
+                executor.map(_get_schema, file_system.glob(file + "/**/*.parquet"))
             )
+        if not schemas:
+            raise ValueError(f"Couldn't find any schemas among {child_paths}.") from e
+        return pyarrow.unify_schemas(
+            schemas,
+            promote_options="permissive",
+        )
 def get_num_rows(file):
@@ -1599,7 +1606,7 @@ def get_num_rows(file):
 def get_shape(file) -> tuple[int, int]:
     schema = get_schema(file)
-    index_cols = _get_index_cols(schema)
+    index_cols = _get_index_cols(schema, file)
     ncol: int = sum(name not in index_cols for name in schema.names)
     nrow: int = get_num_rows(file)
     return nrow, ncol

{daplapath-2.0.8 → daplapath-2.0.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "daplapath"
-version = "2.0.8"
+version = "2.0.9"
 description = "A pathlib.Path class for dapla"
 authors = ["ort <ort@ssb.no>"]
 license = "MIT"

{daplapath-2.0.8 → daplapath-2.0.9}/LICENSE.md RENAMED Viewed

File without changes

{daplapath-2.0.8 → daplapath-2.0.9}/README.md RENAMED Viewed

File without changes

{daplapath-2.0.8 → daplapath-2.0.9}/daplapath/__init__.py RENAMED Viewed

File without changes

daplapath 2.0.8__tar.gz → 2.0.9__tar.gz

daplapath 2.0.8tar.gz → 2.0.9tar.gz