patito 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patito/__init__.py +12 -6
- patito/database.py +658 -0
- patito/duckdb.py +153 -186
- patito/polars.py +52 -45
- patito/pydantic.py +99 -88
- patito/sql.py +2 -3
- patito/validators.py +87 -1
- patito/xdg.py +22 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/LICENSE +1 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/METADATA +18 -17
- patito-0.5.0.dist-info/RECORD +14 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/WHEEL +1 -1
- patito-0.4.3.dist-info/RECORD +0 -12
patito/validators.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Module for validating datastructures with respect to model specifications."""
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import sys
|
|
4
5
|
from typing import TYPE_CHECKING, Type, Union, cast
|
|
5
6
|
|
|
6
7
|
import polars as pl
|
|
8
|
+
from typing_extensions import get_args, get_origin
|
|
7
9
|
|
|
8
10
|
from patito.exceptions import (
|
|
9
11
|
ColumnDTypeError,
|
|
@@ -15,6 +17,13 @@ from patito.exceptions import (
|
|
|
15
17
|
ValidationError,
|
|
16
18
|
)
|
|
17
19
|
|
|
20
|
+
if sys.version_info >= (3, 10): # pragma: no cover
|
|
21
|
+
from types import UnionType # pyright: ignore
|
|
22
|
+
|
|
23
|
+
UNION_TYPES = (Union, UnionType)
|
|
24
|
+
else:
|
|
25
|
+
UNION_TYPES = (Union,) # pragma: no cover
|
|
26
|
+
|
|
18
27
|
try:
|
|
19
28
|
import pandas as pd
|
|
20
29
|
|
|
@@ -44,6 +53,44 @@ VALID_POLARS_TYPES = {
|
|
|
44
53
|
}
|
|
45
54
|
|
|
46
55
|
|
|
56
|
+
def _is_optional(type_annotation: Type) -> bool:
|
|
57
|
+
"""
|
|
58
|
+
Return True if the given type annotation is an Optional annotation.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
type_annotation: The type annotation to be checked.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
True if the outermost type is Optional.
|
|
65
|
+
"""
|
|
66
|
+
return (get_origin(type_annotation) in UNION_TYPES) and (
|
|
67
|
+
type(None) in get_args(type_annotation)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _dewrap_optional(type_annotation: Type) -> Type:
|
|
72
|
+
"""
|
|
73
|
+
Return the inner, wrapped type of an Optional.
|
|
74
|
+
|
|
75
|
+
Is a no-op for non-Optional types.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
type_annotation: The type annotation to be dewrapped.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
The input type, but with the outermost Optional removed.
|
|
82
|
+
"""
|
|
83
|
+
return (
|
|
84
|
+
next( # pragma: no cover
|
|
85
|
+
valid_type
|
|
86
|
+
for valid_type in get_args(type_annotation)
|
|
87
|
+
if valid_type is not type(None) # noqa: E721
|
|
88
|
+
)
|
|
89
|
+
if _is_optional(type_annotation)
|
|
90
|
+
else type_annotation
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
47
94
|
def _find_errors( # noqa: C901
|
|
48
95
|
dataframe: pl.DataFrame,
|
|
49
96
|
schema: Type[Model],
|
|
@@ -99,6 +146,45 @@ def _find_errors( # noqa: C901
|
|
|
99
146
|
)
|
|
100
147
|
)
|
|
101
148
|
|
|
149
|
+
for column, dtype in schema.dtypes.items():
|
|
150
|
+
if not isinstance(dtype, pl.List):
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
annotation = schema.__annotations__[column] # type: ignore[unreachable]
|
|
154
|
+
|
|
155
|
+
# Retrieve the annotation of the list itself,
|
|
156
|
+
# dewrapping any potential Optional[...]
|
|
157
|
+
list_type = _dewrap_optional(annotation)
|
|
158
|
+
|
|
159
|
+
# Check if the list items themselves should be considered nullable
|
|
160
|
+
item_type = get_args(list_type)[0]
|
|
161
|
+
if _is_optional(item_type):
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
num_missing_values = (
|
|
165
|
+
dataframe.lazy()
|
|
166
|
+
.select(column)
|
|
167
|
+
# Remove those rows that do not contain lists at all
|
|
168
|
+
.filter(pl.col(column).is_not_null())
|
|
169
|
+
# Convert lists of N items to N individual rows
|
|
170
|
+
.explode(column)
|
|
171
|
+
# Calculate how many nulls are present in lists
|
|
172
|
+
.filter(pl.col(column).is_null())
|
|
173
|
+
.collect()
|
|
174
|
+
.height
|
|
175
|
+
)
|
|
176
|
+
if num_missing_values != 0:
|
|
177
|
+
errors.append(
|
|
178
|
+
ErrorWrapper(
|
|
179
|
+
MissingValuesError(
|
|
180
|
+
f"{num_missing_values} missing "
|
|
181
|
+
f"{'value' if num_missing_values == 1 else 'values'} "
|
|
182
|
+
f"in lists"
|
|
183
|
+
),
|
|
184
|
+
loc=column,
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
|
|
102
188
|
# Check if any column has a wrong dtype
|
|
103
189
|
valid_dtypes = schema.valid_dtypes
|
|
104
190
|
dataframe_datatypes = dict(zip(dataframe.columns, dataframe.dtypes))
|
|
@@ -189,7 +275,7 @@ def _find_errors( # noqa: C901
|
|
|
189
275
|
)
|
|
190
276
|
if "_" in constraints.meta.root_names():
|
|
191
277
|
# An underscore is an alias for the current field
|
|
192
|
-
illegal_rows = dataframe.
|
|
278
|
+
illegal_rows = dataframe.with_columns(
|
|
193
279
|
pl.col(column_name).alias("_")
|
|
194
280
|
).filter(constraints)
|
|
195
281
|
else:
|
patito/xdg.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Module implementing the XDG directory standard."""
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def cache_home(application: Optional[str] = None) -> Path:
|
|
8
|
+
"""
|
|
9
|
+
Return path to directory containing user-specific non-essential data files.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
application: An optional name of an application for which to return an
|
|
13
|
+
application-specific cache directory for.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
A path object pointing to a directory to store cache files.
|
|
17
|
+
"""
|
|
18
|
+
path = Path(os.environ.get("XDG_CACHE_HOME", "~/.cache")).resolve()
|
|
19
|
+
if application:
|
|
20
|
+
path = path / application
|
|
21
|
+
path.mkdir(exist_ok=True, parents=True)
|
|
22
|
+
return path
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
3
|
Copyright (c) 2022 Oda Group Holding AS
|
|
4
|
+
Copyright (c) 2023 Jakob Gerhard Martinussen and contributors
|
|
4
5
|
|
|
5
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
7
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -1,33 +1,34 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: patito
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A dataframe modelling library built on top of polars and pydantic.
|
|
5
5
|
Home-page: https://github.com/kolonialno/patito
|
|
6
6
|
License: MIT
|
|
7
7
|
Keywords: validation,dataframe
|
|
8
8
|
Author: Jakob Gerhard Martinussen
|
|
9
9
|
Author-email: jakobgm@gmail.com
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.8,<4.0
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.8
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Provides-Extra: caching
|
|
17
18
|
Provides-Extra: docs
|
|
18
19
|
Provides-Extra: duckdb
|
|
19
20
|
Provides-Extra: pandas
|
|
20
|
-
Requires-Dist: Sphinx; extra == "docs"
|
|
21
|
-
Requires-Dist: duckdb (>=0.
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: pyarrow (>=5.0.0); (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
|
|
21
|
+
Requires-Dist: Sphinx (<7) ; extra == "docs"
|
|
22
|
+
Requires-Dist: duckdb (>=0.6.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
|
|
23
|
+
Requires-Dist: pandas ; (python_version >= "3.8" and python_version < "4.0") and (extra == "pandas")
|
|
24
|
+
Requires-Dist: polars (>=0.18.3)
|
|
25
|
+
Requires-Dist: pyarrow (>=5.0.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "caching" or extra == "duckdb")
|
|
26
26
|
Requires-Dist: pydantic (>=1.7.0)
|
|
27
|
-
Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
28
|
-
Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
|
|
29
|
-
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
30
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: sphinx-autobuild ; extra == "docs"
|
|
28
|
+
Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
|
|
29
|
+
Requires-Dist: sphinx-rtd-theme ; extra == "docs"
|
|
30
|
+
Requires-Dist: sphinx-toolbox ; extra == "docs"
|
|
31
|
+
Requires-Dist: sphinxcontrib-mermaid ; extra == "docs"
|
|
31
32
|
Requires-Dist: typing-extensions
|
|
32
33
|
Project-URL: Documentation, https://patito.readthedocs.io
|
|
33
34
|
Project-URL: Repository, https://github.com/kolonialno/patito
|
|
@@ -284,7 +285,7 @@ class Product(pt.Model):
|
|
|
284
285
|
@property
|
|
285
286
|
def url(self) -> str:
|
|
286
287
|
return (
|
|
287
|
-
"https://
|
|
288
|
+
"https://example.com/no/products/"
|
|
288
289
|
f"{self.product_id}-"
|
|
289
290
|
f"{self.name.lower().replace(' ', '-')}"
|
|
290
291
|
)
|
|
@@ -302,7 +303,7 @@ products = pl.DataFrame(
|
|
|
302
303
|
milk_row = products.filter(pl.col("product_id" == 1))
|
|
303
304
|
milk = Product.from_row(milk_row)
|
|
304
305
|
print(milk.url)
|
|
305
|
-
# https://
|
|
306
|
+
# https://example.com/no/products/1-skimmed-milk
|
|
306
307
|
```
|
|
307
308
|
|
|
308
309
|
If you "connect" the `Product` model with the `DataFrame` by the use of `patito.DataFrame.set_model()`, or alternatively by using `Product.DataFrame` directly, you can use the `.get()` method in order to filter the data frame down to a single row _and_ cast it to the respective model class:
|
|
@@ -317,6 +318,6 @@ products = Product.DataFrame(
|
|
|
317
318
|
)
|
|
318
319
|
milk = products.get(pl.col("product_id") == 1)
|
|
319
320
|
print(milk.url)
|
|
320
|
-
# https://
|
|
321
|
+
# https://example.com/no/products/1-skimmed-milk
|
|
321
322
|
```
|
|
322
323
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
patito/__init__.py,sha256=J-p3aBiyNGdsyHxViEsP1C6bE-OxtxAul7xSA1xXpIE,1169
|
|
2
|
+
patito/_docs.py,sha256=bobkmo8-RRdz80_KY53y_i1Gcp1WWTH5-D5ZHGidpok,161
|
|
3
|
+
patito/database.py,sha256=IaxbsmyQMzL3KzvJUq0tgVGUxdr55KEv-wd1X673U2o,27917
|
|
4
|
+
patito/duckdb.py,sha256=MnC4C7m3epXODz_eYBLqb7dTAcOh2o1Y8BViPoPKTaA,111015
|
|
5
|
+
patito/exceptions.py,sha256=4WuaQJoc5wLUehhBRQPvL59LVFLcn_K806Z9fg0KBss,1262
|
|
6
|
+
patito/polars.py,sha256=oFS8In6cUiKQu-QclkJ1Egxbf2HF2SlbTF76bcyE4Vo,26161
|
|
7
|
+
patito/pydantic.py,sha256=HKQ6dfeeJCq1xaVJbqfo5UpF-WI_jBEaDRAy2tLnvtM,54832
|
|
8
|
+
patito/sql.py,sha256=_xmvVfC1kUUq2d8KTBPExJmfZ9ec6uoMfZv52naFFkY,3218
|
|
9
|
+
patito/validators.py,sha256=D5hdCfAc1rv15FwQILSmaZNZ5j88QfYUYZ37GMwgvQU,10659
|
|
10
|
+
patito/xdg.py,sha256=3EcUGcYBBUX2Secegajb4DB2QgAHNPs6oi0tMsL--UQ,686
|
|
11
|
+
patito-0.5.0.dist-info/LICENSE,sha256=3bc4YyuF0e5nd59E3CsR8QM1Ua7pqKfC9DD1LVBVMs4,1139
|
|
12
|
+
patito-0.5.0.dist-info/METADATA,sha256=K16UBKdqYnZfDmVdVqUuMc3AqknsaAmWfgljNVg0q_g,14562
|
|
13
|
+
patito-0.5.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
14
|
+
patito-0.5.0.dist-info/RECORD,,
|
patito-0.4.3.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
patito/__init__.py,sha256=BSTuaflPBdcoJKJTMGDAa7fvoJVduWb28N6Fnw_Gpns,1079
|
|
2
|
-
patito/_docs.py,sha256=bobkmo8-RRdz80_KY53y_i1Gcp1WWTH5-D5ZHGidpok,161
|
|
3
|
-
patito/duckdb.py,sha256=Kxkn5Z4Rl_9KthO_zzbYCEhRtoGtV5T0xGueV6qgoZw,113446
|
|
4
|
-
patito/exceptions.py,sha256=4WuaQJoc5wLUehhBRQPvL59LVFLcn_K806Z9fg0KBss,1262
|
|
5
|
-
patito/polars.py,sha256=Ds_pH302wtWTDN78mmDdUrHn5K906L6H66GnpKrMGDo,26335
|
|
6
|
-
patito/pydantic.py,sha256=x9gNcS29_nZaT_TUqsvMZm5UZwWZGGMFOe6WGvSdgvs,54976
|
|
7
|
-
patito/sql.py,sha256=HhNGhYwIPTJOs08fMEAX6PUjeIKuzI2IB_cjOJknpJg,3249
|
|
8
|
-
patito/validators.py,sha256=YhDMgua8c7Hs97pmscX3vIs-kqzq3vrAjAomYR42IfI,8061
|
|
9
|
-
patito-0.4.3.dist-info/LICENSE,sha256=0LkhXGFNXIt9gW0zs6_9YZynb4DDt3OZaRXz7DpD0qQ,1077
|
|
10
|
-
patito-0.4.3.dist-info/WHEEL,sha256=DA86_h4QwwzGeRoz62o1svYt5kGEXpoUTuTtwzoTb30,83
|
|
11
|
-
patito-0.4.3.dist-info/METADATA,sha256=Bhj5XsAbskyczUNkQzUYroXjbgsFooqaLLEabVnZx-8,14500
|
|
12
|
-
patito-0.4.3.dist-info/RECORD,,
|