patito 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patito/_pydantic/column_info.py +101 -46
- patito/_pydantic/dtypes/dtypes.py +15 -14
- patito/_pydantic/dtypes/utils.py +29 -26
- patito/_pydantic/repr.py +7 -15
- patito/_pydantic/schema.py +10 -9
- patito/exceptions.py +11 -16
- patito/polars.py +113 -54
- patito/pydantic.py +97 -88
- patito/validators.py +111 -71
- {patito-0.7.0.dist-info → patito-0.8.0.dist-info}/METADATA +5 -3
- patito-0.8.0.dist-info/RECORD +17 -0
- {patito-0.7.0.dist-info → patito-0.8.0.dist-info}/WHEEL +1 -1
- patito-0.7.0.dist-info/RECORD +0 -17
- {patito-0.7.0.dist-info → patito-0.8.0.dist-info}/LICENSE +0 -0
patito/_pydantic/column_info.py
CHANGED
|
@@ -1,27 +1,90 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import io
|
|
3
4
|
import json
|
|
4
|
-
from typing import
|
|
5
|
-
Any,
|
|
6
|
-
Dict,
|
|
7
|
-
Optional,
|
|
8
|
-
Sequence,
|
|
9
|
-
Type,
|
|
10
|
-
TypeVar,
|
|
11
|
-
Union,
|
|
12
|
-
)
|
|
5
|
+
from typing import Annotated, Optional, Union
|
|
13
6
|
|
|
14
7
|
import polars as pl
|
|
8
|
+
from polars.datatypes import * # noqa: F403 # type: ignore
|
|
15
9
|
from polars.datatypes import DataType, DataTypeClass
|
|
16
|
-
from
|
|
10
|
+
from polars.exceptions import ComputeError
|
|
11
|
+
from pydantic import BaseModel, BeforeValidator, field_serializer
|
|
17
12
|
|
|
18
|
-
|
|
13
|
+
|
|
14
|
+
def dtype_deserializer(dtype: str | DataTypeClass | DataType | None):
|
|
15
|
+
"""Deserialize a dtype from json."""
|
|
16
|
+
if isinstance(dtype, DataTypeClass) or isinstance(dtype, DataType):
|
|
17
|
+
return dtype
|
|
18
|
+
else:
|
|
19
|
+
if dtype == "null" or dtype is None:
|
|
20
|
+
return None
|
|
21
|
+
else:
|
|
22
|
+
return eval(dtype)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def expr_deserializer(
|
|
26
|
+
expr: str | pl.Expr | list[pl.Expr] | None,
|
|
27
|
+
) -> pl.Expr | list[pl.Expr] | None:
|
|
28
|
+
"""Deserialize a polars expression or list thereof from json.
|
|
29
|
+
|
|
30
|
+
This is applied both during deserialization and validation.
|
|
31
|
+
"""
|
|
32
|
+
if expr is None:
|
|
33
|
+
return None
|
|
34
|
+
elif isinstance(expr, pl.Expr):
|
|
35
|
+
return expr
|
|
36
|
+
elif isinstance(expr, list):
|
|
37
|
+
return expr
|
|
38
|
+
elif isinstance(expr, str):
|
|
39
|
+
if expr == "null":
|
|
40
|
+
return None
|
|
41
|
+
# can be either a list of expr or expr
|
|
42
|
+
elif expr[0] == "[":
|
|
43
|
+
return [
|
|
44
|
+
pl.Expr.deserialize(io.StringIO(e), format="json")
|
|
45
|
+
for e in json.loads(expr)
|
|
46
|
+
]
|
|
47
|
+
else:
|
|
48
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def expr_or_col_name_deserializer(expr: str | pl.Expr | None) -> pl.Expr | str | None:
|
|
54
|
+
"""Deserialize a polars expression or column name from json.
|
|
55
|
+
|
|
56
|
+
This is applied both during deserialization and validation.
|
|
57
|
+
"""
|
|
58
|
+
if expr is None:
|
|
59
|
+
return None
|
|
60
|
+
elif isinstance(expr, pl.Expr):
|
|
61
|
+
return expr
|
|
62
|
+
elif isinstance(expr, list):
|
|
63
|
+
return expr
|
|
64
|
+
elif isinstance(expr, str):
|
|
65
|
+
# Default behaviour
|
|
66
|
+
if expr == "null":
|
|
67
|
+
return None
|
|
68
|
+
else:
|
|
69
|
+
try:
|
|
70
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
71
|
+
except ComputeError:
|
|
72
|
+
try:
|
|
73
|
+
# Column name is being deserialized
|
|
74
|
+
return json.loads(expr)
|
|
75
|
+
except json.JSONDecodeError:
|
|
76
|
+
# Column name has been passed literally
|
|
77
|
+
# to ColumnInfo(derived_from="foo")
|
|
78
|
+
return expr
|
|
79
|
+
else:
|
|
80
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
19
81
|
|
|
20
82
|
|
|
21
83
|
class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
22
84
|
"""patito-side model for storing column metadata.
|
|
23
85
|
|
|
24
86
|
Args:
|
|
87
|
+
allow_missing (bool): Column may be missing.
|
|
25
88
|
constraints (Union[polars.Expression, List[polars.Expression]): A single
|
|
26
89
|
constraint or list of constraints, expressed as a polars expression objects.
|
|
27
90
|
All rows must satisfy the given constraint. You can refer to the given column
|
|
@@ -34,10 +97,20 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
|
34
97
|
|
|
35
98
|
"""
|
|
36
99
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
100
|
+
allow_missing: Optional[bool] = None # noqa: UP007
|
|
101
|
+
dtype: Annotated[
|
|
102
|
+
Optional[Union[DataTypeClass, DataType]], # noqa: UP007
|
|
103
|
+
BeforeValidator(dtype_deserializer),
|
|
104
|
+
] = None
|
|
105
|
+
constraints: Annotated[
|
|
106
|
+
Optional[Union[pl.Expr, list[pl.Expr]]], # noqa: UP007
|
|
107
|
+
BeforeValidator(expr_deserializer),
|
|
108
|
+
] = None
|
|
109
|
+
derived_from: Annotated[
|
|
110
|
+
Optional[Union[str, pl.Expr]], # noqa: UP007
|
|
111
|
+
BeforeValidator(expr_or_col_name_deserializer),
|
|
112
|
+
] = None
|
|
113
|
+
unique: Optional[bool] = None # noqa : UP007
|
|
41
114
|
|
|
42
115
|
def __repr__(self) -> str:
|
|
43
116
|
"""Print only Field attributes whose values are not default (mainly None)."""
|
|
@@ -56,39 +129,21 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
|
56
129
|
return f"ColumnInfo({string})"
|
|
57
130
|
|
|
58
131
|
@field_serializer("constraints", "derived_from")
|
|
59
|
-
def
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return [self._serialize_expr(c) for c in exprs]
|
|
132
|
+
def expr_serializer(self, expr: None | pl.Expr | list[pl.Expr]):
|
|
133
|
+
"""Converts polars expr to json."""
|
|
134
|
+
if expr is None:
|
|
135
|
+
return "null"
|
|
136
|
+
elif isinstance(expr, str):
|
|
137
|
+
return json.dumps(expr)
|
|
138
|
+
elif isinstance(expr, list):
|
|
139
|
+
return json.dumps([e.meta.serialize(format="json") for e in expr])
|
|
68
140
|
else:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def _serialize_expr(self, expr: pl.Expr) -> Dict:
|
|
72
|
-
if isinstance(expr, pl.Expr):
|
|
73
|
-
return json.loads(
|
|
74
|
-
expr.meta.serialize(format="json")
|
|
75
|
-
) # can we access the dictionary directly?
|
|
76
|
-
else:
|
|
77
|
-
raise ValueError(f"Invalid type for expr: {type(expr)}")
|
|
141
|
+
return expr.meta.serialize(format="json")
|
|
78
142
|
|
|
79
143
|
@field_serializer("dtype")
|
|
80
|
-
def
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
References:
|
|
84
|
-
[1] https://stackoverflow.com/questions/76572310/how-to-serialize-deserialize-polars-datatypes
|
|
85
|
-
"""
|
|
144
|
+
def dtype_serializer(self, dtype: DataTypeClass | DataType | None) -> str:
|
|
145
|
+
"""Converts polars dtype to json."""
|
|
86
146
|
if dtype is None:
|
|
87
|
-
return
|
|
88
|
-
elif isinstance(dtype, DataTypeClass) or isinstance(dtype, DataType):
|
|
89
|
-
return parse_composite_dtype(dtype)
|
|
147
|
+
return "null"
|
|
90
148
|
else:
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
CI = TypeVar("CI", bound=Type[ColumnInfo])
|
|
149
|
+
return str(dtype)
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
3
4
|
from functools import cache, reduce
|
|
4
5
|
from operator import and_
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
import polars as pl
|
|
8
9
|
from polars.datatypes import DataType, DataTypeClass
|
|
9
10
|
from polars.datatypes.group import DataTypeGroup
|
|
10
11
|
from pydantic import TypeAdapter
|
|
11
12
|
|
|
13
|
+
from patito._pydantic.column_info import ColumnInfo
|
|
12
14
|
from patito._pydantic.dtypes.utils import (
|
|
13
15
|
PT_BASE_SUPPORTED_DTYPES,
|
|
14
16
|
PydanticBaseType,
|
|
15
17
|
_pyd_type_to_default_dtype,
|
|
16
18
|
_pyd_type_to_valid_dtypes,
|
|
17
19
|
_without_optional,
|
|
18
|
-
dtype_from_string,
|
|
19
20
|
)
|
|
20
21
|
from patito._pydantic.repr import display_as_type
|
|
21
22
|
|
|
@@ -25,8 +26,8 @@ if TYPE_CHECKING:
|
|
|
25
26
|
|
|
26
27
|
@cache
|
|
27
28
|
def valid_dtypes_for_model(
|
|
28
|
-
cls:
|
|
29
|
-
) -> Mapping[str,
|
|
29
|
+
cls: type[ModelType],
|
|
30
|
+
) -> Mapping[str, frozenset[DataTypeClass]]:
|
|
30
31
|
return {
|
|
31
32
|
column: (
|
|
32
33
|
DtypeResolver(cls.model_fields[column].annotation).valid_polars_dtypes()
|
|
@@ -39,7 +40,7 @@ def valid_dtypes_for_model(
|
|
|
39
40
|
|
|
40
41
|
@cache
|
|
41
42
|
def default_dtypes_for_model(
|
|
42
|
-
cls:
|
|
43
|
+
cls: type[ModelType],
|
|
43
44
|
) -> dict[str, DataType]:
|
|
44
45
|
default_dtypes: dict[str, DataType] = {}
|
|
45
46
|
for column in cls.columns:
|
|
@@ -57,7 +58,7 @@ def default_dtypes_for_model(
|
|
|
57
58
|
def validate_polars_dtype(
|
|
58
59
|
annotation: type[Any] | None,
|
|
59
60
|
dtype: DataType | DataTypeClass | None,
|
|
60
|
-
column:
|
|
61
|
+
column: str | None = None,
|
|
61
62
|
) -> None:
|
|
62
63
|
"""Check that the polars dtype is valid for the given annotation. Raises ValueError if not.
|
|
63
64
|
|
|
@@ -84,7 +85,7 @@ def validate_polars_dtype(
|
|
|
84
85
|
|
|
85
86
|
|
|
86
87
|
def validate_annotation(
|
|
87
|
-
annotation: type[Any] | Any | None, column:
|
|
88
|
+
annotation: type[Any] | Any | None, column: str | None = None
|
|
88
89
|
) -> None:
|
|
89
90
|
"""Check that the provided annotation has polars/patito support (we can resolve it to a default dtype). Raises ValueError if not.
|
|
90
91
|
|
|
@@ -129,7 +130,7 @@ class DtypeResolver:
|
|
|
129
130
|
|
|
130
131
|
def _valid_polars_dtypes_for_schema(
|
|
131
132
|
self,
|
|
132
|
-
schema:
|
|
133
|
+
schema: dict,
|
|
133
134
|
) -> DataTypeGroup:
|
|
134
135
|
valid_type_sets = []
|
|
135
136
|
if "anyOf" in schema:
|
|
@@ -146,7 +147,7 @@ class DtypeResolver:
|
|
|
146
147
|
|
|
147
148
|
def _pydantic_subschema_to_valid_polars_types(
|
|
148
149
|
self,
|
|
149
|
-
props:
|
|
150
|
+
props: dict,
|
|
150
151
|
) -> DataTypeGroup:
|
|
151
152
|
if "type" not in props:
|
|
152
153
|
if "enum" in props:
|
|
@@ -189,7 +190,7 @@ class DtypeResolver:
|
|
|
189
190
|
PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
|
|
190
191
|
)
|
|
191
192
|
|
|
192
|
-
def _default_polars_dtype_for_schema(self, schema:
|
|
193
|
+
def _default_polars_dtype_for_schema(self, schema: dict) -> DataType | None:
|
|
193
194
|
if "anyOf" in schema:
|
|
194
195
|
if len(schema["anyOf"]) == 2: # look for optionals first
|
|
195
196
|
schema = _without_optional(schema)
|
|
@@ -205,12 +206,12 @@ class DtypeResolver:
|
|
|
205
206
|
|
|
206
207
|
def _pydantic_subschema_to_default_dtype(
|
|
207
208
|
self,
|
|
208
|
-
props:
|
|
209
|
+
props: dict,
|
|
209
210
|
) -> DataType | None:
|
|
210
211
|
if "column_info" in props: # user has specified in patito model
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
dtype = dtype() if isinstance(dtype, DataTypeClass) else dtype
|
|
212
|
+
ci = ColumnInfo.model_validate_json(props["column_info"])
|
|
213
|
+
if ci.dtype is not None:
|
|
214
|
+
dtype = ci.dtype() if isinstance(ci.dtype, DataTypeClass) else ci.dtype
|
|
214
215
|
return dtype
|
|
215
216
|
if "type" not in props:
|
|
216
217
|
if "enum" in props:
|
patito/_pydantic/dtypes/utils.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import sys
|
|
4
|
+
from collections.abc import Sequence
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
7
|
-
Dict,
|
|
8
|
-
List,
|
|
9
|
-
Optional,
|
|
10
|
-
Sequence,
|
|
11
8
|
Union,
|
|
12
|
-
cast,
|
|
13
9
|
get_args,
|
|
14
10
|
get_origin,
|
|
15
11
|
)
|
|
@@ -23,9 +19,6 @@ from polars.datatypes.group import (
|
|
|
23
19
|
INTEGER_DTYPES,
|
|
24
20
|
DataTypeGroup,
|
|
25
21
|
)
|
|
26
|
-
from polars.polars import (
|
|
27
|
-
dtype_str_repr, # TODO: this is a rust function, can we implement our own string parser for Time/Duration/Datetime?
|
|
28
|
-
)
|
|
29
22
|
|
|
30
23
|
PYTHON_TO_PYDANTIC_TYPES = {
|
|
31
24
|
str: "string",
|
|
@@ -90,32 +83,42 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
|
|
|
90
83
|
)
|
|
91
84
|
|
|
92
85
|
|
|
86
|
+
def unwrap_optional(type_annotation: type[Any] | Any) -> type:
|
|
87
|
+
"""Return the inner, wrapped type of an Optional.
|
|
88
|
+
|
|
89
|
+
Is a no-op for non-Optional types.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
type_annotation: The type annotation to be dewrapped.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The input type, but with the outermost Optional removed.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
return (
|
|
99
|
+
next( # pragma: no cover
|
|
100
|
+
valid_type
|
|
101
|
+
for valid_type in get_args(type_annotation)
|
|
102
|
+
if valid_type is not type(None) # noqa: E721
|
|
103
|
+
)
|
|
104
|
+
if is_optional(type_annotation)
|
|
105
|
+
else type_annotation
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
93
109
|
def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
|
|
94
110
|
"""For serialization, converts polars dtype to string representation."""
|
|
95
|
-
|
|
96
|
-
if dtype == pl.Struct or isinstance(dtype, pl.Struct):
|
|
97
|
-
raise NotImplementedError("Structs not yet supported by patito")
|
|
98
|
-
if not isinstance(dtype, pl.List) or isinstance(dtype, pl.Array):
|
|
99
|
-
raise NotImplementedError(
|
|
100
|
-
f"Unsupported nested dtype: {dtype} of type {type(dtype)}"
|
|
101
|
-
)
|
|
102
|
-
if dtype.inner is None:
|
|
103
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]
|
|
104
|
-
return f"{convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]}[{parse_composite_dtype(dtype.inner)}]"
|
|
105
|
-
elif dtype.is_temporal():
|
|
106
|
-
return cast(str, dtype_str_repr(dtype))
|
|
107
|
-
else:
|
|
108
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype]
|
|
111
|
+
return str(dtype)
|
|
109
112
|
|
|
110
113
|
|
|
111
|
-
def dtype_from_string(v: str) ->
|
|
114
|
+
def dtype_from_string(v: str) -> DataTypeClass | DataType | None:
|
|
112
115
|
"""For deserialization."""
|
|
113
116
|
# TODO test all dtypes
|
|
114
117
|
return convert.dtype_short_repr_to_dtype(v)
|
|
115
118
|
|
|
116
119
|
|
|
117
120
|
def _pyd_type_to_valid_dtypes(
|
|
118
|
-
pyd_type: PydanticBaseType, string_format:
|
|
121
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
119
122
|
) -> DataTypeGroup:
|
|
120
123
|
if enum is not None:
|
|
121
124
|
_validate_enum_values(pyd_type, enum)
|
|
@@ -142,7 +145,7 @@ def _pyd_type_to_valid_dtypes(
|
|
|
142
145
|
|
|
143
146
|
|
|
144
147
|
def _pyd_type_to_default_dtype(
|
|
145
|
-
pyd_type: PydanticBaseType, string_format:
|
|
148
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
146
149
|
) -> DataTypeClass | DataType:
|
|
147
150
|
if enum is not None:
|
|
148
151
|
_validate_enum_values(pyd_type, enum)
|
|
@@ -208,7 +211,7 @@ def _pyd_string_format_to_default_dtype(
|
|
|
208
211
|
raise NotImplementedError
|
|
209
212
|
|
|
210
213
|
|
|
211
|
-
def _without_optional(schema:
|
|
214
|
+
def _without_optional(schema: dict) -> dict:
|
|
212
215
|
if "anyOf" in schema:
|
|
213
216
|
for sub_props in schema["anyOf"]:
|
|
214
217
|
if "type" in sub_props and sub_props["type"] == "null":
|
patito/_pydantic/repr.py
CHANGED
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import types
|
|
3
3
|
import typing
|
|
4
|
+
from collections.abc import Generator, Iterable, Sequence
|
|
4
5
|
from typing import (
|
|
5
6
|
Any,
|
|
6
7
|
Callable,
|
|
7
|
-
Generator,
|
|
8
|
-
Iterable,
|
|
9
8
|
Literal,
|
|
10
9
|
Optional,
|
|
11
|
-
Sequence,
|
|
12
|
-
Tuple,
|
|
13
|
-
Type,
|
|
14
10
|
Union,
|
|
15
11
|
get_args,
|
|
16
12
|
get_origin,
|
|
17
13
|
)
|
|
14
|
+
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
18
15
|
|
|
19
16
|
if typing.TYPE_CHECKING:
|
|
20
|
-
Loc =
|
|
21
|
-
ReprArgs = Sequence[
|
|
17
|
+
Loc = tuple[Union[int, str], ...]
|
|
18
|
+
ReprArgs = Sequence[tuple[Optional[str], Any]]
|
|
22
19
|
RichReprResult = Iterable[
|
|
23
|
-
Union[Any,
|
|
20
|
+
Union[Any, tuple[Any], tuple[str, Any], tuple[str, Any, Any]]
|
|
24
21
|
]
|
|
25
22
|
|
|
26
23
|
try:
|
|
@@ -30,15 +27,10 @@ except ImportError:
|
|
|
30
27
|
|
|
31
28
|
typing_base = _TypingBase
|
|
32
29
|
|
|
33
|
-
if sys.version_info < (3, 9):
|
|
34
|
-
# python < 3.9 does not have GenericAlias (list[int], tuple[str, ...] and so on)
|
|
35
|
-
TypingGenericAlias = ()
|
|
36
|
-
else:
|
|
37
|
-
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
38
30
|
|
|
39
31
|
if sys.version_info < (3, 10):
|
|
40
32
|
|
|
41
|
-
def origin_is_union(tp: Optional[
|
|
33
|
+
def origin_is_union(tp: Optional[type[Any]]) -> bool:
|
|
42
34
|
return tp is typing.Union
|
|
43
35
|
|
|
44
36
|
WithArgsTypes = (TypingGenericAlias,)
|
|
@@ -58,7 +50,7 @@ class Representation:
|
|
|
58
50
|
of objects.
|
|
59
51
|
"""
|
|
60
52
|
|
|
61
|
-
__slots__:
|
|
53
|
+
__slots__: tuple[str, ...] = tuple()
|
|
62
54
|
|
|
63
55
|
def __repr_args__(self) -> "ReprArgs":
|
|
64
56
|
"""Returns the attributes to show in __str__, __repr__, and __pretty__ this is generally overridden.
|
patito/_pydantic/schema.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
3
4
|
from functools import cache
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, get_args
|
|
5
6
|
|
|
6
7
|
from pydantic.fields import FieldInfo
|
|
7
8
|
|
|
@@ -13,7 +14,7 @@ if TYPE_CHECKING:
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
@cache
|
|
16
|
-
def schema_for_model(cls:
|
|
17
|
+
def schema_for_model(cls: type[ModelType]) -> dict[str, dict[str, Any]]:
|
|
17
18
|
"""Return schema properties where definition references have been resolved.
|
|
18
19
|
|
|
19
20
|
Returns:
|
|
@@ -46,27 +47,27 @@ def schema_for_model(cls: Type[ModelType]) -> Dict[str, Dict[str, Any]]:
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
@cache
|
|
49
|
-
def column_infos_for_model(cls:
|
|
50
|
+
def column_infos_for_model(cls: type[ModelType]) -> Mapping[str, ColumnInfo]:
|
|
50
51
|
fields = cls.model_fields
|
|
51
52
|
|
|
52
53
|
def get_column_info(field: FieldInfo) -> ColumnInfo:
|
|
53
54
|
if field.json_schema_extra is None:
|
|
54
|
-
return
|
|
55
|
+
return ColumnInfo()
|
|
55
56
|
elif callable(field.json_schema_extra):
|
|
56
57
|
raise NotImplementedError(
|
|
57
58
|
"Callable json_schema_extra not supported by patito."
|
|
58
59
|
)
|
|
59
|
-
return
|
|
60
|
+
return ColumnInfo.model_validate_json(field.json_schema_extra["column_info"])
|
|
60
61
|
|
|
61
62
|
return {k: get_column_info(v) for k, v in fields.items()}
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
def _append_field_info_to_props(
|
|
65
|
-
field_info:
|
|
66
|
+
field_info: dict[str, Any],
|
|
66
67
|
field_name: str,
|
|
67
|
-
model_schema:
|
|
68
|
-
required:
|
|
69
|
-
) ->
|
|
68
|
+
model_schema: dict[str, Any],
|
|
69
|
+
required: bool | None = None,
|
|
70
|
+
) -> dict[str, Any]:
|
|
70
71
|
if "$ref" in field_info: # TODO onto runtime append
|
|
71
72
|
definition = model_schema["$defs"][field_info["$ref"]]
|
|
72
73
|
if "enum" in definition and "type" not in definition:
|
patito/exceptions.py
CHANGED
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
"""Exceptions used by patito."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Generator, Sequence
|
|
3
4
|
from typing import (
|
|
4
5
|
TYPE_CHECKING,
|
|
5
6
|
Any,
|
|
6
|
-
Dict,
|
|
7
|
-
Generator,
|
|
8
|
-
List,
|
|
9
7
|
Optional,
|
|
10
|
-
Sequence,
|
|
11
|
-
Tuple,
|
|
12
|
-
Type,
|
|
13
8
|
TypedDict,
|
|
14
9
|
Union,
|
|
15
10
|
)
|
|
@@ -19,7 +14,7 @@ from patito._pydantic.repr import Representation
|
|
|
19
14
|
if TYPE_CHECKING:
|
|
20
15
|
from pydantic import BaseModel
|
|
21
16
|
|
|
22
|
-
Loc =
|
|
17
|
+
Loc = tuple[Union[int, str], ...]
|
|
23
18
|
|
|
24
19
|
class _ErrorDictRequired(TypedDict):
|
|
25
20
|
loc: Loc
|
|
@@ -27,7 +22,7 @@ if TYPE_CHECKING:
|
|
|
27
22
|
type: str
|
|
28
23
|
|
|
29
24
|
class ErrorDict(_ErrorDictRequired, total=False):
|
|
30
|
-
ctx:
|
|
25
|
+
ctx: dict[str, Any]
|
|
31
26
|
|
|
32
27
|
from patito._pydantic.repr import ReprArgs
|
|
33
28
|
|
|
@@ -67,13 +62,13 @@ class DataFrameValidationError(Representation, ValueError):
|
|
|
67
62
|
|
|
68
63
|
__slots__ = "raw_errors", "model", "_error_cache"
|
|
69
64
|
|
|
70
|
-
def __init__(self, errors: Sequence[ErrorList], model:
|
|
65
|
+
def __init__(self, errors: Sequence[ErrorList], model: type["BaseModel"]) -> None:
|
|
71
66
|
"""Create a dataframe validation error."""
|
|
72
67
|
self.raw_errors = errors
|
|
73
68
|
self.model = model
|
|
74
|
-
self._error_cache: Optional[
|
|
69
|
+
self._error_cache: Optional[list[ErrorDict]] = None
|
|
75
70
|
|
|
76
|
-
def errors(self) ->
|
|
71
|
+
def errors(self) -> list["ErrorDict"]:
|
|
77
72
|
"""Get list of errors."""
|
|
78
73
|
if self._error_cache is None:
|
|
79
74
|
self._error_cache = list(flatten_errors(self.raw_errors))
|
|
@@ -93,7 +88,7 @@ class DataFrameValidationError(Representation, ValueError):
|
|
|
93
88
|
return [("model", self.model.__name__), ("errors", self.errors())]
|
|
94
89
|
|
|
95
90
|
|
|
96
|
-
def display_errors(errors:
|
|
91
|
+
def display_errors(errors: list["ErrorDict"]) -> str:
|
|
97
92
|
return "\n".join(
|
|
98
93
|
f'{_display_error_loc(e)}\n {e["msg"]} ({_display_error_type_and_ctx(e)})'
|
|
99
94
|
for e in errors
|
|
@@ -142,7 +137,7 @@ def error_dict(exc: Exception, loc: "Loc") -> "ErrorDict":
|
|
|
142
137
|
else:
|
|
143
138
|
msg = str(exc)
|
|
144
139
|
|
|
145
|
-
d:
|
|
140
|
+
d: ErrorDict = {"loc": loc, "msg": msg, "type": type_}
|
|
146
141
|
|
|
147
142
|
if ctx:
|
|
148
143
|
d["ctx"] = ctx
|
|
@@ -150,10 +145,10 @@ def error_dict(exc: Exception, loc: "Loc") -> "ErrorDict":
|
|
|
150
145
|
return d
|
|
151
146
|
|
|
152
147
|
|
|
153
|
-
_EXC_TYPE_CACHE:
|
|
148
|
+
_EXC_TYPE_CACHE: dict[type[Exception], str] = {}
|
|
154
149
|
|
|
155
150
|
|
|
156
|
-
def get_exc_type(cls:
|
|
151
|
+
def get_exc_type(cls: type[Exception]) -> str:
|
|
157
152
|
# slightly more efficient than using lru_cache since we don't need to worry about the cache filling up
|
|
158
153
|
try:
|
|
159
154
|
return _EXC_TYPE_CACHE[cls]
|
|
@@ -163,7 +158,7 @@ def get_exc_type(cls: Type[Exception]) -> str:
|
|
|
163
158
|
return r
|
|
164
159
|
|
|
165
160
|
|
|
166
|
-
def _get_exc_type(cls:
|
|
161
|
+
def _get_exc_type(cls: type[Exception]) -> str:
|
|
167
162
|
if issubclass(cls, AssertionError):
|
|
168
163
|
return "assertion_error"
|
|
169
164
|
|