patito 0.7.0__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patito-0.7.0 → patito-0.8.2}/PKG-INFO +4 -3
- {patito-0.7.0 → patito-0.8.2}/README.md +1 -1
- {patito-0.7.0 → patito-0.8.2}/pyproject.toml +22 -14
- patito-0.8.2/src/patito/_pydantic/column_info.py +149 -0
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/dtypes/dtypes.py +44 -36
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/dtypes/utils.py +30 -27
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/repr.py +7 -15
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/schema.py +10 -9
- {patito-0.7.0 → patito-0.8.2}/src/patito/exceptions.py +11 -16
- {patito-0.7.0 → patito-0.8.2}/src/patito/polars.py +191 -118
- {patito-0.7.0 → patito-0.8.2}/src/patito/pydantic.py +108 -95
- {patito-0.7.0 → patito-0.8.2}/src/patito/validators.py +111 -71
- patito-0.7.0/src/patito/_pydantic/column_info.py +0 -94
- {patito-0.7.0 → patito-0.8.2}/LICENSE +0 -0
- {patito-0.7.0 → patito-0.8.2}/src/patito/__init__.py +0 -0
- {patito-0.7.0 → patito-0.8.2}/src/patito/_docs.py +0 -0
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/__init__.py +0 -0
- {patito-0.7.0 → patito-0.8.2}/src/patito/_pydantic/dtypes/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: patito
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: A dataframe modelling library built on top of polars and pydantic.
|
|
5
5
|
Home-page: https://github.com/JakobGM/patito
|
|
6
6
|
License: MIT
|
|
@@ -13,12 +13,13 @@ Classifier: Programming Language :: Python :: 3
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.9
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
17
|
Provides-Extra: caching
|
|
17
18
|
Provides-Extra: docs
|
|
18
19
|
Provides-Extra: pandas
|
|
19
20
|
Requires-Dist: Sphinx (<7) ; extra == "docs"
|
|
20
21
|
Requires-Dist: pandas ; extra == "pandas"
|
|
21
|
-
Requires-Dist: polars (>=1.
|
|
22
|
+
Requires-Dist: polars (>=1.10.0)
|
|
22
23
|
Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
|
|
23
24
|
Requires-Dist: pydantic (>=2.7.0)
|
|
24
25
|
Requires-Dist: sphinx-autobuild ; extra == "docs"
|
|
@@ -74,7 +75,7 @@ pip install patito
|
|
|
74
75
|
|
|
75
76
|
## Documentation
|
|
76
77
|
|
|
77
|
-
The full documentation of
|
|
78
|
+
The full documentation of Patito can be found [here](https://patito.readthedocs.io).
|
|
78
79
|
|
|
79
80
|
## 👮 Data validation
|
|
80
81
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "patito"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.8.2"
|
|
4
4
|
description = "A dataframe modelling library built on top of polars and pydantic."
|
|
5
|
-
authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
|
|
5
|
+
authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>", "Thomas Aarholt <thomasaarholt@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
homepage = "https://github.com/JakobGM/patito"
|
|
@@ -13,7 +13,7 @@ keywords = ["validation", "dataframe"]
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
14
|
python = ">=3.9"
|
|
15
15
|
pydantic = ">=2.7.0"
|
|
16
|
-
polars = ">=1.
|
|
16
|
+
polars = ">=1.10.0"
|
|
17
17
|
# Required for typing.get_args backports in python3.9 and 3.10
|
|
18
18
|
typing-extensions = "*"
|
|
19
19
|
pandas = {version = "*", optional = true}
|
|
@@ -41,13 +41,8 @@ docs = [
|
|
|
41
41
|
|
|
42
42
|
[tool.poetry.group.dev.dependencies]
|
|
43
43
|
ruff = ">=0.2.1"
|
|
44
|
+
pre-commit = "^3.8.0"
|
|
44
45
|
coverage = {version = "*", extras = ["toml"]}
|
|
45
|
-
flake8 = "3.9.2"
|
|
46
|
-
flake8-annotations = { version = "*", python = ">=3.9,<4.0" }
|
|
47
|
-
flake8-bandit = "*"
|
|
48
|
-
flake8-black = "*"
|
|
49
|
-
flake8-bugbear = "*"
|
|
50
|
-
flake8-isort = "*"
|
|
51
46
|
pyright = ">=1.1.239"
|
|
52
47
|
pytest = ">=7.1.2"
|
|
53
48
|
pytest-cov = ">=3.0.0"
|
|
@@ -94,11 +89,19 @@ exclude_lines = [
|
|
|
94
89
|
fail_under = 99.64
|
|
95
90
|
show_missing = true
|
|
96
91
|
|
|
97
|
-
[tool.isort]
|
|
98
|
-
profile = "black"
|
|
99
|
-
|
|
100
92
|
[tool.pyright]
|
|
101
|
-
|
|
93
|
+
typeCheckingMode = "basic"
|
|
94
|
+
venvPath = "."
|
|
95
|
+
venv = ".venv"
|
|
96
|
+
pythonVersion = "3.9"
|
|
97
|
+
|
|
98
|
+
exclude = [
|
|
99
|
+
".venv",
|
|
100
|
+
"noxfile.py",
|
|
101
|
+
"**/node_modules",
|
|
102
|
+
"**/__pycache__",
|
|
103
|
+
"**/.*"
|
|
104
|
+
]
|
|
102
105
|
|
|
103
106
|
[tool.mypy]
|
|
104
107
|
warn_unused_configs = true
|
|
@@ -114,6 +117,9 @@ allow_redefinition = true
|
|
|
114
117
|
show_error_codes = true
|
|
115
118
|
exclude = [
|
|
116
119
|
"noxfile.py",
|
|
120
|
+
"**/node_modules",
|
|
121
|
+
"**/__pycache__",
|
|
122
|
+
"**/.*"
|
|
117
123
|
]
|
|
118
124
|
|
|
119
125
|
|
|
@@ -122,10 +128,12 @@ module = ["tests.test_validators"]
|
|
|
122
128
|
warn_unused_ignores = false
|
|
123
129
|
|
|
124
130
|
[tool.ruff]
|
|
131
|
+
target-version = "py39"
|
|
125
132
|
extend-exclude= ["tests/__init__.py"]
|
|
126
133
|
|
|
127
134
|
[tool.ruff.lint]
|
|
128
|
-
select = ["E4", "E7", "E9", "F", "I", "B", "D"]
|
|
135
|
+
select = ["E4", "E7", "E9", "F", "I", "B", "D", "UP"]
|
|
136
|
+
ignore = ["UP007"]
|
|
129
137
|
|
|
130
138
|
[tool.ruff.lint.pydocstyle]
|
|
131
139
|
convention = "google"
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
from typing import Annotated, Optional, Union
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from polars.datatypes import * # noqa: F403 # type: ignore
|
|
9
|
+
from polars.datatypes import DataType, DataTypeClass
|
|
10
|
+
from polars.exceptions import ComputeError
|
|
11
|
+
from pydantic import BaseModel, BeforeValidator, field_serializer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def dtype_deserializer(dtype: str | DataTypeClass | DataType | None):
|
|
15
|
+
"""Deserialize a dtype from json."""
|
|
16
|
+
if isinstance(dtype, DataTypeClass) or isinstance(dtype, DataType):
|
|
17
|
+
return dtype
|
|
18
|
+
else:
|
|
19
|
+
if dtype == "null" or dtype is None:
|
|
20
|
+
return None
|
|
21
|
+
else:
|
|
22
|
+
return eval(dtype)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def expr_deserializer(
|
|
26
|
+
expr: str | pl.Expr | list[pl.Expr] | None,
|
|
27
|
+
) -> pl.Expr | list[pl.Expr] | None:
|
|
28
|
+
"""Deserialize a polars expression or list thereof from json.
|
|
29
|
+
|
|
30
|
+
This is applied both during deserialization and validation.
|
|
31
|
+
"""
|
|
32
|
+
if expr is None:
|
|
33
|
+
return None
|
|
34
|
+
elif isinstance(expr, pl.Expr):
|
|
35
|
+
return expr
|
|
36
|
+
elif isinstance(expr, list):
|
|
37
|
+
return expr
|
|
38
|
+
elif isinstance(expr, str):
|
|
39
|
+
if expr == "null":
|
|
40
|
+
return None
|
|
41
|
+
# can be either a list of expr or expr
|
|
42
|
+
elif expr[0] == "[":
|
|
43
|
+
return [
|
|
44
|
+
pl.Expr.deserialize(io.StringIO(e), format="json")
|
|
45
|
+
for e in json.loads(expr)
|
|
46
|
+
]
|
|
47
|
+
else:
|
|
48
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def expr_or_col_name_deserializer(expr: str | pl.Expr | None) -> pl.Expr | str | None:
|
|
54
|
+
"""Deserialize a polars expression or column name from json.
|
|
55
|
+
|
|
56
|
+
This is applied both during deserialization and validation.
|
|
57
|
+
"""
|
|
58
|
+
if expr is None:
|
|
59
|
+
return None
|
|
60
|
+
elif isinstance(expr, pl.Expr):
|
|
61
|
+
return expr
|
|
62
|
+
elif isinstance(expr, list):
|
|
63
|
+
return expr
|
|
64
|
+
elif isinstance(expr, str):
|
|
65
|
+
# Default behaviour
|
|
66
|
+
if expr == "null":
|
|
67
|
+
return None
|
|
68
|
+
else:
|
|
69
|
+
try:
|
|
70
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
71
|
+
except ComputeError:
|
|
72
|
+
try:
|
|
73
|
+
# Column name is being deserialized
|
|
74
|
+
return json.loads(expr)
|
|
75
|
+
except json.JSONDecodeError:
|
|
76
|
+
# Column name has been passed literally
|
|
77
|
+
# to ColumnInfo(derived_from="foo")
|
|
78
|
+
return expr
|
|
79
|
+
else:
|
|
80
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
84
|
+
"""patito-side model for storing column metadata.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
allow_missing (bool): Column may be missing.
|
|
88
|
+
constraints (Union[polars.Expression, List[polars.Expression]): A single
|
|
89
|
+
constraint or list of constraints, expressed as a polars expression objects.
|
|
90
|
+
All rows must satisfy the given constraint. You can refer to the given column
|
|
91
|
+
with ``pt.field``, which will automatically be replaced with
|
|
92
|
+
``polars.col(<field_name>)`` before evaluation.
|
|
93
|
+
derived_from (Union[str, polars.Expr]): used to mark fields that are meant to be derived from other fields. Users can specify a polars expression that will be called to derive the column value when `pt.DataFrame.derive` is called.
|
|
94
|
+
dtype (polars.datatype.DataType): The given dataframe column must have the given
|
|
95
|
+
polars dtype, for instance ``polars.UInt64`` or ``pl.Float32``.
|
|
96
|
+
unique (bool): All row values must be unique.
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
allow_missing: Optional[bool] = None
|
|
101
|
+
dtype: Annotated[
|
|
102
|
+
Optional[Union[DataTypeClass, DataType]],
|
|
103
|
+
BeforeValidator(dtype_deserializer),
|
|
104
|
+
] = None
|
|
105
|
+
constraints: Annotated[
|
|
106
|
+
Optional[Union[pl.Expr, list[pl.Expr]]],
|
|
107
|
+
BeforeValidator(expr_deserializer),
|
|
108
|
+
] = None
|
|
109
|
+
derived_from: Annotated[
|
|
110
|
+
Optional[Union[str, pl.Expr]],
|
|
111
|
+
BeforeValidator(expr_or_col_name_deserializer),
|
|
112
|
+
] = None
|
|
113
|
+
unique: Optional[bool] = None
|
|
114
|
+
|
|
115
|
+
def __repr__(self) -> str:
|
|
116
|
+
"""Print only Field attributes whose values are not default (mainly None)."""
|
|
117
|
+
not_default_field = {
|
|
118
|
+
field: getattr(self, field)
|
|
119
|
+
for field in self.model_fields
|
|
120
|
+
if getattr(self, field) is not self.model_fields[field].default
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
string = ""
|
|
124
|
+
for field, value in not_default_field.items():
|
|
125
|
+
string += f"{field}={value}, "
|
|
126
|
+
if string:
|
|
127
|
+
# remove trailing comma and space
|
|
128
|
+
string = string[:-2]
|
|
129
|
+
return f"ColumnInfo({string})"
|
|
130
|
+
|
|
131
|
+
@field_serializer("constraints", "derived_from")
|
|
132
|
+
def expr_serializer(self, expr: None | pl.Expr | list[pl.Expr]):
|
|
133
|
+
"""Converts polars expr to json."""
|
|
134
|
+
if expr is None:
|
|
135
|
+
return "null"
|
|
136
|
+
elif isinstance(expr, str):
|
|
137
|
+
return json.dumps(expr)
|
|
138
|
+
elif isinstance(expr, list):
|
|
139
|
+
return json.dumps([e.meta.serialize(format="json") for e in expr])
|
|
140
|
+
else:
|
|
141
|
+
return expr.meta.serialize(format="json")
|
|
142
|
+
|
|
143
|
+
@field_serializer("dtype")
|
|
144
|
+
def dtype_serializer(self, dtype: DataTypeClass | DataType | None) -> str:
|
|
145
|
+
"""Converts polars dtype to json."""
|
|
146
|
+
if dtype is None:
|
|
147
|
+
return "null"
|
|
148
|
+
else:
|
|
149
|
+
return str(dtype)
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
3
4
|
from functools import cache, reduce
|
|
4
|
-
from operator import
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
+
from operator import or_
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
import polars as pl
|
|
8
9
|
from polars.datatypes import DataType, DataTypeClass
|
|
9
10
|
from polars.datatypes.group import DataTypeGroup
|
|
10
11
|
from pydantic import TypeAdapter
|
|
11
12
|
|
|
13
|
+
from patito._pydantic.column_info import ColumnInfo
|
|
12
14
|
from patito._pydantic.dtypes.utils import (
|
|
13
15
|
PT_BASE_SUPPORTED_DTYPES,
|
|
14
16
|
PydanticBaseType,
|
|
15
17
|
_pyd_type_to_default_dtype,
|
|
16
18
|
_pyd_type_to_valid_dtypes,
|
|
17
19
|
_without_optional,
|
|
18
|
-
dtype_from_string,
|
|
19
20
|
)
|
|
20
21
|
from patito._pydantic.repr import display_as_type
|
|
21
22
|
|
|
@@ -25,8 +26,8 @@ if TYPE_CHECKING:
|
|
|
25
26
|
|
|
26
27
|
@cache
|
|
27
28
|
def valid_dtypes_for_model(
|
|
28
|
-
cls:
|
|
29
|
-
) -> Mapping[str,
|
|
29
|
+
cls: type[ModelType],
|
|
30
|
+
) -> Mapping[str, frozenset[DataTypeClass]]:
|
|
30
31
|
return {
|
|
31
32
|
column: (
|
|
32
33
|
DtypeResolver(cls.model_fields[column].annotation).valid_polars_dtypes()
|
|
@@ -39,7 +40,7 @@ def valid_dtypes_for_model(
|
|
|
39
40
|
|
|
40
41
|
@cache
|
|
41
42
|
def default_dtypes_for_model(
|
|
42
|
-
cls:
|
|
43
|
+
cls: type[ModelType],
|
|
43
44
|
) -> dict[str, DataType]:
|
|
44
45
|
default_dtypes: dict[str, DataType] = {}
|
|
45
46
|
for column in cls.columns:
|
|
@@ -57,7 +58,7 @@ def default_dtypes_for_model(
|
|
|
57
58
|
def validate_polars_dtype(
|
|
58
59
|
annotation: type[Any] | None,
|
|
59
60
|
dtype: DataType | DataTypeClass | None,
|
|
60
|
-
column:
|
|
61
|
+
column: str | None = None,
|
|
61
62
|
) -> None:
|
|
62
63
|
"""Check that the polars dtype is valid for the given annotation. Raises ValueError if not.
|
|
63
64
|
|
|
@@ -84,7 +85,7 @@ def validate_polars_dtype(
|
|
|
84
85
|
|
|
85
86
|
|
|
86
87
|
def validate_annotation(
|
|
87
|
-
annotation: type[Any] | Any | None, column:
|
|
88
|
+
annotation: type[Any] | Any | None, column: str | None = None
|
|
88
89
|
) -> None:
|
|
89
90
|
"""Check that the provided annotation has polars/patito support (we can resolve it to a default dtype). Raises ValueError if not.
|
|
90
91
|
|
|
@@ -114,7 +115,8 @@ def validate_annotation(
|
|
|
114
115
|
class DtypeResolver:
|
|
115
116
|
def __init__(self, annotation: Any | None):
|
|
116
117
|
self.annotation = annotation
|
|
117
|
-
|
|
118
|
+
# mode='serialization' allows nested models with structs, see #86
|
|
119
|
+
self.schema = TypeAdapter(annotation).json_schema(mode="serialization")
|
|
118
120
|
self.defs = self.schema.get("$defs", {})
|
|
119
121
|
|
|
120
122
|
def valid_polars_dtypes(self) -> DataTypeGroup:
|
|
@@ -129,7 +131,7 @@ class DtypeResolver:
|
|
|
129
131
|
|
|
130
132
|
def _valid_polars_dtypes_for_schema(
|
|
131
133
|
self,
|
|
132
|
-
schema:
|
|
134
|
+
schema: dict,
|
|
133
135
|
) -> DataTypeGroup:
|
|
134
136
|
valid_type_sets = []
|
|
135
137
|
if "anyOf" in schema:
|
|
@@ -142,11 +144,11 @@ class DtypeResolver:
|
|
|
142
144
|
valid_type_sets.append(
|
|
143
145
|
self._pydantic_subschema_to_valid_polars_types(schema)
|
|
144
146
|
)
|
|
145
|
-
return reduce(
|
|
147
|
+
return reduce(or_, valid_type_sets) if valid_type_sets else DataTypeGroup([])
|
|
146
148
|
|
|
147
149
|
def _pydantic_subschema_to_valid_polars_types(
|
|
148
150
|
self,
|
|
149
|
-
props:
|
|
151
|
+
props: dict,
|
|
150
152
|
) -> DataTypeGroup:
|
|
151
153
|
if "type" not in props:
|
|
152
154
|
if "enum" in props:
|
|
@@ -158,6 +160,7 @@ class DtypeResolver:
|
|
|
158
160
|
self.defs[props["$ref"].split("/")[-1]]
|
|
159
161
|
)
|
|
160
162
|
return DataTypeGroup([])
|
|
163
|
+
|
|
161
164
|
pyd_type = props.get("type")
|
|
162
165
|
if pyd_type == "array":
|
|
163
166
|
if "items" not in props:
|
|
@@ -168,28 +171,27 @@ class DtypeResolver:
|
|
|
168
171
|
return DataTypeGroup(
|
|
169
172
|
[pl.List(dtype) for dtype in item_dtypes], match_base_type=False
|
|
170
173
|
)
|
|
174
|
+
|
|
171
175
|
elif pyd_type == "object":
|
|
172
176
|
if "properties" not in props:
|
|
173
177
|
return DataTypeGroup([])
|
|
174
178
|
object_props = props["properties"]
|
|
179
|
+
struct_fields: list[pl.Field] = []
|
|
180
|
+
for name, sub_props in object_props.items():
|
|
181
|
+
dtype = self._default_polars_dtype_for_schema(sub_props)
|
|
182
|
+
assert dtype is not None
|
|
183
|
+
struct_fields.append(pl.Field(name, dtype))
|
|
175
184
|
return DataTypeGroup(
|
|
176
|
-
[
|
|
177
|
-
pl.Struct(
|
|
178
|
-
[
|
|
179
|
-
pl.Field(
|
|
180
|
-
name, self._default_polars_dtype_for_schema(sub_props)
|
|
181
|
-
)
|
|
182
|
-
for name, sub_props in object_props.items()
|
|
183
|
-
]
|
|
184
|
-
)
|
|
185
|
-
],
|
|
185
|
+
[pl.Struct(struct_fields)],
|
|
186
186
|
match_base_type=False,
|
|
187
187
|
) # for structs, return only the default dtype set to avoid combinatoric issues
|
|
188
188
|
return _pyd_type_to_valid_dtypes(
|
|
189
189
|
PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
|
|
190
190
|
)
|
|
191
191
|
|
|
192
|
-
def _default_polars_dtype_for_schema(
|
|
192
|
+
def _default_polars_dtype_for_schema(
|
|
193
|
+
self, schema: dict[str, Any]
|
|
194
|
+
) -> DataType | None:
|
|
193
195
|
if "anyOf" in schema:
|
|
194
196
|
if len(schema["anyOf"]) == 2: # look for optionals first
|
|
195
197
|
schema = _without_optional(schema)
|
|
@@ -205,13 +207,14 @@ class DtypeResolver:
|
|
|
205
207
|
|
|
206
208
|
def _pydantic_subschema_to_default_dtype(
|
|
207
209
|
self,
|
|
208
|
-
props:
|
|
210
|
+
props: dict[str, Any],
|
|
209
211
|
) -> DataType | None:
|
|
210
212
|
if "column_info" in props: # user has specified in patito model
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
dtype = dtype() if isinstance(dtype, DataTypeClass) else dtype
|
|
213
|
+
ci = ColumnInfo.model_validate_json(props["column_info"])
|
|
214
|
+
if ci.dtype is not None:
|
|
215
|
+
dtype = ci.dtype() if isinstance(ci.dtype, DataTypeClass) else ci.dtype
|
|
214
216
|
return dtype
|
|
217
|
+
|
|
215
218
|
if "type" not in props:
|
|
216
219
|
if "enum" in props:
|
|
217
220
|
raise TypeError("Mixed type enums not supported by patito.")
|
|
@@ -222,10 +225,12 @@ class DtypeResolver:
|
|
|
222
225
|
self.defs[props["$ref"].split("/")[-1]]
|
|
223
226
|
)
|
|
224
227
|
return None
|
|
228
|
+
|
|
225
229
|
pyd_type = props.get("type")
|
|
226
230
|
if pyd_type == "numeric":
|
|
227
231
|
pyd_type = "number"
|
|
228
|
-
|
|
232
|
+
|
|
233
|
+
elif pyd_type == "array":
|
|
229
234
|
if "items" not in props:
|
|
230
235
|
raise NotImplementedError(
|
|
231
236
|
"Unexpected error processing pydantic schema. Please file an issue."
|
|
@@ -235,18 +240,21 @@ class DtypeResolver:
|
|
|
235
240
|
if inner_default_type is None:
|
|
236
241
|
return None
|
|
237
242
|
return pl.List(inner_default_type)
|
|
238
|
-
|
|
243
|
+
|
|
244
|
+
elif pyd_type == "object": # these are structs
|
|
239
245
|
if "properties" not in props:
|
|
240
246
|
raise NotImplementedError(
|
|
241
247
|
"dictionaries not currently supported by patito"
|
|
242
248
|
)
|
|
243
|
-
object_props = props["properties"]
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
249
|
+
object_props: dict[str, dict[str, str]] = props["properties"]
|
|
250
|
+
struct_fields: list[pl.Field] = []
|
|
251
|
+
|
|
252
|
+
for name, sub_props in object_props.items():
|
|
253
|
+
dtype = self._default_polars_dtype_for_schema(sub_props)
|
|
254
|
+
assert dtype is not None
|
|
255
|
+
struct_fields.append(pl.Field(name, dtype))
|
|
256
|
+
return pl.Struct(struct_fields)
|
|
257
|
+
|
|
250
258
|
return _pyd_type_to_default_dtype(
|
|
251
259
|
PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
|
|
252
260
|
)
|
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import sys
|
|
4
|
+
from collections.abc import Sequence
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
7
|
-
Dict,
|
|
8
|
-
List,
|
|
9
|
-
Optional,
|
|
10
|
-
Sequence,
|
|
11
8
|
Union,
|
|
12
|
-
cast,
|
|
13
9
|
get_args,
|
|
14
10
|
get_origin,
|
|
15
11
|
)
|
|
@@ -23,9 +19,6 @@ from polars.datatypes.group import (
|
|
|
23
19
|
INTEGER_DTYPES,
|
|
24
20
|
DataTypeGroup,
|
|
25
21
|
)
|
|
26
|
-
from polars.polars import (
|
|
27
|
-
dtype_str_repr, # TODO: this is a rust function, can we implement our own string parser for Time/Duration/Datetime?
|
|
28
|
-
)
|
|
29
22
|
|
|
30
23
|
PYTHON_TO_PYDANTIC_TYPES = {
|
|
31
24
|
str: "string",
|
|
@@ -90,38 +83,48 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
|
|
|
90
83
|
)
|
|
91
84
|
|
|
92
85
|
|
|
86
|
+
def unwrap_optional(type_annotation: type[Any] | Any) -> type:
|
|
87
|
+
"""Return the inner, wrapped type of an Optional.
|
|
88
|
+
|
|
89
|
+
Is a no-op for non-Optional types.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
type_annotation: The type annotation to be dewrapped.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The input type, but with the outermost Optional removed.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
return (
|
|
99
|
+
next( # pragma: no cover
|
|
100
|
+
valid_type
|
|
101
|
+
for valid_type in get_args(type_annotation)
|
|
102
|
+
if valid_type is not type(None) # noqa: E721
|
|
103
|
+
)
|
|
104
|
+
if is_optional(type_annotation)
|
|
105
|
+
else type_annotation
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
93
109
|
def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
|
|
94
110
|
"""For serialization, converts polars dtype to string representation."""
|
|
95
|
-
|
|
96
|
-
if dtype == pl.Struct or isinstance(dtype, pl.Struct):
|
|
97
|
-
raise NotImplementedError("Structs not yet supported by patito")
|
|
98
|
-
if not isinstance(dtype, pl.List) or isinstance(dtype, pl.Array):
|
|
99
|
-
raise NotImplementedError(
|
|
100
|
-
f"Unsupported nested dtype: {dtype} of type {type(dtype)}"
|
|
101
|
-
)
|
|
102
|
-
if dtype.inner is None:
|
|
103
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]
|
|
104
|
-
return f"{convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]}[{parse_composite_dtype(dtype.inner)}]"
|
|
105
|
-
elif dtype.is_temporal():
|
|
106
|
-
return cast(str, dtype_str_repr(dtype))
|
|
107
|
-
else:
|
|
108
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype]
|
|
111
|
+
return str(dtype)
|
|
109
112
|
|
|
110
113
|
|
|
111
|
-
def dtype_from_string(v: str) ->
|
|
114
|
+
def dtype_from_string(v: str) -> DataTypeClass | DataType | None:
|
|
112
115
|
"""For deserialization."""
|
|
113
116
|
# TODO test all dtypes
|
|
114
117
|
return convert.dtype_short_repr_to_dtype(v)
|
|
115
118
|
|
|
116
119
|
|
|
117
120
|
def _pyd_type_to_valid_dtypes(
|
|
118
|
-
pyd_type: PydanticBaseType, string_format:
|
|
121
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
119
122
|
) -> DataTypeGroup:
|
|
120
123
|
if enum is not None:
|
|
121
124
|
_validate_enum_values(pyd_type, enum)
|
|
122
125
|
return DataTypeGroup([pl.Enum(enum), pl.String], match_base_type=False)
|
|
123
126
|
if pyd_type.value == "integer":
|
|
124
|
-
return DataTypeGroup(INTEGER_DTYPES
|
|
127
|
+
return DataTypeGroup(INTEGER_DTYPES)
|
|
125
128
|
elif pyd_type.value == "number":
|
|
126
129
|
return (
|
|
127
130
|
FLOAT_DTYPES
|
|
@@ -142,7 +145,7 @@ def _pyd_type_to_valid_dtypes(
|
|
|
142
145
|
|
|
143
146
|
|
|
144
147
|
def _pyd_type_to_default_dtype(
|
|
145
|
-
pyd_type: PydanticBaseType, string_format:
|
|
148
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
146
149
|
) -> DataTypeClass | DataType:
|
|
147
150
|
if enum is not None:
|
|
148
151
|
_validate_enum_values(pyd_type, enum)
|
|
@@ -208,7 +211,7 @@ def _pyd_string_format_to_default_dtype(
|
|
|
208
211
|
raise NotImplementedError
|
|
209
212
|
|
|
210
213
|
|
|
211
|
-
def _without_optional(schema:
|
|
214
|
+
def _without_optional(schema: dict) -> dict:
|
|
212
215
|
if "anyOf" in schema:
|
|
213
216
|
for sub_props in schema["anyOf"]:
|
|
214
217
|
if "type" in sub_props and sub_props["type"] == "null":
|
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import types
|
|
3
3
|
import typing
|
|
4
|
+
from collections.abc import Generator, Iterable, Sequence
|
|
4
5
|
from typing import (
|
|
5
6
|
Any,
|
|
6
7
|
Callable,
|
|
7
|
-
Generator,
|
|
8
|
-
Iterable,
|
|
9
8
|
Literal,
|
|
10
9
|
Optional,
|
|
11
|
-
Sequence,
|
|
12
|
-
Tuple,
|
|
13
|
-
Type,
|
|
14
10
|
Union,
|
|
15
11
|
get_args,
|
|
16
12
|
get_origin,
|
|
17
13
|
)
|
|
14
|
+
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
18
15
|
|
|
19
16
|
if typing.TYPE_CHECKING:
|
|
20
|
-
Loc =
|
|
21
|
-
ReprArgs = Sequence[
|
|
17
|
+
Loc = tuple[Union[int, str], ...]
|
|
18
|
+
ReprArgs = Sequence[tuple[Optional[str], Any]]
|
|
22
19
|
RichReprResult = Iterable[
|
|
23
|
-
Union[Any,
|
|
20
|
+
Union[Any, tuple[Any], tuple[str, Any], tuple[str, Any, Any]]
|
|
24
21
|
]
|
|
25
22
|
|
|
26
23
|
try:
|
|
@@ -30,15 +27,10 @@ except ImportError:
|
|
|
30
27
|
|
|
31
28
|
typing_base = _TypingBase
|
|
32
29
|
|
|
33
|
-
if sys.version_info < (3, 9):
|
|
34
|
-
# python < 3.9 does not have GenericAlias (list[int], tuple[str, ...] and so on)
|
|
35
|
-
TypingGenericAlias = ()
|
|
36
|
-
else:
|
|
37
|
-
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
38
30
|
|
|
39
31
|
if sys.version_info < (3, 10):
|
|
40
32
|
|
|
41
|
-
def origin_is_union(tp: Optional[
|
|
33
|
+
def origin_is_union(tp: Optional[type[Any]]) -> bool:
|
|
42
34
|
return tp is typing.Union
|
|
43
35
|
|
|
44
36
|
WithArgsTypes = (TypingGenericAlias,)
|
|
@@ -58,7 +50,7 @@ class Representation:
|
|
|
58
50
|
of objects.
|
|
59
51
|
"""
|
|
60
52
|
|
|
61
|
-
__slots__:
|
|
53
|
+
__slots__: tuple[str, ...] = tuple()
|
|
62
54
|
|
|
63
55
|
def __repr_args__(self) -> "ReprArgs":
|
|
64
56
|
"""Returns the attributes to show in __str__, __repr__, and __pretty__ this is generally overridden.
|