patito 0.6.2__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patito-0.6.2 → patito-0.8.0}/PKG-INFO +6 -4
- {patito-0.6.2 → patito-0.8.0}/README.md +1 -1
- {patito-0.6.2 → patito-0.8.0}/pyproject.toml +23 -15
- patito-0.8.0/src/patito/_pydantic/column_info.py +149 -0
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/dtypes/dtypes.py +19 -15
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/dtypes/utils.py +32 -28
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/repr.py +7 -15
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/schema.py +10 -9
- {patito-0.6.2 → patito-0.8.0}/src/patito/exceptions.py +11 -16
- {patito-0.6.2 → patito-0.8.0}/src/patito/polars.py +124 -65
- {patito-0.6.2 → patito-0.8.0}/src/patito/pydantic.py +98 -89
- {patito-0.6.2 → patito-0.8.0}/src/patito/validators.py +111 -71
- patito-0.6.2/src/patito/_pydantic/column_info.py +0 -94
- {patito-0.6.2 → patito-0.8.0}/LICENSE +0 -0
- {patito-0.6.2 → patito-0.8.0}/src/patito/__init__.py +0 -0
- {patito-0.6.2 → patito-0.8.0}/src/patito/_docs.py +0 -0
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/__init__.py +0 -0
- {patito-0.6.2 → patito-0.8.0}/src/patito/_pydantic/dtypes/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: patito
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: A dataframe modelling library built on top of polars and pydantic.
|
|
5
5
|
Home-page: https://github.com/JakobGM/patito
|
|
6
6
|
License: MIT
|
|
@@ -13,14 +13,16 @@ Classifier: Programming Language :: Python :: 3
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.9
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
17
|
Provides-Extra: caching
|
|
17
18
|
Provides-Extra: docs
|
|
18
19
|
Provides-Extra: pandas
|
|
19
20
|
Requires-Dist: Sphinx (<7) ; extra == "docs"
|
|
20
21
|
Requires-Dist: pandas ; extra == "pandas"
|
|
21
|
-
Requires-Dist: polars (>=
|
|
22
|
+
Requires-Dist: polars (>=1.10.0)
|
|
23
|
+
Requires-Dist: pre-commit (>=3.8.0,<4.0.0)
|
|
22
24
|
Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
|
|
23
|
-
Requires-Dist: pydantic (>=2.
|
|
25
|
+
Requires-Dist: pydantic (>=2.7.0)
|
|
24
26
|
Requires-Dist: sphinx-autobuild ; extra == "docs"
|
|
25
27
|
Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
|
|
26
28
|
Requires-Dist: sphinx-rtd-theme ; extra == "docs"
|
|
@@ -74,7 +76,7 @@ pip install patito
|
|
|
74
76
|
|
|
75
77
|
## Documentation
|
|
76
78
|
|
|
77
|
-
The full documentation of
|
|
79
|
+
The full documentation of Patito can be found [here](https://patito.readthedocs.io).
|
|
78
80
|
|
|
79
81
|
## 👮 Data validation
|
|
80
82
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "patito"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.8.0"
|
|
4
4
|
description = "A dataframe modelling library built on top of polars and pydantic."
|
|
5
|
-
authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
|
|
5
|
+
authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>", "Thomas Aarholt <thomasaarholt@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
homepage = "https://github.com/JakobGM/patito"
|
|
@@ -12,8 +12,8 @@ keywords = ["validation", "dataframe"]
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
14
|
python = ">=3.9"
|
|
15
|
-
pydantic = ">=2.
|
|
16
|
-
polars = ">=
|
|
15
|
+
pydantic = ">=2.7.0"
|
|
16
|
+
polars = ">=1.10.0"
|
|
17
17
|
# Required for typing.get_args backports in python3.9 and 3.10
|
|
18
18
|
typing-extensions = "*"
|
|
19
19
|
pandas = {version = "*", optional = true}
|
|
@@ -25,6 +25,7 @@ sphinx-autobuild = {version = "*", optional = true}
|
|
|
25
25
|
sphinx-autodoc-typehints = {version = "*", optional = true}
|
|
26
26
|
sphinx-toolbox = {version = "*", optional = true}
|
|
27
27
|
sphinxcontrib-mermaid = {version = "*", optional = true}
|
|
28
|
+
pre-commit = "^3.8.0"
|
|
28
29
|
|
|
29
30
|
[tool.poetry.extras]
|
|
30
31
|
# The pyarrow.parquet module is required for writing parquet caches to disk
|
|
@@ -42,12 +43,6 @@ docs = [
|
|
|
42
43
|
[tool.poetry.group.dev.dependencies]
|
|
43
44
|
ruff = ">=0.2.1"
|
|
44
45
|
coverage = {version = "*", extras = ["toml"]}
|
|
45
|
-
flake8 = "3.9.2"
|
|
46
|
-
flake8-annotations = { version = "*", python = ">=3.9,<4.0" }
|
|
47
|
-
flake8-bandit = "*"
|
|
48
|
-
flake8-black = "*"
|
|
49
|
-
flake8-bugbear = "*"
|
|
50
|
-
flake8-isort = "*"
|
|
51
46
|
pyright = ">=1.1.239"
|
|
52
47
|
pytest = ">=7.1.2"
|
|
53
48
|
pytest-cov = ">=3.0.0"
|
|
@@ -59,6 +54,7 @@ types-setuptools = ">=57.4.14"
|
|
|
59
54
|
pandas-stubs = ">=1.2.0"
|
|
60
55
|
codecov = "^2.1.12"
|
|
61
56
|
blackdoc = "*"
|
|
57
|
+
ipykernel = "^6.29.4"
|
|
62
58
|
|
|
63
59
|
|
|
64
60
|
[tool.poetry.group.docs.dependencies]
|
|
@@ -93,11 +89,19 @@ exclude_lines = [
|
|
|
93
89
|
fail_under = 99.64
|
|
94
90
|
show_missing = true
|
|
95
91
|
|
|
96
|
-
[tool.isort]
|
|
97
|
-
profile = "black"
|
|
98
|
-
|
|
99
92
|
[tool.pyright]
|
|
100
|
-
|
|
93
|
+
typeCheckingMode = "basic"
|
|
94
|
+
venvPath = "."
|
|
95
|
+
venv = ".venv"
|
|
96
|
+
pythonVersion = "3.9"
|
|
97
|
+
|
|
98
|
+
exclude = [
|
|
99
|
+
".venv",
|
|
100
|
+
"noxfile.py",
|
|
101
|
+
"**/node_modules",
|
|
102
|
+
"**/__pycache__",
|
|
103
|
+
"**/.*"
|
|
104
|
+
]
|
|
101
105
|
|
|
102
106
|
[tool.mypy]
|
|
103
107
|
warn_unused_configs = true
|
|
@@ -113,6 +117,9 @@ allow_redefinition = true
|
|
|
113
117
|
show_error_codes = true
|
|
114
118
|
exclude = [
|
|
115
119
|
"noxfile.py",
|
|
120
|
+
"**/node_modules",
|
|
121
|
+
"**/__pycache__",
|
|
122
|
+
"**/.*"
|
|
116
123
|
]
|
|
117
124
|
|
|
118
125
|
|
|
@@ -121,10 +128,11 @@ module = ["tests.test_validators"]
|
|
|
121
128
|
warn_unused_ignores = false
|
|
122
129
|
|
|
123
130
|
[tool.ruff]
|
|
131
|
+
target-version = "py39"
|
|
124
132
|
extend-exclude= ["tests/__init__.py"]
|
|
125
133
|
|
|
126
134
|
[tool.ruff.lint]
|
|
127
|
-
select = ["E4", "E7", "E9", "F", "I", "B", "D"]
|
|
135
|
+
select = ["E4", "E7", "E9", "F", "I", "B", "D", "UP"]
|
|
128
136
|
|
|
129
137
|
[tool.ruff.lint.pydocstyle]
|
|
130
138
|
convention = "google"
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
from typing import Annotated, Optional, Union
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from polars.datatypes import * # noqa: F403 # type: ignore
|
|
9
|
+
from polars.datatypes import DataType, DataTypeClass
|
|
10
|
+
from polars.exceptions import ComputeError
|
|
11
|
+
from pydantic import BaseModel, BeforeValidator, field_serializer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def dtype_deserializer(dtype: str | DataTypeClass | DataType | None):
|
|
15
|
+
"""Deserialize a dtype from json."""
|
|
16
|
+
if isinstance(dtype, DataTypeClass) or isinstance(dtype, DataType):
|
|
17
|
+
return dtype
|
|
18
|
+
else:
|
|
19
|
+
if dtype == "null" or dtype is None:
|
|
20
|
+
return None
|
|
21
|
+
else:
|
|
22
|
+
return eval(dtype)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def expr_deserializer(
|
|
26
|
+
expr: str | pl.Expr | list[pl.Expr] | None,
|
|
27
|
+
) -> pl.Expr | list[pl.Expr] | None:
|
|
28
|
+
"""Deserialize a polars expression or list thereof from json.
|
|
29
|
+
|
|
30
|
+
This is applied both during deserialization and validation.
|
|
31
|
+
"""
|
|
32
|
+
if expr is None:
|
|
33
|
+
return None
|
|
34
|
+
elif isinstance(expr, pl.Expr):
|
|
35
|
+
return expr
|
|
36
|
+
elif isinstance(expr, list):
|
|
37
|
+
return expr
|
|
38
|
+
elif isinstance(expr, str):
|
|
39
|
+
if expr == "null":
|
|
40
|
+
return None
|
|
41
|
+
# can be either a list of expr or expr
|
|
42
|
+
elif expr[0] == "[":
|
|
43
|
+
return [
|
|
44
|
+
pl.Expr.deserialize(io.StringIO(e), format="json")
|
|
45
|
+
for e in json.loads(expr)
|
|
46
|
+
]
|
|
47
|
+
else:
|
|
48
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
49
|
+
else:
|
|
50
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def expr_or_col_name_deserializer(expr: str | pl.Expr | None) -> pl.Expr | str | None:
|
|
54
|
+
"""Deserialize a polars expression or column name from json.
|
|
55
|
+
|
|
56
|
+
This is applied both during deserialization and validation.
|
|
57
|
+
"""
|
|
58
|
+
if expr is None:
|
|
59
|
+
return None
|
|
60
|
+
elif isinstance(expr, pl.Expr):
|
|
61
|
+
return expr
|
|
62
|
+
elif isinstance(expr, list):
|
|
63
|
+
return expr
|
|
64
|
+
elif isinstance(expr, str):
|
|
65
|
+
# Default behaviour
|
|
66
|
+
if expr == "null":
|
|
67
|
+
return None
|
|
68
|
+
else:
|
|
69
|
+
try:
|
|
70
|
+
return pl.Expr.deserialize(io.StringIO(expr), format="json")
|
|
71
|
+
except ComputeError:
|
|
72
|
+
try:
|
|
73
|
+
# Column name is being deserialized
|
|
74
|
+
return json.loads(expr)
|
|
75
|
+
except json.JSONDecodeError:
|
|
76
|
+
# Column name has been passed literally
|
|
77
|
+
# to ColumnInfo(derived_from="foo")
|
|
78
|
+
return expr
|
|
79
|
+
else:
|
|
80
|
+
raise ValueError(f"{expr} can not be deserialized.")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
84
|
+
"""patito-side model for storing column metadata.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
allow_missing (bool): Column may be missing.
|
|
88
|
+
constraints (Union[polars.Expression, List[polars.Expression]): A single
|
|
89
|
+
constraint or list of constraints, expressed as a polars expression objects.
|
|
90
|
+
All rows must satisfy the given constraint. You can refer to the given column
|
|
91
|
+
with ``pt.field``, which will automatically be replaced with
|
|
92
|
+
``polars.col(<field_name>)`` before evaluation.
|
|
93
|
+
derived_from (Union[str, polars.Expr]): used to mark fields that are meant to be derived from other fields. Users can specify a polars expression that will be called to derive the column value when `pt.DataFrame.derive` is called.
|
|
94
|
+
dtype (polars.datatype.DataType): The given dataframe column must have the given
|
|
95
|
+
polars dtype, for instance ``polars.UInt64`` or ``pl.Float32``.
|
|
96
|
+
unique (bool): All row values must be unique.
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
allow_missing: Optional[bool] = None # noqa: UP007
|
|
101
|
+
dtype: Annotated[
|
|
102
|
+
Optional[Union[DataTypeClass, DataType]], # noqa: UP007
|
|
103
|
+
BeforeValidator(dtype_deserializer),
|
|
104
|
+
] = None
|
|
105
|
+
constraints: Annotated[
|
|
106
|
+
Optional[Union[pl.Expr, list[pl.Expr]]], # noqa: UP007
|
|
107
|
+
BeforeValidator(expr_deserializer),
|
|
108
|
+
] = None
|
|
109
|
+
derived_from: Annotated[
|
|
110
|
+
Optional[Union[str, pl.Expr]], # noqa: UP007
|
|
111
|
+
BeforeValidator(expr_or_col_name_deserializer),
|
|
112
|
+
] = None
|
|
113
|
+
unique: Optional[bool] = None # noqa : UP007
|
|
114
|
+
|
|
115
|
+
def __repr__(self) -> str:
|
|
116
|
+
"""Print only Field attributes whose values are not default (mainly None)."""
|
|
117
|
+
not_default_field = {
|
|
118
|
+
field: getattr(self, field)
|
|
119
|
+
for field in self.model_fields
|
|
120
|
+
if getattr(self, field) is not self.model_fields[field].default
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
string = ""
|
|
124
|
+
for field, value in not_default_field.items():
|
|
125
|
+
string += f"{field}={value}, "
|
|
126
|
+
if string:
|
|
127
|
+
# remove trailing comma and space
|
|
128
|
+
string = string[:-2]
|
|
129
|
+
return f"ColumnInfo({string})"
|
|
130
|
+
|
|
131
|
+
@field_serializer("constraints", "derived_from")
|
|
132
|
+
def expr_serializer(self, expr: None | pl.Expr | list[pl.Expr]):
|
|
133
|
+
"""Converts polars expr to json."""
|
|
134
|
+
if expr is None:
|
|
135
|
+
return "null"
|
|
136
|
+
elif isinstance(expr, str):
|
|
137
|
+
return json.dumps(expr)
|
|
138
|
+
elif isinstance(expr, list):
|
|
139
|
+
return json.dumps([e.meta.serialize(format="json") for e in expr])
|
|
140
|
+
else:
|
|
141
|
+
return expr.meta.serialize(format="json")
|
|
142
|
+
|
|
143
|
+
@field_serializer("dtype")
|
|
144
|
+
def dtype_serializer(self, dtype: DataTypeClass | DataType | None) -> str:
|
|
145
|
+
"""Converts polars dtype to json."""
|
|
146
|
+
if dtype is None:
|
|
147
|
+
return "null"
|
|
148
|
+
else:
|
|
149
|
+
return str(dtype)
|
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
3
4
|
from functools import cache, reduce
|
|
4
5
|
from operator import and_
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
import polars as pl
|
|
8
|
-
from polars.datatypes import DataType, DataTypeClass
|
|
9
|
+
from polars.datatypes import DataType, DataTypeClass
|
|
10
|
+
from polars.datatypes.group import DataTypeGroup
|
|
9
11
|
from pydantic import TypeAdapter
|
|
10
12
|
|
|
13
|
+
from patito._pydantic.column_info import ColumnInfo
|
|
11
14
|
from patito._pydantic.dtypes.utils import (
|
|
12
15
|
PT_BASE_SUPPORTED_DTYPES,
|
|
13
16
|
PydanticBaseType,
|
|
14
17
|
_pyd_type_to_default_dtype,
|
|
15
18
|
_pyd_type_to_valid_dtypes,
|
|
16
19
|
_without_optional,
|
|
17
|
-
dtype_from_string,
|
|
18
20
|
)
|
|
19
21
|
from patito._pydantic.repr import display_as_type
|
|
20
22
|
|
|
@@ -24,8 +26,8 @@ if TYPE_CHECKING:
|
|
|
24
26
|
|
|
25
27
|
@cache
|
|
26
28
|
def valid_dtypes_for_model(
|
|
27
|
-
cls:
|
|
28
|
-
) -> Mapping[str,
|
|
29
|
+
cls: type[ModelType],
|
|
30
|
+
) -> Mapping[str, frozenset[DataTypeClass]]:
|
|
29
31
|
return {
|
|
30
32
|
column: (
|
|
31
33
|
DtypeResolver(cls.model_fields[column].annotation).valid_polars_dtypes()
|
|
@@ -38,7 +40,7 @@ def valid_dtypes_for_model(
|
|
|
38
40
|
|
|
39
41
|
@cache
|
|
40
42
|
def default_dtypes_for_model(
|
|
41
|
-
cls:
|
|
43
|
+
cls: type[ModelType],
|
|
42
44
|
) -> dict[str, DataType]:
|
|
43
45
|
default_dtypes: dict[str, DataType] = {}
|
|
44
46
|
for column in cls.columns:
|
|
@@ -56,7 +58,7 @@ def default_dtypes_for_model(
|
|
|
56
58
|
def validate_polars_dtype(
|
|
57
59
|
annotation: type[Any] | None,
|
|
58
60
|
dtype: DataType | DataTypeClass | None,
|
|
59
|
-
column:
|
|
61
|
+
column: str | None = None,
|
|
60
62
|
) -> None:
|
|
61
63
|
"""Check that the polars dtype is valid for the given annotation. Raises ValueError if not.
|
|
62
64
|
|
|
@@ -83,7 +85,7 @@ def validate_polars_dtype(
|
|
|
83
85
|
|
|
84
86
|
|
|
85
87
|
def validate_annotation(
|
|
86
|
-
annotation: type[Any] | Any | None, column:
|
|
88
|
+
annotation: type[Any] | Any | None, column: str | None = None
|
|
87
89
|
) -> None:
|
|
88
90
|
"""Check that the provided annotation has polars/patito support (we can resolve it to a default dtype). Raises ValueError if not.
|
|
89
91
|
|
|
@@ -128,7 +130,7 @@ class DtypeResolver:
|
|
|
128
130
|
|
|
129
131
|
def _valid_polars_dtypes_for_schema(
|
|
130
132
|
self,
|
|
131
|
-
schema:
|
|
133
|
+
schema: dict,
|
|
132
134
|
) -> DataTypeGroup:
|
|
133
135
|
valid_type_sets = []
|
|
134
136
|
if "anyOf" in schema:
|
|
@@ -145,7 +147,7 @@ class DtypeResolver:
|
|
|
145
147
|
|
|
146
148
|
def _pydantic_subschema_to_valid_polars_types(
|
|
147
149
|
self,
|
|
148
|
-
props:
|
|
150
|
+
props: dict,
|
|
149
151
|
) -> DataTypeGroup:
|
|
150
152
|
if "type" not in props:
|
|
151
153
|
if "enum" in props:
|
|
@@ -188,7 +190,7 @@ class DtypeResolver:
|
|
|
188
190
|
PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
|
|
189
191
|
)
|
|
190
192
|
|
|
191
|
-
def _default_polars_dtype_for_schema(self, schema:
|
|
193
|
+
def _default_polars_dtype_for_schema(self, schema: dict) -> DataType | None:
|
|
192
194
|
if "anyOf" in schema:
|
|
193
195
|
if len(schema["anyOf"]) == 2: # look for optionals first
|
|
194
196
|
schema = _without_optional(schema)
|
|
@@ -204,12 +206,12 @@ class DtypeResolver:
|
|
|
204
206
|
|
|
205
207
|
def _pydantic_subschema_to_default_dtype(
|
|
206
208
|
self,
|
|
207
|
-
props:
|
|
209
|
+
props: dict,
|
|
208
210
|
) -> DataType | None:
|
|
209
211
|
if "column_info" in props: # user has specified in patito model
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
dtype = dtype() if isinstance(dtype, DataTypeClass) else dtype
|
|
212
|
+
ci = ColumnInfo.model_validate_json(props["column_info"])
|
|
213
|
+
if ci.dtype is not None:
|
|
214
|
+
dtype = ci.dtype() if isinstance(ci.dtype, DataTypeClass) else ci.dtype
|
|
213
215
|
return dtype
|
|
214
216
|
if "type" not in props:
|
|
215
217
|
if "enum" in props:
|
|
@@ -222,6 +224,8 @@ class DtypeResolver:
|
|
|
222
224
|
)
|
|
223
225
|
return None
|
|
224
226
|
pyd_type = props.get("type")
|
|
227
|
+
if pyd_type == "numeric":
|
|
228
|
+
pyd_type = "number"
|
|
225
229
|
if pyd_type == "array":
|
|
226
230
|
if "items" not in props:
|
|
227
231
|
raise NotImplementedError(
|
|
@@ -1,29 +1,23 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import sys
|
|
4
|
+
from collections.abc import Sequence
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
7
|
-
Dict,
|
|
8
|
-
List,
|
|
9
|
-
Optional,
|
|
10
|
-
Sequence,
|
|
11
8
|
Union,
|
|
12
|
-
cast,
|
|
13
9
|
get_args,
|
|
14
10
|
get_origin,
|
|
15
11
|
)
|
|
16
12
|
|
|
17
13
|
import polars as pl
|
|
18
|
-
from polars.datatypes import DataType, DataTypeClass,
|
|
19
|
-
from polars.datatypes.
|
|
14
|
+
from polars.datatypes import DataType, DataTypeClass, convert
|
|
15
|
+
from polars.datatypes.group import (
|
|
20
16
|
DATETIME_DTYPES,
|
|
21
17
|
DURATION_DTYPES,
|
|
22
18
|
FLOAT_DTYPES,
|
|
23
19
|
INTEGER_DTYPES,
|
|
24
|
-
|
|
25
|
-
from polars.polars import (
|
|
26
|
-
dtype_str_repr, # TODO: this is a rust function, can we implement our own string parser for Time/Duration/Datetime?
|
|
20
|
+
DataTypeGroup,
|
|
27
21
|
)
|
|
28
22
|
|
|
29
23
|
PYTHON_TO_PYDANTIC_TYPES = {
|
|
@@ -89,32 +83,42 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
|
|
|
89
83
|
)
|
|
90
84
|
|
|
91
85
|
|
|
86
|
+
def unwrap_optional(type_annotation: type[Any] | Any) -> type:
|
|
87
|
+
"""Return the inner, wrapped type of an Optional.
|
|
88
|
+
|
|
89
|
+
Is a no-op for non-Optional types.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
type_annotation: The type annotation to be dewrapped.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The input type, but with the outermost Optional removed.
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
return (
|
|
99
|
+
next( # pragma: no cover
|
|
100
|
+
valid_type
|
|
101
|
+
for valid_type in get_args(type_annotation)
|
|
102
|
+
if valid_type is not type(None) # noqa: E721
|
|
103
|
+
)
|
|
104
|
+
if is_optional(type_annotation)
|
|
105
|
+
else type_annotation
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
92
109
|
def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
|
|
93
110
|
"""For serialization, converts polars dtype to string representation."""
|
|
94
|
-
|
|
95
|
-
if dtype == pl.Struct or isinstance(dtype, pl.Struct):
|
|
96
|
-
raise NotImplementedError("Structs not yet supported by patito")
|
|
97
|
-
if not isinstance(dtype, pl.List) or isinstance(dtype, pl.Array):
|
|
98
|
-
raise NotImplementedError(
|
|
99
|
-
f"Unsupported nested dtype: {dtype} of type {type(dtype)}"
|
|
100
|
-
)
|
|
101
|
-
if dtype.inner is None:
|
|
102
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]
|
|
103
|
-
return f"{convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]}[{parse_composite_dtype(dtype.inner)}]"
|
|
104
|
-
elif dtype in pl.TEMPORAL_DTYPES:
|
|
105
|
-
return cast(str, dtype_str_repr(dtype))
|
|
106
|
-
else:
|
|
107
|
-
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype]
|
|
111
|
+
return str(dtype)
|
|
108
112
|
|
|
109
113
|
|
|
110
|
-
def dtype_from_string(v: str) ->
|
|
114
|
+
def dtype_from_string(v: str) -> DataTypeClass | DataType | None:
|
|
111
115
|
"""For deserialization."""
|
|
112
116
|
# TODO test all dtypes
|
|
113
117
|
return convert.dtype_short_repr_to_dtype(v)
|
|
114
118
|
|
|
115
119
|
|
|
116
120
|
def _pyd_type_to_valid_dtypes(
|
|
117
|
-
pyd_type: PydanticBaseType, string_format:
|
|
121
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
118
122
|
) -> DataTypeGroup:
|
|
119
123
|
if enum is not None:
|
|
120
124
|
_validate_enum_values(pyd_type, enum)
|
|
@@ -141,7 +145,7 @@ def _pyd_type_to_valid_dtypes(
|
|
|
141
145
|
|
|
142
146
|
|
|
143
147
|
def _pyd_type_to_default_dtype(
|
|
144
|
-
pyd_type: PydanticBaseType, string_format:
|
|
148
|
+
pyd_type: PydanticBaseType, string_format: str | None, enum: list[str] | None
|
|
145
149
|
) -> DataTypeClass | DataType:
|
|
146
150
|
if enum is not None:
|
|
147
151
|
_validate_enum_values(pyd_type, enum)
|
|
@@ -207,7 +211,7 @@ def _pyd_string_format_to_default_dtype(
|
|
|
207
211
|
raise NotImplementedError
|
|
208
212
|
|
|
209
213
|
|
|
210
|
-
def _without_optional(schema:
|
|
214
|
+
def _without_optional(schema: dict) -> dict:
|
|
211
215
|
if "anyOf" in schema:
|
|
212
216
|
for sub_props in schema["anyOf"]:
|
|
213
217
|
if "type" in sub_props and sub_props["type"] == "null":
|
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import types
|
|
3
3
|
import typing
|
|
4
|
+
from collections.abc import Generator, Iterable, Sequence
|
|
4
5
|
from typing import (
|
|
5
6
|
Any,
|
|
6
7
|
Callable,
|
|
7
|
-
Generator,
|
|
8
|
-
Iterable,
|
|
9
8
|
Literal,
|
|
10
9
|
Optional,
|
|
11
|
-
Sequence,
|
|
12
|
-
Tuple,
|
|
13
|
-
Type,
|
|
14
10
|
Union,
|
|
15
11
|
get_args,
|
|
16
12
|
get_origin,
|
|
17
13
|
)
|
|
14
|
+
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
18
15
|
|
|
19
16
|
if typing.TYPE_CHECKING:
|
|
20
|
-
Loc =
|
|
21
|
-
ReprArgs = Sequence[
|
|
17
|
+
Loc = tuple[Union[int, str], ...]
|
|
18
|
+
ReprArgs = Sequence[tuple[Optional[str], Any]]
|
|
22
19
|
RichReprResult = Iterable[
|
|
23
|
-
Union[Any,
|
|
20
|
+
Union[Any, tuple[Any], tuple[str, Any], tuple[str, Any, Any]]
|
|
24
21
|
]
|
|
25
22
|
|
|
26
23
|
try:
|
|
@@ -30,15 +27,10 @@ except ImportError:
|
|
|
30
27
|
|
|
31
28
|
typing_base = _TypingBase
|
|
32
29
|
|
|
33
|
-
if sys.version_info < (3, 9):
|
|
34
|
-
# python < 3.9 does not have GenericAlias (list[int], tuple[str, ...] and so on)
|
|
35
|
-
TypingGenericAlias = ()
|
|
36
|
-
else:
|
|
37
|
-
from typing import GenericAlias as TypingGenericAlias # type: ignore
|
|
38
30
|
|
|
39
31
|
if sys.version_info < (3, 10):
|
|
40
32
|
|
|
41
|
-
def origin_is_union(tp: Optional[
|
|
33
|
+
def origin_is_union(tp: Optional[type[Any]]) -> bool:
|
|
42
34
|
return tp is typing.Union
|
|
43
35
|
|
|
44
36
|
WithArgsTypes = (TypingGenericAlias,)
|
|
@@ -58,7 +50,7 @@ class Representation:
|
|
|
58
50
|
of objects.
|
|
59
51
|
"""
|
|
60
52
|
|
|
61
|
-
__slots__:
|
|
53
|
+
__slots__: tuple[str, ...] = tuple()
|
|
62
54
|
|
|
63
55
|
def __repr_args__(self) -> "ReprArgs":
|
|
64
56
|
"""Returns the attributes to show in __str__, __repr__, and __pretty__ this is generally overridden.
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Mapping
|
|
3
4
|
from functools import cache
|
|
4
|
-
from typing import TYPE_CHECKING, Any,
|
|
5
|
+
from typing import TYPE_CHECKING, Any, get_args
|
|
5
6
|
|
|
6
7
|
from pydantic.fields import FieldInfo
|
|
7
8
|
|
|
@@ -13,7 +14,7 @@ if TYPE_CHECKING:
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
@cache
|
|
16
|
-
def schema_for_model(cls:
|
|
17
|
+
def schema_for_model(cls: type[ModelType]) -> dict[str, dict[str, Any]]:
|
|
17
18
|
"""Return schema properties where definition references have been resolved.
|
|
18
19
|
|
|
19
20
|
Returns:
|
|
@@ -46,27 +47,27 @@ def schema_for_model(cls: Type[ModelType]) -> Dict[str, Dict[str, Any]]:
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
@cache
|
|
49
|
-
def column_infos_for_model(cls:
|
|
50
|
+
def column_infos_for_model(cls: type[ModelType]) -> Mapping[str, ColumnInfo]:
|
|
50
51
|
fields = cls.model_fields
|
|
51
52
|
|
|
52
53
|
def get_column_info(field: FieldInfo) -> ColumnInfo:
|
|
53
54
|
if field.json_schema_extra is None:
|
|
54
|
-
return
|
|
55
|
+
return ColumnInfo()
|
|
55
56
|
elif callable(field.json_schema_extra):
|
|
56
57
|
raise NotImplementedError(
|
|
57
58
|
"Callable json_schema_extra not supported by patito."
|
|
58
59
|
)
|
|
59
|
-
return
|
|
60
|
+
return ColumnInfo.model_validate_json(field.json_schema_extra["column_info"])
|
|
60
61
|
|
|
61
62
|
return {k: get_column_info(v) for k, v in fields.items()}
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
def _append_field_info_to_props(
|
|
65
|
-
field_info:
|
|
66
|
+
field_info: dict[str, Any],
|
|
66
67
|
field_name: str,
|
|
67
|
-
model_schema:
|
|
68
|
-
required:
|
|
69
|
-
) ->
|
|
68
|
+
model_schema: dict[str, Any],
|
|
69
|
+
required: bool | None = None,
|
|
70
|
+
) -> dict[str, Any]:
|
|
70
71
|
if "$ref" in field_info: # TODO onto runtime append
|
|
71
72
|
definition = model_schema["$defs"][field_info["$ref"]]
|
|
72
73
|
if "enum" in definition and "type" not in definition:
|