patito 0.6.2__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patito-0.6.2 → patito-0.7.0}/PKG-INFO +3 -3
- {patito-0.6.2 → patito-0.7.0}/pyproject.toml +4 -3
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/column_info.py +1 -1
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/dtypes/dtypes.py +4 -1
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/dtypes/utils.py +5 -4
- {patito-0.6.2 → patito-0.7.0}/src/patito/polars.py +11 -11
- {patito-0.6.2 → patito-0.7.0}/src/patito/pydantic.py +1 -1
- {patito-0.6.2 → patito-0.7.0}/LICENSE +0 -0
- {patito-0.6.2 → patito-0.7.0}/README.md +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/__init__.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/_docs.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/__init__.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/dtypes/__init__.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/repr.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/_pydantic/schema.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/exceptions.py +0 -0
- {patito-0.6.2 → patito-0.7.0}/src/patito/validators.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: patito
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: A dataframe modelling library built on top of polars and pydantic.
|
|
5
5
|
Home-page: https://github.com/JakobGM/patito
|
|
6
6
|
License: MIT
|
|
@@ -18,9 +18,9 @@ Provides-Extra: docs
|
|
|
18
18
|
Provides-Extra: pandas
|
|
19
19
|
Requires-Dist: Sphinx (<7) ; extra == "docs"
|
|
20
20
|
Requires-Dist: pandas ; extra == "pandas"
|
|
21
|
-
Requires-Dist: polars (>=0.
|
|
21
|
+
Requires-Dist: polars (>=1.0.0)
|
|
22
22
|
Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
|
|
23
|
-
Requires-Dist: pydantic (>=2.
|
|
23
|
+
Requires-Dist: pydantic (>=2.7.0)
|
|
24
24
|
Requires-Dist: sphinx-autobuild ; extra == "docs"
|
|
25
25
|
Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
|
|
26
26
|
Requires-Dist: sphinx-rtd-theme ; extra == "docs"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "patito"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.7.0"
|
|
4
4
|
description = "A dataframe modelling library built on top of polars and pydantic."
|
|
5
5
|
authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -12,8 +12,8 @@ keywords = ["validation", "dataframe"]
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry.dependencies]
|
|
14
14
|
python = ">=3.9"
|
|
15
|
-
pydantic = ">=2.
|
|
16
|
-
polars = ">=0.
|
|
15
|
+
pydantic = ">=2.7.0"
|
|
16
|
+
polars = ">=1.0.0"
|
|
17
17
|
# Required for typing.get_args backports in python3.9 and 3.10
|
|
18
18
|
typing-extensions = "*"
|
|
19
19
|
pandas = {version = "*", optional = true}
|
|
@@ -59,6 +59,7 @@ types-setuptools = ">=57.4.14"
|
|
|
59
59
|
pandas-stubs = ">=1.2.0"
|
|
60
60
|
codecov = "^2.1.12"
|
|
61
61
|
blackdoc = "*"
|
|
62
|
+
ipykernel = "^6.29.4"
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
[tool.poetry.group.docs.dependencies]
|
|
@@ -71,7 +71,7 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
|
|
|
71
71
|
def _serialize_expr(self, expr: pl.Expr) -> Dict:
|
|
72
72
|
if isinstance(expr, pl.Expr):
|
|
73
73
|
return json.loads(
|
|
74
|
-
expr.meta.serialize(
|
|
74
|
+
expr.meta.serialize(format="json")
|
|
75
75
|
) # can we access the dictionary directly?
|
|
76
76
|
else:
|
|
77
77
|
raise ValueError(f"Invalid type for expr: {type(expr)}")
|
|
@@ -5,7 +5,8 @@ from operator import and_
|
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Mapping, Optional, Type
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
|
-
from polars.datatypes import DataType, DataTypeClass
|
|
8
|
+
from polars.datatypes import DataType, DataTypeClass
|
|
9
|
+
from polars.datatypes.group import DataTypeGroup
|
|
9
10
|
from pydantic import TypeAdapter
|
|
10
11
|
|
|
11
12
|
from patito._pydantic.dtypes.utils import (
|
|
@@ -222,6 +223,8 @@ class DtypeResolver:
|
|
|
222
223
|
)
|
|
223
224
|
return None
|
|
224
225
|
pyd_type = props.get("type")
|
|
226
|
+
if pyd_type == "numeric":
|
|
227
|
+
pyd_type = "number"
|
|
225
228
|
if pyd_type == "array":
|
|
226
229
|
if "items" not in props:
|
|
227
230
|
raise NotImplementedError(
|
|
@@ -15,12 +15,13 @@ from typing import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
import polars as pl
|
|
18
|
-
from polars.datatypes import DataType, DataTypeClass,
|
|
19
|
-
from polars.datatypes.
|
|
18
|
+
from polars.datatypes import DataType, DataTypeClass, convert
|
|
19
|
+
from polars.datatypes.group import (
|
|
20
20
|
DATETIME_DTYPES,
|
|
21
21
|
DURATION_DTYPES,
|
|
22
22
|
FLOAT_DTYPES,
|
|
23
23
|
INTEGER_DTYPES,
|
|
24
|
+
DataTypeGroup,
|
|
24
25
|
)
|
|
25
26
|
from polars.polars import (
|
|
26
27
|
dtype_str_repr, # TODO: this is a rust function, can we implement our own string parser for Time/Duration/Datetime?
|
|
@@ -91,7 +92,7 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
|
|
|
91
92
|
|
|
92
93
|
def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
|
|
93
94
|
"""For serialization, converts polars dtype to string representation."""
|
|
94
|
-
if dtype
|
|
95
|
+
if dtype.is_nested():
|
|
95
96
|
if dtype == pl.Struct or isinstance(dtype, pl.Struct):
|
|
96
97
|
raise NotImplementedError("Structs not yet supported by patito")
|
|
97
98
|
if not isinstance(dtype, pl.List) or isinstance(dtype, pl.Array):
|
|
@@ -101,7 +102,7 @@ def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
|
|
|
101
102
|
if dtype.inner is None:
|
|
102
103
|
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]
|
|
103
104
|
return f"{convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]}[{parse_composite_dtype(dtype.inner)}]"
|
|
104
|
-
elif dtype
|
|
105
|
+
elif dtype.is_temporal():
|
|
105
106
|
return cast(str, dtype_str_repr(dtype))
|
|
106
107
|
else:
|
|
107
108
|
return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype]
|
|
@@ -20,7 +20,7 @@ from typing import (
|
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
import polars as pl
|
|
23
|
-
from polars.
|
|
23
|
+
from polars._typing import IntoExpr
|
|
24
24
|
from pydantic import AliasChoices, AliasPath, create_model
|
|
25
25
|
|
|
26
26
|
from patito._pydantic.column_info import ColumnInfo
|
|
@@ -130,7 +130,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
130
130
|
"""
|
|
131
131
|
derived_columns = []
|
|
132
132
|
props = self.model._schema_properties()
|
|
133
|
-
original_columns = set(self.
|
|
133
|
+
original_columns = set(self.collect_schema())
|
|
134
134
|
to_derive = self.model.derived_columns if columns is None else columns
|
|
135
135
|
for column_name in to_derive:
|
|
136
136
|
if column_name not in derived_columns:
|
|
@@ -193,15 +193,15 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
193
193
|
|
|
194
194
|
def to_expr(va: str | AliasPath | AliasChoices) -> Optional[pl.Expr]:
|
|
195
195
|
if isinstance(va, str):
|
|
196
|
-
return pl.col(va) if va in self.
|
|
196
|
+
return pl.col(va) if va in self.collect_schema() else None
|
|
197
197
|
elif isinstance(va, AliasPath):
|
|
198
198
|
if len(va.path) != 2 or not isinstance(va.path[1], int):
|
|
199
199
|
raise NotImplementedError(
|
|
200
200
|
f"TODO figure out how this AliasPath behaves ({va})"
|
|
201
201
|
)
|
|
202
202
|
return (
|
|
203
|
-
pl.col(va.path[0]).list.get(va.path[1])
|
|
204
|
-
if va.path[0] in self.
|
|
203
|
+
pl.col(va.path[0]).list.get(va.path[1], null_on_oob=True)
|
|
204
|
+
if va.path[0] in self.collect_schema()
|
|
205
205
|
else None
|
|
206
206
|
)
|
|
207
207
|
elif isinstance(va, AliasChoices):
|
|
@@ -224,7 +224,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
224
224
|
exprs.append(pl.col(name))
|
|
225
225
|
else:
|
|
226
226
|
expr = to_expr(field_info.validation_alias)
|
|
227
|
-
if name in self.
|
|
227
|
+
if name in self.collect_schema().names():
|
|
228
228
|
if expr is None:
|
|
229
229
|
exprs.append(pl.col(name))
|
|
230
230
|
else:
|
|
@@ -278,9 +278,9 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
278
278
|
properties = self.model._schema_properties()
|
|
279
279
|
valid_dtypes = self.model.valid_dtypes
|
|
280
280
|
default_dtypes = self.model.dtypes
|
|
281
|
-
columns = columns or self.
|
|
281
|
+
columns = columns or self.collect_schema().names()
|
|
282
282
|
exprs = []
|
|
283
|
-
for column, current_dtype in
|
|
283
|
+
for column, current_dtype in self.collect_schema().items():
|
|
284
284
|
if (column not in columns) or (column not in properties):
|
|
285
285
|
exprs.append(pl.col(column))
|
|
286
286
|
elif "dtype" in properties[column]:
|
|
@@ -865,7 +865,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
865
865
|
# └─────┴─────┘
|
|
866
866
|
|
|
867
867
|
"""
|
|
868
|
-
kwargs.setdefault("
|
|
868
|
+
kwargs.setdefault("schema_overrides", cls.model.dtypes)
|
|
869
869
|
has_header = kwargs.get("has_header", True)
|
|
870
870
|
if not has_header and "columns" not in kwargs:
|
|
871
871
|
kwargs.setdefault("new_columns", cls.model.columns)
|
|
@@ -877,9 +877,9 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
877
877
|
field_name: alias_func(field_name)
|
|
878
878
|
for field_name in cls.model.model_fields
|
|
879
879
|
}
|
|
880
|
-
kwargs["
|
|
880
|
+
kwargs["schema_overrides"] = {
|
|
881
881
|
fields_to_cols.get(field, field): dtype
|
|
882
|
-
for field, dtype in kwargs["
|
|
882
|
+
for field, dtype in kwargs["schema_overrides"].items()
|
|
883
883
|
}
|
|
884
884
|
# TODO: other forms of alias setting like in Field
|
|
885
885
|
df = cls.model.DataFrame._from_pydf(pl.read_csv(*args, **kwargs)._df)
|
|
@@ -1200,7 +1200,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
1200
1200
|
field_type = Optional[field_type]
|
|
1201
1201
|
new_fields[new_field_name] = (field_type, field_definition[1])
|
|
1202
1202
|
return create_model( # type: ignore
|
|
1203
|
-
|
|
1203
|
+
model_name,
|
|
1204
1204
|
__base__=Model,
|
|
1205
1205
|
**new_fields,
|
|
1206
1206
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|