patito 0.6.2__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: patito
3
- Version: 0.6.2
3
+ Version: 0.7.0
4
4
  Summary: A dataframe modelling library built on top of polars and pydantic.
5
5
  Home-page: https://github.com/JakobGM/patito
6
6
  License: MIT
@@ -18,9 +18,9 @@ Provides-Extra: docs
18
18
  Provides-Extra: pandas
19
19
  Requires-Dist: Sphinx (<7) ; extra == "docs"
20
20
  Requires-Dist: pandas ; extra == "pandas"
21
- Requires-Dist: polars (>=0.20.1)
21
+ Requires-Dist: polars (>=1.0.0)
22
22
  Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
23
- Requires-Dist: pydantic (>=2.4.1)
23
+ Requires-Dist: pydantic (>=2.7.0)
24
24
  Requires-Dist: sphinx-autobuild ; extra == "docs"
25
25
  Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
26
26
  Requires-Dist: sphinx-rtd-theme ; extra == "docs"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "patito"
3
- version = "0.6.2"
3
+ version = "0.7.0"
4
4
  description = "A dataframe modelling library built on top of polars and pydantic."
5
5
  authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
6
6
  license = "MIT"
@@ -12,8 +12,8 @@ keywords = ["validation", "dataframe"]
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = ">=3.9"
15
- pydantic = ">=2.4.1"
16
- polars = ">=0.20.1"
15
+ pydantic = ">=2.7.0"
16
+ polars = ">=1.0.0"
17
17
  # Required for typing.get_args backports in python3.9 and 3.10
18
18
  typing-extensions = "*"
19
19
  pandas = {version = "*", optional = true}
@@ -59,6 +59,7 @@ types-setuptools = ">=57.4.14"
59
59
  pandas-stubs = ">=1.2.0"
60
60
  codecov = "^2.1.12"
61
61
  blackdoc = "*"
62
+ ipykernel = "^6.29.4"
62
63
 
63
64
 
64
65
  [tool.poetry.group.docs.dependencies]
@@ -71,7 +71,7 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
71
71
  def _serialize_expr(self, expr: pl.Expr) -> Dict:
72
72
  if isinstance(expr, pl.Expr):
73
73
  return json.loads(
74
- expr.meta.serialize(None)
74
+ expr.meta.serialize(format="json")
75
75
  ) # can we access the dictionary directly?
76
76
  else:
77
77
  raise ValueError(f"Invalid type for expr: {type(expr)}")
@@ -5,7 +5,8 @@ from operator import and_
5
5
  from typing import TYPE_CHECKING, Any, Dict, FrozenSet, Mapping, Optional, Type
6
6
 
7
7
  import polars as pl
8
- from polars.datatypes import DataType, DataTypeClass, DataTypeGroup
8
+ from polars.datatypes import DataType, DataTypeClass
9
+ from polars.datatypes.group import DataTypeGroup
9
10
  from pydantic import TypeAdapter
10
11
 
11
12
  from patito._pydantic.dtypes.utils import (
@@ -222,6 +223,8 @@ class DtypeResolver:
222
223
  )
223
224
  return None
224
225
  pyd_type = props.get("type")
226
+ if pyd_type == "numeric":
227
+ pyd_type = "number"
225
228
  if pyd_type == "array":
226
229
  if "items" not in props:
227
230
  raise NotImplementedError(
@@ -15,12 +15,13 @@ from typing import (
15
15
  )
16
16
 
17
17
  import polars as pl
18
- from polars.datatypes import DataType, DataTypeClass, DataTypeGroup, convert
19
- from polars.datatypes.constants import (
18
+ from polars.datatypes import DataType, DataTypeClass, convert
19
+ from polars.datatypes.group import (
20
20
  DATETIME_DTYPES,
21
21
  DURATION_DTYPES,
22
22
  FLOAT_DTYPES,
23
23
  INTEGER_DTYPES,
24
+ DataTypeGroup,
24
25
  )
25
26
  from polars.polars import (
26
27
  dtype_str_repr, # TODO: this is a rust function, can we implement our own string parser for Time/Duration/Datetime?
@@ -91,7 +92,7 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
91
92
 
92
93
  def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
93
94
  """For serialization, converts polars dtype to string representation."""
94
- if dtype in pl.NESTED_DTYPES:
95
+ if dtype.is_nested():
95
96
  if dtype == pl.Struct or isinstance(dtype, pl.Struct):
96
97
  raise NotImplementedError("Structs not yet supported by patito")
97
98
  if not isinstance(dtype, pl.List) or isinstance(dtype, pl.Array):
@@ -101,7 +102,7 @@ def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
101
102
  if dtype.inner is None:
102
103
  return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]
103
104
  return f"{convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype.base_type()]}[{parse_composite_dtype(dtype.inner)}]"
104
- elif dtype in pl.TEMPORAL_DTYPES:
105
+ elif dtype.is_temporal():
105
106
  return cast(str, dtype_str_repr(dtype))
106
107
  else:
107
108
  return convert.DataTypeMappings.DTYPE_TO_FFINAME[dtype]
@@ -20,7 +20,7 @@ from typing import (
20
20
  )
21
21
 
22
22
  import polars as pl
23
- from polars.type_aliases import IntoExpr
23
+ from polars._typing import IntoExpr
24
24
  from pydantic import AliasChoices, AliasPath, create_model
25
25
 
26
26
  from patito._pydantic.column_info import ColumnInfo
@@ -130,7 +130,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
130
130
  """
131
131
  derived_columns = []
132
132
  props = self.model._schema_properties()
133
- original_columns = set(self.columns)
133
+ original_columns = set(self.collect_schema())
134
134
  to_derive = self.model.derived_columns if columns is None else columns
135
135
  for column_name in to_derive:
136
136
  if column_name not in derived_columns:
@@ -193,15 +193,15 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
193
193
 
194
194
  def to_expr(va: str | AliasPath | AliasChoices) -> Optional[pl.Expr]:
195
195
  if isinstance(va, str):
196
- return pl.col(va) if va in self.columns else None
196
+ return pl.col(va) if va in self.collect_schema() else None
197
197
  elif isinstance(va, AliasPath):
198
198
  if len(va.path) != 2 or not isinstance(va.path[1], int):
199
199
  raise NotImplementedError(
200
200
  f"TODO figure out how this AliasPath behaves ({va})"
201
201
  )
202
202
  return (
203
- pl.col(va.path[0]).list.get(va.path[1])
204
- if va.path[0] in self.columns
203
+ pl.col(va.path[0]).list.get(va.path[1], null_on_oob=True)
204
+ if va.path[0] in self.collect_schema()
205
205
  else None
206
206
  )
207
207
  elif isinstance(va, AliasChoices):
@@ -224,7 +224,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
224
224
  exprs.append(pl.col(name))
225
225
  else:
226
226
  expr = to_expr(field_info.validation_alias)
227
- if name in self.columns:
227
+ if name in self.collect_schema().names():
228
228
  if expr is None:
229
229
  exprs.append(pl.col(name))
230
230
  else:
@@ -278,9 +278,9 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
278
278
  properties = self.model._schema_properties()
279
279
  valid_dtypes = self.model.valid_dtypes
280
280
  default_dtypes = self.model.dtypes
281
- columns = columns or self.columns
281
+ columns = columns or self.collect_schema().names()
282
282
  exprs = []
283
- for column, current_dtype in zip(self.columns, self.dtypes):
283
+ for column, current_dtype in self.collect_schema().items():
284
284
  if (column not in columns) or (column not in properties):
285
285
  exprs.append(pl.col(column))
286
286
  elif "dtype" in properties[column]:
@@ -865,7 +865,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
865
865
  # └─────┴─────┘
866
866
 
867
867
  """
868
- kwargs.setdefault("dtypes", cls.model.dtypes)
868
+ kwargs.setdefault("schema_overrides", cls.model.dtypes)
869
869
  has_header = kwargs.get("has_header", True)
870
870
  if not has_header and "columns" not in kwargs:
871
871
  kwargs.setdefault("new_columns", cls.model.columns)
@@ -877,9 +877,9 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
877
877
  field_name: alias_func(field_name)
878
878
  for field_name in cls.model.model_fields
879
879
  }
880
- kwargs["dtypes"] = {
880
+ kwargs["schema_overrides"] = {
881
881
  fields_to_cols.get(field, field): dtype
882
- for field, dtype in kwargs["dtypes"].items()
882
+ for field, dtype in kwargs["schema_overrides"].items()
883
883
  }
884
884
  # TODO: other forms of alias setting like in Field
885
885
  df = cls.model.DataFrame._from_pydf(pl.read_csv(*args, **kwargs)._df)
@@ -1200,7 +1200,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1200
1200
  field_type = Optional[field_type]
1201
1201
  new_fields[new_field_name] = (field_type, field_definition[1])
1202
1202
  return create_model( # type: ignore
1203
- __model_name=model_name,
1203
+ model_name,
1204
1204
  __base__=Model,
1205
1205
  **new_fields,
1206
1206
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes