patito 0.8.0__tar.gz → 0.8.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: patito
3
- Version: 0.8.0
3
+ Version: 0.8.3
4
4
  Summary: A dataframe modelling library built on top of polars and pydantic.
5
5
  Home-page: https://github.com/JakobGM/patito
6
6
  License: MIT
@@ -20,7 +20,6 @@ Provides-Extra: pandas
20
20
  Requires-Dist: Sphinx (<7) ; extra == "docs"
21
21
  Requires-Dist: pandas ; extra == "pandas"
22
22
  Requires-Dist: polars (>=1.10.0)
23
- Requires-Dist: pre-commit (>=3.8.0,<4.0.0)
24
23
  Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
25
24
  Requires-Dist: pydantic (>=2.7.0)
26
25
  Requires-Dist: sphinx-autobuild ; extra == "docs"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "patito"
3
- version = "0.8.0"
3
+ version = "0.8.3"
4
4
  description = "A dataframe modelling library built on top of polars and pydantic."
5
5
  authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>", "Thomas Aarholt <thomasaarholt@gmail.com>"]
6
6
  license = "MIT"
@@ -25,7 +25,6 @@ sphinx-autobuild = {version = "*", optional = true}
25
25
  sphinx-autodoc-typehints = {version = "*", optional = true}
26
26
  sphinx-toolbox = {version = "*", optional = true}
27
27
  sphinxcontrib-mermaid = {version = "*", optional = true}
28
- pre-commit = "^3.8.0"
29
28
 
30
29
  [tool.poetry.extras]
31
30
  # The pyarrow.parquet module is required for writing parquet caches to disk
@@ -42,6 +41,7 @@ docs = [
42
41
 
43
42
  [tool.poetry.group.dev.dependencies]
44
43
  ruff = ">=0.2.1"
44
+ pre-commit = "^3.8.0"
45
45
  coverage = {version = "*", extras = ["toml"]}
46
46
  pyright = ">=1.1.239"
47
47
  pytest = ">=7.1.2"
@@ -133,6 +133,7 @@ extend-exclude= ["tests/__init__.py"]
133
133
 
134
134
  [tool.ruff.lint]
135
135
  select = ["E4", "E7", "E9", "F", "I", "B", "D", "UP"]
136
+ ignore = ["UP007"]
136
137
 
137
138
  [tool.ruff.lint.pydocstyle]
138
139
  convention = "google"
@@ -97,20 +97,20 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
97
97
 
98
98
  """
99
99
 
100
- allow_missing: Optional[bool] = None # noqa: UP007
100
+ allow_missing: Optional[bool] = None
101
101
  dtype: Annotated[
102
- Optional[Union[DataTypeClass, DataType]], # noqa: UP007
102
+ Optional[Union[DataTypeClass, DataType]],
103
103
  BeforeValidator(dtype_deserializer),
104
104
  ] = None
105
105
  constraints: Annotated[
106
- Optional[Union[pl.Expr, list[pl.Expr]]], # noqa: UP007
106
+ Optional[Union[pl.Expr, list[pl.Expr]]],
107
107
  BeforeValidator(expr_deserializer),
108
108
  ] = None
109
109
  derived_from: Annotated[
110
- Optional[Union[str, pl.Expr]], # noqa: UP007
110
+ Optional[Union[str, pl.Expr]],
111
111
  BeforeValidator(expr_or_col_name_deserializer),
112
112
  ] = None
113
- unique: Optional[bool] = None # noqa : UP007
113
+ unique: Optional[bool] = None
114
114
 
115
115
  def __repr__(self) -> str:
116
116
  """Print only Field attributes whose values are not default (mainly None)."""
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from collections.abc import Mapping
4
4
  from functools import cache, reduce
5
- from operator import and_
5
+ from operator import or_
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import polars as pl
@@ -115,7 +115,8 @@ def validate_annotation(
115
115
  class DtypeResolver:
116
116
  def __init__(self, annotation: Any | None):
117
117
  self.annotation = annotation
118
- self.schema = TypeAdapter(annotation).json_schema()
118
+ # mode='serialization' allows nested models with structs, see #86
119
+ self.schema = TypeAdapter(annotation).json_schema(mode="serialization")
119
120
  self.defs = self.schema.get("$defs", {})
120
121
 
121
122
  def valid_polars_dtypes(self) -> DataTypeGroup:
@@ -143,7 +144,7 @@ class DtypeResolver:
143
144
  valid_type_sets.append(
144
145
  self._pydantic_subschema_to_valid_polars_types(schema)
145
146
  )
146
- return reduce(and_, valid_type_sets) if valid_type_sets else DataTypeGroup([])
147
+ return reduce(or_, valid_type_sets) if valid_type_sets else DataTypeGroup([])
147
148
 
148
149
  def _pydantic_subschema_to_valid_polars_types(
149
150
  self,
@@ -159,6 +160,7 @@ class DtypeResolver:
159
160
  self.defs[props["$ref"].split("/")[-1]]
160
161
  )
161
162
  return DataTypeGroup([])
163
+
162
164
  pyd_type = props.get("type")
163
165
  if pyd_type == "array":
164
166
  if "items" not in props:
@@ -169,28 +171,27 @@ class DtypeResolver:
169
171
  return DataTypeGroup(
170
172
  [pl.List(dtype) for dtype in item_dtypes], match_base_type=False
171
173
  )
174
+
172
175
  elif pyd_type == "object":
173
176
  if "properties" not in props:
174
177
  return DataTypeGroup([])
175
178
  object_props = props["properties"]
179
+ struct_fields: list[pl.Field] = []
180
+ for name, sub_props in object_props.items():
181
+ dtype = self._default_polars_dtype_for_schema(sub_props)
182
+ assert dtype is not None
183
+ struct_fields.append(pl.Field(name, dtype))
176
184
  return DataTypeGroup(
177
- [
178
- pl.Struct(
179
- [
180
- pl.Field(
181
- name, self._default_polars_dtype_for_schema(sub_props)
182
- )
183
- for name, sub_props in object_props.items()
184
- ]
185
- )
186
- ],
185
+ [pl.Struct(struct_fields)],
187
186
  match_base_type=False,
188
187
  ) # for structs, return only the default dtype set to avoid combinatoric issues
189
188
  return _pyd_type_to_valid_dtypes(
190
189
  PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
191
190
  )
192
191
 
193
- def _default_polars_dtype_for_schema(self, schema: dict) -> DataType | None:
192
+ def _default_polars_dtype_for_schema(
193
+ self, schema: dict[str, Any]
194
+ ) -> DataType | None:
194
195
  if "anyOf" in schema:
195
196
  if len(schema["anyOf"]) == 2: # look for optionals first
196
197
  schema = _without_optional(schema)
@@ -206,13 +207,14 @@ class DtypeResolver:
206
207
 
207
208
  def _pydantic_subschema_to_default_dtype(
208
209
  self,
209
- props: dict,
210
+ props: dict[str, Any],
210
211
  ) -> DataType | None:
211
212
  if "column_info" in props: # user has specified in patito model
212
213
  ci = ColumnInfo.model_validate_json(props["column_info"])
213
214
  if ci.dtype is not None:
214
215
  dtype = ci.dtype() if isinstance(ci.dtype, DataTypeClass) else ci.dtype
215
216
  return dtype
217
+
216
218
  if "type" not in props:
217
219
  if "enum" in props:
218
220
  raise TypeError("Mixed type enums not supported by patito.")
@@ -223,10 +225,12 @@ class DtypeResolver:
223
225
  self.defs[props["$ref"].split("/")[-1]]
224
226
  )
225
227
  return None
228
+
226
229
  pyd_type = props.get("type")
227
230
  if pyd_type == "numeric":
228
231
  pyd_type = "number"
229
- if pyd_type == "array":
232
+
233
+ elif pyd_type == "array":
230
234
  if "items" not in props:
231
235
  raise NotImplementedError(
232
236
  "Unexpected error processing pydantic schema. Please file an issue."
@@ -236,18 +240,21 @@ class DtypeResolver:
236
240
  if inner_default_type is None:
237
241
  return None
238
242
  return pl.List(inner_default_type)
239
- elif pyd_type == "object":
243
+
244
+ elif pyd_type == "object": # these are structs
240
245
  if "properties" not in props:
241
246
  raise NotImplementedError(
242
247
  "dictionaries not currently supported by patito"
243
248
  )
244
- object_props = props["properties"]
245
- return pl.Struct(
246
- [
247
- pl.Field(name, self._default_polars_dtype_for_schema(sub_props))
248
- for name, sub_props in object_props.items()
249
- ]
250
- )
249
+ object_props: dict[str, dict[str, str]] = props["properties"]
250
+ struct_fields: list[pl.Field] = []
251
+
252
+ for name, sub_props in object_props.items():
253
+ dtype = self._default_polars_dtype_for_schema(sub_props)
254
+ assert dtype is not None
255
+ struct_fields.append(pl.Field(name, dtype))
256
+ return pl.Struct(struct_fields)
257
+
251
258
  return _pyd_type_to_default_dtype(
252
259
  PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
253
260
  )
@@ -124,7 +124,7 @@ def _pyd_type_to_valid_dtypes(
124
124
  _validate_enum_values(pyd_type, enum)
125
125
  return DataTypeGroup([pl.Enum(enum), pl.String], match_base_type=False)
126
126
  if pyd_type.value == "integer":
127
- return DataTypeGroup(INTEGER_DTYPES | FLOAT_DTYPES)
127
+ return DataTypeGroup(INTEGER_DTYPES)
128
128
  elif pyd_type.value == "number":
129
129
  return (
130
130
  FLOAT_DTYPES
@@ -54,33 +54,63 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
54
54
 
55
55
  model: type[ModelType]
56
56
 
57
- @classmethod
58
- def _construct_lazyframe_model_class(
59
- cls: type[LDF], model: type[ModelType] | None
60
- ) -> type[LazyFrame[ModelType]]:
61
- """Return custom LazyFrame sub-class where LazyFrame.model is set.
57
+ def set_model(self, model: type[OtherModelType]) -> LazyFrame[OtherModelType]:
58
+ """Associate a given patito ``Model`` with the dataframe.
62
59
 
63
- Can be used to construct a LazyFrame class where
64
- DataFrame.set_model(model) is implicitly invoked at collection.
60
+ The model schema is used by methods that depend on a model being associated with
61
+ the given dataframe such as :ref:`DataFrame.validate() <DataFrame.validate>`
62
+ and :ref:`DataFrame.get() <DataFrame.get>`.
63
+
64
+ ``DataFrame(...).set_model(Model)`` is equivalent with ``Model.DataFrame(...)``.
65
65
 
66
66
  Args:
67
- model: A patito model which should be used to validate the final dataframe.
68
- If None is provided, the regular LazyFrame class will be returned.
67
+ model (Model): Sub-class of ``patito.Model`` declaring the schema of the
68
+ dataframe.
69
69
 
70
70
  Returns:
71
- A custom LazyFrame model class where LazyFrame.model has been correctly
72
- "hard-coded" to the given model.
71
+ DataFrame[Model]: Returns the same dataframe, but with an attached model
72
+ that is required for certain model-specific dataframe methods to work.
73
73
 
74
- """
75
- if model is None:
76
- return cls
74
+ Examples:
75
+ >>> from typing_extensions import Literal
76
+ >>> import patito as pt
77
+ >>> import polars as pl
78
+ >>> class SchoolClass(pt.Model):
79
+ ... year: int = pt.Field(dtype=pl.UInt16)
80
+ ... letter: Literal["A", "B"] = pt.Field(dtype=pl.Categorical)
81
+ ...
82
+ >>> classes = pt.DataFrame(
83
+ ... {"year": [1, 1, 2, 2], "letter": list("ABAB")}
84
+ ... ).set_model(SchoolClass)
85
+ >>> classes
86
+ shape: (4, 2)
87
+ ┌──────┬────────┐
88
+ │ year ┆ letter │
89
+ │ --- ┆ --- │
90
+ │ i64 ┆ str │
91
+ ╞══════╪════════╡
92
+ │ 1 ┆ A │
93
+ │ 1 ┆ B │
94
+ │ 2 ┆ A │
95
+ │ 2 ┆ B │
96
+ └──────┴────────┘
97
+ >>> casted_classes = classes.cast()
98
+ >>> casted_classes
99
+ shape: (4, 2)
100
+ ┌──────┬────────┐
101
+ │ year ┆ letter │
102
+ │ --- ┆ --- │
103
+ │ u16 ┆ cat │
104
+ ╞══════╪════════╡
105
+ │ 1 ┆ A │
106
+ │ 1 ┆ B │
107
+ │ 2 ┆ A │
108
+ │ 2 ┆ B │
109
+ └──────┴────────┘
110
+ >>> casted_classes.validate()
77
111
 
78
- new_class = type(
79
- f"{model.__name__}LazyFrame",
80
- (cls,),
81
- {"model": model},
82
- )
83
- return new_class
112
+ """
113
+ return model.LazyFrame._from_pyldf(self._ldf) # type: ignore
84
114
 
85
115
  def collect(
86
116
  self,
@@ -93,12 +123,11 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
93
123
  parameters.
94
124
  """
95
125
  background = kwargs.pop("background", False)
96
- df = super().collect(*args, background=background, **kwargs)
126
+ df: pl.DataFrame = super().collect(*args, background=background, **kwargs)
127
+ df = DataFrame(df)
97
128
  if getattr(self, "model", False):
98
- cls = DataFrame._construct_dataframe_model_class(model=self.model)
99
- else:
100
- cls = DataFrame
101
- return cls._from_pydf(df._df)
129
+ df = df.set_model(self.model)
130
+ return df
102
131
 
103
132
  def derive(self: LDF, columns: list[str] | None = None) -> LDF:
104
133
  """Populate columns which have ``pt.Field(derived_from=...)`` definitions.
@@ -213,7 +242,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
213
242
  f"TODO figure out how this AliasPath behaves ({va})"
214
243
  )
215
244
  return (
216
- pl.col(va.path[0]).list.get(va.path[1], null_on_oob=True)
245
+ pl.col(str(va.path[0])).list.get(va.path[1], null_on_oob=True)
217
246
  if va.path[0] in self.collect_schema()
218
247
  else None
219
248
  )
@@ -307,7 +336,10 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
307
336
  @classmethod
308
337
  def from_existing(cls: type[LDF], lf: pl.LazyFrame) -> LDF:
309
338
  """Construct a patito.DataFrame object from an existing polars.DataFrame object."""
310
- return cls.model.LazyFrame._from_pyldf(lf._ldf).cast()
339
+ if getattr(cls, "model", False):
340
+ return cls.model.LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
341
+
342
+ return LazyFrame._from_pyldf(lf._ldf) # type: ignore
311
343
 
312
344
 
313
345
  class DataFrame(pl.DataFrame, Generic[ModelType]):
@@ -341,30 +373,6 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
341
373
 
342
374
  model: type[ModelType]
343
375
 
344
- @classmethod
345
- def _construct_dataframe_model_class(
346
- cls: type[DF], model: type[OtherModelType]
347
- ) -> type[DataFrame[OtherModelType]]:
348
- """Return custom DataFrame sub-class where DataFrame.model is set.
349
-
350
- Can be used to construct a DataFrame class where
351
- DataFrame.set_model(model) is implicitly invoked at instantiation.
352
-
353
- Args:
354
- model: A patito model which should be used to validate the dataframe.
355
-
356
- Returns:
357
- A custom DataFrame model class where DataFrame._model has been correctly
358
- "hard-coded" to the given model.
359
-
360
- """
361
- new_class = type(
362
- f"{model.model_json_schema()['title']}DataFrame",
363
- (cls,),
364
- {"model": model},
365
- )
366
- return new_class
367
-
368
376
  def lazy(self: DataFrame[ModelType]) -> LazyFrame[ModelType]:
369
377
  """Convert DataFrame into LazyFrame.
370
378
 
@@ -374,15 +382,12 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
374
382
  A new LazyFrame object.
375
383
 
376
384
  """
377
- lazyframe_class: LazyFrame[ModelType] = (
378
- LazyFrame._construct_lazyframe_model_class(
379
- model=getattr(self, "model", None)
380
- )
381
- ) # type: ignore
382
- ldf = lazyframe_class._from_pyldf(super().lazy()._ldf)
383
- return ldf
385
+ if getattr(self, "model", False):
386
+ return self.model.LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
387
+
388
+ return LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
384
389
 
385
- def set_model(self, model): # type: ignore[no-untyped-def] # noqa: ANN001, ANN201
390
+ def set_model(self, model: type[OtherModelType]) -> DataFrame[OtherModelType]:
386
391
  """Associate a given patito ``Model`` with the dataframe.
387
392
 
388
393
  The model schema is used by methods that depend on a model being associated with
@@ -438,11 +443,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
438
443
  >>> casted_classes.validate()
439
444
 
440
445
  """
441
- cls = self._construct_dataframe_model_class(model=model)
442
- return cast(
443
- DataFrame[model],
444
- cls._from_pydf(self._df),
445
- )
446
+ return model.DataFrame(self._df)
446
447
 
447
448
  def unalias(self: DF) -> DF:
448
449
  """Un-aliases column names using information from pydantic validation_alias.
@@ -503,7 +504,6 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
503
504
  def drop(
504
505
  self: DF,
505
506
  columns: str | Collection[str] | None = None,
506
- *more_columns: str,
507
507
  ) -> DF:
508
508
  """Drop one or more columns from the dataframe.
509
509
 
@@ -515,7 +515,6 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
515
515
  columns: A single column string name, or list of strings, indicating
516
516
  which columns to drop. If not specified, all columns *not*
517
517
  specified by the associated dataframe model will be dropped.
518
- more_columns: Additional named columns to drop.
519
518
 
520
519
  Returns:
521
520
  DataFrame[Model]: New dataframe without the specified columns.
@@ -538,7 +537,9 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
538
537
 
539
538
  """
540
539
  if columns is not None:
541
- return self._from_pydf(super().drop(columns)._df)
540
+ # I get a single null row if I try to use super() here, so go via
541
+ # pl.DataFrame instead.
542
+ return self._from_pydf(pl.DataFrame(self._df).drop(columns)._df)
542
543
  else:
543
544
  return self.drop(list(set(self.columns) - set(self.model.columns)))
544
545
 
@@ -705,7 +706,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
705
706
  )
706
707
  for column, default_value in self.model.defaults.items()
707
708
  ]
708
- ).set_model(self.model)
709
+ ).set_model(self.model) # type: ignore
709
710
 
710
711
  def get(self, predicate: pl.Expr | None = None) -> ModelType:
711
712
  """Fetch the single row that matches the given polars predicate.
@@ -815,7 +816,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
815
816
  >>> class Product(pt.Model):
816
817
  ... product_id: int = pt.Field(unique=True)
817
818
  ... price: float
818
- ...
819
+
819
820
  >>> df = pt.DataFrame({"product_id": [1, 2], "price": [10., 20.]})
820
821
  >>> df = df.set_model(Product)
821
822
  >>> for product in df.iter_models():
@@ -833,10 +834,23 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
833
834
 
834
835
  df = self.validate(drop_superfluous_columns=True) if validate_df else self
835
836
 
836
- def _iter_models(_df: DF) -> Iterator[ModelType]:
837
- for idx in range(_df.height):
838
- yield self.model.from_row(_df[idx], validate=validate_model)
839
-
837
+ def _iter_models_with_validate(
838
+ _df: DataFrame[ModelType],
839
+ ) -> Iterator[ModelType]:
840
+ for row in _df.iter_rows(named=True):
841
+ yield self.model(**row)
842
+
843
+ def _iter_models_without_validate(
844
+ _df: DataFrame[ModelType],
845
+ ) -> Iterator[ModelType]:
846
+ for row in _df.iter_rows(named=True):
847
+ yield self.model.model_construct(**row)
848
+
849
+ _iter_models = (
850
+ _iter_models_with_validate
851
+ if validate_model
852
+ else _iter_models_without_validate
853
+ )
840
854
  return ModelGenerator(_iter_models(df))
841
855
 
842
856
  def _pydantic_model(self) -> type[Model]:
@@ -76,27 +76,31 @@ class ModelMetaclass(PydanticModelMetaclass):
76
76
 
77
77
  """
78
78
  super().__init__(name, bases, clsdict, **kwargs)
79
- # Add a custom subclass of patito.DataFrame to the model class,
80
- # where .set_model() has been implicitly set.
81
- cls.DataFrame = DataFrame._construct_dataframe_model_class(
82
- model=cls, # type: ignore
79
+ NewDataFrame = type(
80
+ f"{cls.__name__}DataFrame",
81
+ (DataFrame,),
82
+ {"model": cls},
83
83
  )
84
- # Similarly for LazyFrame
85
- cls.LazyFrame = LazyFrame._construct_lazyframe_model_class(
86
- model=cls, # type: ignore
84
+ cls.DataFrame: type[DataFrame[cls]] = NewDataFrame # type: ignore
85
+
86
+ NewLazyFrame = type(
87
+ f"{cls.__name__}LazyFrame",
88
+ (LazyFrame,),
89
+ {"model": cls},
87
90
  )
91
+ cls.LazyFrame: type[LazyFrame[cls]] = NewLazyFrame # type: ignore
88
92
 
89
93
  def __hash__(self) -> int:
90
94
  """Return hash of the model class."""
91
95
  return super().__hash__()
92
96
 
93
97
  @property
94
- def column_infos(cls: type[ModelType]) -> Mapping[str, ColumnInfo]:
98
+ def column_infos(cls: type[Model]) -> Mapping[str, ColumnInfo]:
95
99
  """Return column information for the model."""
96
100
  return column_infos_for_model(cls)
97
101
 
98
102
  @property
99
- def model_schema(cls: type[ModelType]) -> Mapping[str, Mapping[str, Any]]:
103
+ def model_schema(cls: type[Model]) -> Mapping[str, Mapping[str, Any]]:
100
104
  """Return schema properties where definition references have been resolved.
101
105
 
102
106
  Returns:
@@ -112,7 +116,7 @@ class ModelMetaclass(PydanticModelMetaclass):
112
116
  return schema_for_model(cls)
113
117
 
114
118
  @property
115
- def columns(cls: type[ModelType]) -> list[str]:
119
+ def columns(cls: type[Model]) -> list[str]:
116
120
  """Return the name of the dataframe columns specified by the fields of the model.
117
121
 
118
122
  Returns:
@@ -131,7 +135,7 @@ class ModelMetaclass(PydanticModelMetaclass):
131
135
  return list(cls.model_fields.keys())
132
136
 
133
137
  @property
134
- def dtypes(cls: type[ModelType]) -> dict[str, DataTypeClass | DataType]:
138
+ def dtypes(cls: type[Model]) -> dict[str, DataTypeClass | DataType]:
135
139
  """Return the polars dtypes of the dataframe.
136
140
 
137
141
  Unless Field(dtype=...) is specified, the highest signed column dtype
@@ -155,7 +159,7 @@ class ModelMetaclass(PydanticModelMetaclass):
155
159
 
156
160
  @property
157
161
  def valid_dtypes(
158
- cls: type[ModelType],
162
+ cls: type[Model],
159
163
  ) -> Mapping[str, frozenset[DataTypeClass | DataType]]:
160
164
  """Return a list of polars dtypes which Patito considers valid for each field.
161
165
 
@@ -172,7 +176,7 @@ class ModelMetaclass(PydanticModelMetaclass):
172
176
  return valid_dtypes_for_model(cls)
173
177
 
174
178
  @property
175
- def defaults(cls: type[ModelType]) -> dict[str, Any]:
179
+ def defaults(cls: type[Model]) -> dict[str, Any]:
176
180
  """Return default field values specified on the model.
177
181
 
178
182
  Returns:
@@ -197,7 +201,7 @@ class ModelMetaclass(PydanticModelMetaclass):
197
201
  }
198
202
 
199
203
  @property
200
- def non_nullable_columns(cls: type[ModelType]) -> set[str]:
204
+ def non_nullable_columns(cls: type[Model]) -> set[str]:
201
205
  """Return names of those columns that are non-nullable in the schema.
202
206
 
203
207
  Returns:
@@ -226,7 +230,7 @@ class ModelMetaclass(PydanticModelMetaclass):
226
230
  )
227
231
 
228
232
  @property
229
- def nullable_columns(cls: type[ModelType]) -> set[str]:
233
+ def nullable_columns(cls: type[Model]) -> set[str]:
230
234
  """Return names of those columns that are nullable in the schema.
231
235
 
232
236
  Returns:
@@ -248,7 +252,7 @@ class ModelMetaclass(PydanticModelMetaclass):
248
252
  return set(cls.columns) - cls.non_nullable_columns
249
253
 
250
254
  @property
251
- def unique_columns(cls: type[ModelType]) -> set[str]:
255
+ def unique_columns(cls: type[Model]) -> set[str]:
252
256
  """Return columns with uniqueness constraint.
253
257
 
254
258
  Returns:
@@ -271,7 +275,7 @@ class ModelMetaclass(PydanticModelMetaclass):
271
275
  return {column for column in cls.columns if infos[column].unique}
272
276
 
273
277
  @property
274
- def derived_columns(cls: type[ModelType]) -> set[str]:
278
+ def derived_columns(cls: type[Model]) -> set[str]:
275
279
  """Return set of columns which are derived from other columns."""
276
280
  infos = cls.column_infos
277
281
  return {
@@ -301,6 +301,7 @@ def _find_errors( # noqa: C901
301
301
 
302
302
  dataframe_tmp = (
303
303
  dataframe_tmp.select(column_name)
304
+ .filter(pl.col(column_name).list.len() > 0)
304
305
  .explode(column_name)
305
306
  .unnest(column_name)
306
307
  )
File without changes
File without changes
File without changes
File without changes
File without changes