patito 0.7.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patito/exceptions.py CHANGED
@@ -1,15 +1,10 @@
1
1
  """Exceptions used by patito."""
2
2
 
3
+ from collections.abc import Generator, Sequence
3
4
  from typing import (
4
5
  TYPE_CHECKING,
5
6
  Any,
6
- Dict,
7
- Generator,
8
- List,
9
7
  Optional,
10
- Sequence,
11
- Tuple,
12
- Type,
13
8
  TypedDict,
14
9
  Union,
15
10
  )
@@ -19,7 +14,7 @@ from patito._pydantic.repr import Representation
19
14
  if TYPE_CHECKING:
20
15
  from pydantic import BaseModel
21
16
 
22
- Loc = Tuple[Union[int, str], ...]
17
+ Loc = tuple[Union[int, str], ...]
23
18
 
24
19
  class _ErrorDictRequired(TypedDict):
25
20
  loc: Loc
@@ -27,7 +22,7 @@ if TYPE_CHECKING:
27
22
  type: str
28
23
 
29
24
  class ErrorDict(_ErrorDictRequired, total=False):
30
- ctx: Dict[str, Any]
25
+ ctx: dict[str, Any]
31
26
 
32
27
  from patito._pydantic.repr import ReprArgs
33
28
 
@@ -67,13 +62,13 @@ class DataFrameValidationError(Representation, ValueError):
67
62
 
68
63
  __slots__ = "raw_errors", "model", "_error_cache"
69
64
 
70
- def __init__(self, errors: Sequence[ErrorList], model: Type["BaseModel"]) -> None:
65
+ def __init__(self, errors: Sequence[ErrorList], model: type["BaseModel"]) -> None:
71
66
  """Create a dataframe validation error."""
72
67
  self.raw_errors = errors
73
68
  self.model = model
74
- self._error_cache: Optional[List["ErrorDict"]] = None
69
+ self._error_cache: Optional[list[ErrorDict]] = None
75
70
 
76
- def errors(self) -> List["ErrorDict"]:
71
+ def errors(self) -> list["ErrorDict"]:
77
72
  """Get list of errors."""
78
73
  if self._error_cache is None:
79
74
  self._error_cache = list(flatten_errors(self.raw_errors))
@@ -93,7 +88,7 @@ class DataFrameValidationError(Representation, ValueError):
93
88
  return [("model", self.model.__name__), ("errors", self.errors())]
94
89
 
95
90
 
96
- def display_errors(errors: List["ErrorDict"]) -> str:
91
+ def display_errors(errors: list["ErrorDict"]) -> str:
97
92
  return "\n".join(
98
93
  f'{_display_error_loc(e)}\n {e["msg"]} ({_display_error_type_and_ctx(e)})'
99
94
  for e in errors
@@ -142,7 +137,7 @@ def error_dict(exc: Exception, loc: "Loc") -> "ErrorDict":
142
137
  else:
143
138
  msg = str(exc)
144
139
 
145
- d: "ErrorDict" = {"loc": loc, "msg": msg, "type": type_}
140
+ d: ErrorDict = {"loc": loc, "msg": msg, "type": type_}
146
141
 
147
142
  if ctx:
148
143
  d["ctx"] = ctx
@@ -150,10 +145,10 @@ def error_dict(exc: Exception, loc: "Loc") -> "ErrorDict":
150
145
  return d
151
146
 
152
147
 
153
- _EXC_TYPE_CACHE: Dict[Type[Exception], str] = {}
148
+ _EXC_TYPE_CACHE: dict[type[Exception], str] = {}
154
149
 
155
150
 
156
- def get_exc_type(cls: Type[Exception]) -> str:
151
+ def get_exc_type(cls: type[Exception]) -> str:
157
152
  # slightly more efficient than using lru_cache since we don't need to worry about the cache filling up
158
153
  try:
159
154
  return _EXC_TYPE_CACHE[cls]
@@ -163,7 +158,7 @@ def get_exc_type(cls: Type[Exception]) -> str:
163
158
  return r
164
159
 
165
160
 
166
- def _get_exc_type(cls: Type[Exception]) -> str:
161
+ def _get_exc_type(cls: type[Exception]) -> str:
167
162
  if issubclass(cls, AssertionError):
168
163
  return "assertion_error"
169
164
 
patito/polars.py CHANGED
@@ -2,20 +2,13 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from collections.abc import Collection, Iterable, Iterator, Sequence
5
6
  from typing import (
6
7
  TYPE_CHECKING,
7
8
  Any,
8
- Collection,
9
- Dict,
10
9
  Generic,
11
- Iterable,
12
10
  Literal,
13
- Optional,
14
- Sequence,
15
- Tuple,
16
- Type,
17
11
  TypeVar,
18
- Union,
19
12
  cast,
20
13
  )
21
14
 
@@ -31,63 +24,110 @@ if TYPE_CHECKING:
31
24
 
32
25
  from patito.pydantic import Model
33
26
 
34
-
35
27
  DF = TypeVar("DF", bound="DataFrame")
36
28
  LDF = TypeVar("LDF", bound="LazyFrame")
37
29
  ModelType = TypeVar("ModelType", bound="Model")
38
30
  OtherModelType = TypeVar("OtherModelType", bound="Model")
31
+ T = TypeVar("T")
32
+
33
+
34
+ class ModelGenerator(Iterator[ModelType], Generic[ModelType]):
35
+ """An iterator that can be converted to a list."""
36
+
37
+ def __init__(self, iterator: Iterator[ModelType]) -> None:
38
+ """Construct a ModelGenerator from an iterator."""
39
+ self._iterator = iterator
40
+
41
+ def to_list(self) -> list[ModelType]:
42
+ """Convert iterator to list."""
43
+ return list(self)
44
+
45
+ def __next__(self) -> ModelType: # noqa: D105
46
+ return next(self._iterator)
47
+
48
+ def __iter__(self) -> Iterator[ModelType]: # noqa: D105
49
+ return self
39
50
 
40
51
 
41
52
  class LazyFrame(pl.LazyFrame, Generic[ModelType]):
42
53
  """LazyFrame class associated to DataFrame."""
43
54
 
44
- model: Type[ModelType]
55
+ model: type[ModelType]
45
56
 
46
- @classmethod
47
- def _construct_lazyframe_model_class(
48
- cls: Type[LDF], model: Optional[Type[ModelType]]
49
- ) -> Type[LazyFrame[ModelType]]:
50
- """Return custom LazyFrame sub-class where LazyFrame.model is set.
57
+ def set_model(self, model: type[OtherModelType]) -> LazyFrame[OtherModelType]:
58
+ """Associate a given patito ``Model`` with the dataframe.
51
59
 
52
- Can be used to construct a LazyFrame class where
53
- DataFrame.set_model(model) is implicitly invoked at collection.
60
+ The model schema is used by methods that depend on a model being associated with
61
+ the given dataframe such as :ref:`DataFrame.validate() <DataFrame.validate>`
62
+ and :ref:`DataFrame.get() <DataFrame.get>`.
63
+
64
+ ``DataFrame(...).set_model(Model)`` is equivalent with ``Model.DataFrame(...)``.
54
65
 
55
66
  Args:
56
- model: A patito model which should be used to validate the final dataframe.
57
- If None is provided, the regular LazyFrame class will be returned.
67
+ model (Model): Sub-class of ``patito.Model`` declaring the schema of the
68
+ dataframe.
58
69
 
59
70
  Returns:
60
- A custom LazyFrame model class where LazyFrame.model has been correctly
61
- "hard-coded" to the given model.
71
+ DataFrame[Model]: Returns the same dataframe, but with an attached model
72
+ that is required for certain model-specific dataframe methods to work.
62
73
 
63
- """
64
- if model is None:
65
- return cls
74
+ Examples:
75
+ >>> from typing_extensions import Literal
76
+ >>> import patito as pt
77
+ >>> import polars as pl
78
+ >>> class SchoolClass(pt.Model):
79
+ ... year: int = pt.Field(dtype=pl.UInt16)
80
+ ... letter: Literal["A", "B"] = pt.Field(dtype=pl.Categorical)
81
+ ...
82
+ >>> classes = pt.DataFrame(
83
+ ... {"year": [1, 1, 2, 2], "letter": list("ABAB")}
84
+ ... ).set_model(SchoolClass)
85
+ >>> classes
86
+ shape: (4, 2)
87
+ ┌──────┬────────┐
88
+ │ year ┆ letter │
89
+ │ --- ┆ --- │
90
+ │ i64 ┆ str │
91
+ ╞══════╪════════╡
92
+ │ 1 ┆ A │
93
+ │ 1 ┆ B │
94
+ │ 2 ┆ A │
95
+ │ 2 ┆ B │
96
+ └──────┴────────┘
97
+ >>> casted_classes = classes.cast()
98
+ >>> casted_classes
99
+ shape: (4, 2)
100
+ ┌──────┬────────┐
101
+ │ year ┆ letter │
102
+ │ --- ┆ --- │
103
+ │ u16 ┆ cat │
104
+ ╞══════╪════════╡
105
+ │ 1 ┆ A │
106
+ │ 1 ┆ B │
107
+ │ 2 ┆ A │
108
+ │ 2 ┆ B │
109
+ └──────┴────────┘
110
+ >>> casted_classes.validate()
66
111
 
67
- new_class = type(
68
- f"{model.__name__}LazyFrame",
69
- (cls,),
70
- {"model": model},
71
- )
72
- return new_class
112
+ """
113
+ return model.LazyFrame._from_pyldf(self._ldf) # type: ignore
73
114
 
74
115
  def collect(
75
116
  self,
76
117
  *args,
77
118
  **kwargs,
78
- ) -> "DataFrame[ModelType]": # noqa: DAR101, DAR201
119
+ ) -> DataFrame[ModelType]: # noqa: DAR101, DAR201
79
120
  """Collect into a DataFrame.
80
121
 
81
122
  See documentation of polars.DataFrame.collect for full description of
82
123
  parameters.
83
124
  """
84
125
  background = kwargs.pop("background", False)
85
- df = super().collect(*args, background=background, **kwargs)
126
+ df: pl.DataFrame = super().collect(*args, background=background, **kwargs)
127
+ df = DataFrame(df)
86
128
  if getattr(self, "model", False):
87
- cls = DataFrame._construct_dataframe_model_class(model=self.model)
88
- else:
89
- cls = DataFrame
90
- return cls._from_pydf(df._df)
129
+ df = df.set_model(self.model)
130
+ return df
91
131
 
92
132
  def derive(self: LDF, columns: list[str] | None = None) -> LDF:
93
133
  """Populate columns which have ``pt.Field(derived_from=...)`` definitions.
@@ -148,33 +188,35 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
148
188
 
149
189
  def _derive_column(
150
190
  self,
151
- df: LDF,
191
+ lf: LDF,
152
192
  column_name: str,
153
- column_infos: Dict[str, ColumnInfo],
154
- ) -> Tuple[LDF, Sequence[str]]:
193
+ column_infos: dict[str, ColumnInfo],
194
+ ) -> tuple[LDF, Sequence[str]]:
155
195
  if (
156
196
  column_infos.get(column_name, None) is None
157
197
  or column_infos[column_name].derived_from is None
158
198
  ):
159
- return df, []
199
+ return lf, []
200
+
160
201
  derived_from = column_infos[column_name].derived_from
161
202
  dtype = self.model.dtypes[column_name]
162
203
  derived_columns = []
204
+
163
205
  if isinstance(derived_from, str):
164
- df = df.with_columns(pl.col(derived_from).cast(dtype).alias(column_name))
206
+ lf = lf.with_columns(pl.col(derived_from).cast(dtype).alias(column_name))
165
207
  elif isinstance(derived_from, pl.Expr):
166
208
  root_cols = derived_from.meta.root_names()
167
209
  while root_cols:
168
210
  root_col = root_cols.pop()
169
- df, _derived_columns = self._derive_column(df, root_col, column_infos)
211
+ lf, _derived_columns = self._derive_column(lf, root_col, column_infos)
170
212
  derived_columns.extend(_derived_columns)
171
- df = df.with_columns(derived_from.cast(dtype).alias(column_name))
213
+ lf = lf.with_columns(derived_from.cast(dtype).alias(column_name))
172
214
  else:
173
215
  raise TypeError(
174
216
  "Can not derive dataframe column from type " f"{type(derived_from)}."
175
217
  )
176
218
  derived_columns.append(column_name)
177
- return df, derived_columns
219
+ return lf, derived_columns
178
220
 
179
221
  def unalias(self: LDF) -> LDF:
180
222
  """Un-aliases column names using information from pydantic validation_alias.
@@ -191,7 +233,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
191
233
  return self
192
234
  exprs = []
193
235
 
194
- def to_expr(va: str | AliasPath | AliasChoices) -> Optional[pl.Expr]:
236
+ def to_expr(va: str | AliasPath | AliasChoices) -> pl.Expr | None:
195
237
  if isinstance(va, str):
196
238
  return pl.col(va) if va in self.collect_schema() else None
197
239
  elif isinstance(va, AliasPath):
@@ -200,12 +242,12 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
200
242
  f"TODO figure out how this AliasPath behaves ({va})"
201
243
  )
202
244
  return (
203
- pl.col(va.path[0]).list.get(va.path[1], null_on_oob=True)
245
+ pl.col(str(va.path[0])).list.get(va.path[1], null_on_oob=True)
204
246
  if va.path[0] in self.collect_schema()
205
247
  else None
206
248
  )
207
249
  elif isinstance(va, AliasChoices):
208
- local_expr: Optional[pl.Expr] = None
250
+ local_expr: pl.Expr | None = None
209
251
  for choice in va.choices:
210
252
  if (part := to_expr(choice)) is not None:
211
253
  local_expr = (
@@ -235,7 +277,7 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
235
277
  return self.select(exprs)
236
278
 
237
279
  def cast(
238
- self: LDF, strict: bool = False, columns: Optional[Sequence[str]] = None
280
+ self: LDF, strict: bool = False, columns: Sequence[str] | None = None
239
281
  ) -> LDF:
240
282
  """Cast columns to `dtypes` specified by the associated Patito model.
241
283
 
@@ -292,9 +334,12 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
292
334
  return self.with_columns(exprs)
293
335
 
294
336
  @classmethod
295
- def from_existing(cls: Type[LDF], lf: pl.LazyFrame) -> LDF:
337
+ def from_existing(cls: type[LDF], lf: pl.LazyFrame) -> LDF:
296
338
  """Construct a patito.DataFrame object from an existing polars.DataFrame object."""
297
- return cls.model.LazyFrame._from_pyldf(lf._ldf).cast()
339
+ if getattr(cls, "model", False):
340
+ return cls.model.LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
341
+
342
+ return LazyFrame._from_pyldf(lf._ldf) # type: ignore
298
343
 
299
344
 
300
345
  class DataFrame(pl.DataFrame, Generic[ModelType]):
@@ -326,31 +371,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
326
371
  :ref:`Product.validate <DataFrame.validate>`.
327
372
  """
328
373
 
329
- model: Type[ModelType]
330
-
331
- @classmethod
332
- def _construct_dataframe_model_class(
333
- cls: Type[DF], model: Type[OtherModelType]
334
- ) -> Type[DataFrame[OtherModelType]]:
335
- """Return custom DataFrame sub-class where DataFrame.model is set.
336
-
337
- Can be used to construct a DataFrame class where
338
- DataFrame.set_model(model) is implicitly invoked at instantiation.
339
-
340
- Args:
341
- model: A patito model which should be used to validate the dataframe.
342
-
343
- Returns:
344
- A custom DataFrame model class where DataFrame._model has been correctly
345
- "hard-coded" to the given model.
346
-
347
- """
348
- new_class = type(
349
- f"{model.model_json_schema()['title']}DataFrame",
350
- (cls,),
351
- {"model": model},
352
- )
353
- return new_class
374
+ model: type[ModelType]
354
375
 
355
376
  def lazy(self: DataFrame[ModelType]) -> LazyFrame[ModelType]:
356
377
  """Convert DataFrame into LazyFrame.
@@ -361,15 +382,12 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
361
382
  A new LazyFrame object.
362
383
 
363
384
  """
364
- lazyframe_class: LazyFrame[ModelType] = (
365
- LazyFrame._construct_lazyframe_model_class(
366
- model=getattr(self, "model", None)
367
- )
368
- ) # type: ignore
369
- ldf = lazyframe_class._from_pyldf(super().lazy()._ldf)
370
- return ldf
385
+ if getattr(self, "model", False):
386
+ return self.model.LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
387
+
388
+ return LazyFrame._from_pyldf(super().lazy()._ldf) # type: ignore
371
389
 
372
- def set_model(self, model): # type: ignore[no-untyped-def] # noqa: ANN001, ANN201
390
+ def set_model(self, model: type[OtherModelType]) -> DataFrame[OtherModelType]:
373
391
  """Associate a given patito ``Model`` with the dataframe.
374
392
 
375
393
  The model schema is used by methods that depend on a model being associated with
@@ -425,11 +443,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
425
443
  >>> casted_classes.validate()
426
444
 
427
445
  """
428
- cls = self._construct_dataframe_model_class(model=model)
429
- return cast(
430
- DataFrame[model],
431
- cls._from_pydf(self._df),
432
- )
446
+ return model.DataFrame(self._df)
433
447
 
434
448
  def unalias(self: DF) -> DF:
435
449
  """Un-aliases column names using information from pydantic validation_alias.
@@ -445,7 +459,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
445
459
  return self.lazy().unalias().collect()
446
460
 
447
461
  def cast(
448
- self: DF, strict: bool = False, columns: Optional[Sequence[str]] = None
462
+ self: DF, strict: bool = False, columns: Sequence[str] | None = None
449
463
  ) -> DF:
450
464
  """Cast columns to `dtypes` specified by the associated Patito model.
451
465
 
@@ -489,8 +503,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
489
503
 
490
504
  def drop(
491
505
  self: DF,
492
- columns: Optional[Union[str, Collection[str]]] = None,
493
- *more_columns: str,
506
+ columns: str | Collection[str] | None = None,
494
507
  ) -> DF:
495
508
  """Drop one or more columns from the dataframe.
496
509
 
@@ -502,7 +515,6 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
502
515
  columns: A single column string name, or list of strings, indicating
503
516
  which columns to drop. If not specified, all columns *not*
504
517
  specified by the associated dataframe model will be dropped.
505
- more_columns: Additional named columns to drop.
506
518
 
507
519
  Returns:
508
520
  DataFrame[Model]: New dataframe without the specified columns.
@@ -525,27 +537,29 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
525
537
 
526
538
  """
527
539
  if columns is not None:
528
- return self._from_pydf(super().drop(columns)._df)
540
+ # I get a single null row if I try to use super() here, so go via
541
+ # pl.DataFrame instead.
542
+ return self._from_pydf(pl.DataFrame(self._df).drop(columns)._df)
529
543
  else:
530
544
  return self.drop(list(set(self.columns) - set(self.model.columns)))
531
545
 
532
- def validate(self, columns: Optional[Sequence[str]] = None, **kwargs: Any):
546
+ def validate(self, columns: Sequence[str] | None = None, **kwargs: Any):
533
547
  """Validate the schema and content of the dataframe.
534
548
 
535
549
  You must invoke ``.set_model()`` before invoking ``.validate()`` in order
536
550
  to specify how the dataframe should be validated.
537
551
 
538
552
  Returns:
539
- DataFrame[Model]: The original dataframe, if correctly validated.
553
+ DataFrame[Model]: The original patito dataframe, if correctly validated.
540
554
 
541
555
  Raises:
556
+ patito.exceptions.DataFrameValidationError: If the dataframe does not match the
557
+ specified schema.
558
+
542
559
  TypeError: If ``DataFrame.set_model()`` has not been invoked prior to
543
560
  validation. Note that ``patito.Model.DataFrame`` automatically invokes
544
561
  ``DataFrame.set_model()`` for you.
545
562
 
546
- patito.exceptions.DataFrameValidationError: If the dataframe does not match the
547
- specified schema.
548
-
549
563
  Examples:
550
564
  >>> import patito as pt
551
565
 
@@ -623,13 +637,12 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
623
637
 
624
638
  def fill_null(
625
639
  self: DF,
626
- value: Optional[Any] = None,
627
- strategy: Optional[
628
- Literal[
629
- "forward", "backward", "min", "max", "mean", "zero", "one", "defaults"
630
- ]
631
- ] = None,
632
- limit: Optional[int] = None,
640
+ value: Any | None = None,
641
+ strategy: Literal[
642
+ "forward", "backward", "min", "max", "mean", "zero", "one", "defaults"
643
+ ]
644
+ | None = None,
645
+ limit: int | None = None,
633
646
  matches_supertype: bool = True,
634
647
  ) -> DF:
635
648
  """Fill null values using a filling strategy, literal, or ``Expr``.
@@ -689,14 +702,13 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
689
702
  pl.lit(default_value, self.model.dtypes[column])
690
703
  )
691
704
  if column in self.columns
692
- else pl.Series(column, [default_value], self.model.dtypes[column])
693
- ) # NOTE: hack to get around polars bug https://github.com/pola-rs/polars/issues/13602
694
- # else pl.lit(default_value, self.model.dtypes[column]).alias(column)
705
+ else pl.lit(default_value, self.model.dtypes[column]).alias(column)
706
+ )
695
707
  for column, default_value in self.model.defaults.items()
696
708
  ]
697
- ).set_model(self.model)
709
+ ).set_model(self.model) # type: ignore
698
710
 
699
- def get(self, predicate: Optional[pl.Expr] = None) -> ModelType:
711
+ def get(self, predicate: pl.Expr | None = None) -> ModelType:
700
712
  """Fetch the single row that matches the given polars predicate.
701
713
 
702
714
  If you expect a data frame to already consist of one single row,
@@ -778,7 +790,70 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
778
790
  else:
779
791
  return self._pydantic_model().from_row(row) # type: ignore
780
792
 
781
- def _pydantic_model(self) -> Type[Model]:
793
+ def iter_models(
794
+ self, validate_df: bool = True, validate_model: bool = False
795
+ ) -> ModelGenerator[ModelType]:
796
+ """Iterate over all rows in the dataframe as pydantic models.
797
+
798
+ Args:
799
+ validate_df: If set to ``True``, the dataframe will be validated before
800
+ making models out of each row. If set to ``False``, beware that columns
801
+ need to be the exact same as the model fields.
802
+ validate_model: If set to ``True``, each model will be validated when
803
+ constructing. Disabled by default since df validation should cover this case.
804
+
805
+ Yields:
806
+ Model: A pydantic-derived model representing the given row. .to_list() can be
807
+ used to convert the iterator to a list.
808
+
809
+ Raises:
810
+ TypeError: If ``DataFrame.set_model()`` has not been invoked prior to
811
+ iteration.
812
+
813
+ Example:
814
+ >>> import patito as pt
815
+ >>> import polars as pl
816
+ >>> class Product(pt.Model):
817
+ ... product_id: int = pt.Field(unique=True)
818
+ ... price: float
819
+
820
+ >>> df = pt.DataFrame({"product_id": [1, 2], "price": [10., 20.]})
821
+ >>> df = df.set_model(Product)
822
+ >>> for product in df.iter_models():
823
+ ... print(product)
824
+ ...
825
+ Product(product_id=1, price=10.0)
826
+ Product(product_id=2, price=20.0)
827
+
828
+ """
829
+ if not hasattr(self, "model"):
830
+ raise TypeError(
831
+ f"You must invoke {self.__class__.__name__}.set_model() "
832
+ f"before invoking {self.__class__.__name__}.iter_models()."
833
+ )
834
+
835
+ df = self.validate(drop_superfluous_columns=True) if validate_df else self
836
+
837
+ def _iter_models_with_validate(
838
+ _df: DataFrame[ModelType],
839
+ ) -> Iterator[ModelType]:
840
+ for row in _df.iter_rows(named=True):
841
+ yield self.model(**row)
842
+
843
+ def _iter_models_without_validate(
844
+ _df: DataFrame[ModelType],
845
+ ) -> Iterator[ModelType]:
846
+ for row in _df.iter_rows(named=True):
847
+ yield self.model.model_construct(**row)
848
+
849
+ _iter_models = (
850
+ _iter_models_with_validate
851
+ if validate_model
852
+ else _iter_models_without_validate
853
+ )
854
+ return ModelGenerator(_iter_models(df))
855
+
856
+ def _pydantic_model(self) -> type[Model]:
782
857
  """Dynamically construct patito model compliant with dataframe.
783
858
 
784
859
  Returns:
@@ -790,7 +865,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
790
865
 
791
866
  pydantic_annotations = {column: (Any, ...) for column in self.columns}
792
867
  return cast(
793
- Type[Model],
868
+ type[Model],
794
869
  create_model( # type: ignore
795
870
  "UntypedRow",
796
871
  __base__=Model,
@@ -804,7 +879,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
804
879
 
805
880
  @classmethod
806
881
  def read_csv( # type: ignore[no-untyped-def]
807
- cls: Type[DF],
882
+ cls: type[DF],
808
883
  *args, # noqa: ANN002
809
884
  **kwargs, # noqa: ANN003
810
885
  ) -> DF:
@@ -888,15 +963,13 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
888
963
  # --- Type annotation overrides ---
889
964
  def filter( # noqa: D102
890
965
  self: DF,
891
- predicate: Union[
892
- pl.Expr, str, pl.Series, list[bool], np.ndarray[Any, Any], bool
893
- ],
966
+ predicate: pl.Expr | str | pl.Series | list[bool] | np.ndarray[Any, Any] | bool,
894
967
  ) -> DF:
895
968
  return cast(DF, super().filter(predicate))
896
969
 
897
970
  def select( # noqa: D102
898
971
  self: DF,
899
- *exprs: Union[IntoExpr, Iterable[IntoExpr]],
972
+ *exprs: IntoExpr | Iterable[IntoExpr],
900
973
  **named_exprs: IntoExpr,
901
974
  ) -> DF:
902
975
  return cast( # pyright: ignore[redundant-cast]
@@ -905,7 +978,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
905
978
 
906
979
  def with_columns( # noqa: D102
907
980
  self: DF,
908
- *exprs: Union[IntoExpr, Iterable[IntoExpr]],
981
+ *exprs: IntoExpr | Iterable[IntoExpr],
909
982
  **named_exprs: IntoExpr,
910
983
  ) -> DF:
911
984
  return cast(DF, super().with_columns(*exprs, **named_exprs))