patito 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patito/pydantic.py CHANGED
@@ -83,7 +83,6 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
83
83
  """Construct new patito model.
84
84
 
85
85
  Args:
86
- ----
87
86
  name: Name of model class.
88
87
  bases: Tuple of superclasses.
89
88
  clsdict: Dictionary containing class properties.
@@ -114,14 +113,12 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
114
113
  def model_schema(cls: Type[ModelType]) -> Mapping[str, Mapping[str, Any]]:
115
114
  """Return schema properties where definition references have been resolved.
116
115
 
117
- Returns
118
- -------
116
+ Returns:
119
117
  Field information as a dictionary where the keys are field names and the
120
118
  values are dictionaries containing metadata information about the field
121
119
  itself.
122
120
 
123
- Raises
124
- ------
121
+ Raises:
125
122
  TypeError: if a field is annotated with an enum where the values are of
126
123
  different types.
127
124
 
@@ -133,11 +130,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
133
130
  """Return the name of the dataframe columns specified by the fields of the model.
134
131
 
135
132
  Returns:
136
- -------
137
133
  List of column names.
138
134
 
139
135
  Example:
140
- -------
141
136
  >>> import patito as pt
142
137
  >>> class Product(pt.Model):
143
138
  ... name: str
@@ -157,11 +152,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
157
152
  is chosen for integer and float columns.
158
153
 
159
154
  Returns:
160
- -------
161
155
  A dictionary mapping string column names to polars dtype classes.
162
156
 
163
157
  Example:
164
- -------
165
158
  >>> import patito as pt
166
159
  >>> class Product(pt.Model):
167
160
  ... name: str
@@ -182,12 +175,10 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
182
175
 
183
176
  The first item of each list is the default dtype chosen by Patito.
184
177
 
185
- Returns
186
- -------
178
+ Returns:
187
179
  A dictionary mapping each column string name to a list of valid dtypes.
188
180
 
189
- Raises
190
- ------
181
+ Raises:
191
182
  NotImplementedError: If one or more model fields are annotated with types
192
183
  not compatible with polars.
193
184
 
@@ -199,11 +190,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
199
190
  """Return default field values specified on the model.
200
191
 
201
192
  Returns:
202
- -------
203
193
  Dictionary containing fields with their respective default values.
204
194
 
205
195
  Example:
206
- -------
207
196
  >>> from typing_extensions import Literal
208
197
  >>> import patito as pt
209
198
  >>> class Product(pt.Model):
@@ -226,11 +215,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
226
215
  """Return names of those columns that are non-nullable in the schema.
227
216
 
228
217
  Returns:
229
- -------
230
218
  Set of column name strings.
231
219
 
232
220
  Example:
233
- -------
234
221
  >>> from typing import Optional
235
222
  >>> import patito as pt
236
223
  >>> class MyModel(pt.Model):
@@ -257,11 +244,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
257
244
  """Return names of those columns that are nullable in the schema.
258
245
 
259
246
  Returns:
260
- -------
261
247
  Set of column name strings.
262
248
 
263
249
  Example:
264
- -------
265
250
  >>> from typing import Optional
266
251
  >>> import patito as pt
267
252
  >>> class MyModel(pt.Model):
@@ -281,11 +266,9 @@ class ModelMetaclass(PydanticModelMetaclass, Generic[CI]):
281
266
  """Return columns with uniqueness constraint.
282
267
 
283
268
  Returns:
284
- -------
285
269
  Set of column name strings.
286
270
 
287
271
  Example:
288
- -------
289
272
  >>> from typing import Optional
290
273
  >>> import patito as pt
291
274
 
@@ -335,20 +318,16 @@ class Model(BaseModel, metaclass=ModelMetaclass):
335
318
  """Represent a single data frame row as a Patito model.
336
319
 
337
320
  Args:
338
- ----
339
321
  row: A dataframe, either polars and pandas, consisting of a single row.
340
322
  validate: If ``False``, skip pydantic validation of the given row data.
341
323
 
342
324
  Returns:
343
- -------
344
325
  Model: A patito model representing the given row data.
345
326
 
346
327
  Raises:
347
- ------
348
328
  TypeError: If the given type is neither a pandas or polars DataFrame.
349
329
 
350
330
  Example:
351
- -------
352
331
  >>> import patito as pt
353
332
  >>> import polars as pl
354
333
 
@@ -387,24 +366,20 @@ class Model(BaseModel, metaclass=ModelMetaclass):
387
366
  """Construct model from a single polars row.
388
367
 
389
368
  Args:
390
- ----
391
369
  dataframe: A polars dataframe consisting of one single row.
392
370
  validate: If ``True``, run the pydantic validators. If ``False``, pydantic
393
371
  will not cast any types in the resulting object.
394
372
 
395
373
  Returns:
396
- -------
397
374
  Model: A pydantic model object representing the given polars row.
398
375
 
399
376
  Raises:
400
- ------
401
377
  TypeError: If the provided ``dataframe`` argument is not of type
402
378
  ``polars.DataFrame``.
403
379
  ValueError: If the given ``dataframe`` argument does not consist of exactly
404
380
  one row.
405
381
 
406
382
  Example:
407
- -------
408
383
  >>> import patito as pt
409
384
  >>> import polars as pl
410
385
 
@@ -447,28 +422,31 @@ class Model(BaseModel, metaclass=ModelMetaclass):
447
422
  cls,
448
423
  dataframe: Union["pd.DataFrame", pl.DataFrame],
449
424
  columns: Optional[Sequence[str]] = None,
425
+ allow_missing_columns: bool = False,
426
+ allow_superfluous_columns: bool = False,
450
427
  **kwargs,
451
428
  ) -> None:
452
429
  """Validate the schema and content of the given dataframe.
453
430
 
454
431
  Args:
455
- ----
456
432
  dataframe: Polars DataFrame to be validated.
457
433
  columns: Optional list of columns to validate. If not provided, all columns
458
434
  of the dataframe will be validated.
435
+ allow_missing_columns: If True, missing columns will not be considered an error.
436
+ allow_superfluous_columns: If True, additional columns will not be considered an error.
459
437
  **kwargs: Additional keyword arguments to be passed to the validation
460
438
 
439
+ Returns:
440
+ ``None``:
441
+
461
442
  Raises:
462
- ------
463
443
  patito.exceptions.DataFrameValidationError: If the given dataframe does not match
464
444
  the given schema.
465
445
 
466
446
  Examples:
467
- --------
468
447
  >>> import patito as pt
469
448
  >>> import polars as pl
470
449
 
471
-
472
450
  >>> class Product(pt.Model):
473
451
  ... product_id: int = pt.Field(unique=True)
474
452
  ... temperature_zone: Literal["dry", "cold", "frozen"]
@@ -495,7 +473,14 @@ class Model(BaseModel, metaclass=ModelMetaclass):
495
473
  Rows with invalid values: {'oven'}. (type=value_error.rowvalue)
496
474
 
497
475
  """
498
- validate(dataframe=dataframe, columns=columns, schema=cls, **kwargs)
476
+ validate(
477
+ dataframe=dataframe,
478
+ schema=cls,
479
+ columns=columns,
480
+ allow_missing_columns=allow_missing_columns,
481
+ allow_superfluous_columns=allow_superfluous_columns,
482
+ **kwargs,
483
+ )
499
484
 
500
485
  @classmethod
501
486
  def example_value( # noqa: C901
@@ -506,19 +491,16 @@ class Model(BaseModel, metaclass=ModelMetaclass):
506
491
  """Return a valid example value for the given model field.
507
492
 
508
493
  Args:
509
- ----
510
494
  field: Field name identifier.
495
+ properties: Pydantic v2-style properties dict
511
496
 
512
497
  Returns:
513
- -------
514
498
  A single value which is consistent with the given field definition.
515
499
 
516
500
  Raises:
517
- ------
518
501
  NotImplementedError: If the given field has no example generator.
519
502
 
520
503
  Example:
521
- -------
522
504
  >>> from typing import Literal
523
505
  >>> import patito as pt
524
506
 
@@ -680,22 +662,18 @@ class Model(BaseModel, metaclass=ModelMetaclass):
680
662
  The first item of ``typing.Literal`` annotations are used for dummy values.
681
663
 
682
664
  Args:
683
- ----
684
665
  **kwargs: Provide explicit values for any fields which should `not` be
685
666
  filled with dummy data.
686
667
 
687
668
  Returns:
688
- -------
689
669
  Model: A pydantic model object filled with dummy data for all unspecified
690
670
  model fields.
691
671
 
692
672
  Raises:
693
- ------
694
673
  TypeError: If one or more of the provided keyword arguments do not match any
695
674
  fields on the model.
696
675
 
697
676
  Example:
698
- -------
699
677
  >>> from typing import Literal
700
678
  >>> import patito as pt
701
679
 
@@ -735,7 +713,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
735
713
  the iterable arguments.
736
714
 
737
715
  Args:
738
- ----
739
716
  data: Data to populate the dummy dataframe with. If
740
717
  not a dict, column names must also be provided.
741
718
  columns: Ignored if data is a dict. If
@@ -743,17 +720,14 @@ class Model(BaseModel, metaclass=ModelMetaclass):
743
720
  resulting dataframe. Defaults to None.
744
721
 
745
722
  Returns:
746
- -------
747
723
  A pandas DataFrame filled with dummy example data.
748
724
 
749
725
  Raises:
750
- ------
751
726
  ImportError: If pandas has not been installed. You should install
752
727
  patito[pandas] in order to integrate patito with pandas.
753
728
  TypeError: If column names have not been specified in the input data.
754
729
 
755
730
  Example:
756
- -------
757
731
  >>> from typing import Literal
758
732
  >>> import patito as pt
759
733
 
@@ -807,7 +781,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
807
781
  This constructor accepts the same data format as polars.DataFrame.
808
782
 
809
783
  Args:
810
- ----
811
784
  data: Data to populate the dummy dataframe with. If given as an iterable of
812
785
  values, then column names must also be provided. If not provided at all,
813
786
  a dataframe with a single row populated with dummy data is provided.
@@ -816,17 +789,14 @@ class Model(BaseModel, metaclass=ModelMetaclass):
816
789
  column names in the resulting dataframe. Defaults to None.
817
790
 
818
791
  Returns:
819
- -------
820
792
  A polars dataframe where all unspecified columns have been filled with dummy
821
793
  data which should pass model validation.
822
794
 
823
795
  Raises:
824
- ------
825
796
  TypeError: If one or more of the model fields are not mappable to polars
826
797
  column dtype equivalents.
827
798
 
828
799
  Example:
829
- -------
830
800
  >>> from typing import Literal
831
801
  >>> import patito as pt
832
802
 
@@ -914,17 +884,14 @@ class Model(BaseModel, metaclass=ModelMetaclass):
914
884
  SQL operation making all the columns of the right table nullable.
915
885
 
916
886
  Args:
917
- ----
918
887
  other: Another patito Model class.
919
888
  how: The type of SQL Join operation.
920
889
 
921
890
  Returns:
922
- -------
923
891
  A new model type compatible with the resulting schema produced by the given
924
892
  join operation.
925
893
 
926
894
  Examples:
927
- --------
928
895
  >>> class A(Model):
929
896
  ... a: int
930
897
  ...
@@ -979,19 +946,15 @@ class Model(BaseModel, metaclass=ModelMetaclass):
979
946
  """Create a new model consisting of only a subset of the model fields.
980
947
 
981
948
  Args:
982
- ----
983
949
  fields: A single field name as a string or a collection of strings.
984
950
 
985
951
  Returns:
986
- -------
987
952
  A new model containing only the fields specified by ``fields``.
988
953
 
989
954
  Raises:
990
- ------
991
955
  ValueError: If one or more non-existent fields are selected.
992
956
 
993
957
  Example:
994
- -------
995
958
  >>> class MyModel(Model):
996
959
  ... a: int
997
960
  ... b: int
@@ -1025,16 +988,13 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1025
988
  """Return a new model where one or more fields are excluded.
1026
989
 
1027
990
  Args:
1028
- ----
1029
991
  name: A single string field name, or a list of such field names,
1030
992
  which will be dropped.
1031
993
 
1032
994
  Returns:
1033
- -------
1034
995
  New model class where the given fields have been removed.
1035
996
 
1036
997
  Examples:
1037
- --------
1038
998
  >>> class MyModel(Model):
1039
999
  ... a: int
1040
1000
  ... b: int
@@ -1067,15 +1027,12 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1067
1027
  """Return a new model where all field names have been prefixed.
1068
1028
 
1069
1029
  Args:
1070
- ----
1071
1030
  prefix: String prefix to add to all field names.
1072
1031
 
1073
1032
  Returns:
1074
- -------
1075
1033
  New model class with all the same fields only prefixed with the given prefix.
1076
1034
 
1077
1035
  Example:
1078
- -------
1079
1036
  >>> class MyModel(Model):
1080
1037
  ... a: int
1081
1038
  ... b: int
@@ -1096,16 +1053,13 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1096
1053
  """Return a new model where all field names have been suffixed.
1097
1054
 
1098
1055
  Args:
1099
- ----
1100
1056
  suffix: String suffix to add to all field names.
1101
1057
 
1102
1058
  Returns:
1103
- -------
1104
1059
  New model class with all the same fields only suffixed with the given
1105
1060
  suffix.
1106
1061
 
1107
1062
  Example:
1108
- -------
1109
1063
  >>> class MyModel(Model):
1110
1064
  ... a: int
1111
1065
  ... b: int
@@ -1126,20 +1080,16 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1126
1080
  """Return a new model class where the specified fields have been renamed.
1127
1081
 
1128
1082
  Args:
1129
- ----
1130
1083
  mapping: A dictionary where the keys are the old field names
1131
1084
  and the values are the new names.
1132
1085
 
1133
1086
  Returns:
1134
- -------
1135
1087
  A new model class where the given fields have been renamed.
1136
1088
 
1137
1089
  Raises:
1138
- ------
1139
1090
  ValueError: If non-existent fields are renamed.
1140
1091
 
1141
1092
  Example:
1142
- -------
1143
1093
  >>> class MyModel(Model):
1144
1094
  ... a: int
1145
1095
  ... b: int
@@ -1173,7 +1123,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1173
1123
  """Return a new model class where the given fields have been added.
1174
1124
 
1175
1125
  Args:
1176
- ----
1177
1126
  **field_definitions: the keywords are of the form:
1178
1127
  ``field_name=(field_type, field_default)``.
1179
1128
  Specify ``...`` if no default value is provided.
@@ -1181,12 +1130,10 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1181
1130
  integer field named ``"column_name"``.
1182
1131
 
1183
1132
  Returns:
1184
- -------
1185
1133
  A new model with all the original fields and the additional field
1186
1134
  definitions.
1187
1135
 
1188
1136
  Example:
1189
- -------
1190
1137
  >>> class MyModel(Model):
1191
1138
  ... a: int
1192
1139
  ...
@@ -1225,7 +1172,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1225
1172
  """Derive a new model with new field definitions.
1226
1173
 
1227
1174
  Args:
1228
- ----
1229
1175
  model_name: Name of new model class.
1230
1176
  field_mapping: A mapping where the keys represent field names and the values
1231
1177
  represent field definitions. String field definitions are used as
@@ -1234,7 +1180,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1234
1180
  pydantic.create_model.
1235
1181
 
1236
1182
  Returns:
1237
- -------
1238
1183
  A new model class derived from the model type of self.
1239
1184
 
1240
1185
  """
@@ -1255,7 +1200,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1255
1200
  field_type = Optional[field_type]
1256
1201
  new_fields[new_field_name] = (field_type, field_definition[1])
1257
1202
  return create_model( # type: ignore
1258
- __model_name=model_name,
1203
+ model_name,
1259
1204
  __base__=Model,
1260
1205
  **new_fields,
1261
1206
  )
@@ -1293,44 +1238,31 @@ class Model(BaseModel, metaclass=ModelMetaclass):
1293
1238
  FIELD_KWARGS = getfullargspec(fields.Field)
1294
1239
 
1295
1240
 
1241
+ # Helper function for patito Field.
1242
+
1243
+
1296
1244
  def FieldCI(
1297
1245
  column_info: Type[ColumnInfo], *args: Any, **kwargs: Any
1298
1246
  ) -> Any: # annotate with Any to make the downstream type annotations happy
1299
- ci = column_info(**kwargs)
1300
- for field in ci.model_fields_set:
1301
- kwargs.pop(field)
1302
- if kwargs.pop("modern_kwargs_only", True):
1303
- for kwarg in kwargs:
1304
- if kwarg not in FIELD_KWARGS.kwonlyargs and kwarg not in FIELD_KWARGS.args:
1305
- raise ValueError(
1306
- f"unexpected kwarg {kwarg}={kwargs[kwarg]}. Add modern_kwargs_only=False to ignore"
1307
- )
1308
- return fields.Field(
1309
- *args,
1310
- json_schema_extra={"column_info": ci},
1311
- **kwargs,
1312
- )
1313
-
1314
-
1315
- Field = partial(FieldCI, column_info=ColumnInfo)
1316
-
1317
-
1318
- class FieldDoc:
1319
1247
  """Annotate model field with additional type and validation information.
1320
1248
 
1321
- This class is built on ``pydantic.Field`` and you can find its full documentation
1322
- `here <https://pydantic-docs.helpmanual.io/usage/schema/#field-customization>`_.
1249
+ This class is built on ``pydantic.Field`` and you can find the list of parameters
1250
+ in the `API reference <https://docs.pydantic.dev/latest/api/fields/>`_.
1323
1251
  Patito adds additional parameters which are used when validating dataframes,
1324
- these are documented here.
1252
+ these are documented here along with the main parameters which can be used for
1253
+ validation. Pydantic's `usage documentation <https://docs.pydantic.dev/latest/concepts/fields/>`_
1254
+ can be read with the below examples.
1325
1255
 
1326
1256
  Args:
1327
- ----
1257
+ column_info: (Type[ColumnInfo]): ColumnInfo object to pass args to.
1328
1258
  constraints (Union[polars.Expression, List[polars.Expression]): A single
1329
1259
  constraint or list of constraints, expressed as a polars expression objects.
1330
1260
  All rows must satisfy the given constraint. You can refer to the given column
1331
1261
  with ``pt.field``, which will automatically be replaced with
1332
1262
  ``polars.col(<field_name>)`` before evaluation.
1333
- derived_from (Union[str, polars.Expr]): used to mark fields that are meant to be derived from other fields. Users can specify a polars expression that will be called to derive the column value when `pt.DataFrame.derive` is called.
1263
+ derived_from (Union[str, polars.Expr]): used to mark fields that are meant to be
1264
+ derived from other fields. Users can specify a polars expression that will
1265
+ be called to derive the column value when `pt.DataFrame.derive` is called.
1334
1266
  dtype (polars.datatype.DataType): The given dataframe column must have the given
1335
1267
  polars dtype, for instance ``polars.UInt64`` or ``pl.Float32``.
1336
1268
  unique (bool): All row values must be unique.
@@ -1344,14 +1276,14 @@ class FieldDoc:
1344
1276
  regex (str): UTF-8 string column must match regex pattern for all row values.
1345
1277
  min_length (int): Minimum length of all string values in a UTF-8 column.
1346
1278
  max_length (int): Maximum length of all string values in a UTF-8 column.
1279
+ args (Any): additional arguments to pass to pydantic's field.
1280
+ kwargs (Any): additional keyword arguments to pass to pydantic's field.
1347
1281
 
1348
1282
  Return:
1349
- ------
1350
- FieldInfo: Object used to represent additional constraints put upon the given
1351
- field.
1283
+ `FieldInfo <https://docs.pydantic.dev/latest/api/fields/#pydantic.fields.FieldInfo>`_:
1284
+ Object used to represent additional constraints put upon the given field.
1352
1285
 
1353
1286
  Examples:
1354
- --------
1355
1287
  >>> import patito as pt
1356
1288
  >>> import polars as pl
1357
1289
  >>> class Product(pt.Model):
@@ -1381,6 +1313,20 @@ class FieldDoc:
1381
1313
  Polars dtype Int64 does not match model field type. (type=type_error.columndtype)
1382
1314
 
1383
1315
  """
1316
+ ci = column_info(**kwargs)
1317
+ for field in ci.model_fields_set:
1318
+ kwargs.pop(field)
1319
+ if kwargs.pop("modern_kwargs_only", True):
1320
+ for kwarg in kwargs:
1321
+ if kwarg not in FIELD_KWARGS.kwonlyargs and kwarg not in FIELD_KWARGS.args:
1322
+ raise ValueError(
1323
+ f"unexpected kwarg {kwarg}={kwargs[kwarg]}. Add modern_kwargs_only=False to ignore"
1324
+ )
1325
+ return fields.Field(
1326
+ *args,
1327
+ json_schema_extra={"column_info": ci},
1328
+ **kwargs,
1329
+ )
1384
1330
 
1385
1331
 
1386
- Field.__doc__ = FieldDoc.__doc__
1332
+ Field = partial(FieldCI, column_info=ColumnInfo)