patito 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patito/__init__.py +12 -6
- patito/database.py +658 -0
- patito/duckdb.py +153 -186
- patito/polars.py +52 -45
- patito/pydantic.py +99 -88
- patito/sql.py +2 -3
- patito/validators.py +87 -1
- patito/xdg.py +22 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/LICENSE +1 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/METADATA +18 -17
- patito-0.5.0.dist-info/RECORD +14 -0
- {patito-0.4.3.dist-info → patito-0.5.0.dist-info}/WHEEL +1 -1
- patito-0.4.3.dist-info/RECORD +0 -12
patito/polars.py
CHANGED
|
@@ -4,10 +4,10 @@ from __future__ import annotations
|
|
|
4
4
|
from typing import (
|
|
5
5
|
TYPE_CHECKING,
|
|
6
6
|
Any,
|
|
7
|
+
Collection,
|
|
7
8
|
Generic,
|
|
8
|
-
|
|
9
|
+
Iterable,
|
|
9
10
|
Optional,
|
|
10
|
-
Sequence,
|
|
11
11
|
Type,
|
|
12
12
|
TypeVar,
|
|
13
13
|
Union,
|
|
@@ -15,6 +15,7 @@ from typing import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
import polars as pl
|
|
18
|
+
from polars.type_aliases import IntoExpr
|
|
18
19
|
from pydantic import create_model
|
|
19
20
|
from typing_extensions import Literal
|
|
20
21
|
|
|
@@ -71,9 +72,10 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
71
72
|
predicate_pushdown: bool = True,
|
|
72
73
|
projection_pushdown: bool = True,
|
|
73
74
|
simplify_expression: bool = True,
|
|
74
|
-
string_cache: bool = False,
|
|
75
75
|
no_optimization: bool = False,
|
|
76
76
|
slice_pushdown: bool = True,
|
|
77
|
+
common_subplan_elimination: bool = True,
|
|
78
|
+
streaming: bool = False,
|
|
77
79
|
) -> "DataFrame[ModelType]": # noqa: DAR101, DAR201
|
|
78
80
|
"""
|
|
79
81
|
Collect into a DataFrame.
|
|
@@ -86,9 +88,10 @@ class LazyFrame(pl.LazyFrame, Generic[ModelType]):
|
|
|
86
88
|
predicate_pushdown=predicate_pushdown,
|
|
87
89
|
projection_pushdown=projection_pushdown,
|
|
88
90
|
simplify_expression=simplify_expression,
|
|
89
|
-
string_cache=string_cache,
|
|
90
91
|
no_optimization=no_optimization,
|
|
91
92
|
slice_pushdown=slice_pushdown,
|
|
93
|
+
common_subplan_elimination=common_subplan_elimination,
|
|
94
|
+
streaming=streaming,
|
|
92
95
|
)
|
|
93
96
|
if getattr(self, "model", False):
|
|
94
97
|
cls = DataFrame._construct_dataframe_model_class(model=self.model)
|
|
@@ -207,11 +210,8 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
207
210
|
│ i64 ┆ str │
|
|
208
211
|
╞══════╪════════╡
|
|
209
212
|
│ 1 ┆ A │
|
|
210
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
211
213
|
│ 1 ┆ B │
|
|
212
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
213
214
|
│ 2 ┆ A │
|
|
214
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
215
215
|
│ 2 ┆ B │
|
|
216
216
|
└──────┴────────┘
|
|
217
217
|
>>> casted_classes = classes.cast()
|
|
@@ -223,11 +223,8 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
223
223
|
│ u16 ┆ cat │
|
|
224
224
|
╞══════╪════════╡
|
|
225
225
|
│ 1 ┆ A │
|
|
226
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
227
226
|
│ 1 ┆ B │
|
|
228
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
229
227
|
│ 2 ┆ A │
|
|
230
|
-
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
|
|
231
228
|
│ 2 ┆ B │
|
|
232
229
|
└──────┴────────┘
|
|
233
230
|
>>> casted_classes.validate()
|
|
@@ -290,7 +287,11 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
290
287
|
columns.append(pl.col(column).cast(default_dtypes[column]))
|
|
291
288
|
return self.with_columns(columns)
|
|
292
289
|
|
|
293
|
-
def drop(
|
|
290
|
+
def drop(
|
|
291
|
+
self: DF,
|
|
292
|
+
columns: Optional[Union[str, Collection[str]]] = None,
|
|
293
|
+
*more_columns: str,
|
|
294
|
+
) -> DF:
|
|
294
295
|
"""
|
|
295
296
|
Drop one or more columns from the dataframe.
|
|
296
297
|
|
|
@@ -299,9 +300,10 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
299
300
|
:ref:`DataFrame.set_model <DataFrame.set_model>`, are dropped.
|
|
300
301
|
|
|
301
302
|
Args:
|
|
302
|
-
|
|
303
|
+
columns: A single column string name, or list of strings, indicating
|
|
303
304
|
which columns to drop. If not specified, all columns *not*
|
|
304
305
|
specified by the associated dataframe model will be dropped.
|
|
306
|
+
more_columns: Additional named columns to drop.
|
|
305
307
|
|
|
306
308
|
Returns:
|
|
307
309
|
DataFrame[Model]: New dataframe without the specified columns.
|
|
@@ -319,13 +321,12 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
319
321
|
│ i64 │
|
|
320
322
|
╞══════════╡
|
|
321
323
|
│ 1 │
|
|
322
|
-
├╌╌╌╌╌╌╌╌╌╌┤
|
|
323
324
|
│ 2 │
|
|
324
325
|
└──────────┘
|
|
325
326
|
|
|
326
327
|
"""
|
|
327
|
-
if
|
|
328
|
-
return super().drop(
|
|
328
|
+
if columns is not None:
|
|
329
|
+
return self._from_pydf(super().drop(columns)._df)
|
|
329
330
|
else:
|
|
330
331
|
return self.drop(list(set(self.columns) - set(self.model.columns)))
|
|
331
332
|
|
|
@@ -416,7 +417,6 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
416
417
|
│ i64 ┆ i64 ┆ i64 │
|
|
417
418
|
╞═════╪═════╪════════════╡
|
|
418
419
|
│ 1 ┆ 1 ┆ 2 │
|
|
419
|
-
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
|
420
420
|
│ 2 ┆ 2 ┆ 4 │
|
|
421
421
|
└─────┴─────┴────────────┘
|
|
422
422
|
"""
|
|
@@ -426,11 +426,11 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
426
426
|
derived_from = props["derived_from"]
|
|
427
427
|
dtype = self.model.dtypes[column_name]
|
|
428
428
|
if isinstance(derived_from, str):
|
|
429
|
-
df = df.
|
|
429
|
+
df = df.with_columns(
|
|
430
430
|
pl.col(derived_from).cast(dtype).alias(column_name)
|
|
431
431
|
)
|
|
432
432
|
elif isinstance(derived_from, pl.Expr):
|
|
433
|
-
df = df.
|
|
433
|
+
df = df.with_columns(derived_from.cast(dtype).alias(column_name))
|
|
434
434
|
else:
|
|
435
435
|
raise TypeError(
|
|
436
436
|
"Can not derive dataframe column from type "
|
|
@@ -447,6 +447,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
447
447
|
]
|
|
448
448
|
] = None,
|
|
449
449
|
limit: Optional[int] = None,
|
|
450
|
+
matches_supertype: bool = True,
|
|
450
451
|
) -> DF:
|
|
451
452
|
"""
|
|
452
453
|
Fill null values using a filling strategy, literal, or ``Expr``.
|
|
@@ -461,6 +462,8 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
461
462
|
provided.
|
|
462
463
|
limit: The number of consecutive null values to forward/backward fill.
|
|
463
464
|
Only valid if ``strategy`` is ``"forward"`` or ``"backward"``.
|
|
465
|
+
matches_supertype: Fill all matching supertype of the fill ``value``.
|
|
466
|
+
|
|
464
467
|
|
|
465
468
|
Returns:
|
|
466
469
|
DataFrame[Model]: A new dataframe with nulls filled in according to the
|
|
@@ -483,13 +486,18 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
483
486
|
│ str ┆ i64 │
|
|
484
487
|
╞════════╪═══════╡
|
|
485
488
|
│ apple ┆ 10 │
|
|
486
|
-
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
|
487
489
|
│ banana ┆ 19 │
|
|
488
490
|
└────────┴───────┘
|
|
489
491
|
"""
|
|
490
492
|
if strategy != "defaults": # pragma: no cover
|
|
491
|
-
return cast( #
|
|
492
|
-
DF,
|
|
493
|
+
return cast( # pyright: ignore[redundant-cast]
|
|
494
|
+
DF,
|
|
495
|
+
super().fill_null(
|
|
496
|
+
value=value,
|
|
497
|
+
strategy=strategy,
|
|
498
|
+
limit=limit,
|
|
499
|
+
matches_supertype=matches_supertype,
|
|
500
|
+
),
|
|
493
501
|
)
|
|
494
502
|
return self.with_columns(
|
|
495
503
|
[
|
|
@@ -596,7 +604,7 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
596
604
|
create_model( # type: ignore
|
|
597
605
|
"UntypedRow",
|
|
598
606
|
__base__=Model,
|
|
599
|
-
**pydantic_annotations,
|
|
607
|
+
**pydantic_annotations, # pyright: ignore
|
|
600
608
|
),
|
|
601
609
|
)
|
|
602
610
|
|
|
@@ -651,15 +659,17 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
651
659
|
... b: str = pt.Field(derived_from="source_of_b")
|
|
652
660
|
...
|
|
653
661
|
>>> csv_file = io.StringIO("a,source_of_b\n1,1")
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
│
|
|
660
|
-
|
|
661
|
-
│
|
|
662
|
-
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
# >>> CSVModel.DataFrame.read_csv(csv_file).drop()
|
|
665
|
+
# shape: (1, 2)
|
|
666
|
+
# ┌─────┬─────┐
|
|
667
|
+
# │ a ┆ b │
|
|
668
|
+
# │ --- ┆ --- │
|
|
669
|
+
# │ f64 ┆ str │
|
|
670
|
+
# ╞═════╪═════╡
|
|
671
|
+
# │ 1.0 ┆ 1 │
|
|
672
|
+
# └─────┴─────┘
|
|
663
673
|
"""
|
|
664
674
|
kwargs.setdefault("dtypes", cls.model.dtypes)
|
|
665
675
|
if not kwargs.get("has_header", True) and "columns" not in kwargs:
|
|
@@ -670,27 +680,24 @@ class DataFrame(pl.DataFrame, Generic[ModelType]):
|
|
|
670
680
|
# --- Type annotation overrides ---
|
|
671
681
|
def filter( # noqa: D102
|
|
672
682
|
self: DF,
|
|
673
|
-
predicate: Union[
|
|
683
|
+
predicate: Union[
|
|
684
|
+
pl.Expr, str, pl.Series, list[bool], np.ndarray[Any, Any], bool
|
|
685
|
+
],
|
|
674
686
|
) -> DF:
|
|
675
687
|
return cast(DF, super().filter(predicate=predicate))
|
|
676
688
|
|
|
677
689
|
def select( # noqa: D102
|
|
678
690
|
self: DF,
|
|
679
|
-
exprs: Union[
|
|
691
|
+
*exprs: Union[IntoExpr, Iterable[IntoExpr]],
|
|
692
|
+
**named_exprs: IntoExpr,
|
|
680
693
|
) -> DF:
|
|
681
|
-
return cast(
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
return cast(DF, super().with_column(column=column))
|
|
694
|
+
return cast( # pyright: ignore[redundant-cast]
|
|
695
|
+
DF, super().select(*exprs, **named_exprs)
|
|
696
|
+
)
|
|
685
697
|
|
|
686
698
|
def with_columns( # noqa: D102
|
|
687
699
|
self: DF,
|
|
688
|
-
exprs: Union[
|
|
689
|
-
|
|
690
|
-
pl.Series,
|
|
691
|
-
Sequence[Union[pl.Expr, pl.Series]],
|
|
692
|
-
None,
|
|
693
|
-
] = None,
|
|
694
|
-
**named_exprs: Union[pl.Expr, pl.Series],
|
|
700
|
+
*exprs: Union[IntoExpr, Iterable[IntoExpr]],
|
|
701
|
+
**named_exprs: IntoExpr,
|
|
695
702
|
) -> DF:
|
|
696
|
-
return cast(DF, super().with_columns(exprs
|
|
703
|
+
return cast(DF, super().with_columns(*exprs, **named_exprs))
|
patito/pydantic.py
CHANGED
|
@@ -19,6 +19,7 @@ from typing import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
import polars as pl
|
|
22
|
+
from polars.datatypes import PolarsDataType
|
|
22
23
|
from pydantic import BaseConfig, BaseModel, Field, create_model # noqa: F401
|
|
23
24
|
from pydantic.main import ModelMetaclass as PydanticModelMetaclass
|
|
24
25
|
from typing_extensions import Literal, get_args
|
|
@@ -110,7 +111,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
110
111
|
|
|
111
112
|
@property
|
|
112
113
|
def dtypes( # type: ignore
|
|
113
|
-
cls: Type[ModelType],
|
|
114
|
+
cls: Type[ModelType], # pyright: ignore
|
|
114
115
|
) -> dict[str, Type[pl.DataType]]:
|
|
115
116
|
"""
|
|
116
117
|
Return the polars dtypes of the dataframe.
|
|
@@ -129,18 +130,16 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
129
130
|
... price: float
|
|
130
131
|
...
|
|
131
132
|
>>> Product.dtypes
|
|
132
|
-
{'name':
|
|
133
|
-
'ideal_temperature': <class 'polars.datatypes.Int64'>, \
|
|
134
|
-
'price': <class 'polars.datatypes.Float64'>}
|
|
133
|
+
{'name': Utf8, 'ideal_temperature': Int64, 'price': Float64}
|
|
135
134
|
"""
|
|
136
135
|
return {
|
|
137
136
|
column: valid_dtypes[0] for column, valid_dtypes in cls.valid_dtypes.items()
|
|
138
137
|
}
|
|
139
138
|
|
|
140
139
|
@property
|
|
141
|
-
def valid_dtypes( # type: ignore
|
|
142
|
-
cls: Type[ModelType],
|
|
143
|
-
) -> dict[str, List[
|
|
140
|
+
def valid_dtypes( # type: ignore
|
|
141
|
+
cls: Type[ModelType], # pyright: ignore
|
|
142
|
+
) -> dict[str, List[Union[pl.PolarsDataType, pl.List]]]:
|
|
144
143
|
"""
|
|
145
144
|
Return a list of polars dtypes which Patito considers valid for each field.
|
|
146
145
|
|
|
@@ -164,82 +163,91 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
164
163
|
... float_column: float
|
|
165
164
|
...
|
|
166
165
|
>>> pprint(MyModel.valid_dtypes)
|
|
167
|
-
{'bool_column': [
|
|
168
|
-
'float_column': [
|
|
169
|
-
|
|
170
|
-
'
|
|
171
|
-
<class 'polars.datatypes.Int32'>,
|
|
172
|
-
<class 'polars.datatypes.Int16'>,
|
|
173
|
-
<class 'polars.datatypes.Int8'>,
|
|
174
|
-
<class 'polars.datatypes.UInt64'>,
|
|
175
|
-
<class 'polars.datatypes.UInt32'>,
|
|
176
|
-
<class 'polars.datatypes.UInt16'>,
|
|
177
|
-
<class 'polars.datatypes.UInt8'>],
|
|
178
|
-
'str_column': [<class 'polars.datatypes.Utf8'>]}
|
|
166
|
+
{'bool_column': [Boolean],
|
|
167
|
+
'float_column': [Float64, Float32],
|
|
168
|
+
'int_column': [Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8],
|
|
169
|
+
'str_column': [Utf8]}
|
|
179
170
|
"""
|
|
180
171
|
valid_dtypes = {}
|
|
181
172
|
for column, props in cls._schema_properties().items():
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
173
|
+
column_dtypes: List[Union[PolarsDataType, pl.List]]
|
|
174
|
+
if props.get("type") == "array":
|
|
175
|
+
array_props = props["items"]
|
|
176
|
+
item_dtypes = cls._valid_dtypes(props=array_props)
|
|
177
|
+
if item_dtypes is None:
|
|
178
|
+
raise NotImplementedError(
|
|
179
|
+
f"No valid dtype mapping found for column '{column}'."
|
|
180
|
+
)
|
|
181
|
+
column_dtypes = [pl.List(dtype) for dtype in item_dtypes]
|
|
182
|
+
else:
|
|
183
|
+
column_dtypes = cls._valid_dtypes(props=props) # pyright: ignore
|
|
184
|
+
|
|
185
|
+
if column_dtypes is None:
|
|
189
186
|
raise NotImplementedError(
|
|
190
187
|
f"No valid dtype mapping found for column '{column}'."
|
|
191
188
|
)
|
|
192
|
-
|
|
193
|
-
valid_dtypes[column] = [
|
|
194
|
-
pl.Int64,
|
|
195
|
-
pl.Int32,
|
|
196
|
-
pl.Int16,
|
|
197
|
-
pl.Int8,
|
|
198
|
-
pl.UInt64,
|
|
199
|
-
pl.UInt32,
|
|
200
|
-
pl.UInt16,
|
|
201
|
-
pl.UInt8,
|
|
202
|
-
]
|
|
203
|
-
elif props["type"] == "number":
|
|
204
|
-
if props.get("format") == "time-delta":
|
|
205
|
-
valid_dtypes[column] = [
|
|
206
|
-
pl.Duration,
|
|
207
|
-
] # pyright: reportPrivateImportUsage=false
|
|
208
|
-
else:
|
|
209
|
-
valid_dtypes[column] = [pl.Float64, pl.Float32]
|
|
210
|
-
elif props["type"] == "boolean":
|
|
211
|
-
valid_dtypes[column] = [
|
|
212
|
-
pl.Boolean,
|
|
213
|
-
]
|
|
214
|
-
elif props["type"] == "string":
|
|
215
|
-
string_format = props.get("format")
|
|
216
|
-
if string_format is None:
|
|
217
|
-
valid_dtypes[column] = [
|
|
218
|
-
pl.Utf8,
|
|
219
|
-
]
|
|
220
|
-
elif string_format == "date":
|
|
221
|
-
valid_dtypes[column] = [
|
|
222
|
-
pl.Date,
|
|
223
|
-
]
|
|
224
|
-
# TODO: Find out why this branch is not being hit
|
|
225
|
-
elif string_format == "date-time": # pragma: no cover
|
|
226
|
-
valid_dtypes[column] = [
|
|
227
|
-
pl.Datetime,
|
|
228
|
-
]
|
|
229
|
-
elif props["type"] == "null":
|
|
230
|
-
valid_dtypes[column] = [
|
|
231
|
-
pl.Null,
|
|
232
|
-
]
|
|
233
|
-
else: # pragma: no cover
|
|
234
|
-
raise NotImplementedError(
|
|
235
|
-
f"No valid dtype mapping found for column '{column}'"
|
|
236
|
-
)
|
|
189
|
+
valid_dtypes[column] = column_dtypes
|
|
237
190
|
|
|
238
191
|
return valid_dtypes
|
|
239
192
|
|
|
193
|
+
@staticmethod
|
|
194
|
+
def _valid_dtypes( # noqa: C901
|
|
195
|
+
props: Dict,
|
|
196
|
+
) -> Optional[List[pl.PolarsDataType]]:
|
|
197
|
+
"""
|
|
198
|
+
Map schema property to list of valid polars data types.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
props: Dictionary value retrieved from BaseModel._schema_properties().
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of valid dtypes. None if no mapping exists.
|
|
205
|
+
"""
|
|
206
|
+
if "dtype" in props:
|
|
207
|
+
return [
|
|
208
|
+
props["dtype"],
|
|
209
|
+
]
|
|
210
|
+
elif "enum" in props and props["type"] == "string":
|
|
211
|
+
return [pl.Categorical, pl.Utf8]
|
|
212
|
+
elif "type" not in props:
|
|
213
|
+
return None
|
|
214
|
+
elif props["type"] == "integer":
|
|
215
|
+
return [
|
|
216
|
+
pl.Int64,
|
|
217
|
+
pl.Int32,
|
|
218
|
+
pl.Int16,
|
|
219
|
+
pl.Int8,
|
|
220
|
+
pl.UInt64,
|
|
221
|
+
pl.UInt32,
|
|
222
|
+
pl.UInt16,
|
|
223
|
+
pl.UInt8,
|
|
224
|
+
]
|
|
225
|
+
elif props["type"] == "number":
|
|
226
|
+
if props.get("format") == "time-delta":
|
|
227
|
+
return [pl.Duration]
|
|
228
|
+
else:
|
|
229
|
+
return [pl.Float64, pl.Float32]
|
|
230
|
+
elif props["type"] == "boolean":
|
|
231
|
+
return [pl.Boolean]
|
|
232
|
+
elif props["type"] == "string":
|
|
233
|
+
string_format = props.get("format")
|
|
234
|
+
if string_format is None:
|
|
235
|
+
return [pl.Utf8]
|
|
236
|
+
elif string_format == "date":
|
|
237
|
+
return [pl.Date]
|
|
238
|
+
# TODO: Find out why this branch is not being hit
|
|
239
|
+
elif string_format == "date-time": # pragma: no cover
|
|
240
|
+
return [pl.Datetime]
|
|
241
|
+
else:
|
|
242
|
+
return None # pragma: no cover
|
|
243
|
+
elif props["type"] == "null":
|
|
244
|
+
return [pl.Null]
|
|
245
|
+
else: # pragma: no cover
|
|
246
|
+
return None
|
|
247
|
+
|
|
240
248
|
@property
|
|
241
249
|
def valid_sql_types( # type: ignore # noqa: C901
|
|
242
|
-
cls: Type[ModelType],
|
|
250
|
+
cls: Type[ModelType], # pyright: ignore
|
|
243
251
|
) -> dict[str, List["DuckDBSQLType"]]:
|
|
244
252
|
"""
|
|
245
253
|
Return a list of DuckDB SQL types which Patito considers valid for each field.
|
|
@@ -302,7 +310,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
302
310
|
from patito.duckdb import _enum_type_name
|
|
303
311
|
|
|
304
312
|
# fmt: off
|
|
305
|
-
valid_dtypes[column] = [
|
|
313
|
+
valid_dtypes[column] = [ # pyright: ignore
|
|
306
314
|
_enum_type_name(field_properties=props), # type: ignore
|
|
307
315
|
"VARCHAR", "CHAR", "BPCHAR", "TEXT", "STRING",
|
|
308
316
|
]
|
|
@@ -374,7 +382,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
374
382
|
|
|
375
383
|
@property
|
|
376
384
|
def sql_types( # type: ignore
|
|
377
|
-
cls: Type[ModelType],
|
|
385
|
+
cls: Type[ModelType], # pyright: ignore
|
|
378
386
|
) -> dict[str, str]:
|
|
379
387
|
"""
|
|
380
388
|
Return compatible DuckDB SQL types for all model fields.
|
|
@@ -405,7 +413,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
405
413
|
|
|
406
414
|
@property
|
|
407
415
|
def defaults( # type: ignore
|
|
408
|
-
cls: Type[ModelType],
|
|
416
|
+
cls: Type[ModelType], # pyright: ignore
|
|
409
417
|
) -> dict[str, Any]:
|
|
410
418
|
"""
|
|
411
419
|
Return default field values specified on the model.
|
|
@@ -432,7 +440,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
432
440
|
|
|
433
441
|
@property
|
|
434
442
|
def non_nullable_columns( # type: ignore
|
|
435
|
-
cls: Type[ModelType], # pyright:
|
|
443
|
+
cls: Type[ModelType], # pyright: ignore
|
|
436
444
|
) -> set[str]:
|
|
437
445
|
"""
|
|
438
446
|
Return names of those columns that are non-nullable in the schema.
|
|
@@ -456,7 +464,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
456
464
|
|
|
457
465
|
@property
|
|
458
466
|
def nullable_columns( # type: ignore
|
|
459
|
-
cls: Type[ModelType], # pyright:
|
|
467
|
+
cls: Type[ModelType], # pyright: ignore
|
|
460
468
|
) -> set[str]:
|
|
461
469
|
"""
|
|
462
470
|
Return names of those columns that are nullable in the schema.
|
|
@@ -480,7 +488,7 @@ class ModelMetaclass(PydanticModelMetaclass):
|
|
|
480
488
|
|
|
481
489
|
@property
|
|
482
490
|
def unique_columns( # type: ignore
|
|
483
|
-
cls: Type[ModelType],
|
|
491
|
+
cls: Type[ModelType], # pyright: ignore
|
|
484
492
|
) -> set[str]:
|
|
485
493
|
"""
|
|
486
494
|
Return columns with uniqueness constraint.
|
|
@@ -529,14 +537,18 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
529
537
|
|
|
530
538
|
defaults: ClassVar[Dict[str, Any]]
|
|
531
539
|
|
|
532
|
-
@classmethod
|
|
540
|
+
@classmethod # type: ignore[misc]
|
|
533
541
|
@property
|
|
534
|
-
def DataFrame(
|
|
542
|
+
def DataFrame(
|
|
543
|
+
cls: Type[ModelType],
|
|
544
|
+
) -> Type[DataFrame[ModelType]]: # pyright: ignore # noqa
|
|
535
545
|
"""Return DataFrame class where DataFrame.set_model() is set to self."""
|
|
536
546
|
|
|
537
|
-
@classmethod
|
|
547
|
+
@classmethod # type: ignore[misc]
|
|
538
548
|
@property
|
|
539
|
-
def LazyFrame(
|
|
549
|
+
def LazyFrame(
|
|
550
|
+
cls: Type[ModelType],
|
|
551
|
+
) -> Type[LazyFrame[ModelType]]: # pyright: ignore
|
|
540
552
|
"""Return DataFrame class where DataFrame.set_model() is set to self."""
|
|
541
553
|
|
|
542
554
|
@classmethod
|
|
@@ -570,7 +582,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
570
582
|
|
|
571
583
|
>>> df = pl.DataFrame(
|
|
572
584
|
... [["1", "product name", "1.22"]],
|
|
573
|
-
...
|
|
585
|
+
... schema=["product_id", "name", "price"],
|
|
574
586
|
... )
|
|
575
587
|
>>> Product.from_row(df)
|
|
576
588
|
Product(product_id=1, name='product name', price=1.22)
|
|
@@ -582,7 +594,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
582
594
|
elif _PANDAS_AVAILABLE and isinstance(row, pd.DataFrame):
|
|
583
595
|
dataframe = pl.DataFrame._from_pandas(row)
|
|
584
596
|
elif _PANDAS_AVAILABLE and isinstance(row, pd.Series): # type: ignore[unreachable]
|
|
585
|
-
return cls(**dict(row.
|
|
597
|
+
return cls(**dict(row.items())) # type: ignore[unreachable]
|
|
586
598
|
else:
|
|
587
599
|
raise TypeError(f"{cls.__name__}.from_row not implemented for {type(row)}.")
|
|
588
600
|
return cls._from_polars(dataframe=dataframe, validate=validate)
|
|
@@ -622,7 +634,7 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
622
634
|
|
|
623
635
|
>>> df = pl.DataFrame(
|
|
624
636
|
... [["1", "product name", "1.22"]],
|
|
625
|
-
...
|
|
637
|
+
... schema=["product_id", "name", "price"],
|
|
626
638
|
... )
|
|
627
639
|
>>> Product._from_polars(df)
|
|
628
640
|
Product(product_id=1, name='product name', price=1.22)
|
|
@@ -977,7 +989,6 @@ class Model(BaseModel, metaclass=ModelMetaclass):
|
|
|
977
989
|
│ str ┆ cat ┆ i64 │
|
|
978
990
|
╞═══════════╪══════════════════╪════════════╡
|
|
979
991
|
│ product A ┆ dry ┆ 0 │
|
|
980
|
-
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
|
|
981
992
|
│ product B ┆ dry ┆ 1 │
|
|
982
993
|
└───────────┴──────────────────┴────────────┘
|
|
983
994
|
"""
|
|
@@ -1461,8 +1472,8 @@ class FieldDoc:
|
|
|
1461
1472
|
product_id
|
|
1462
1473
|
2 rows with duplicated values. (type=value_error.rowvalue)
|
|
1463
1474
|
price
|
|
1464
|
-
Polars dtype
|
|
1465
|
-
|
|
1475
|
+
Polars dtype Int64 does not match model field type. \
|
|
1476
|
+
(type=type_error.columndtype)
|
|
1466
1477
|
brand_color
|
|
1467
1478
|
2 rows with out of bound values. (type=value_error.rowvalue)
|
|
1468
1479
|
"""
|
patito/sql.py
CHANGED
|
@@ -45,7 +45,7 @@ class Case:
|
|
|
45
45
|
|
|
46
46
|
Examples:
|
|
47
47
|
>>> import patito as pt
|
|
48
|
-
>>> db = pt.Database()
|
|
48
|
+
>>> db = pt.duckdb.Database()
|
|
49
49
|
>>> relation = db.to_relation("select 1 as a union select 2 as a")
|
|
50
50
|
>>> case_statement = pt.sql.Case(
|
|
51
51
|
... on_column="a",
|
|
@@ -53,7 +53,7 @@ class Case:
|
|
|
53
53
|
... default="three",
|
|
54
54
|
... as_column="b",
|
|
55
55
|
... )
|
|
56
|
-
>>> relation.select(f"*, {case_statement}").to_df()
|
|
56
|
+
>>> relation.select(f"*, {case_statement}").order(by="a").to_df()
|
|
57
57
|
shape: (2, 2)
|
|
58
58
|
┌─────┬─────┐
|
|
59
59
|
│ a ┆ b │
|
|
@@ -61,7 +61,6 @@ class Case:
|
|
|
61
61
|
│ i64 ┆ str │
|
|
62
62
|
╞═════╪═════╡
|
|
63
63
|
│ 1 ┆ one │
|
|
64
|
-
├╌╌╌╌╌┼╌╌╌╌╌┤
|
|
65
64
|
│ 2 ┆ two │
|
|
66
65
|
└─────┴─────┘
|
|
67
66
|
"""
|