patito 0.6.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: patito
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Summary: A dataframe modelling library built on top of polars and pydantic.
5
- Home-page: https://github.com/kolonialno/patito
5
+ Home-page: https://github.com/JakobGM/patito
6
6
  License: MIT
7
7
  Keywords: validation,dataframe
8
8
  Author: Jakob Gerhard Martinussen
9
9
  Author-email: jakobgm@gmail.com
10
- Requires-Python: >=3.9,<4.0
10
+ Requires-Python: >=3.9
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.9
@@ -17,10 +17,10 @@ Provides-Extra: caching
17
17
  Provides-Extra: docs
18
18
  Provides-Extra: pandas
19
19
  Requires-Dist: Sphinx (<7) ; extra == "docs"
20
- Requires-Dist: pandas ; (python_version >= "3.9" and python_version < "4.0") and (extra == "pandas")
20
+ Requires-Dist: pandas ; extra == "pandas"
21
21
  Requires-Dist: polars (>=0.20.1)
22
- Requires-Dist: pyarrow (>=5.0.0) ; (python_version >= "3.9" and python_version < "4.0") and (extra == "caching")
23
- Requires-Dist: pydantic (>=2.0.0)
22
+ Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
23
+ Requires-Dist: pydantic (>=2.4.1)
24
24
  Requires-Dist: sphinx-autobuild ; extra == "docs"
25
25
  Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
26
26
  Requires-Dist: sphinx-rtd-theme ; extra == "docs"
@@ -28,7 +28,7 @@ Requires-Dist: sphinx-toolbox ; extra == "docs"
28
28
  Requires-Dist: sphinxcontrib-mermaid ; extra == "docs"
29
29
  Requires-Dist: typing-extensions
30
30
  Project-URL: Documentation, https://patito.readthedocs.io
31
- Project-URL: Repository, https://github.com/kolonialno/patito
31
+ Project-URL: Repository, https://github.com/JakobGM/patito
32
32
  Description-Content-Type: text/markdown
33
33
 
34
34
  # <center><img height="30px" src="https://em-content.zobj.net/thumbs/120/samsung/78/duck_1f986.png"> Patito<center>
@@ -63,7 +63,6 @@ These schema can be used for:
63
63
  🧪 Easy generation of valid mock data frames for tests.\
64
64
  🐍 Retrieve and represent singular rows in an object-oriented manner.\
65
65
  🧠 Provide a single source of truth for the core data models in your code base. \
66
- 🦆 Integration with DuckDB for running flexible SQL queries.
67
66
 
68
67
  Patito has first-class support for [polars]("https://github.com/pola-rs/polars"), a _"blazingly fast DataFrames library written in Rust"_.
69
68
 
@@ -73,16 +72,6 @@ Patito has first-class support for [polars]("https://github.com/pola-rs/polars")
73
72
  pip install patito
74
73
  ```
75
74
 
76
- #### DuckDB Integration
77
-
78
- Patito can also integrate with [DuckDB](https://duckdb.org/).
79
- In order to enable this integration you must explicitly specify it during installation:
80
-
81
- ```sh
82
- pip install 'patito[duckdb]'
83
- ```
84
-
85
-
86
75
  ## Documentation
87
76
 
88
77
  The full documentation of Patio can be found [here](https://patito.readthedocs.io).
@@ -93,7 +82,7 @@ Patito allows you to specify the type of each column in your dataframe by creati
93
82
 
94
83
  ```py
95
84
  # models.py
96
- from typing import Literal, Optional
85
+ from typing import Literal
97
86
 
98
87
  import patito as pt
99
88
 
@@ -118,7 +107,7 @@ df = pl.DataFrame(
118
107
  )
119
108
  try:
120
109
  Product.validate(df)
121
- except pt.ValidationError as exc:
110
+ except pt.exceptions.DataFrameValidationError as exc:
122
111
  print(exc)
123
112
  # 3 validation errors for Product
124
113
  # is_for_sale
@@ -164,7 +153,7 @@ def num_products_for_sale(products: pl.DataFrame) -> int:
164
153
  return products.filter(pl.col("is_for_sale")).height
165
154
  ```
166
155
 
167
- The following test would fail with a `patito.ValidationError`:
156
+ The following test would fail with a `patito.exceptions.DataFrameValidationError`:
168
157
 
169
158
  ```py
170
159
  def test_num_products_for_sale():
@@ -30,7 +30,6 @@ These schema can be used for:
30
30
  🧪 Easy generation of valid mock data frames for tests.\
31
31
  🐍 Retrieve and represent singular rows in an object-oriented manner.\
32
32
  🧠 Provide a single source of truth for the core data models in your code base. \
33
- 🦆 Integration with DuckDB for running flexible SQL queries.
34
33
 
35
34
  Patito has first-class support for [polars]("https://github.com/pola-rs/polars"), a _"blazingly fast DataFrames library written in Rust"_.
36
35
 
@@ -40,16 +39,6 @@ Patito has first-class support for [polars]("https://github.com/pola-rs/polars")
40
39
  pip install patito
41
40
  ```
42
41
 
43
- #### DuckDB Integration
44
-
45
- Patito can also integrate with [DuckDB](https://duckdb.org/).
46
- In order to enable this integration you must explicitly specify it during installation:
47
-
48
- ```sh
49
- pip install 'patito[duckdb]'
50
- ```
51
-
52
-
53
42
  ## Documentation
54
43
 
55
44
  The full documentation of Patio can be found [here](https://patito.readthedocs.io).
@@ -60,7 +49,7 @@ Patito allows you to specify the type of each column in your dataframe by creati
60
49
 
61
50
  ```py
62
51
  # models.py
63
- from typing import Literal, Optional
52
+ from typing import Literal
64
53
 
65
54
  import patito as pt
66
55
 
@@ -85,7 +74,7 @@ df = pl.DataFrame(
85
74
  )
86
75
  try:
87
76
  Product.validate(df)
88
- except pt.ValidationError as exc:
77
+ except pt.exceptions.DataFrameValidationError as exc:
89
78
  print(exc)
90
79
  # 3 validation errors for Product
91
80
  # is_for_sale
@@ -131,7 +120,7 @@ def num_products_for_sale(products: pl.DataFrame) -> int:
131
120
  return products.filter(pl.col("is_for_sale")).height
132
121
  ```
133
122
 
134
- The following test would fail with a `patito.ValidationError`:
123
+ The following test would fail with a `patito.exceptions.DataFrameValidationError`:
135
124
 
136
125
  ```py
137
126
  def test_num_products_for_sale():
@@ -1,23 +1,23 @@
1
1
  [tool.poetry]
2
2
  name = "patito"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  description = "A dataframe modelling library built on top of polars and pydantic."
5
5
  authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
- homepage = "https://github.com/kolonialno/patito"
9
- repository = "https://github.com/kolonialno/patito"
8
+ homepage = "https://github.com/JakobGM/patito"
9
+ repository = "https://github.com/JakobGM/patito"
10
10
  documentation = "https://patito.readthedocs.io"
11
11
  keywords = ["validation", "dataframe"]
12
12
 
13
13
  [tool.poetry.dependencies]
14
- python = "^3.9"
15
- pydantic = ">=2.0.0"
14
+ python = ">=3.9"
15
+ pydantic = ">=2.4.1"
16
16
  polars = ">=0.20.1"
17
- # Required for typing.Literal in python3.7
17
+ # Required for typing.get_args backports in python3.9 and 3.10
18
18
  typing-extensions = "*"
19
- pandas = {version = "*", optional = true, python = "^3.9"}
20
- pyarrow = {version = ">=5.0.0", optional = true, python = "^3.9"}
19
+ pandas = {version = "*", optional = true}
20
+ pyarrow = {version = ">=5.0.0", optional = true}
21
21
  # Optional docs dependencies
22
22
  Sphinx = {version = "<7", optional = true}
23
23
  sphinx-rtd-theme = {version = "*", optional = true}
@@ -43,7 +43,7 @@ docs = [
43
43
  ruff = ">=0.2.1"
44
44
  coverage = {version = "*", extras = ["toml"]}
45
45
  flake8 = "3.9.2"
46
- flake8-annotations = "*"
46
+ flake8-annotations = { version = "*", python = ">=3.9,<4.0" }
47
47
  flake8-bandit = "*"
48
48
  flake8-black = "*"
49
49
  flake8-bugbear = "*"
@@ -51,7 +51,7 @@ flake8-isort = "*"
51
51
  pyright = ">=1.1.239"
52
52
  pytest = ">=7.1.2"
53
53
  pytest-cov = ">=3.0.0"
54
- pytest-watcher = ">=0.2.3"
54
+ pytest-watcher = { version = ">=0.2.3", python = ">=3.9,<4.0" }
55
55
  xdoctest = ">=1.0.0"
56
56
 
57
57
  mypy = ">=0.950"
@@ -115,31 +115,16 @@ exclude = [
115
115
  "noxfile.py",
116
116
  ]
117
117
 
118
- [[tool.mypy.overrides]]
119
- module = ["tests.*", "noxfile"]
120
- allow_untyped_defs = true
121
- check_untyped_defs = true
122
- disallow_incomplete_defs = false
123
- # TODO: Go through and remove those we want to check in tests
124
- disable_error_code = [
125
- "var-annotated",
126
- "override",
127
- "attr-defined",
128
- "call-arg",
129
- "type-var",
130
- "misc",
131
- "arg-type",
132
- "assignment",
133
- ]
134
118
 
135
119
  [[tool.mypy.overrides]]
136
120
  module = ["tests.test_validators"]
137
121
  warn_unused_ignores = false
138
122
 
123
+ [tool.ruff]
124
+ extend-exclude= ["tests/__init__.py"]
125
+
139
126
  [tool.ruff.lint]
140
127
  select = ["E4", "E7", "E9", "F", "I", "B", "D"]
141
- ignore = []
142
128
 
143
- # Allow fix for all enabled rules (when `--fix`) is provided.
144
- fixable = ["ALL"]
145
- unfixable = []
129
+ [tool.ruff.lint.pydocstyle]
130
+ convention = "google"
@@ -1,4 +1,5 @@
1
1
  """Patito, a data-modelling library built on top of polars and pydantic."""
2
+
2
3
  from polars import Expr, Series, col
3
4
 
4
5
  from patito import exceptions
@@ -1,2 +1,3 @@
1
1
  """Ugly workaround for Sphinx + autodoc + ModelMetaclass + classproperty."""
2
+
2
3
  from patito.pydantic import ModelMetaclass as Model # noqa: F401, pragma: no cover
File without changes
@@ -19,10 +19,9 @@ from patito._pydantic.dtypes import parse_composite_dtype
19
19
 
20
20
 
21
21
  class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
22
- """patito-side model for storing column metadata
22
+ """patito-side model for storing column metadata.
23
23
 
24
24
  Args:
25
- ----
26
25
  constraints (Union[polars.Expression, List[polars.Expression]): A single
27
26
  constraint or list of constraints, expressed as a polars expression objects.
28
27
  All rows must satisfy the given constraint. You can refer to the given column
@@ -40,6 +39,22 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
40
39
  derived_from: Optional[Union[str, pl.Expr]] = None
41
40
  unique: Optional[bool] = None
42
41
 
42
+ def __repr__(self) -> str:
43
+ """Print only Field attributes whose values are not default (mainly None)."""
44
+ not_default_field = {
45
+ field: getattr(self, field)
46
+ for field in self.model_fields
47
+ if getattr(self, field) is not self.model_fields[field].default
48
+ }
49
+
50
+ string = ""
51
+ for field, value in not_default_field.items():
52
+ string += f"{field}={value}, "
53
+ if string:
54
+ # remove trailing comma and space
55
+ string = string[:-2]
56
+ return f"ColumnInfo({string})"
57
+
43
58
  @field_serializer("constraints", "derived_from")
44
59
  def serialize_exprs(self, exprs: str | pl.Expr | Sequence[pl.Expr] | None) -> Any:
45
60
  if exprs is None:
@@ -56,17 +71,17 @@ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
56
71
  def _serialize_expr(self, expr: pl.Expr) -> Dict:
57
72
  if isinstance(expr, pl.Expr):
58
73
  return json.loads(
59
- expr.meta.write_json(None)
74
+ expr.meta.serialize(None)
60
75
  ) # can we access the dictionary directly?
61
76
  else:
62
77
  raise ValueError(f"Invalid type for expr: {type(expr)}")
63
78
 
64
79
  @field_serializer("dtype")
65
80
  def serialize_dtype(self, dtype: DataTypeClass | DataType | None) -> Any:
66
- """References
67
- ----------
68
- [1] https://stackoverflow.com/questions/76572310/how-to-serialize-deserialize-polars-datatypes
81
+ """Serialize a polars dtype.
69
82
 
83
+ References:
84
+ [1] https://stackoverflow.com/questions/76572310/how-to-serialize-deserialize-polars-datatypes
70
85
  """
71
86
  if dtype is None:
72
87
  return None
@@ -39,24 +39,17 @@ def valid_dtypes_for_model(
39
39
  @cache
40
40
  def default_dtypes_for_model(
41
41
  cls: Type[ModelType],
42
- ) -> dict[str, DataTypeClass | DataType]:
43
- default_dtypes = {}
42
+ ) -> dict[str, DataType]:
43
+ default_dtypes: dict[str, DataType] = {}
44
44
  for column in cls.columns:
45
- dtype = cls.column_infos[column].dtype
45
+ dtype = (
46
+ cls.column_infos[column].dtype
47
+ or DtypeResolver(cls.model_fields[column].annotation).default_polars_dtype()
48
+ )
46
49
  if dtype is None:
47
- default_dtype = DtypeResolver(
48
- cls.model_fields[column].annotation
49
- ).default_polars_dtype()
50
- if default_dtype is None:
51
- raise ValueError(
52
- f"Unable to find a default dtype for column `{column}`"
53
- )
54
- else:
55
- default_dtypes[column] = default_dtype
56
- else:
57
- default_dtypes[column] = (
58
- dtype if isinstance(dtype, DataType) else dtype()
59
- ) # if dtype is not instantiated, instantiate it
50
+ raise ValueError(f"Unable to find a default dtype for column `{column}`")
51
+
52
+ default_dtypes[column] = dtype if isinstance(dtype, DataType) else dtype()
60
53
  return default_dtypes
61
54
 
62
55
 
@@ -68,7 +61,6 @@ def validate_polars_dtype(
68
61
  """Check that the polars dtype is valid for the given annotation. Raises ValueError if not.
69
62
 
70
63
  Args:
71
- ----
72
64
  annotation (type[Any] | None): python type annotation
73
65
  dtype (DataType | DataTypeClass | None): polars dtype
74
66
  column (Optional[str], optional): column name. Defaults to None.
@@ -96,7 +88,6 @@ def validate_annotation(
96
88
  """Check that the provided annotation has polars/patito support (we can resolve it to a default dtype). Raises ValueError if not.
97
89
 
98
90
  Args:
99
- ----
100
91
  annotation (type[Any] | None): python type annotation
101
92
  column (Optional[str], optional): column name. Defaults to None.
102
93
 
@@ -130,9 +121,9 @@ class DtypeResolver:
130
121
  return PT_BASE_SUPPORTED_DTYPES
131
122
  return self._valid_polars_dtypes_for_schema(self.schema)
132
123
 
133
- def default_polars_dtype(self) -> DataTypeClass | DataType | None:
124
+ def default_polars_dtype(self) -> DataType | None:
134
125
  if self.annotation == Any:
135
- return pl.String
126
+ return pl.String()
136
127
  return self._default_polars_dtype_for_schema(self.schema)
137
128
 
138
129
  def _valid_polars_dtypes_for_schema(
@@ -197,9 +188,7 @@ class DtypeResolver:
197
188
  PydanticBaseType(pyd_type), props.get("format"), props.get("enum")
198
189
  )
199
190
 
200
- def _default_polars_dtype_for_schema(
201
- self, schema: Dict
202
- ) -> DataTypeClass | DataType | None:
191
+ def _default_polars_dtype_for_schema(self, schema: Dict) -> DataType | None:
203
192
  if "anyOf" in schema:
204
193
  if len(schema["anyOf"]) == 2: # look for optionals first
205
194
  schema = _without_optional(schema)
@@ -216,10 +205,12 @@ class DtypeResolver:
216
205
  def _pydantic_subschema_to_default_dtype(
217
206
  self,
218
207
  props: Dict,
219
- ) -> DataTypeClass | DataType | None:
208
+ ) -> DataType | None:
220
209
  if "column_info" in props: # user has specified in patito model
221
210
  if props["column_info"]["dtype"] is not None:
222
- return dtype_from_string(props["column_info"]["dtype"])
211
+ dtype = dtype_from_string(props["column_info"]["dtype"])
212
+ dtype = dtype() if isinstance(dtype, DataTypeClass) else dtype
213
+ return dtype
223
214
  if "type" not in props:
224
215
  if "enum" in props:
225
216
  raise TypeError("Mixed type enums not supported by patito.")
@@ -78,11 +78,9 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
78
78
  """Return True if the given type annotation is an Optional annotation.
79
79
 
80
80
  Args:
81
- ----
82
81
  type_annotation: The type annotation to be checked.
83
82
 
84
83
  Returns:
85
- -------
86
84
  True if the outermost type is Optional.
87
85
 
88
86
  """
@@ -92,7 +90,7 @@ def is_optional(type_annotation: type[Any] | Any | None) -> bool:
92
90
 
93
91
 
94
92
  def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
95
- """For serialization, converts polars dtype to string representation"""
93
+ """For serialization, converts polars dtype to string representation."""
96
94
  if dtype in pl.NESTED_DTYPES:
97
95
  if dtype == pl.Struct or isinstance(dtype, pl.Struct):
98
96
  raise NotImplementedError("Structs not yet supported by patito")
@@ -110,7 +108,7 @@ def parse_composite_dtype(dtype: DataTypeClass | DataType) -> str:
110
108
 
111
109
 
112
110
  def dtype_from_string(v: str) -> Optional[Union[DataTypeClass, DataType]]:
113
- """For deserialization"""
111
+ """For deserialization."""
114
112
  # TODO test all dtypes
115
113
  return convert.dtype_short_repr_to_dtype(v)
116
114
 
@@ -82,7 +82,7 @@ class Representation:
82
82
  def __pretty__(
83
83
  self, fmt: Callable[[Any], Any], **kwargs: Any
84
84
  ) -> Generator[Any, None, None]:
85
- """Used by devtools (https://python-devtools.helpmanual.io/) to provide a human readable representations of objects"""
85
+ """Used by devtools (https://python-devtools.helpmanual.io/) to provide a human readable representations of objects."""
86
86
  yield self.__repr_name__() + "("
87
87
  yield 1
88
88
  for name, value in self.__repr_args__():
@@ -101,7 +101,7 @@ class Representation:
101
101
  return f'{self.__repr_name__()}({self.__repr_str__(", ")})'
102
102
 
103
103
  def __rich_repr__(self) -> "RichReprResult":
104
- """Get fields for Rich library"""
104
+ """Get fields for Rich library."""
105
105
  for name, field_repr in self.__repr_args__():
106
106
  if name is None:
107
107
  yield field_repr
@@ -16,14 +16,12 @@ if TYPE_CHECKING:
16
16
  def schema_for_model(cls: Type[ModelType]) -> Dict[str, Dict[str, Any]]:
17
17
  """Return schema properties where definition references have been resolved.
18
18
 
19
- Returns
20
- -------
19
+ Returns:
21
20
  Field information as a dictionary where the keys are field names and the
22
21
  values are dictionaries containing metadata information about the field
23
22
  itself.
24
23
 
25
- Raises
26
- ------
24
+ Raises:
27
25
  TypeError: if a field is annotated with an enum where the values are of
28
26
  different types.
29
27
 
@@ -1,3 +1,5 @@
1
+ """Exceptions used by patito."""
2
+
1
3
  from typing import (
2
4
  TYPE_CHECKING,
3
5
  Any,
@@ -34,19 +36,24 @@ __all__ = "ErrorWrapper", "DataFrameValidationError"
34
36
 
35
37
 
36
38
  class ErrorWrapper(Representation):
39
+ """Error handler for nicely accumulating errors."""
40
+
37
41
  __slots__ = "exc", "_loc"
38
42
 
39
43
  def __init__(self, exc: Exception, loc: Union[str, "Loc"]) -> None:
44
+ """Wrap an error in an ErrorWrapper."""
40
45
  self.exc = exc
41
46
  self._loc = loc
42
47
 
43
48
  def loc_tuple(self) -> "Loc":
49
+ """Represent error as tuple."""
44
50
  if isinstance(self._loc, tuple):
45
51
  return self._loc
46
52
  else:
47
53
  return (self._loc,)
48
54
 
49
55
  def __repr_args__(self) -> "ReprArgs":
56
+ """Pydantic repr."""
50
57
  return [("exc", self.exc), ("loc", self.loc_tuple())]
51
58
 
52
59
 
@@ -56,19 +63,24 @@ ErrorList = Union[Sequence[Any], ErrorWrapper]
56
63
 
57
64
 
58
65
  class DataFrameValidationError(Representation, ValueError):
66
+ """Parent error for DataFrame validation errors."""
67
+
59
68
  __slots__ = "raw_errors", "model", "_error_cache"
60
69
 
61
70
  def __init__(self, errors: Sequence[ErrorList], model: Type["BaseModel"]) -> None:
71
+ """Create a dataframe validation error."""
62
72
  self.raw_errors = errors
63
73
  self.model = model
64
74
  self._error_cache: Optional[List["ErrorDict"]] = None
65
75
 
66
76
  def errors(self) -> List["ErrorDict"]:
77
+ """Get list of errors."""
67
78
  if self._error_cache is None:
68
79
  self._error_cache = list(flatten_errors(self.raw_errors))
69
80
  return self._error_cache
70
81
 
71
82
  def __str__(self) -> str:
83
+ """String reprentation of error."""
72
84
  errors = self.errors()
73
85
  no_errors = len(errors)
74
86
  return (
@@ -77,6 +89,7 @@ class DataFrameValidationError(Representation, ValueError):
77
89
  )
78
90
 
79
91
  def __repr_args__(self) -> "ReprArgs":
92
+ """Pydantic repr."""
80
93
  return [("model", self.model.__name__), ("errors", self.errors())]
81
94
 
82
95