patito 0.5.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,26 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: patito
3
- Version: 0.5.1
3
+ Version: 0.6.2
4
4
  Summary: A dataframe modelling library built on top of polars and pydantic.
5
- Home-page: https://github.com/kolonialno/patito
5
+ Home-page: https://github.com/JakobGM/patito
6
6
  License: MIT
7
7
  Keywords: validation,dataframe
8
8
  Author: Jakob Gerhard Martinussen
9
9
  Author-email: jakobgm@gmail.com
10
- Requires-Python: >=3.8,<4.0
10
+ Requires-Python: >=3.9
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.8
14
13
  Classifier: Programming Language :: Python :: 3.9
15
14
  Classifier: Programming Language :: Python :: 3.10
16
15
  Classifier: Programming Language :: Python :: 3.11
17
16
  Provides-Extra: caching
18
17
  Provides-Extra: docs
19
- Provides-Extra: duckdb
20
18
  Provides-Extra: pandas
21
19
  Requires-Dist: Sphinx (<7) ; extra == "docs"
22
- Requires-Dist: duckdb (>=0.6.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
23
- Requires-Dist: pandas ; (python_version >= "3.8" and python_version < "4.0") and (extra == "pandas")
24
- Requires-Dist: polars (>=0.18.7)
25
- Requires-Dist: pyarrow (>=5.0.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "caching" or extra == "duckdb")
26
- Requires-Dist: pydantic (>=1.7.0,<2.0.0)
20
+ Requires-Dist: pandas ; extra == "pandas"
21
+ Requires-Dist: polars (>=0.20.1)
22
+ Requires-Dist: pyarrow (>=5.0.0) ; extra == "caching"
23
+ Requires-Dist: pydantic (>=2.4.1)
27
24
  Requires-Dist: sphinx-autobuild ; extra == "docs"
28
25
  Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
29
26
  Requires-Dist: sphinx-rtd-theme ; extra == "docs"
@@ -31,10 +28,10 @@ Requires-Dist: sphinx-toolbox ; extra == "docs"
31
28
  Requires-Dist: sphinxcontrib-mermaid ; extra == "docs"
32
29
  Requires-Dist: typing-extensions
33
30
  Project-URL: Documentation, https://patito.readthedocs.io
34
- Project-URL: Repository, https://github.com/kolonialno/patito
31
+ Project-URL: Repository, https://github.com/JakobGM/patito
35
32
  Description-Content-Type: text/markdown
36
33
 
37
- # <center><img height="30px" src="https://emojipedia-us.s3.dualstack.us-west-1.amazonaws.com/thumbs/120/samsung/78/duck_1f986.png"> Patito<center>
34
+ # <center><img height="30px" src="https://em-content.zobj.net/thumbs/120/samsung/78/duck_1f986.png"> Patito<center>
38
35
 
39
36
  <p align="center">
40
37
  <em>
@@ -66,7 +63,6 @@ These schema can be used for:
66
63
  🧪 Easy generation of valid mock data frames for tests.\
67
64
  🐍 Retrieve and represent singular rows in an object-oriented manner.\
68
65
  🧠 Provide a single source of truth for the core data models in your code base. \
69
- 🦆 Integration with DuckDB for running flexible SQL queries.
70
66
 
71
67
  Patito has first-class support for [polars]("https://github.com/pola-rs/polars"), a _"blazingly fast DataFrames library written in Rust"_.
72
68
 
@@ -76,16 +72,6 @@ Patito has first-class support for [polars]("https://github.com/pola-rs/polars")
76
72
  pip install patito
77
73
  ```
78
74
 
79
- #### DuckDB Integration
80
-
81
- Patito can also integrate with [DuckDB](https://duckdb.org/).
82
- In order to enable this integration you must explicitly specify it during installation:
83
-
84
- ```sh
85
- pip install 'patito[duckdb]'
86
- ```
87
-
88
-
89
75
  ## Documentation
90
76
 
91
77
  The full documentation of Patio can be found [here](https://patito.readthedocs.io).
@@ -96,7 +82,7 @@ Patito allows you to specify the type of each column in your dataframe by creati
96
82
 
97
83
  ```py
98
84
  # models.py
99
- from typing import Literal, Optional
85
+ from typing import Literal
100
86
 
101
87
  import patito as pt
102
88
 
@@ -121,7 +107,7 @@ df = pl.DataFrame(
121
107
  )
122
108
  try:
123
109
  Product.validate(df)
124
- except pt.ValidationError as exc:
110
+ except pt.exceptions.DataFrameValidationError as exc:
125
111
  print(exc)
126
112
  # 3 validation errors for Product
127
113
  # is_for_sale
@@ -167,7 +153,7 @@ def num_products_for_sale(products: pl.DataFrame) -> int:
167
153
  return products.filter(pl.col("is_for_sale")).height
168
154
  ```
169
155
 
170
- The following test would fail with a `patito.ValidationError`:
156
+ The following test would fail with a `patito.exceptions.DataFrameValidationError`:
171
157
 
172
158
  ```py
173
159
  def test_num_products_for_sale():
@@ -1,4 +1,4 @@
1
- # <center><img height="30px" src="https://emojipedia-us.s3.dualstack.us-west-1.amazonaws.com/thumbs/120/samsung/78/duck_1f986.png"> Patito<center>
1
+ # <center><img height="30px" src="https://em-content.zobj.net/thumbs/120/samsung/78/duck_1f986.png"> Patito<center>
2
2
 
3
3
  <p align="center">
4
4
  <em>
@@ -30,7 +30,6 @@ These schema can be used for:
30
30
  🧪 Easy generation of valid mock data frames for tests.\
31
31
  🐍 Retrieve and represent singular rows in an object-oriented manner.\
32
32
  🧠 Provide a single source of truth for the core data models in your code base. \
33
- 🦆 Integration with DuckDB for running flexible SQL queries.
34
33
 
35
34
  Patito has first-class support for [polars]("https://github.com/pola-rs/polars"), a _"blazingly fast DataFrames library written in Rust"_.
36
35
 
@@ -40,16 +39,6 @@ Patito has first-class support for [polars]("https://github.com/pola-rs/polars")
40
39
  pip install patito
41
40
  ```
42
41
 
43
- #### DuckDB Integration
44
-
45
- Patito can also integrate with [DuckDB](https://duckdb.org/).
46
- In order to enable this integration you must explicitly specify it during installation:
47
-
48
- ```sh
49
- pip install 'patito[duckdb]'
50
- ```
51
-
52
-
53
42
  ## Documentation
54
43
 
55
44
  The full documentation of Patio can be found [here](https://patito.readthedocs.io).
@@ -60,7 +49,7 @@ Patito allows you to specify the type of each column in your dataframe by creati
60
49
 
61
50
  ```py
62
51
  # models.py
63
- from typing import Literal, Optional
52
+ from typing import Literal
64
53
 
65
54
  import patito as pt
66
55
 
@@ -85,7 +74,7 @@ df = pl.DataFrame(
85
74
  )
86
75
  try:
87
76
  Product.validate(df)
88
- except pt.ValidationError as exc:
77
+ except pt.exceptions.DataFrameValidationError as exc:
89
78
  print(exc)
90
79
  # 3 validation errors for Product
91
80
  # is_for_sale
@@ -131,7 +120,7 @@ def num_products_for_sale(products: pl.DataFrame) -> int:
131
120
  return products.filter(pl.col("is_for_sale")).height
132
121
  ```
133
122
 
134
- The following test would fail with a `patito.ValidationError`:
123
+ The following test would fail with a `patito.exceptions.DataFrameValidationError`:
135
124
 
136
125
  ```py
137
126
  def test_num_products_for_sale():
@@ -1,24 +1,23 @@
1
1
  [tool.poetry]
2
2
  name = "patito"
3
- version = "0.5.1"
3
+ version = "0.6.2"
4
4
  description = "A dataframe modelling library built on top of polars and pydantic."
5
5
  authors = ["Jakob Gerhard Martinussen <jakobgm@gmail.com>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
- homepage = "https://github.com/kolonialno/patito"
9
- repository = "https://github.com/kolonialno/patito"
8
+ homepage = "https://github.com/JakobGM/patito"
9
+ repository = "https://github.com/JakobGM/patito"
10
10
  documentation = "https://patito.readthedocs.io"
11
11
  keywords = ["validation", "dataframe"]
12
12
 
13
13
  [tool.poetry.dependencies]
14
- python = "^3.8"
15
- pydantic = "^1.7.0"
16
- polars = ">=0.18.7"
17
- # Required for typing.Literal in python3.7
14
+ python = ">=3.9"
15
+ pydantic = ">=2.4.1"
16
+ polars = ">=0.20.1"
17
+ # Required for typing.get_args backports in python3.9 and 3.10
18
18
  typing-extensions = "*"
19
- pandas = {version = "*", optional = true, python = "^3.8"}
20
- duckdb = {version = ">=0.6.0", optional = true, python = "^3.8"}
21
- pyarrow = {version = ">=5.0.0", optional = true, python = "^3.8"}
19
+ pandas = {version = "*", optional = true}
20
+ pyarrow = {version = ">=5.0.0", optional = true}
22
21
  # Optional docs dependencies
23
22
  Sphinx = {version = "<7", optional = true}
24
23
  sphinx-rtd-theme = {version = "*", optional = true}
@@ -30,7 +29,6 @@ sphinxcontrib-mermaid = {version = "*", optional = true}
30
29
  [tool.poetry.extras]
31
30
  # The pyarrow.parquet module is required for writing parquet caches to disk
32
31
  caching = ["pyarrow"]
33
- duckdb = ["duckdb", "pyarrow"]
34
32
  pandas = ["pandas"]
35
33
  docs = [
36
34
  "Sphinx",
@@ -42,19 +40,18 @@ docs = [
42
40
  ]
43
41
 
44
42
  [tool.poetry.group.dev.dependencies]
45
- black = ">=22.3.0"
43
+ ruff = ">=0.2.1"
46
44
  coverage = {version = "*", extras = ["toml"]}
47
45
  flake8 = "3.9.2"
48
- flake8-annotations = "*"
46
+ flake8-annotations = { version = "*", python = ">=3.9,<4.0" }
49
47
  flake8-bandit = "*"
50
48
  flake8-black = "*"
51
49
  flake8-bugbear = "*"
52
50
  flake8-isort = "*"
53
- isort = "*"
54
51
  pyright = ">=1.1.239"
55
52
  pytest = ">=7.1.2"
56
53
  pytest-cov = ">=3.0.0"
57
- pytest-watcher = ">=0.2.3"
54
+ pytest-watcher = { version = ">=0.2.3", python = ">=3.9,<4.0" }
58
55
  xdoctest = ">=1.0.0"
59
56
 
60
57
  mypy = ">=0.950"
@@ -63,6 +60,10 @@ pandas-stubs = ">=1.2.0"
63
60
  codecov = "^2.1.12"
64
61
  blackdoc = "*"
65
62
 
63
+
64
+ [tool.poetry.group.docs.dependencies]
65
+ nox = "^2023.4.22"
66
+
66
67
  [build-system]
67
68
  requires = ["poetry-core>=1.0.0"]
68
69
  build-backend = "poetry.core.masonry.api"
@@ -114,23 +115,16 @@ exclude = [
114
115
  "noxfile.py",
115
116
  ]
116
117
 
117
- [[tool.mypy.overrides]]
118
- module = ["tests.*", "noxfile"]
119
- allow_untyped_defs = true
120
- check_untyped_defs = true
121
- disallow_incomplete_defs = false
122
- # TODO: Go through and remove those we want to check in tests
123
- disable_error_code = [
124
- "var-annotated",
125
- "override",
126
- "attr-defined",
127
- "call-arg",
128
- "type-var",
129
- "misc",
130
- "arg-type",
131
- "assignment",
132
- ]
133
118
 
134
119
  [[tool.mypy.overrides]]
135
120
  module = ["tests.test_validators"]
136
121
  warn_unused_ignores = false
122
+
123
+ [tool.ruff]
124
+ extend-exclude= ["tests/__init__.py"]
125
+
126
+ [tool.ruff.lint]
127
+ select = ["E4", "E7", "E9", "F", "I", "B", "D"]
128
+
129
+ [tool.ruff.lint.pydocstyle]
130
+ convention = "google"
@@ -1,47 +1,28 @@
1
1
  """Patito, a data-modelling library built on top of polars and pydantic."""
2
+
2
3
  from polars import Expr, Series, col
3
4
 
4
- from patito import exceptions, sql
5
- from patito.exceptions import ValidationError
5
+ from patito import exceptions
6
+ from patito.exceptions import DataFrameValidationError
6
7
  from patito.polars import DataFrame, LazyFrame
7
8
  from patito.pydantic import Field, Model
8
9
 
9
10
  _CACHING_AVAILABLE = False
10
- _DUCKDB_AVAILABLE = False
11
11
  field = col("_")
12
12
  __all__ = [
13
13
  "DataFrame",
14
+ "DataFrameValidationError",
14
15
  "Expr",
15
16
  "Field",
16
17
  "LazyFrame",
17
18
  "Model",
18
19
  "Series",
19
- "ValidationError",
20
20
  "_CACHING_AVAILABLE",
21
- "_DUCKDB_AVAILABLE",
22
21
  "col",
23
22
  "exceptions",
24
23
  "field",
25
- "sql",
26
24
  ]
27
25
 
28
- try:
29
- from patito import duckdb
30
-
31
- _DUCKDB_AVAILABLE = True
32
- __all__ += ["duckdb"]
33
- except ImportError: # pragma: no cover
34
- pass
35
-
36
- try:
37
- from patito.database import Database
38
-
39
- _CACHING_AVAILABLE = True
40
- __all__ += ["Database"]
41
- except ImportError:
42
- pass
43
-
44
-
45
26
  try:
46
27
  from importlib.metadata import PackageNotFoundError, version
47
28
  except ImportError: # pragma: no cover
@@ -1,2 +1,3 @@
1
1
  """Ugly workaround for Sphinx + autodoc + ModelMetaclass + classproperty."""
2
+
2
3
  from patito.pydantic import ModelMetaclass as Model # noqa: F401, pragma: no cover
File without changes
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import (
5
+ Any,
6
+ Dict,
7
+ Optional,
8
+ Sequence,
9
+ Type,
10
+ TypeVar,
11
+ Union,
12
+ )
13
+
14
+ import polars as pl
15
+ from polars.datatypes import DataType, DataTypeClass
16
+ from pydantic import BaseModel, field_serializer
17
+
18
+ from patito._pydantic.dtypes import parse_composite_dtype
19
+
20
+
21
+ class ColumnInfo(BaseModel, arbitrary_types_allowed=True):
22
+ """patito-side model for storing column metadata.
23
+
24
+ Args:
25
+ constraints (Union[polars.Expression, List[polars.Expression]): A single
26
+ constraint or list of constraints, expressed as a polars expression objects.
27
+ All rows must satisfy the given constraint. You can refer to the given column
28
+ with ``pt.field``, which will automatically be replaced with
29
+ ``polars.col(<field_name>)`` before evaluation.
30
+ derived_from (Union[str, polars.Expr]): used to mark fields that are meant to be derived from other fields. Users can specify a polars expression that will be called to derive the column value when `pt.DataFrame.derive` is called.
31
+ dtype (polars.datatype.DataType): The given dataframe column must have the given
32
+ polars dtype, for instance ``polars.UInt64`` or ``pl.Float32``.
33
+ unique (bool): All row values must be unique.
34
+
35
+ """
36
+
37
+ dtype: Optional[Union[DataTypeClass, DataType]] = None
38
+ constraints: Optional[Union[pl.Expr, Sequence[pl.Expr]]] = None
39
+ derived_from: Optional[Union[str, pl.Expr]] = None
40
+ unique: Optional[bool] = None
41
+
42
+ def __repr__(self) -> str:
43
+ """Print only Field attributes whose values are not default (mainly None)."""
44
+ not_default_field = {
45
+ field: getattr(self, field)
46
+ for field in self.model_fields
47
+ if getattr(self, field) is not self.model_fields[field].default
48
+ }
49
+
50
+ string = ""
51
+ for field, value in not_default_field.items():
52
+ string += f"{field}={value}, "
53
+ if string:
54
+ # remove trailing comma and space
55
+ string = string[:-2]
56
+ return f"ColumnInfo({string})"
57
+
58
+ @field_serializer("constraints", "derived_from")
59
+ def serialize_exprs(self, exprs: str | pl.Expr | Sequence[pl.Expr] | None) -> Any:
60
+ if exprs is None:
61
+ return None
62
+ elif isinstance(exprs, str):
63
+ return exprs
64
+ elif isinstance(exprs, pl.Expr):
65
+ return self._serialize_expr(exprs)
66
+ elif isinstance(exprs, Sequence):
67
+ return [self._serialize_expr(c) for c in exprs]
68
+ else:
69
+ raise ValueError(f"Invalid type for exprs: {type(exprs)}")
70
+
71
+ def _serialize_expr(self, expr: pl.Expr) -> Dict:
72
+ if isinstance(expr, pl.Expr):
73
+ return json.loads(
74
+ expr.meta.serialize(None)
75
+ ) # can we access the dictionary directly?
76
+ else:
77
+ raise ValueError(f"Invalid type for expr: {type(expr)}")
78
+
79
+ @field_serializer("dtype")
80
+ def serialize_dtype(self, dtype: DataTypeClass | DataType | None) -> Any:
81
+ """Serialize a polars dtype.
82
+
83
+ References:
84
+ [1] https://stackoverflow.com/questions/76572310/how-to-serialize-deserialize-polars-datatypes
85
+ """
86
+ if dtype is None:
87
+ return None
88
+ elif isinstance(dtype, DataTypeClass) or isinstance(dtype, DataType):
89
+ return parse_composite_dtype(dtype)
90
+ else:
91
+ raise ValueError(f"Invalid type for dtype: {type(dtype)}")
92
+
93
+
94
+ CI = TypeVar("CI", bound=Type[ColumnInfo])
@@ -0,0 +1,25 @@
1
+ from patito._pydantic.dtypes.dtypes import (
2
+ DtypeResolver,
3
+ default_dtypes_for_model,
4
+ valid_dtypes_for_model,
5
+ validate_annotation,
6
+ validate_polars_dtype,
7
+ )
8
+ from patito._pydantic.dtypes.utils import (
9
+ PYTHON_TO_PYDANTIC_TYPES,
10
+ dtype_from_string,
11
+ is_optional,
12
+ parse_composite_dtype,
13
+ )
14
+
15
+ __all__ = [
16
+ "DtypeResolver",
17
+ "validate_annotation",
18
+ "validate_polars_dtype",
19
+ "parse_composite_dtype",
20
+ "dtype_from_string",
21
+ "valid_dtypes_for_model",
22
+ "default_dtypes_for_model",
23
+ "PYTHON_TO_PYDANTIC_TYPES",
24
+ "is_optional",
25
+ ]