patito 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patito/validators.py CHANGED
@@ -1,9 +1,11 @@
1
1
  """Module for validating datastructures with respect to model specifications."""
2
2
  from __future__ import annotations
3
3
 
4
+ import sys
4
5
  from typing import TYPE_CHECKING, Type, Union, cast
5
6
 
6
7
  import polars as pl
8
+ from typing_extensions import get_args, get_origin
7
9
 
8
10
  from patito.exceptions import (
9
11
  ColumnDTypeError,
@@ -15,6 +17,13 @@ from patito.exceptions import (
15
17
  ValidationError,
16
18
  )
17
19
 
20
+ if sys.version_info >= (3, 10): # pragma: no cover
21
+ from types import UnionType # pyright: ignore
22
+
23
+ UNION_TYPES = (Union, UnionType)
24
+ else:
25
+ UNION_TYPES = (Union,) # pragma: no cover
26
+
18
27
  try:
19
28
  import pandas as pd
20
29
 
@@ -44,6 +53,44 @@ VALID_POLARS_TYPES = {
44
53
  }
45
54
 
46
55
 
56
+ def _is_optional(type_annotation: Type) -> bool:
57
+ """
58
+ Return True if the given type annotation is an Optional annotation.
59
+
60
+ Args:
61
+ type_annotation: The type annotation to be checked.
62
+
63
+ Returns:
64
+ True if the outermost type is Optional.
65
+ """
66
+ return (get_origin(type_annotation) in UNION_TYPES) and (
67
+ type(None) in get_args(type_annotation)
68
+ )
69
+
70
+
71
+ def _dewrap_optional(type_annotation: Type) -> Type:
72
+ """
73
+ Return the inner, wrapped type of an Optional.
74
+
75
+ Is a no-op for non-Optional types.
76
+
77
+ Args:
78
+ type_annotation: The type annotation to be dewrapped.
79
+
80
+ Returns:
81
+ The input type, but with the outermost Optional removed.
82
+ """
83
+ return (
84
+ next( # pragma: no cover
85
+ valid_type
86
+ for valid_type in get_args(type_annotation)
87
+ if valid_type is not type(None) # noqa: E721
88
+ )
89
+ if _is_optional(type_annotation)
90
+ else type_annotation
91
+ )
92
+
93
+
47
94
  def _find_errors( # noqa: C901
48
95
  dataframe: pl.DataFrame,
49
96
  schema: Type[Model],
@@ -99,6 +146,45 @@ def _find_errors( # noqa: C901
99
146
  )
100
147
  )
101
148
 
149
+ for column, dtype in schema.dtypes.items():
150
+ if not isinstance(dtype, pl.List):
151
+ continue
152
+
153
+ annotation = schema.__annotations__[column] # type: ignore[unreachable]
154
+
155
+ # Retrieve the annotation of the list itself,
156
+ # dewrapping any potential Optional[...]
157
+ list_type = _dewrap_optional(annotation)
158
+
159
+ # Check if the list items themselves should be considered nullable
160
+ item_type = get_args(list_type)[0]
161
+ if _is_optional(item_type):
162
+ continue
163
+
164
+ num_missing_values = (
165
+ dataframe.lazy()
166
+ .select(column)
167
+ # Remove those rows that do not contain lists at all
168
+ .filter(pl.col(column).is_not_null())
169
+ # Convert lists of N items to N individual rows
170
+ .explode(column)
171
+ # Calculate how many nulls are present in lists
172
+ .filter(pl.col(column).is_null())
173
+ .collect()
174
+ .height
175
+ )
176
+ if num_missing_values != 0:
177
+ errors.append(
178
+ ErrorWrapper(
179
+ MissingValuesError(
180
+ f"{num_missing_values} missing "
181
+ f"{'value' if num_missing_values == 1 else 'values'} "
182
+ f"in lists"
183
+ ),
184
+ loc=column,
185
+ )
186
+ )
187
+
102
188
  # Check if any column has a wrong dtype
103
189
  valid_dtypes = schema.valid_dtypes
104
190
  dataframe_datatypes = dict(zip(dataframe.columns, dataframe.dtypes))
@@ -189,7 +275,7 @@ def _find_errors( # noqa: C901
189
275
  )
190
276
  if "_" in constraints.meta.root_names():
191
277
  # An underscore is an alias for the current field
192
- illegal_rows = dataframe.with_column(
278
+ illegal_rows = dataframe.with_columns(
193
279
  pl.col(column_name).alias("_")
194
280
  ).filter(constraints)
195
281
  else:
patito/xdg.py ADDED
@@ -0,0 +1,22 @@
1
+ """Module implementing the XDG directory standard."""
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+
7
+ def cache_home(application: Optional[str] = None) -> Path:
8
+ """
9
+ Return path to directory containing user-specific non-essential data files.
10
+
11
+ Args:
12
+ application: An optional name of an application for which to return an
13
+ application-specific cache directory for.
14
+
15
+ Returns:
16
+ A path object pointing to a directory to store cache files.
17
+ """
18
+ path = Path(os.environ.get("XDG_CACHE_HOME", "~/.cache")).resolve()
19
+ if application:
20
+ path = path / application
21
+ path.mkdir(exist_ok=True, parents=True)
22
+ return path
@@ -1,6 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2022 Oda Group Holding AS
4
+ Copyright (c) 2023 Jakob Gerhard Martinussen and contributors
4
5
 
5
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
7
  of this software and associated documentation files (the "Software"), to deal
@@ -1,33 +1,34 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: patito
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: A dataframe modelling library built on top of polars and pydantic.
5
5
  Home-page: https://github.com/kolonialno/patito
6
6
  License: MIT
7
7
  Keywords: validation,dataframe
8
8
  Author: Jakob Gerhard Martinussen
9
9
  Author-email: jakobgm@gmail.com
10
- Requires-Python: >=3.7,<4.0
10
+ Requires-Python: >=3.8,<4.0
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.7
15
13
  Classifier: Programming Language :: Python :: 3.8
16
14
  Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Provides-Extra: caching
17
18
  Provides-Extra: docs
18
19
  Provides-Extra: duckdb
19
20
  Provides-Extra: pandas
20
- Requires-Dist: Sphinx; extra == "docs"
21
- Requires-Dist: duckdb (>=0.5.0); (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
22
- Requires-Dist: importlib-metadata; python_version < "3.8"
23
- Requires-Dist: pandas; (python_version >= "3.8" and python_version < "4.0") and (extra == "pandas")
24
- Requires-Dist: polars (>=0.14.0)
25
- Requires-Dist: pyarrow (>=5.0.0); (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
21
+ Requires-Dist: Sphinx (<7) ; extra == "docs"
22
+ Requires-Dist: duckdb (>=0.6.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "duckdb")
23
+ Requires-Dist: pandas ; (python_version >= "3.8" and python_version < "4.0") and (extra == "pandas")
24
+ Requires-Dist: polars (>=0.18.3)
25
+ Requires-Dist: pyarrow (>=5.0.0) ; (python_version >= "3.8" and python_version < "4.0") and (extra == "caching" or extra == "duckdb")
26
26
  Requires-Dist: pydantic (>=1.7.0)
27
- Requires-Dist: sphinx-autobuild; extra == "docs"
28
- Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
29
- Requires-Dist: sphinx-rtd-theme; extra == "docs"
30
- Requires-Dist: sphinxcontrib-mermaid; extra == "docs"
27
+ Requires-Dist: sphinx-autobuild ; extra == "docs"
28
+ Requires-Dist: sphinx-autodoc-typehints ; extra == "docs"
29
+ Requires-Dist: sphinx-rtd-theme ; extra == "docs"
30
+ Requires-Dist: sphinx-toolbox ; extra == "docs"
31
+ Requires-Dist: sphinxcontrib-mermaid ; extra == "docs"
31
32
  Requires-Dist: typing-extensions
32
33
  Project-URL: Documentation, https://patito.readthedocs.io
33
34
  Project-URL: Repository, https://github.com/kolonialno/patito
@@ -284,7 +285,7 @@ class Product(pt.Model):
284
285
  @property
285
286
  def url(self) -> str:
286
287
  return (
287
- "https://oda.com/no/products/"
288
+ "https://example.com/no/products/"
288
289
  f"{self.product_id}-"
289
290
  f"{self.name.lower().replace(' ', '-')}"
290
291
  )
@@ -302,7 +303,7 @@ products = pl.DataFrame(
302
303
  milk_row = products.filter(pl.col("product_id" == 1))
303
304
  milk = Product.from_row(milk_row)
304
305
  print(milk.url)
305
- # https://oda.com/no/products/1-skimmed-milk
306
+ # https://example.com/no/products/1-skimmed-milk
306
307
  ```
307
308
 
308
309
  If you "connect" the `Product` model with the `DataFrame` by the use of `patito.DataFrame.set_model()`, or alternatively by using `Product.DataFrame` directly, you can use the `.get()` method in order to filter the data frame down to a single row _and_ cast it to the respective model class:
@@ -317,6 +318,6 @@ products = Product.DataFrame(
317
318
  )
318
319
  milk = products.get(pl.col("product_id") == 1)
319
320
  print(milk.url)
320
- # https://oda.com/no/products/1-skimmed-milk
321
+ # https://example.com/no/products/1-skimmed-milk
321
322
  ```
322
323
 
@@ -0,0 +1,14 @@
1
+ patito/__init__.py,sha256=J-p3aBiyNGdsyHxViEsP1C6bE-OxtxAul7xSA1xXpIE,1169
2
+ patito/_docs.py,sha256=bobkmo8-RRdz80_KY53y_i1Gcp1WWTH5-D5ZHGidpok,161
3
+ patito/database.py,sha256=IaxbsmyQMzL3KzvJUq0tgVGUxdr55KEv-wd1X673U2o,27917
4
+ patito/duckdb.py,sha256=MnC4C7m3epXODz_eYBLqb7dTAcOh2o1Y8BViPoPKTaA,111015
5
+ patito/exceptions.py,sha256=4WuaQJoc5wLUehhBRQPvL59LVFLcn_K806Z9fg0KBss,1262
6
+ patito/polars.py,sha256=oFS8In6cUiKQu-QclkJ1Egxbf2HF2SlbTF76bcyE4Vo,26161
7
+ patito/pydantic.py,sha256=HKQ6dfeeJCq1xaVJbqfo5UpF-WI_jBEaDRAy2tLnvtM,54832
8
+ patito/sql.py,sha256=_xmvVfC1kUUq2d8KTBPExJmfZ9ec6uoMfZv52naFFkY,3218
9
+ patito/validators.py,sha256=D5hdCfAc1rv15FwQILSmaZNZ5j88QfYUYZ37GMwgvQU,10659
10
+ patito/xdg.py,sha256=3EcUGcYBBUX2Secegajb4DB2QgAHNPs6oi0tMsL--UQ,686
11
+ patito-0.5.0.dist-info/LICENSE,sha256=3bc4YyuF0e5nd59E3CsR8QM1Ua7pqKfC9DD1LVBVMs4,1139
12
+ patito-0.5.0.dist-info/METADATA,sha256=K16UBKdqYnZfDmVdVqUuMc3AqknsaAmWfgljNVg0q_g,14562
13
+ patito-0.5.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
14
+ patito-0.5.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry 1.0.8
2
+ Generator: poetry-core 1.6.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,12 +0,0 @@
1
- patito/__init__.py,sha256=BSTuaflPBdcoJKJTMGDAa7fvoJVduWb28N6Fnw_Gpns,1079
2
- patito/_docs.py,sha256=bobkmo8-RRdz80_KY53y_i1Gcp1WWTH5-D5ZHGidpok,161
3
- patito/duckdb.py,sha256=Kxkn5Z4Rl_9KthO_zzbYCEhRtoGtV5T0xGueV6qgoZw,113446
4
- patito/exceptions.py,sha256=4WuaQJoc5wLUehhBRQPvL59LVFLcn_K806Z9fg0KBss,1262
5
- patito/polars.py,sha256=Ds_pH302wtWTDN78mmDdUrHn5K906L6H66GnpKrMGDo,26335
6
- patito/pydantic.py,sha256=x9gNcS29_nZaT_TUqsvMZm5UZwWZGGMFOe6WGvSdgvs,54976
7
- patito/sql.py,sha256=HhNGhYwIPTJOs08fMEAX6PUjeIKuzI2IB_cjOJknpJg,3249
8
- patito/validators.py,sha256=YhDMgua8c7Hs97pmscX3vIs-kqzq3vrAjAomYR42IfI,8061
9
- patito-0.4.3.dist-info/LICENSE,sha256=0LkhXGFNXIt9gW0zs6_9YZynb4DDt3OZaRXz7DpD0qQ,1077
10
- patito-0.4.3.dist-info/WHEEL,sha256=DA86_h4QwwzGeRoz62o1svYt5kGEXpoUTuTtwzoTb30,83
11
- patito-0.4.3.dist-info/METADATA,sha256=Bhj5XsAbskyczUNkQzUYroXjbgsFooqaLLEabVnZx-8,14500
12
- patito-0.4.3.dist-info/RECORD,,