pointblank 0.9.4__py3-none-any.whl → 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/data/api-docs.txt +20 -5
- pointblank/schema.py +8 -1
- pointblank/validate.py +174 -4
- {pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/METADATA +5 -3
- {pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/RECORD +8 -8
- {pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/WHEEL +1 -1
- {pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.9.4.dist-info → pointblank-0.9.5.dist-info}/top_level.txt +0 -0
pointblank/data/api-docs.txt
CHANGED
|
@@ -8837,7 +8837,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
|
|
|
8837
8837
|
Pandas DataFrame, the availability of Ibis is not needed.
|
|
8838
8838
|
|
|
8839
8839
|
|
|
8840
|
-
load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
|
|
8840
|
+
load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights', 'global_sales']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
|
|
8841
8841
|
|
|
8842
8842
|
Load a dataset hosted in the library as specified table type.
|
|
8843
8843
|
|
|
@@ -8851,7 +8851,7 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8851
8851
|
----------
|
|
8852
8852
|
dataset
|
|
8853
8853
|
The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
|
|
8854
|
-
and `"
|
|
8854
|
+
`"nycflights"`, and `"global_sales"`.
|
|
8855
8855
|
tbl_type
|
|
8856
8856
|
The type of table to generate from the dataset. The named options are `"polars"`,
|
|
8857
8857
|
`"pandas"`, and `"duckdb"`.
|
|
@@ -8873,6 +8873,8 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8873
8873
|
they purchased, ads viewed, and the revenue generated.
|
|
8874
8874
|
- `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
|
|
8875
8875
|
about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
|
|
8876
|
+
- `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
|
|
8877
|
+
global sales of products across different regions and countries.
|
|
8876
8878
|
|
|
8877
8879
|
Supported DataFrame Types
|
|
8878
8880
|
-------------------------
|
|
@@ -8884,10 +8886,10 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8884
8886
|
|
|
8885
8887
|
Examples
|
|
8886
8888
|
--------
|
|
8887
|
-
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
8888
|
-
|
|
8889
|
+
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
8890
|
+
`dataset="small_table"` and `tbl_type="polars"`:
|
|
8889
8891
|
|
|
8890
|
-
Note that the `"small_table"` dataset is a
|
|
8892
|
+
Note that the `"small_table"` dataset is a Polars DataFrame and using the
|
|
8891
8893
|
[`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
|
|
8892
8894
|
environment.
|
|
8893
8895
|
|
|
@@ -8915,6 +8917,19 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8915
8917
|
The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
|
|
8916
8918
|
truly a real-world dataset and provides information about flights originating from New York City
|
|
8917
8919
|
airports in 2013.
|
|
8920
|
+
|
|
8921
|
+
Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
|
|
8922
|
+
name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
|
|
8923
|
+
|
|
8924
|
+
```python
|
|
8925
|
+
global_sales = pb.load_dataset(dataset="global_sales")
|
|
8926
|
+
|
|
8927
|
+
pb.preview(global_sales)
|
|
8928
|
+
```
|
|
8929
|
+
|
|
8930
|
+
The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
|
|
8931
|
+
describes the sales of a particular product to a customer located in one of three global
|
|
8932
|
+
regions: North America, Europe, or Asia.
|
|
8918
8933
|
|
|
8919
8934
|
|
|
8920
8935
|
|
pointblank/schema.py
CHANGED
|
@@ -728,7 +728,14 @@ class Schema:
|
|
|
728
728
|
return new_schema
|
|
729
729
|
|
|
730
730
|
def __str__(self):
|
|
731
|
-
|
|
731
|
+
formatted_columns = []
|
|
732
|
+
for col in self.columns:
|
|
733
|
+
if len(col) == 1: # Only column name provided (no data type)
|
|
734
|
+
formatted_columns.append(f" {col[0]}: <ANY>")
|
|
735
|
+
else: # Both column name and data type provided
|
|
736
|
+
formatted_columns.append(f" {col[0]}: {col[1]}")
|
|
737
|
+
|
|
738
|
+
return "Pointblank Schema\n" + "\n".join(formatted_columns)
|
|
732
739
|
|
|
733
740
|
def __repr__(self):
|
|
734
741
|
return f"Schema(columns={self.columns})"
|
pointblank/validate.py
CHANGED
|
@@ -8031,7 +8031,7 @@ class Validate:
|
|
|
8031
8031
|
|
|
8032
8032
|
After interrogation is complete, the `Validate` object will have gathered information, and
|
|
8033
8033
|
we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
|
|
8034
|
-
[`f_failed()`](`pointblank.Validate.f_failed`)
|
|
8034
|
+
[`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
|
|
8035
8035
|
against the validation plan. A visual representation of the validation results can be viewed
|
|
8036
8036
|
by printing the `Validate` object; this will display the validation table in an HTML viewing
|
|
8037
8037
|
environment.
|
|
@@ -8772,6 +8772,10 @@ class Validate:
|
|
|
8772
8772
|
assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
|
|
8773
8773
|
some details are preserved.
|
|
8774
8774
|
|
|
8775
|
+
If the validation has not yet been interrogated, this method will automatically call
|
|
8776
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
|
|
8777
|
+
for passing tests.
|
|
8778
|
+
|
|
8775
8779
|
Raises
|
|
8776
8780
|
-------
|
|
8777
8781
|
AssertionError
|
|
@@ -8781,8 +8785,9 @@ class Validate:
|
|
|
8781
8785
|
--------
|
|
8782
8786
|
In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
|
|
8783
8787
|
`c`). There will be three validation steps, and the second step will have a failing test
|
|
8784
|
-
unit (the value `10` isn't less than `9`).
|
|
8785
|
-
|
|
8788
|
+
unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
|
|
8789
|
+
that all validation steps passed perfectly, automatically performing the interrogation if
|
|
8790
|
+
needed.
|
|
8786
8791
|
|
|
8787
8792
|
```{python}
|
|
8788
8793
|
#| error: True
|
|
@@ -8803,12 +8808,16 @@ class Validate:
|
|
|
8803
8808
|
.col_vals_gt(columns="a", value=0)
|
|
8804
8809
|
.col_vals_lt(columns="b", value=9) # this assertion is false
|
|
8805
8810
|
.col_vals_in_set(columns="c", set=["a", "b"])
|
|
8806
|
-
.interrogate()
|
|
8807
8811
|
)
|
|
8808
8812
|
|
|
8813
|
+
# No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
|
|
8809
8814
|
validation.assert_passing()
|
|
8810
8815
|
```
|
|
8811
8816
|
"""
|
|
8817
|
+
# Check if validation has been interrogated
|
|
8818
|
+
if not hasattr(self, "time_start") or self.time_start is None:
|
|
8819
|
+
# Auto-interrogate with default parameters
|
|
8820
|
+
self.interrogate()
|
|
8812
8821
|
|
|
8813
8822
|
if not self.all_passed():
|
|
8814
8823
|
failed_steps = [
|
|
@@ -8821,6 +8830,167 @@ class Validate:
|
|
|
8821
8830
|
)
|
|
8822
8831
|
raise AssertionError(msg)
|
|
8823
8832
|
|
|
8833
|
+
def assert_below_threshold(
|
|
8834
|
+
self, level: str = "warning", i: int = None, message: str = None
|
|
8835
|
+
) -> None:
|
|
8836
|
+
"""
|
|
8837
|
+
Raise an `AssertionError` if validation steps exceed a specified threshold level.
|
|
8838
|
+
|
|
8839
|
+
The `assert_below_threshold()` method checks whether validation steps' failure rates are
|
|
8840
|
+
below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
|
|
8841
|
+
particularly useful in automated testing environments where you want to ensure your data
|
|
8842
|
+
quality meets minimum standards before proceeding.
|
|
8843
|
+
|
|
8844
|
+
If any validation step exceeds the specified threshold level, an `AssertionError` will be
|
|
8845
|
+
raised with details about which steps failed. If the validation has not yet been
|
|
8846
|
+
interrogated, this method will automatically call
|
|
8847
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
|
|
8848
|
+
|
|
8849
|
+
Parameters
|
|
8850
|
+
----------
|
|
8851
|
+
level
|
|
8852
|
+
The threshold level to check against, which could be any of `"warning"` (the default),
|
|
8853
|
+
`"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
|
|
8854
|
+
exceeds this level.
|
|
8855
|
+
i
|
|
8856
|
+
Specific validation step number(s) to check. Can be provided as a single integer or a
|
|
8857
|
+
list of integers. If `None` (the default), all steps are checked.
|
|
8858
|
+
message
|
|
8859
|
+
Custom error message to use if assertion fails. If `None`, a default message will be
|
|
8860
|
+
generated that lists the specific steps that exceeded the threshold.
|
|
8861
|
+
|
|
8862
|
+
Returns
|
|
8863
|
+
-------
|
|
8864
|
+
None
|
|
8865
|
+
|
|
8866
|
+
Raises
|
|
8867
|
+
------
|
|
8868
|
+
AssertionError
|
|
8869
|
+
If any specified validation step exceeds the given threshold level.
|
|
8870
|
+
ValueError
|
|
8871
|
+
If an invalid threshold level is provided.
|
|
8872
|
+
|
|
8873
|
+
Examples
|
|
8874
|
+
--------
|
|
8875
|
+
```{python}
|
|
8876
|
+
#| echo: false
|
|
8877
|
+
#| output: false
|
|
8878
|
+
import pointblank as pb
|
|
8879
|
+
pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
|
|
8880
|
+
```
|
|
8881
|
+
Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
|
|
8882
|
+
create a simple Polars DataFrame with two columns (`a` and `b`).
|
|
8883
|
+
|
|
8884
|
+
```{python}
|
|
8885
|
+
import polars as pl
|
|
8886
|
+
|
|
8887
|
+
tbl = pl.DataFrame({
|
|
8888
|
+
"a": [7, 4, 9, 7, 12],
|
|
8889
|
+
"b": [9, 8, 10, 5, 10]
|
|
8890
|
+
})
|
|
8891
|
+
```
|
|
8892
|
+
|
|
8893
|
+
Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
|
|
8894
|
+
`critical=0.3`). After interrogating, we display the validation report table:
|
|
8895
|
+
|
|
8896
|
+
```{python}
|
|
8897
|
+
import pointblank as pb
|
|
8898
|
+
|
|
8899
|
+
validation = (
|
|
8900
|
+
pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
|
|
8901
|
+
.col_vals_gt(columns="a", value=5) # 1 failing test unit
|
|
8902
|
+
.col_vals_lt(columns="b", value=10) # 2 failing test units
|
|
8903
|
+
.interrogate()
|
|
8904
|
+
)
|
|
8905
|
+
|
|
8906
|
+
validation
|
|
8907
|
+
```
|
|
8908
|
+
|
|
8909
|
+
Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
|
|
8910
|
+
exceeds the 'warning' threshold:
|
|
8911
|
+
|
|
8912
|
+
```{python}
|
|
8913
|
+
try:
|
|
8914
|
+
validation.assert_below_threshold(level="warning")
|
|
8915
|
+
except AssertionError as e:
|
|
8916
|
+
print(f"Assertion failed: {e}")
|
|
8917
|
+
```
|
|
8918
|
+
|
|
8919
|
+
Check a specific step against the 'critical' threshold using the `i=` parameter:
|
|
8920
|
+
|
|
8921
|
+
```{python}
|
|
8922
|
+
validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
|
|
8923
|
+
```
|
|
8924
|
+
|
|
8925
|
+
As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
|
|
8926
|
+
thresholds), no error is raised and nothing is printed.
|
|
8927
|
+
|
|
8928
|
+
We can also provide a custom error message with the `message=` parameter. Let's try that
|
|
8929
|
+
here:
|
|
8930
|
+
|
|
8931
|
+
```{python}
|
|
8932
|
+
try:
|
|
8933
|
+
validation.assert_below_threshold(
|
|
8934
|
+
level="error",
|
|
8935
|
+
message="Data quality too low for processing!"
|
|
8936
|
+
)
|
|
8937
|
+
except AssertionError as e:
|
|
8938
|
+
print(f"Custom error: {e}")
|
|
8939
|
+
```
|
|
8940
|
+
|
|
8941
|
+
See Also
|
|
8942
|
+
--------
|
|
8943
|
+
- [`warning()`](`pointblank.Validate.warning`): Get the 'warning' status for each validation
|
|
8944
|
+
step
|
|
8945
|
+
- [`error()`](`pointblank.Validate.error`): Get the 'error' status for each validation step
|
|
8946
|
+
- [`critical()`](`pointblank.Validate.critical`): Get the 'critical' status for each
|
|
8947
|
+
validation step
|
|
8948
|
+
- [`assert_passing()`](`pointblank.Validate.assert_passing`): Assert all validations pass
|
|
8949
|
+
completely
|
|
8950
|
+
"""
|
|
8951
|
+
# Check if validation has been interrogated
|
|
8952
|
+
if not hasattr(self, "time_start") or self.time_start is None:
|
|
8953
|
+
# Auto-interrogate with default parameters
|
|
8954
|
+
self.interrogate()
|
|
8955
|
+
|
|
8956
|
+
# Validate the level parameter
|
|
8957
|
+
level = level.lower()
|
|
8958
|
+
if level not in ["warning", "error", "critical"]:
|
|
8959
|
+
raise ValueError(
|
|
8960
|
+
f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
|
|
8961
|
+
)
|
|
8962
|
+
|
|
8963
|
+
# Get the threshold status using the appropriate method
|
|
8964
|
+
if level == "warning":
|
|
8965
|
+
status = self.warning(i=i)
|
|
8966
|
+
elif level == "error":
|
|
8967
|
+
status = self.error(i=i)
|
|
8968
|
+
elif level == "critical":
|
|
8969
|
+
status = self.critical(i=i)
|
|
8970
|
+
|
|
8971
|
+
# Find any steps that exceeded the threshold
|
|
8972
|
+
failures = []
|
|
8973
|
+
for step_num, exceeded in status.items():
|
|
8974
|
+
if exceeded:
|
|
8975
|
+
# Get the step's description
|
|
8976
|
+
validation_step = self.validation_info[step_num - 1]
|
|
8977
|
+
step_descriptor = (
|
|
8978
|
+
validation_step.autobrief
|
|
8979
|
+
if hasattr(validation_step, "autobrief") and validation_step.autobrief
|
|
8980
|
+
else f"Validation step {step_num}"
|
|
8981
|
+
)
|
|
8982
|
+
failures.append(f"Step {step_num}: {step_descriptor}")
|
|
8983
|
+
|
|
8984
|
+
# If any failures were found, raise an AssertionError
|
|
8985
|
+
if failures:
|
|
8986
|
+
if message:
|
|
8987
|
+
msg = message
|
|
8988
|
+
else:
|
|
8989
|
+
msg = f"The following steps exceeded the {level} threshold level:\n" + "\n".join(
|
|
8990
|
+
failures
|
|
8991
|
+
)
|
|
8992
|
+
raise AssertionError(msg)
|
|
8993
|
+
|
|
8824
8994
|
def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
|
|
8825
8995
|
"""
|
|
8826
8996
|
Provides a dictionary of the number of test units for each validation step.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.5
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -103,7 +103,7 @@ _Data validation made beautiful and powerful_
|
|
|
103
103
|
|
|
104
104
|
</div>
|
|
105
105
|
|
|
106
|
-
<div align="
|
|
106
|
+
<div align="center">
|
|
107
107
|
<a href="translations/README.fr.md">Français</a> |
|
|
108
108
|
<a href="translations/README.de.md">Deutsch</a> |
|
|
109
109
|
<a href="translations/README.it.md">Italiano</a> |
|
|
@@ -112,7 +112,9 @@ _Data validation made beautiful and powerful_
|
|
|
112
112
|
<a href="translations/README.nl.md">Nederlands</a> |
|
|
113
113
|
<a href="translations/README.zh-CN.md">简体中文</a> |
|
|
114
114
|
<a href="translations/README.ja.md">日本語</a> |
|
|
115
|
-
<a href="translations/README.ko.md">한국어</a>
|
|
115
|
+
<a href="translations/README.ko.md">한국어</a> |
|
|
116
|
+
<a href="translations/README.hi.md">हिन्दी</a> |
|
|
117
|
+
<a href="translations/README.ar.md">العربية</a>
|
|
116
118
|
</div>
|
|
117
119
|
|
|
118
120
|
## What is Pointblank?
|
|
@@ -12,11 +12,11 @@ pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
|
|
|
12
12
|
pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
|
|
13
13
|
pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
|
|
14
14
|
pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
|
|
15
|
-
pointblank/schema.py,sha256=
|
|
15
|
+
pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
17
|
pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
19
|
-
pointblank/data/api-docs.txt,sha256=
|
|
18
|
+
pointblank/validate.py,sha256=DfTChQcLyaJFNLdjkG3jQAsY7GtLvTHSbxkzKusG9I4,615287
|
|
19
|
+
pointblank/data/api-docs.txt,sha256=Sk2ePat_ngz3tAizQVSo7uG_fInv638HFLmM6041osM,482808
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
22
22
|
pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
|
|
@@ -26,8 +26,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
26
26
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
27
27
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
28
28
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
29
|
-
pointblank-0.9.
|
|
30
|
-
pointblank-0.9.
|
|
31
|
-
pointblank-0.9.
|
|
32
|
-
pointblank-0.9.
|
|
33
|
-
pointblank-0.9.
|
|
29
|
+
pointblank-0.9.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
30
|
+
pointblank-0.9.5.dist-info/METADATA,sha256=8SHBgMHqrX9T2cMOfa_cQMDw60NbCmMB1xLgrwWk5vw,14857
|
|
31
|
+
pointblank-0.9.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
32
|
+
pointblank-0.9.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
33
|
+
pointblank-0.9.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|