pointblank 0.9.2__py3-none-any.whl → 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_typing.py +35 -24
- pointblank/data/api-docs.txt +20 -5
- pointblank/data/global_sales-duckdb.zip +0 -0
- pointblank/data/global_sales.zip +0 -0
- pointblank/schema.py +8 -1
- pointblank/validate.py +242 -13
- {pointblank-0.9.2.dist-info → pointblank-0.9.5.dist-info}/METADATA +5 -3
- {pointblank-0.9.2.dist-info → pointblank-0.9.5.dist-info}/RECORD +11 -9
- {pointblank-0.9.2.dist-info → pointblank-0.9.5.dist-info}/WHEEL +1 -1
- {pointblank-0.9.2.dist-info → pointblank-0.9.5.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.9.2.dist-info → pointblank-0.9.5.dist-info}/top_level.txt +0 -0
pointblank/_typing.py
CHANGED
|
@@ -1,26 +1,37 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
SegmentSpec
|
|
3
|
+
import sys
|
|
4
|
+
from typing import List, Tuple, Union
|
|
5
|
+
|
|
6
|
+
# Check Python version for TypeAlias support
|
|
7
|
+
if sys.version_info >= (3, 10):
|
|
8
|
+
from typing import TypeAlias
|
|
9
|
+
|
|
10
|
+
# Python 3.10+ style type aliases
|
|
11
|
+
AbsoluteBounds: TypeAlias = Tuple[int, int]
|
|
12
|
+
RelativeBounds: TypeAlias = Tuple[float, float]
|
|
13
|
+
Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
|
|
14
|
+
SegmentValue: TypeAlias = Union[str, List[str]]
|
|
15
|
+
SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
|
|
16
|
+
SegmentItem: TypeAlias = Union[str, SegmentTuple]
|
|
17
|
+
SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
|
|
18
|
+
else:
|
|
19
|
+
# Python 3.8 and 3.9 compatible type aliases
|
|
20
|
+
AbsoluteBounds = Tuple[int, int]
|
|
21
|
+
RelativeBounds = Tuple[float, float]
|
|
22
|
+
Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
|
|
23
|
+
SegmentValue = Union[str, List[str]]
|
|
24
|
+
SegmentTuple = Tuple[str, SegmentValue]
|
|
25
|
+
SegmentItem = Union[str, SegmentTuple]
|
|
26
|
+
SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
|
|
27
|
+
|
|
28
|
+
# Add docstrings for better IDE support
|
|
29
|
+
AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
|
|
30
|
+
RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
|
|
31
|
+
Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
|
|
32
|
+
SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
|
|
33
|
+
SegmentTuple.__doc__ = "(column, value(s)) format for segments"
|
|
34
|
+
SegmentItem.__doc__ = "Individual segment item (string or tuple)"
|
|
35
|
+
SegmentSpec.__doc__ = (
|
|
36
|
+
"Full segment specification options (i.e., all options for segment specification)"
|
|
37
|
+
)
|
pointblank/data/api-docs.txt
CHANGED
|
@@ -8837,7 +8837,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
|
|
|
8837
8837
|
Pandas DataFrame, the availability of Ibis is not needed.
|
|
8838
8838
|
|
|
8839
8839
|
|
|
8840
|
-
load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
|
|
8840
|
+
load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights', 'global_sales']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
|
|
8841
8841
|
|
|
8842
8842
|
Load a dataset hosted in the library as specified table type.
|
|
8843
8843
|
|
|
@@ -8851,7 +8851,7 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8851
8851
|
----------
|
|
8852
8852
|
dataset
|
|
8853
8853
|
The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
|
|
8854
|
-
and `"
|
|
8854
|
+
`"nycflights"`, and `"global_sales"`.
|
|
8855
8855
|
tbl_type
|
|
8856
8856
|
The type of table to generate from the dataset. The named options are `"polars"`,
|
|
8857
8857
|
`"pandas"`, and `"duckdb"`.
|
|
@@ -8873,6 +8873,8 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8873
8873
|
they purchased, ads viewed, and the revenue generated.
|
|
8874
8874
|
- `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
|
|
8875
8875
|
about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
|
|
8876
|
+
- `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
|
|
8877
|
+
global sales of products across different regions and countries.
|
|
8876
8878
|
|
|
8877
8879
|
Supported DataFrame Types
|
|
8878
8880
|
-------------------------
|
|
@@ -8884,10 +8886,10 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8884
8886
|
|
|
8885
8887
|
Examples
|
|
8886
8888
|
--------
|
|
8887
|
-
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
8888
|
-
|
|
8889
|
+
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
8890
|
+
`dataset="small_table"` and `tbl_type="polars"`:
|
|
8889
8891
|
|
|
8890
|
-
Note that the `"small_table"` dataset is a
|
|
8892
|
+
Note that the `"small_table"` dataset is a Polars DataFrame and using the
|
|
8891
8893
|
[`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
|
|
8892
8894
|
environment.
|
|
8893
8895
|
|
|
@@ -8915,6 +8917,19 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
|
|
|
8915
8917
|
The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
|
|
8916
8918
|
truly a real-world dataset and provides information about flights originating from New York City
|
|
8917
8919
|
airports in 2013.
|
|
8920
|
+
|
|
8921
|
+
Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
|
|
8922
|
+
name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
|
|
8923
|
+
|
|
8924
|
+
```python
|
|
8925
|
+
global_sales = pb.load_dataset(dataset="global_sales")
|
|
8926
|
+
|
|
8927
|
+
pb.preview(global_sales)
|
|
8928
|
+
```
|
|
8929
|
+
|
|
8930
|
+
The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
|
|
8931
|
+
describes the sales of a particular product to a customer located in one of three global
|
|
8932
|
+
regions: North America, Europe, or Asia.
|
|
8918
8933
|
|
|
8919
8934
|
|
|
8920
8935
|
|
|
Binary file
|
|
Binary file
|
pointblank/schema.py
CHANGED
|
@@ -728,7 +728,14 @@ class Schema:
|
|
|
728
728
|
return new_schema
|
|
729
729
|
|
|
730
730
|
def __str__(self):
|
|
731
|
-
|
|
731
|
+
formatted_columns = []
|
|
732
|
+
for col in self.columns:
|
|
733
|
+
if len(col) == 1: # Only column name provided (no data type)
|
|
734
|
+
formatted_columns.append(f" {col[0]}: <ANY>")
|
|
735
|
+
else: # Both column name and data type provided
|
|
736
|
+
formatted_columns.append(f" {col[0]}: {col[1]}")
|
|
737
|
+
|
|
738
|
+
return "Pointblank Schema\n" + "\n".join(formatted_columns)
|
|
732
739
|
|
|
733
740
|
def __repr__(self):
|
|
734
741
|
return f"Schema(columns={self.columns})"
|
pointblank/validate.py
CHANGED
|
@@ -385,7 +385,7 @@ def config(
|
|
|
385
385
|
|
|
386
386
|
|
|
387
387
|
def load_dataset(
|
|
388
|
-
dataset: Literal["small_table", "game_revenue", "nycflights"] = "small_table",
|
|
388
|
+
dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
|
|
389
389
|
tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
|
|
390
390
|
) -> FrameT | Any:
|
|
391
391
|
"""
|
|
@@ -401,7 +401,7 @@ def load_dataset(
|
|
|
401
401
|
----------
|
|
402
402
|
dataset
|
|
403
403
|
The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
|
|
404
|
-
and `"
|
|
404
|
+
`"nycflights"`, and `"global_sales"`.
|
|
405
405
|
tbl_type
|
|
406
406
|
The type of table to generate from the dataset. The named options are `"polars"`,
|
|
407
407
|
`"pandas"`, and `"duckdb"`.
|
|
@@ -423,6 +423,8 @@ def load_dataset(
|
|
|
423
423
|
they purchased, ads viewed, and the revenue generated.
|
|
424
424
|
- `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
|
|
425
425
|
about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
|
|
426
|
+
- `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
|
|
427
|
+
global sales of products across different regions and countries.
|
|
426
428
|
|
|
427
429
|
Supported DataFrame Types
|
|
428
430
|
-------------------------
|
|
@@ -434,18 +436,18 @@ def load_dataset(
|
|
|
434
436
|
|
|
435
437
|
Examples
|
|
436
438
|
--------
|
|
437
|
-
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
438
|
-
|
|
439
|
+
Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
|
|
440
|
+
`dataset="small_table"` and `tbl_type="polars"`:
|
|
439
441
|
|
|
440
442
|
```{python}
|
|
441
443
|
import pointblank as pb
|
|
442
444
|
|
|
443
|
-
small_table = pb.load_dataset()
|
|
445
|
+
small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
|
|
444
446
|
|
|
445
447
|
pb.preview(small_table)
|
|
446
448
|
```
|
|
447
449
|
|
|
448
|
-
Note that the `"small_table"` dataset is a
|
|
450
|
+
Note that the `"small_table"` dataset is a Polars DataFrame and using the
|
|
449
451
|
[`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
|
|
450
452
|
environment.
|
|
451
453
|
|
|
@@ -473,10 +475,23 @@ def load_dataset(
|
|
|
473
475
|
The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
|
|
474
476
|
truly a real-world dataset and provides information about flights originating from New York City
|
|
475
477
|
airports in 2013.
|
|
478
|
+
|
|
479
|
+
Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
|
|
480
|
+
name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
|
|
481
|
+
|
|
482
|
+
```{python}
|
|
483
|
+
global_sales = pb.load_dataset(dataset="global_sales")
|
|
484
|
+
|
|
485
|
+
pb.preview(global_sales)
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
|
|
489
|
+
describes the sales of a particular product to a customer located in one of three global
|
|
490
|
+
regions: North America, Europe, or Asia.
|
|
476
491
|
"""
|
|
477
492
|
|
|
478
493
|
# Raise an error if the dataset is from the list of provided datasets
|
|
479
|
-
if dataset not in ["small_table", "game_revenue", "nycflights"]:
|
|
494
|
+
if dataset not in ["small_table", "game_revenue", "nycflights", "global_sales"]:
|
|
480
495
|
raise ValueError(
|
|
481
496
|
f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
|
|
482
497
|
"- `small_table`\n"
|
|
@@ -518,6 +533,7 @@ def load_dataset(
|
|
|
518
533
|
"small_table": ["date_time", "date"],
|
|
519
534
|
"game_revenue": ["session_start", "time", "start_day"],
|
|
520
535
|
"nycflights": [],
|
|
536
|
+
"global_sales": ["timestamp"],
|
|
521
537
|
}
|
|
522
538
|
|
|
523
539
|
dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])
|
|
@@ -8015,7 +8031,7 @@ class Validate:
|
|
|
8015
8031
|
|
|
8016
8032
|
After interrogation is complete, the `Validate` object will have gathered information, and
|
|
8017
8033
|
we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
|
|
8018
|
-
[`f_failed()`](`pointblank.Validate.f_failed`)
|
|
8034
|
+
[`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
|
|
8019
8035
|
against the validation plan. A visual representation of the validation results can be viewed
|
|
8020
8036
|
by printing the `Validate` object; this will display the validation table in an HTML viewing
|
|
8021
8037
|
environment.
|
|
@@ -8142,6 +8158,7 @@ class Validate:
|
|
|
8142
8158
|
inclusive = validation.inclusive
|
|
8143
8159
|
na_pass = validation.na_pass
|
|
8144
8160
|
threshold = validation.thresholds
|
|
8161
|
+
segment = validation.segments
|
|
8145
8162
|
|
|
8146
8163
|
assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
|
|
8147
8164
|
assertion_category = METHOD_CATEGORY_MAP[assertion_method]
|
|
@@ -8149,7 +8166,14 @@ class Validate:
|
|
|
8149
8166
|
|
|
8150
8167
|
# Process the `brief` text for the validation step by including template variables to
|
|
8151
8168
|
# the user-supplied text
|
|
8152
|
-
validation.brief = _process_brief(
|
|
8169
|
+
validation.brief = _process_brief(
|
|
8170
|
+
brief=validation.brief,
|
|
8171
|
+
step=validation.i,
|
|
8172
|
+
col=column,
|
|
8173
|
+
values=value,
|
|
8174
|
+
thresholds=threshold,
|
|
8175
|
+
segment=segment,
|
|
8176
|
+
)
|
|
8153
8177
|
|
|
8154
8178
|
# Generate the autobrief description for the validation step; it's important to perform
|
|
8155
8179
|
# that here since text components like the column and the value(s) have been resolved
|
|
@@ -8748,6 +8772,10 @@ class Validate:
|
|
|
8748
8772
|
assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
|
|
8749
8773
|
some details are preserved.
|
|
8750
8774
|
|
|
8775
|
+
If the validation has not yet been interrogated, this method will automatically call
|
|
8776
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
|
|
8777
|
+
for passing tests.
|
|
8778
|
+
|
|
8751
8779
|
Raises
|
|
8752
8780
|
-------
|
|
8753
8781
|
AssertionError
|
|
@@ -8757,8 +8785,9 @@ class Validate:
|
|
|
8757
8785
|
--------
|
|
8758
8786
|
In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
|
|
8759
8787
|
`c`). There will be three validation steps, and the second step will have a failing test
|
|
8760
|
-
unit (the value `10` isn't less than `9`).
|
|
8761
|
-
|
|
8788
|
+
unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
|
|
8789
|
+
that all validation steps passed perfectly, automatically performing the interrogation if
|
|
8790
|
+
needed.
|
|
8762
8791
|
|
|
8763
8792
|
```{python}
|
|
8764
8793
|
#| error: True
|
|
@@ -8779,12 +8808,16 @@ class Validate:
|
|
|
8779
8808
|
.col_vals_gt(columns="a", value=0)
|
|
8780
8809
|
.col_vals_lt(columns="b", value=9) # this assertion is false
|
|
8781
8810
|
.col_vals_in_set(columns="c", set=["a", "b"])
|
|
8782
|
-
.interrogate()
|
|
8783
8811
|
)
|
|
8784
8812
|
|
|
8813
|
+
# No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
|
|
8785
8814
|
validation.assert_passing()
|
|
8786
8815
|
```
|
|
8787
8816
|
"""
|
|
8817
|
+
# Check if validation has been interrogated
|
|
8818
|
+
if not hasattr(self, "time_start") or self.time_start is None:
|
|
8819
|
+
# Auto-interrogate with default parameters
|
|
8820
|
+
self.interrogate()
|
|
8788
8821
|
|
|
8789
8822
|
if not self.all_passed():
|
|
8790
8823
|
failed_steps = [
|
|
@@ -8797,6 +8830,167 @@ class Validate:
|
|
|
8797
8830
|
)
|
|
8798
8831
|
raise AssertionError(msg)
|
|
8799
8832
|
|
|
8833
|
+
def assert_below_threshold(
|
|
8834
|
+
self, level: str = "warning", i: int = None, message: str = None
|
|
8835
|
+
) -> None:
|
|
8836
|
+
"""
|
|
8837
|
+
Raise an `AssertionError` if validation steps exceed a specified threshold level.
|
|
8838
|
+
|
|
8839
|
+
The `assert_below_threshold()` method checks whether validation steps' failure rates are
|
|
8840
|
+
below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
|
|
8841
|
+
particularly useful in automated testing environments where you want to ensure your data
|
|
8842
|
+
quality meets minimum standards before proceeding.
|
|
8843
|
+
|
|
8844
|
+
If any validation step exceeds the specified threshold level, an `AssertionError` will be
|
|
8845
|
+
raised with details about which steps failed. If the validation has not yet been
|
|
8846
|
+
interrogated, this method will automatically call
|
|
8847
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
|
|
8848
|
+
|
|
8849
|
+
Parameters
|
|
8850
|
+
----------
|
|
8851
|
+
level
|
|
8852
|
+
The threshold level to check against, which could be any of `"warning"` (the default),
|
|
8853
|
+
`"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
|
|
8854
|
+
exceeds this level.
|
|
8855
|
+
i
|
|
8856
|
+
Specific validation step number(s) to check. Can be provided as a single integer or a
|
|
8857
|
+
list of integers. If `None` (the default), all steps are checked.
|
|
8858
|
+
message
|
|
8859
|
+
Custom error message to use if assertion fails. If `None`, a default message will be
|
|
8860
|
+
generated that lists the specific steps that exceeded the threshold.
|
|
8861
|
+
|
|
8862
|
+
Returns
|
|
8863
|
+
-------
|
|
8864
|
+
None
|
|
8865
|
+
|
|
8866
|
+
Raises
|
|
8867
|
+
------
|
|
8868
|
+
AssertionError
|
|
8869
|
+
If any specified validation step exceeds the given threshold level.
|
|
8870
|
+
ValueError
|
|
8871
|
+
If an invalid threshold level is provided.
|
|
8872
|
+
|
|
8873
|
+
Examples
|
|
8874
|
+
--------
|
|
8875
|
+
```{python}
|
|
8876
|
+
#| echo: false
|
|
8877
|
+
#| output: false
|
|
8878
|
+
import pointblank as pb
|
|
8879
|
+
pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
|
|
8880
|
+
```
|
|
8881
|
+
Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
|
|
8882
|
+
create a simple Polars DataFrame with two columns (`a` and `b`).
|
|
8883
|
+
|
|
8884
|
+
```{python}
|
|
8885
|
+
import polars as pl
|
|
8886
|
+
|
|
8887
|
+
tbl = pl.DataFrame({
|
|
8888
|
+
"a": [7, 4, 9, 7, 12],
|
|
8889
|
+
"b": [9, 8, 10, 5, 10]
|
|
8890
|
+
})
|
|
8891
|
+
```
|
|
8892
|
+
|
|
8893
|
+
Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
|
|
8894
|
+
`critical=0.3`). After interrogating, we display the validation report table:
|
|
8895
|
+
|
|
8896
|
+
```{python}
|
|
8897
|
+
import pointblank as pb
|
|
8898
|
+
|
|
8899
|
+
validation = (
|
|
8900
|
+
pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
|
|
8901
|
+
.col_vals_gt(columns="a", value=5) # 1 failing test unit
|
|
8902
|
+
.col_vals_lt(columns="b", value=10) # 2 failing test units
|
|
8903
|
+
.interrogate()
|
|
8904
|
+
)
|
|
8905
|
+
|
|
8906
|
+
validation
|
|
8907
|
+
```
|
|
8908
|
+
|
|
8909
|
+
Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
|
|
8910
|
+
exceeds the 'warning' threshold:
|
|
8911
|
+
|
|
8912
|
+
```{python}
|
|
8913
|
+
try:
|
|
8914
|
+
validation.assert_below_threshold(level="warning")
|
|
8915
|
+
except AssertionError as e:
|
|
8916
|
+
print(f"Assertion failed: {e}")
|
|
8917
|
+
```
|
|
8918
|
+
|
|
8919
|
+
Check a specific step against the 'critical' threshold using the `i=` parameter:
|
|
8920
|
+
|
|
8921
|
+
```{python}
|
|
8922
|
+
validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
|
|
8923
|
+
```
|
|
8924
|
+
|
|
8925
|
+
As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
|
|
8926
|
+
thresholds), no error is raised and nothing is printed.
|
|
8927
|
+
|
|
8928
|
+
We can also provide a custom error message with the `message=` parameter. Let's try that
|
|
8929
|
+
here:
|
|
8930
|
+
|
|
8931
|
+
```{python}
|
|
8932
|
+
try:
|
|
8933
|
+
validation.assert_below_threshold(
|
|
8934
|
+
level="error",
|
|
8935
|
+
message="Data quality too low for processing!"
|
|
8936
|
+
)
|
|
8937
|
+
except AssertionError as e:
|
|
8938
|
+
print(f"Custom error: {e}")
|
|
8939
|
+
```
|
|
8940
|
+
|
|
8941
|
+
See Also
|
|
8942
|
+
--------
|
|
8943
|
+
- [`warning()`](`pointblank.Validate.warning`): Get the 'warning' status for each validation
|
|
8944
|
+
step
|
|
8945
|
+
- [`error()`](`pointblank.Validate.error`): Get the 'error' status for each validation step
|
|
8946
|
+
- [`critical()`](`pointblank.Validate.critical`): Get the 'critical' status for each
|
|
8947
|
+
validation step
|
|
8948
|
+
- [`assert_passing()`](`pointblank.Validate.assert_passing`): Assert all validations pass
|
|
8949
|
+
completely
|
|
8950
|
+
"""
|
|
8951
|
+
# Check if validation has been interrogated
|
|
8952
|
+
if not hasattr(self, "time_start") or self.time_start is None:
|
|
8953
|
+
# Auto-interrogate with default parameters
|
|
8954
|
+
self.interrogate()
|
|
8955
|
+
|
|
8956
|
+
# Validate the level parameter
|
|
8957
|
+
level = level.lower()
|
|
8958
|
+
if level not in ["warning", "error", "critical"]:
|
|
8959
|
+
raise ValueError(
|
|
8960
|
+
f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
|
|
8961
|
+
)
|
|
8962
|
+
|
|
8963
|
+
# Get the threshold status using the appropriate method
|
|
8964
|
+
if level == "warning":
|
|
8965
|
+
status = self.warning(i=i)
|
|
8966
|
+
elif level == "error":
|
|
8967
|
+
status = self.error(i=i)
|
|
8968
|
+
elif level == "critical":
|
|
8969
|
+
status = self.critical(i=i)
|
|
8970
|
+
|
|
8971
|
+
# Find any steps that exceeded the threshold
|
|
8972
|
+
failures = []
|
|
8973
|
+
for step_num, exceeded in status.items():
|
|
8974
|
+
if exceeded:
|
|
8975
|
+
# Get the step's description
|
|
8976
|
+
validation_step = self.validation_info[step_num - 1]
|
|
8977
|
+
step_descriptor = (
|
|
8978
|
+
validation_step.autobrief
|
|
8979
|
+
if hasattr(validation_step, "autobrief") and validation_step.autobrief
|
|
8980
|
+
else f"Validation step {step_num}"
|
|
8981
|
+
)
|
|
8982
|
+
failures.append(f"Step {step_num}: {step_descriptor}")
|
|
8983
|
+
|
|
8984
|
+
# If any failures were found, raise an AssertionError
|
|
8985
|
+
if failures:
|
|
8986
|
+
if message:
|
|
8987
|
+
msg = message
|
|
8988
|
+
else:
|
|
8989
|
+
msg = f"The following steps exceeded the {level} threshold level:\n" + "\n".join(
|
|
8990
|
+
failures
|
|
8991
|
+
)
|
|
8992
|
+
raise AssertionError(msg)
|
|
8993
|
+
|
|
8800
8994
|
def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
|
|
8801
8995
|
"""
|
|
8802
8996
|
Provides a dictionary of the number of test units for each validation step.
|
|
@@ -11629,7 +11823,14 @@ def _string_date_dttm_conversion(value: any) -> any:
|
|
|
11629
11823
|
return value
|
|
11630
11824
|
|
|
11631
11825
|
|
|
11632
|
-
def _process_brief(
|
|
11826
|
+
def _process_brief(
|
|
11827
|
+
brief: str | None,
|
|
11828
|
+
step: int,
|
|
11829
|
+
col: str | list[str] | None,
|
|
11830
|
+
values: any | None,
|
|
11831
|
+
thresholds: any | None,
|
|
11832
|
+
segment: any | None,
|
|
11833
|
+
) -> str:
|
|
11633
11834
|
# If there is no brief, return `None`
|
|
11634
11835
|
if brief is None:
|
|
11635
11836
|
return None
|
|
@@ -11649,6 +11850,34 @@ def _process_brief(brief: str | None, step: int, col: str | list[str] | None) ->
|
|
|
11649
11850
|
brief = brief.replace("{col}", col)
|
|
11650
11851
|
brief = brief.replace("{column}", col)
|
|
11651
11852
|
|
|
11853
|
+
if values is not None:
|
|
11854
|
+
# If the value is a list, then join the values into a comma-separated string
|
|
11855
|
+
if isinstance(values, list):
|
|
11856
|
+
values = ", ".join([str(v) for v in values])
|
|
11857
|
+
|
|
11858
|
+
brief = brief.replace("{value}", str(values))
|
|
11859
|
+
|
|
11860
|
+
if thresholds is not None:
|
|
11861
|
+
# Get the string representation of thresholds in the form of:
|
|
11862
|
+
# "W: 0.20 / C: 0.40 / E: 1.00"
|
|
11863
|
+
|
|
11864
|
+
warning_val = thresholds._get_threshold_value(level="warning")
|
|
11865
|
+
error_val = thresholds._get_threshold_value(level="error")
|
|
11866
|
+
critical_val = thresholds._get_threshold_value(level="critical")
|
|
11867
|
+
|
|
11868
|
+
thresholds_fmt = f"W: {warning_val} / E: {error_val} / C: {critical_val}"
|
|
11869
|
+
|
|
11870
|
+
brief = brief.replace("{thresholds}", thresholds_fmt)
|
|
11871
|
+
|
|
11872
|
+
if segment is not None:
|
|
11873
|
+
# The segment is always a tuple of the form ("{column}", "{value}")
|
|
11874
|
+
|
|
11875
|
+
segment_fmt = f"{segment[0]} / {segment[1]}"
|
|
11876
|
+
|
|
11877
|
+
brief = brief.replace("{segment}", segment_fmt)
|
|
11878
|
+
brief = brief.replace("{segment_column}", segment[0])
|
|
11879
|
+
brief = brief.replace("{segment_value}", segment[1])
|
|
11880
|
+
|
|
11652
11881
|
return brief
|
|
11653
11882
|
|
|
11654
11883
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.5
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -103,7 +103,7 @@ _Data validation made beautiful and powerful_
|
|
|
103
103
|
|
|
104
104
|
</div>
|
|
105
105
|
|
|
106
|
-
<div align="
|
|
106
|
+
<div align="center">
|
|
107
107
|
<a href="translations/README.fr.md">Français</a> |
|
|
108
108
|
<a href="translations/README.de.md">Deutsch</a> |
|
|
109
109
|
<a href="translations/README.it.md">Italiano</a> |
|
|
@@ -112,7 +112,9 @@ _Data validation made beautiful and powerful_
|
|
|
112
112
|
<a href="translations/README.nl.md">Nederlands</a> |
|
|
113
113
|
<a href="translations/README.zh-CN.md">简体中文</a> |
|
|
114
114
|
<a href="translations/README.ja.md">日本語</a> |
|
|
115
|
-
<a href="translations/README.ko.md">한국어</a>
|
|
115
|
+
<a href="translations/README.ko.md">한국어</a> |
|
|
116
|
+
<a href="translations/README.hi.md">हिन्दी</a> |
|
|
117
|
+
<a href="translations/README.ar.md">العربية</a>
|
|
116
118
|
</div>
|
|
117
119
|
|
|
118
120
|
## What is Pointblank?
|
|
@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,8112
|
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
4
|
pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
|
|
5
5
|
pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
|
|
6
|
-
pointblank/_typing.py,sha256=
|
|
6
|
+
pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
|
|
7
7
|
pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
|
|
8
8
|
pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
|
|
9
9
|
pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
|
|
@@ -12,20 +12,22 @@ pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
|
|
|
12
12
|
pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
|
|
13
13
|
pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
|
|
14
14
|
pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
|
|
15
|
-
pointblank/schema.py,sha256=
|
|
15
|
+
pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
17
|
pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
19
|
-
pointblank/data/api-docs.txt,sha256=
|
|
18
|
+
pointblank/validate.py,sha256=DfTChQcLyaJFNLdjkG3jQAsY7GtLvTHSbxkzKusG9I4,615287
|
|
19
|
+
pointblank/data/api-docs.txt,sha256=Sk2ePat_ngz3tAizQVSo7uG_fInv638HFLmM6041osM,482808
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
22
|
+
pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
|
|
23
|
+
pointblank/data/global_sales.zip,sha256=JeUnR1apKQ35PPwEcvTKCEIEiYeYQtoGmYjmzbz99DM,2138604
|
|
22
24
|
pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
|
|
23
25
|
pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0mU,7828965
|
|
24
26
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
25
27
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
26
28
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
27
|
-
pointblank-0.9.
|
|
28
|
-
pointblank-0.9.
|
|
29
|
-
pointblank-0.9.
|
|
30
|
-
pointblank-0.9.
|
|
31
|
-
pointblank-0.9.
|
|
29
|
+
pointblank-0.9.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
30
|
+
pointblank-0.9.5.dist-info/METADATA,sha256=8SHBgMHqrX9T2cMOfa_cQMDw60NbCmMB1xLgrwWk5vw,14857
|
|
31
|
+
pointblank-0.9.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
32
|
+
pointblank-0.9.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
33
|
+
pointblank-0.9.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|