pointblank 0.9.5__py3-none-any.whl → 0.9.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_constants.py +2 -0
- pointblank/_utils.py +2 -0
- pointblank/actions.py +3 -3
- pointblank/column.py +4 -4
- pointblank/data/api-docs.txt +251 -13
- pointblank/thresholds.py +2 -2
- pointblank/validate.py +169 -5
- {pointblank-0.9.5.dist-info → pointblank-0.9.6.dist-info}/METADATA +3 -1
- {pointblank-0.9.5.dist-info → pointblank-0.9.6.dist-info}/RECORD +12 -12
- {pointblank-0.9.5.dist-info → pointblank-0.9.6.dist-info}/WHEEL +1 -1
- {pointblank-0.9.5.dist-info → pointblank-0.9.6.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.9.5.dist-info → pointblank-0.9.6.dist-info}/top_level.txt +0 -0
pointblank/_constants.py
CHANGED
|
@@ -109,6 +109,7 @@ ROW_BASED_VALIDATION_TYPES = [
|
|
|
109
109
|
]
|
|
110
110
|
|
|
111
111
|
IBIS_BACKENDS = [
|
|
112
|
+
"bigquery",
|
|
112
113
|
"databricks",
|
|
113
114
|
"duckdb",
|
|
114
115
|
"memtable",
|
|
@@ -165,6 +166,7 @@ TABLE_TYPE_STYLES = {
|
|
|
165
166
|
"parquet": {"background": "#3F9FF9", "text": "#FFFFFF", "label": "Parquet"},
|
|
166
167
|
"memtable": {"background": "#2C3E50", "text": "#FFFFFF", "label": "Ibis memtable"},
|
|
167
168
|
"mssql": {"background": "#E2E2E2", "text": "#222222", "label": "MSSQL"},
|
|
169
|
+
"bigquery": {"background": "#4285F4", "text": "#FFFFFF", "label": "BigQuery"},
|
|
168
170
|
"pyspark": {"background": "#E66F21", "text": "#FFFFFF", "label": "Spark DataFrame"},
|
|
169
171
|
"databricks": {"background": "#FF3621", "text": "#FFFFFF", "label": "Databricks"},
|
|
170
172
|
}
|
pointblank/_utils.py
CHANGED
|
@@ -514,6 +514,8 @@ def _get_api_text() -> str:
|
|
|
514
514
|
"Validate.get_data_extracts",
|
|
515
515
|
"Validate.all_passed",
|
|
516
516
|
"Validate.assert_passing",
|
|
517
|
+
"Validate.assert_below_threshold",
|
|
518
|
+
"Validate.above_threshold",
|
|
517
519
|
"Validate.n",
|
|
518
520
|
"Validate.n_passed",
|
|
519
521
|
"Validate.n_failed",
|
pointblank/actions.py
CHANGED
|
@@ -216,7 +216,7 @@ def send_slack_notification(
|
|
|
216
216
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
217
217
|
actions=pb.Actions(critical=notify_slack),
|
|
218
218
|
)
|
|
219
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
219
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
220
220
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
221
221
|
.col_vals_gt(columns="session_duration", value=15)
|
|
222
222
|
.interrogate()
|
|
@@ -248,7 +248,7 @@ def send_slack_notification(
|
|
|
248
248
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
249
249
|
final_actions=pb.FinalActions(notify_slack),
|
|
250
250
|
)
|
|
251
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
251
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
252
252
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
253
253
|
.col_vals_gt(columns="session_duration", value=15)
|
|
254
254
|
.interrogate()
|
|
@@ -316,7 +316,7 @@ def send_slack_notification(
|
|
|
316
316
|
actions=pb.Actions(default=notify_slack),
|
|
317
317
|
final_actions=pb.FinalActions(notify_slack),
|
|
318
318
|
)
|
|
319
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
319
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
320
320
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
321
321
|
.col_vals_gt(columns="session_duration", value=15)
|
|
322
322
|
.interrogate()
|
pointblank/column.py
CHANGED
|
@@ -1007,7 +1007,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
|
|
|
1007
1007
|
`[rev_01, rev_02, profit_01, profit_02, age]`
|
|
1008
1008
|
|
|
1009
1009
|
and you want to validate columns that have two digits at the end of the name, you can use
|
|
1010
|
-
`columns=matches(r"
|
|
1010
|
+
`columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
|
|
1011
1011
|
`profit_02` columns.
|
|
1012
1012
|
|
|
1013
1013
|
There will be a validation step created for every resolved column. Note that if there aren't any
|
|
@@ -1061,7 +1061,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
|
|
|
1061
1061
|
[`col()`](`pointblank.col`) function, like this:
|
|
1062
1062
|
|
|
1063
1063
|
```python
|
|
1064
|
-
col(matches(r"
|
|
1064
|
+
col(matches(r"^[0-9]{5}") & ends_with("_id"))
|
|
1065
1065
|
```
|
|
1066
1066
|
|
|
1067
1067
|
There are four operators that can be used to compose column selectors:
|
|
@@ -1107,7 +1107,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
|
|
|
1107
1107
|
|
|
1108
1108
|
validation = (
|
|
1109
1109
|
pb.Validate(data=tbl)
|
|
1110
|
-
.col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID
|
|
1110
|
+
.col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
|
|
1111
1111
|
.interrogate()
|
|
1112
1112
|
)
|
|
1113
1113
|
|
|
@@ -1115,7 +1115,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
|
|
|
1115
1115
|
```
|
|
1116
1116
|
|
|
1117
1117
|
From the results of the validation table we get two validation steps, one for `id_old` and one
|
|
1118
|
-
for `new_identifier`. The values in both columns all match the pattern `"ID
|
|
1118
|
+
for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
|
|
1119
1119
|
|
|
1120
1120
|
We can also use the `matches()` function in combination with other column selectors (within
|
|
1121
1121
|
[`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
|
pointblank/data/api-docs.txt
CHANGED
|
@@ -107,6 +107,11 @@ Validate(data: 'FrameT | Any', tbl_name: 'str | None' = None, label: 'str | None
|
|
|
107
107
|
- MySQL table (`"mysql"`)*
|
|
108
108
|
- PostgreSQL table (`"postgresql"`)*
|
|
109
109
|
- SQLite table (`"sqlite"`)*
|
|
110
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
111
|
+
- Snowflake table (`"snowflake"`)*
|
|
112
|
+
- Databricks table (`"databricks"`)*
|
|
113
|
+
- PySpark table (`"pyspark"`)*
|
|
114
|
+
- BigQuery table (`"bigquery"`)*
|
|
110
115
|
- Parquet table (`"parquet"`)*
|
|
111
116
|
|
|
112
117
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -580,7 +585,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
|
|
|
580
585
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
581
586
|
actions=pb.Actions(critical="Major data quality issue found in step {step}."),
|
|
582
587
|
)
|
|
583
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
588
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
584
589
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
585
590
|
.col_vals_gt(columns="session_duration", value=15)
|
|
586
591
|
.interrogate()
|
|
@@ -610,7 +615,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
|
|
|
610
615
|
data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
|
|
611
616
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
612
617
|
)
|
|
613
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
618
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
614
619
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
615
620
|
.col_vals_gt(
|
|
616
621
|
columns="session_duration",
|
|
@@ -6231,7 +6236,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
|
|
|
6231
6236
|
`[rev_01, rev_02, profit_01, profit_02, age]`
|
|
6232
6237
|
|
|
6233
6238
|
and you want to validate columns that have two digits at the end of the name, you can use
|
|
6234
|
-
`columns=matches(r"
|
|
6239
|
+
`columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
|
|
6235
6240
|
`profit_02` columns.
|
|
6236
6241
|
|
|
6237
6242
|
There will be a validation step created for every resolved column. Note that if there aren't any
|
|
@@ -6285,7 +6290,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
|
|
|
6285
6290
|
[`col()`](`pointblank.col`) function, like this:
|
|
6286
6291
|
|
|
6287
6292
|
```python
|
|
6288
|
-
col(matches(r"
|
|
6293
|
+
col(matches(r"^[0-9]{5}") & ends_with("_id"))
|
|
6289
6294
|
```
|
|
6290
6295
|
|
|
6291
6296
|
There are four operators that can be used to compose column selectors:
|
|
@@ -6324,7 +6329,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
|
|
|
6324
6329
|
|
|
6325
6330
|
validation = (
|
|
6326
6331
|
pb.Validate(data=tbl)
|
|
6327
|
-
.col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID
|
|
6332
|
+
.col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
|
|
6328
6333
|
.interrogate()
|
|
6329
6334
|
)
|
|
6330
6335
|
|
|
@@ -6332,7 +6337,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
|
|
|
6332
6337
|
```
|
|
6333
6338
|
|
|
6334
6339
|
From the results of the validation table we get two validation steps, one for `id_old` and one
|
|
6335
|
-
for `new_identifier`. The values in both columns all match the pattern `"ID
|
|
6340
|
+
for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
|
|
6336
6341
|
|
|
6337
6342
|
We can also use the `matches()` function in combination with other column selectors (within
|
|
6338
6343
|
[`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
|
|
@@ -6875,7 +6880,7 @@ interrogate(self, collect_extracts: 'bool' = True, collect_tbl_checked: 'bool' =
|
|
|
6875
6880
|
|
|
6876
6881
|
After interrogation is complete, the `Validate` object will have gathered information, and
|
|
6877
6882
|
we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
|
|
6878
|
-
[`f_failed()`](`pointblank.Validate.f_failed`)
|
|
6883
|
+
[`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
|
|
6879
6884
|
against the validation plan. A visual representation of the validation results can be viewed
|
|
6880
6885
|
by printing the `Validate` object; this will display the validation table in an HTML viewing
|
|
6881
6886
|
environment.
|
|
@@ -7578,6 +7583,10 @@ assert_passing(self) -> 'None'
|
|
|
7578
7583
|
assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
|
|
7579
7584
|
some details are preserved.
|
|
7580
7585
|
|
|
7586
|
+
If the validation has not yet been interrogated, this method will automatically call
|
|
7587
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
|
|
7588
|
+
for passing tests.
|
|
7589
|
+
|
|
7581
7590
|
Raises
|
|
7582
7591
|
-------
|
|
7583
7592
|
AssertionError
|
|
@@ -7587,8 +7596,9 @@ assert_passing(self) -> 'None'
|
|
|
7587
7596
|
--------
|
|
7588
7597
|
In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
|
|
7589
7598
|
`c`). There will be three validation steps, and the second step will have a failing test
|
|
7590
|
-
unit (the value `10` isn't less than `9`).
|
|
7591
|
-
|
|
7599
|
+
unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
|
|
7600
|
+
that all validation steps passed perfectly, automatically performing the interrogation if
|
|
7601
|
+
needed.
|
|
7592
7602
|
|
|
7593
7603
|
```python
|
|
7594
7604
|
#| error: True
|
|
@@ -7609,13 +7619,221 @@ assert_passing(self) -> 'None'
|
|
|
7609
7619
|
.col_vals_gt(columns="a", value=0)
|
|
7610
7620
|
.col_vals_lt(columns="b", value=9) # this assertion is false
|
|
7611
7621
|
.col_vals_in_set(columns="c", set=["a", "b"])
|
|
7612
|
-
.interrogate()
|
|
7613
7622
|
)
|
|
7614
7623
|
|
|
7624
|
+
# No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
|
|
7615
7625
|
validation.assert_passing()
|
|
7616
7626
|
```
|
|
7617
7627
|
|
|
7618
7628
|
|
|
7629
|
+
assert_below_threshold(self, level: 'str' = 'warning', i: 'int | None' = None, message: 'str | None' = None) -> 'None'
|
|
7630
|
+
|
|
7631
|
+
Raise an `AssertionError` if validation steps exceed a specified threshold level.
|
|
7632
|
+
|
|
7633
|
+
The `assert_below_threshold()` method checks whether validation steps' failure rates are
|
|
7634
|
+
below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
|
|
7635
|
+
particularly useful in automated testing environments where you want to ensure your data
|
|
7636
|
+
quality meets minimum standards before proceeding.
|
|
7637
|
+
|
|
7638
|
+
If any validation step exceeds the specified threshold level, an `AssertionError` will be
|
|
7639
|
+
raised with details about which steps failed. If the validation has not yet been
|
|
7640
|
+
interrogated, this method will automatically call
|
|
7641
|
+
[`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
|
|
7642
|
+
|
|
7643
|
+
Parameters
|
|
7644
|
+
----------
|
|
7645
|
+
level
|
|
7646
|
+
The threshold level to check against, which could be any of `"warning"` (the default),
|
|
7647
|
+
`"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
|
|
7648
|
+
exceeds this level.
|
|
7649
|
+
i
|
|
7650
|
+
Specific validation step number(s) to check. Can be provided as a single integer or a
|
|
7651
|
+
list of integers. If `None` (the default), all steps are checked.
|
|
7652
|
+
message
|
|
7653
|
+
Custom error message to use if assertion fails. If `None`, a default message will be
|
|
7654
|
+
generated that lists the specific steps that exceeded the threshold.
|
|
7655
|
+
|
|
7656
|
+
Returns
|
|
7657
|
+
-------
|
|
7658
|
+
None
|
|
7659
|
+
|
|
7660
|
+
Raises
|
|
7661
|
+
------
|
|
7662
|
+
AssertionError
|
|
7663
|
+
If any specified validation step exceeds the given threshold level.
|
|
7664
|
+
ValueError
|
|
7665
|
+
If an invalid threshold level is provided.
|
|
7666
|
+
|
|
7667
|
+
Examples
|
|
7668
|
+
--------
|
|
7669
|
+
Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
|
|
7670
|
+
create a simple Polars DataFrame with two columns (`a` and `b`).
|
|
7671
|
+
|
|
7672
|
+
```python
|
|
7673
|
+
import polars as pl
|
|
7674
|
+
|
|
7675
|
+
tbl = pl.DataFrame({
|
|
7676
|
+
"a": [7, 4, 9, 7, 12],
|
|
7677
|
+
"b": [9, 8, 10, 5, 10]
|
|
7678
|
+
})
|
|
7679
|
+
```
|
|
7680
|
+
|
|
7681
|
+
Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
|
|
7682
|
+
`critical=0.3`). After interrogating, we display the validation report table:
|
|
7683
|
+
|
|
7684
|
+
```python
|
|
7685
|
+
import pointblank as pb
|
|
7686
|
+
|
|
7687
|
+
validation = (
|
|
7688
|
+
pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
|
|
7689
|
+
.col_vals_gt(columns="a", value=5) # 1 failing test unit
|
|
7690
|
+
.col_vals_lt(columns="b", value=10) # 2 failing test units
|
|
7691
|
+
.interrogate()
|
|
7692
|
+
)
|
|
7693
|
+
|
|
7694
|
+
validation
|
|
7695
|
+
```
|
|
7696
|
+
|
|
7697
|
+
Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
|
|
7698
|
+
exceeds the 'warning' threshold:
|
|
7699
|
+
|
|
7700
|
+
Check a specific step against the 'critical' threshold using the `i=` parameter:
|
|
7701
|
+
|
|
7702
|
+
```python
|
|
7703
|
+
validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
|
|
7704
|
+
```
|
|
7705
|
+
|
|
7706
|
+
As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
|
|
7707
|
+
thresholds), no error is raised and nothing is printed.
|
|
7708
|
+
|
|
7709
|
+
We can also provide a custom error message with the `message=` parameter. Let's try that
|
|
7710
|
+
here:
|
|
7711
|
+
|
|
7712
|
+
```python
|
|
7713
|
+
try:
|
|
7714
|
+
validation.assert_below_threshold(
|
|
7715
|
+
level="error",
|
|
7716
|
+
message="Data quality too low for processing!"
|
|
7717
|
+
)
|
|
7718
|
+
except AssertionError as e:
|
|
7719
|
+
print(f"Custom error: {e}")
|
|
7720
|
+
```
|
|
7721
|
+
|
|
7722
|
+
See Also
|
|
7723
|
+
--------
|
|
7724
|
+
- [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
|
|
7725
|
+
step
|
|
7726
|
+
- [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
|
|
7727
|
+
- [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
|
|
7728
|
+
validation step
|
|
7729
|
+
- [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
|
|
7730
|
+
completely
|
|
7731
|
+
|
|
7732
|
+
|
|
7733
|
+
above_threshold(self, level: 'str' = 'warning', i: 'int | None' = None) -> 'bool'
|
|
7734
|
+
|
|
7735
|
+
Check if any validation steps exceed a specified threshold level.
|
|
7736
|
+
|
|
7737
|
+
The `above_threshold()` method checks whether validation steps exceed a given threshold
|
|
7738
|
+
level. This provides a non-exception-based alternative to
|
|
7739
|
+
[`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
|
|
7740
|
+
workflow control based on validation results.
|
|
7741
|
+
|
|
7742
|
+
This method is useful in scenarios where you want to check if any validation steps failed
|
|
7743
|
+
beyond a certain threshold without raising an exception, allowing for more flexible
|
|
7744
|
+
programmatic responses to validation issues.
|
|
7745
|
+
|
|
7746
|
+
Parameters
|
|
7747
|
+
----------
|
|
7748
|
+
level
|
|
7749
|
+
The threshold level to check against. Valid options are: `"warning"` (the least severe
|
|
7750
|
+
threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
|
|
7751
|
+
most severe threshold level). The default is `"warning"`.
|
|
7752
|
+
i
|
|
7753
|
+
Specific validation step number(s) to check. If a single integer, checks only that step.
|
|
7754
|
+
If a list of integers, checks all specified steps. If `None` (the default), checks all
|
|
7755
|
+
validation steps. Step numbers are 1-based (first step is `1`, not `0`).
|
|
7756
|
+
|
|
7757
|
+
Returns
|
|
7758
|
+
-------
|
|
7759
|
+
bool
|
|
7760
|
+
`True` if any of the specified validation steps exceed the given threshold level,
|
|
7761
|
+
`False` otherwise.
|
|
7762
|
+
|
|
7763
|
+
Raises
|
|
7764
|
+
------
|
|
7765
|
+
ValueError
|
|
7766
|
+
If an invalid threshold level is provided.
|
|
7767
|
+
|
|
7768
|
+
Examples
|
|
7769
|
+
--------
|
|
7770
|
+
Below are some examples of how to use the `above_threshold()` method. First, we'll create a
|
|
7771
|
+
simple Polars DataFrame with a single column (`values`).
|
|
7772
|
+
|
|
7773
|
+
Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
|
|
7774
|
+
`critical=0.3`). After interrogating, we display the validation report table:
|
|
7775
|
+
|
|
7776
|
+
```python
|
|
7777
|
+
import pointblank as pb
|
|
7778
|
+
|
|
7779
|
+
validation = (
|
|
7780
|
+
pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
|
|
7781
|
+
.col_vals_gt(columns="values", value=0)
|
|
7782
|
+
.col_vals_lt(columns="values", value=10)
|
|
7783
|
+
.col_vals_between(columns="values", left=0, right=5)
|
|
7784
|
+
.interrogate()
|
|
7785
|
+
)
|
|
7786
|
+
|
|
7787
|
+
validation
|
|
7788
|
+
```
|
|
7789
|
+
|
|
7790
|
+
Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
|
|
7791
|
+
A message will be printed if that's the case:
|
|
7792
|
+
|
|
7793
|
+
```python
|
|
7794
|
+
if validation.above_threshold(level="warning"):
|
|
7795
|
+
print("Some steps have exceeded the warning threshold")
|
|
7796
|
+
```
|
|
7797
|
+
|
|
7798
|
+
Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
|
|
7799
|
+
|
|
7800
|
+
```python
|
|
7801
|
+
if validation.above_threshold(level="error", i=[2, 3]):
|
|
7802
|
+
print("Steps 2 and/or 3 have exceeded the error threshold")
|
|
7803
|
+
```
|
|
7804
|
+
|
|
7805
|
+
You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
|
|
7806
|
+
you might use this in a function:
|
|
7807
|
+
|
|
7808
|
+
```python
|
|
7809
|
+
def process_data(validation_obj):
|
|
7810
|
+
# Only continue processing if validation passes critical thresholds
|
|
7811
|
+
if not validation_obj.above_threshold(level="critical"):
|
|
7812
|
+
# Continue with processing
|
|
7813
|
+
print("Data meets critical quality thresholds, proceeding...")
|
|
7814
|
+
return True
|
|
7815
|
+
else:
|
|
7816
|
+
# Log failure and stop processing
|
|
7817
|
+
print("Data fails critical quality checks, aborting...")
|
|
7818
|
+
return False
|
|
7819
|
+
```
|
|
7820
|
+
|
|
7821
|
+
Note that this is just a suggestion for how to implement conditional workflow processes. You
|
|
7822
|
+
should adapt this pattern to your specific requirements, which might include different
|
|
7823
|
+
threshold levels, custom logging mechanisms, or integration with your organization's data
|
|
7824
|
+
pipelines and notification systems.
|
|
7825
|
+
|
|
7826
|
+
See Also
|
|
7827
|
+
--------
|
|
7828
|
+
- [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
|
|
7829
|
+
method that raises an exception if thresholds are exceeded
|
|
7830
|
+
- [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
|
|
7831
|
+
step
|
|
7832
|
+
- [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
|
|
7833
|
+
- [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
|
|
7834
|
+
validation step
|
|
7835
|
+
|
|
7836
|
+
|
|
7619
7837
|
n(self, i: 'int | list[int] | None' = None, scalar: 'bool' = False) -> 'dict[int, int] | int'
|
|
7620
7838
|
|
|
7621
7839
|
Provides a dictionary of the number of test units for each validation step.
|
|
@@ -8504,6 +8722,11 @@ preview(data: 'FrameT | Any', columns_subset: 'str | list[str] | Column | None'
|
|
|
8504
8722
|
- MySQL table (`"mysql"`)*
|
|
8505
8723
|
- PostgreSQL table (`"postgresql"`)*
|
|
8506
8724
|
- SQLite table (`"sqlite"`)*
|
|
8725
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
8726
|
+
- Snowflake table (`"snowflake"`)*
|
|
8727
|
+
- Databricks table (`"databricks"`)*
|
|
8728
|
+
- PySpark table (`"pyspark"`)*
|
|
8729
|
+
- BigQuery table (`"bigquery"`)*
|
|
8507
8730
|
- Parquet table (`"parquet"`)*
|
|
8508
8731
|
|
|
8509
8732
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -8672,6 +8895,11 @@ missing_vals_tbl(data: 'FrameT | Any') -> 'GT'
|
|
|
8672
8895
|
- MySQL table (`"mysql"`)*
|
|
8673
8896
|
- PostgreSQL table (`"postgresql"`)*
|
|
8674
8897
|
- SQLite table (`"sqlite"`)*
|
|
8898
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
8899
|
+
- Snowflake table (`"snowflake"`)*
|
|
8900
|
+
- Databricks table (`"databricks"`)*
|
|
8901
|
+
- PySpark table (`"pyspark"`)*
|
|
8902
|
+
- BigQuery table (`"bigquery"`)*
|
|
8675
8903
|
- Parquet table (`"parquet"`)*
|
|
8676
8904
|
|
|
8677
8905
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -8971,6 +9199,11 @@ get_column_count(data: 'FrameT | Any') -> 'int'
|
|
|
8971
9199
|
- MySQL table (`"mysql"`)*
|
|
8972
9200
|
- PostgreSQL table (`"postgresql"`)*
|
|
8973
9201
|
- SQLite table (`"sqlite"`)*
|
|
9202
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
9203
|
+
- Snowflake table (`"snowflake"`)*
|
|
9204
|
+
- Databricks table (`"databricks"`)*
|
|
9205
|
+
- PySpark table (`"pyspark"`)*
|
|
9206
|
+
- BigQuery table (`"bigquery"`)*
|
|
8974
9207
|
- Parquet table (`"parquet"`)*
|
|
8975
9208
|
|
|
8976
9209
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -9028,6 +9261,11 @@ get_row_count(data: 'FrameT | Any') -> 'int'
|
|
|
9028
9261
|
- MySQL table (`"mysql"`)*
|
|
9029
9262
|
- PostgreSQL table (`"postgresql"`)*
|
|
9030
9263
|
- SQLite table (`"sqlite"`)*
|
|
9264
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
9265
|
+
- Snowflake table (`"snowflake"`)*
|
|
9266
|
+
- Databricks table (`"databricks"`)*
|
|
9267
|
+
- PySpark table (`"pyspark"`)*
|
|
9268
|
+
- BigQuery table (`"bigquery"`)*
|
|
9031
9269
|
- Parquet table (`"parquet"`)*
|
|
9032
9270
|
|
|
9033
9271
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -9467,7 +9705,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
|
|
|
9467
9705
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
9468
9706
|
actions=pb.Actions(critical=notify_slack),
|
|
9469
9707
|
)
|
|
9470
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
9708
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
9471
9709
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
9472
9710
|
.col_vals_gt(columns="session_duration", value=15)
|
|
9473
9711
|
.interrogate()
|
|
@@ -9499,7 +9737,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
|
|
|
9499
9737
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
9500
9738
|
final_actions=pb.FinalActions(notify_slack),
|
|
9501
9739
|
)
|
|
9502
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
9740
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
9503
9741
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
9504
9742
|
.col_vals_gt(columns="session_duration", value=15)
|
|
9505
9743
|
.interrogate()
|
|
@@ -9567,7 +9805,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
|
|
|
9567
9805
|
actions=pb.Actions(default=notify_slack),
|
|
9568
9806
|
final_actions=pb.FinalActions(notify_slack),
|
|
9569
9807
|
)
|
|
9570
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
9808
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
9571
9809
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
9572
9810
|
.col_vals_gt(columns="session_duration", value=15)
|
|
9573
9811
|
.interrogate()
|
pointblank/thresholds.py
CHANGED
|
@@ -404,7 +404,7 @@ class Actions:
|
|
|
404
404
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
405
405
|
actions=pb.Actions(critical="Major data quality issue found in step {step}."),
|
|
406
406
|
)
|
|
407
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
407
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
408
408
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
409
409
|
.col_vals_gt(columns="session_duration", value=15)
|
|
410
410
|
.interrogate()
|
|
@@ -434,7 +434,7 @@ class Actions:
|
|
|
434
434
|
data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
|
|
435
435
|
thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
|
|
436
436
|
)
|
|
437
|
-
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}
|
|
437
|
+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
|
|
438
438
|
.col_vals_gt(columns="item_revenue", value=0.05)
|
|
439
439
|
.col_vals_gt(
|
|
440
440
|
columns="session_duration",
|
pointblank/validate.py
CHANGED
|
@@ -636,6 +636,11 @@ def preview(
|
|
|
636
636
|
- MySQL table (`"mysql"`)*
|
|
637
637
|
- PostgreSQL table (`"postgresql"`)*
|
|
638
638
|
- SQLite table (`"sqlite"`)*
|
|
639
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
640
|
+
- Snowflake table (`"snowflake"`)*
|
|
641
|
+
- Databricks table (`"databricks"`)*
|
|
642
|
+
- PySpark table (`"pyspark"`)*
|
|
643
|
+
- BigQuery table (`"bigquery"`)*
|
|
639
644
|
- Parquet table (`"parquet"`)*
|
|
640
645
|
|
|
641
646
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -1134,6 +1139,11 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
1134
1139
|
- MySQL table (`"mysql"`)*
|
|
1135
1140
|
- PostgreSQL table (`"postgresql"`)*
|
|
1136
1141
|
- SQLite table (`"sqlite"`)*
|
|
1142
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
1143
|
+
- Snowflake table (`"snowflake"`)*
|
|
1144
|
+
- Databricks table (`"databricks"`)*
|
|
1145
|
+
- PySpark table (`"pyspark"`)*
|
|
1146
|
+
- BigQuery table (`"bigquery"`)*
|
|
1137
1147
|
- Parquet table (`"parquet"`)*
|
|
1138
1148
|
|
|
1139
1149
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -1663,6 +1673,11 @@ def get_column_count(data: FrameT | Any) -> int:
|
|
|
1663
1673
|
- MySQL table (`"mysql"`)*
|
|
1664
1674
|
- PostgreSQL table (`"postgresql"`)*
|
|
1665
1675
|
- SQLite table (`"sqlite"`)*
|
|
1676
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
1677
|
+
- Snowflake table (`"snowflake"`)*
|
|
1678
|
+
- Databricks table (`"databricks"`)*
|
|
1679
|
+
- PySpark table (`"pyspark"`)*
|
|
1680
|
+
- BigQuery table (`"bigquery"`)*
|
|
1666
1681
|
- Parquet table (`"parquet"`)*
|
|
1667
1682
|
|
|
1668
1683
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -1741,6 +1756,11 @@ def get_row_count(data: FrameT | Any) -> int:
|
|
|
1741
1756
|
- MySQL table (`"mysql"`)*
|
|
1742
1757
|
- PostgreSQL table (`"postgresql"`)*
|
|
1743
1758
|
- SQLite table (`"sqlite"`)*
|
|
1759
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
1760
|
+
- Snowflake table (`"snowflake"`)*
|
|
1761
|
+
- Databricks table (`"databricks"`)*
|
|
1762
|
+
- PySpark table (`"pyspark"`)*
|
|
1763
|
+
- BigQuery table (`"bigquery"`)*
|
|
1744
1764
|
- Parquet table (`"parquet"`)*
|
|
1745
1765
|
|
|
1746
1766
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -2007,6 +2027,11 @@ class Validate:
|
|
|
2007
2027
|
- MySQL table (`"mysql"`)*
|
|
2008
2028
|
- PostgreSQL table (`"postgresql"`)*
|
|
2009
2029
|
- SQLite table (`"sqlite"`)*
|
|
2030
|
+
- Microsoft SQL Server table (`"mssql"`)*
|
|
2031
|
+
- Snowflake table (`"snowflake"`)*
|
|
2032
|
+
- Databricks table (`"databricks"`)*
|
|
2033
|
+
- PySpark table (`"pyspark"`)*
|
|
2034
|
+
- BigQuery table (`"bigquery"`)*
|
|
2010
2035
|
- Parquet table (`"parquet"`)*
|
|
2011
2036
|
|
|
2012
2037
|
The table types marked with an asterisk need to be prepared as Ibis tables (with type of
|
|
@@ -8831,7 +8856,7 @@ class Validate:
|
|
|
8831
8856
|
raise AssertionError(msg)
|
|
8832
8857
|
|
|
8833
8858
|
def assert_below_threshold(
|
|
8834
|
-
self, level: str = "warning", i: int = None, message: str = None
|
|
8859
|
+
self, level: str = "warning", i: int | None = None, message: str | None = None
|
|
8835
8860
|
) -> None:
|
|
8836
8861
|
"""
|
|
8837
8862
|
Raise an `AssertionError` if validation steps exceed a specified threshold level.
|
|
@@ -8940,12 +8965,12 @@ class Validate:
|
|
|
8940
8965
|
|
|
8941
8966
|
See Also
|
|
8942
8967
|
--------
|
|
8943
|
-
- [`warning()`](`pointblank.Validate.warning`):
|
|
8968
|
+
- [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
|
|
8944
8969
|
step
|
|
8945
|
-
- [`error()`](`pointblank.Validate.error`):
|
|
8946
|
-
- [`critical()`](`pointblank.Validate.critical`):
|
|
8970
|
+
- [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
|
|
8971
|
+
- [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
|
|
8947
8972
|
validation step
|
|
8948
|
-
- [`assert_passing()`](`pointblank.Validate.assert_passing`):
|
|
8973
|
+
- [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
|
|
8949
8974
|
completely
|
|
8950
8975
|
"""
|
|
8951
8976
|
# Check if validation has been interrogated
|
|
@@ -8991,6 +9016,145 @@ class Validate:
|
|
|
8991
9016
|
)
|
|
8992
9017
|
raise AssertionError(msg)
|
|
8993
9018
|
|
|
9019
|
+
def above_threshold(self, level: str = "warning", i: int | None = None) -> bool:
|
|
9020
|
+
"""
|
|
9021
|
+
Check if any validation steps exceed a specified threshold level.
|
|
9022
|
+
|
|
9023
|
+
The `above_threshold()` method checks whether validation steps exceed a given threshold
|
|
9024
|
+
level. This provides a non-exception-based alternative to
|
|
9025
|
+
[`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
|
|
9026
|
+
workflow control based on validation results.
|
|
9027
|
+
|
|
9028
|
+
This method is useful in scenarios where you want to check if any validation steps failed
|
|
9029
|
+
beyond a certain threshold without raising an exception, allowing for more flexible
|
|
9030
|
+
programmatic responses to validation issues.
|
|
9031
|
+
|
|
9032
|
+
Parameters
|
|
9033
|
+
----------
|
|
9034
|
+
level
|
|
9035
|
+
The threshold level to check against. Valid options are: `"warning"` (the least severe
|
|
9036
|
+
threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
|
|
9037
|
+
most severe threshold level). The default is `"warning"`.
|
|
9038
|
+
i
|
|
9039
|
+
Specific validation step number(s) to check. If a single integer, checks only that step.
|
|
9040
|
+
If a list of integers, checks all specified steps. If `None` (the default), checks all
|
|
9041
|
+
validation steps. Step numbers are 1-based (first step is `1`, not `0`).
|
|
9042
|
+
|
|
9043
|
+
Returns
|
|
9044
|
+
-------
|
|
9045
|
+
bool
|
|
9046
|
+
`True` if any of the specified validation steps exceed the given threshold level,
|
|
9047
|
+
`False` otherwise.
|
|
9048
|
+
|
|
9049
|
+
Raises
|
|
9050
|
+
------
|
|
9051
|
+
ValueError
|
|
9052
|
+
If an invalid threshold level is provided.
|
|
9053
|
+
|
|
9054
|
+
Examples
|
|
9055
|
+
--------
|
|
9056
|
+
```{python}
|
|
9057
|
+
#| echo: false
|
|
9058
|
+
#| output: false
|
|
9059
|
+
import pointblank as pb
|
|
9060
|
+
pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
|
|
9061
|
+
```
|
|
9062
|
+
Below are some examples of how to use the `above_threshold()` method. First, we'll create a
|
|
9063
|
+
simple Polars DataFrame with a single column (`values`).
|
|
9064
|
+
|
|
9065
|
+
```{python}
|
|
9066
|
+
import polars as pl
|
|
9067
|
+
|
|
9068
|
+
tbl = pl.DataFrame({
|
|
9069
|
+
"values": [1, 2, 3, 4, 5, 0, -1]
|
|
9070
|
+
})
|
|
9071
|
+
```
|
|
9072
|
+
|
|
9073
|
+
Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
|
|
9074
|
+
`critical=0.3`). After interrogating, we display the validation report table:
|
|
9075
|
+
|
|
9076
|
+
```{python}
|
|
9077
|
+
import pointblank as pb
|
|
9078
|
+
|
|
9079
|
+
validation = (
|
|
9080
|
+
pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
|
|
9081
|
+
.col_vals_gt(columns="values", value=0)
|
|
9082
|
+
.col_vals_lt(columns="values", value=10)
|
|
9083
|
+
.col_vals_between(columns="values", left=0, right=5)
|
|
9084
|
+
.interrogate()
|
|
9085
|
+
)
|
|
9086
|
+
|
|
9087
|
+
validation
|
|
9088
|
+
```
|
|
9089
|
+
|
|
9090
|
+
Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
|
|
9091
|
+
A message will be printed if that's the case:
|
|
9092
|
+
|
|
9093
|
+
```{python}
|
|
9094
|
+
if validation.above_threshold(level="warning"):
|
|
9095
|
+
print("Some steps have exceeded the warning threshold")
|
|
9096
|
+
```
|
|
9097
|
+
|
|
9098
|
+
Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
|
|
9099
|
+
|
|
9100
|
+
```{python}
|
|
9101
|
+
if validation.above_threshold(level="error", i=[2, 3]):
|
|
9102
|
+
print("Steps 2 and/or 3 have exceeded the error threshold")
|
|
9103
|
+
```
|
|
9104
|
+
|
|
9105
|
+
You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
|
|
9106
|
+
you might use this in a function:
|
|
9107
|
+
|
|
9108
|
+
```python
|
|
9109
|
+
def process_data(validation_obj):
|
|
9110
|
+
# Only continue processing if validation passes critical thresholds
|
|
9111
|
+
if not validation_obj.above_threshold(level="critical"):
|
|
9112
|
+
# Continue with processing
|
|
9113
|
+
print("Data meets critical quality thresholds, proceeding...")
|
|
9114
|
+
return True
|
|
9115
|
+
else:
|
|
9116
|
+
# Log failure and stop processing
|
|
9117
|
+
print("Data fails critical quality checks, aborting...")
|
|
9118
|
+
return False
|
|
9119
|
+
```
|
|
9120
|
+
|
|
9121
|
+
Note that this is just a suggestion for how to implement conditional workflow processes. You
|
|
9122
|
+
should adapt this pattern to your specific requirements, which might include different
|
|
9123
|
+
threshold levels, custom logging mechanisms, or integration with your organization's data
|
|
9124
|
+
pipelines and notification systems.
|
|
9125
|
+
|
|
9126
|
+
See Also
|
|
9127
|
+
--------
|
|
9128
|
+
- [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
|
|
9129
|
+
method that raises an exception if thresholds are exceeded
|
|
9130
|
+
- [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
|
|
9131
|
+
step
|
|
9132
|
+
- [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
|
|
9133
|
+
- [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
|
|
9134
|
+
validation step
|
|
9135
|
+
"""
|
|
9136
|
+
# Ensure validation has been run
|
|
9137
|
+
if not hasattr(self, "time_start") or self.time_start is None:
|
|
9138
|
+
return False
|
|
9139
|
+
|
|
9140
|
+
# Validate the level parameter
|
|
9141
|
+
level = level.lower()
|
|
9142
|
+
if level not in ["warning", "error", "critical"]:
|
|
9143
|
+
raise ValueError(
|
|
9144
|
+
f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
|
|
9145
|
+
)
|
|
9146
|
+
|
|
9147
|
+
# Get the threshold status using the appropriate method
|
|
9148
|
+
if level == "warning":
|
|
9149
|
+
status = self.warning(i=i)
|
|
9150
|
+
elif level == "error":
|
|
9151
|
+
status = self.error(i=i)
|
|
9152
|
+
elif level == "critical":
|
|
9153
|
+
status = self.critical(i=i)
|
|
9154
|
+
|
|
9155
|
+
# Return True if any steps exceeded the threshold
|
|
9156
|
+
return any(status.values())
|
|
9157
|
+
|
|
8994
9158
|
def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
|
|
8995
9159
|
"""
|
|
8996
9160
|
Provides a dictionary of the number of test units for each validation step.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.6
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -55,6 +55,8 @@ Requires-Dist: chatlas>=0.3.0; extra == "generate"
|
|
|
55
55
|
Requires-Dist: anthropic[bedrock]>=0.45.2; extra == "generate"
|
|
56
56
|
Requires-Dist: openai>=1.63.0; extra == "generate"
|
|
57
57
|
Requires-Dist: shiny>=1.3.0; extra == "generate"
|
|
58
|
+
Provides-Extra: bigquery
|
|
59
|
+
Requires-Dist: ibis-framework[bigquery]>=9.5.0; extra == "bigquery"
|
|
58
60
|
Provides-Extra: databricks
|
|
59
61
|
Requires-Dist: ibis-framework[databricks]>=9.5.0; extra == "databricks"
|
|
60
62
|
Provides-Extra: duckdb
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
|
|
2
|
-
pointblank/_constants.py,sha256=
|
|
2
|
+
pointblank/_constants.py,sha256=YeQVYpSkdQ8v7D8ZJnG-M75zqAH3yJuDzzjwWC2I-d8,81227
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
4
|
pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
|
|
5
5
|
pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
|
|
6
6
|
pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
|
|
7
|
-
pointblank/_utils.py,sha256=
|
|
7
|
+
pointblank/_utils.py,sha256=BoIwMEZYBwPEe5xGku1vSmkgAeGgnA4_bQ4MDeYFGrc,24824
|
|
8
8
|
pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
|
|
9
9
|
pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
|
|
10
|
-
pointblank/actions.py,sha256=
|
|
10
|
+
pointblank/actions.py,sha256=D6o9B2_ES9PNQg9HZwREacrrt-3A5bhdrBkL1UXz__s,18281
|
|
11
11
|
pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
|
|
12
|
-
pointblank/column.py,sha256=
|
|
12
|
+
pointblank/column.py,sha256=_FJjpjv760D1p6YGgqbwmKYktouG7AJ2A9uIMYQBTYA,76560
|
|
13
13
|
pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
|
|
14
14
|
pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
|
|
15
15
|
pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
|
-
pointblank/thresholds.py,sha256=
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
19
|
-
pointblank/data/api-docs.txt,sha256=
|
|
17
|
+
pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
|
|
18
|
+
pointblank/validate.py,sha256=dM5U41me38atNDt1Llzv08gdUcnYyvWoHycQPpctidg,621961
|
|
19
|
+
pointblank/data/api-docs.txt,sha256=6cdUIYdVy2XfGRLNNxtcGTaxu2WX4EXEeICayOvJCTs,492756
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
22
22
|
pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
|
|
@@ -26,8 +26,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
26
26
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
27
27
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
28
28
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
29
|
-
pointblank-0.9.
|
|
30
|
-
pointblank-0.9.
|
|
31
|
-
pointblank-0.9.
|
|
32
|
-
pointblank-0.9.
|
|
33
|
-
pointblank-0.9.
|
|
29
|
+
pointblank-0.9.6.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
30
|
+
pointblank-0.9.6.dist-info/METADATA,sha256=_BocxWcU0_AXIiMGBPcxsd9VwrD8uGXjXpjE16hUhVw,14950
|
|
31
|
+
pointblank-0.9.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
32
|
+
pointblank-0.9.6.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
33
|
+
pointblank-0.9.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|