pointblank 0.9.5__py3-none-any.whl → 0.9.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/_constants.py CHANGED
@@ -109,6 +109,7 @@ ROW_BASED_VALIDATION_TYPES = [
109
109
  ]
110
110
 
111
111
  IBIS_BACKENDS = [
112
+ "bigquery",
112
113
  "databricks",
113
114
  "duckdb",
114
115
  "memtable",
@@ -165,6 +166,7 @@ TABLE_TYPE_STYLES = {
165
166
  "parquet": {"background": "#3F9FF9", "text": "#FFFFFF", "label": "Parquet"},
166
167
  "memtable": {"background": "#2C3E50", "text": "#FFFFFF", "label": "Ibis memtable"},
167
168
  "mssql": {"background": "#E2E2E2", "text": "#222222", "label": "MSSQL"},
169
+ "bigquery": {"background": "#4285F4", "text": "#FFFFFF", "label": "BigQuery"},
168
170
  "pyspark": {"background": "#E66F21", "text": "#FFFFFF", "label": "Spark DataFrame"},
169
171
  "databricks": {"background": "#FF3621", "text": "#FFFFFF", "label": "Databricks"},
170
172
  }
pointblank/_utils.py CHANGED
@@ -514,6 +514,8 @@ def _get_api_text() -> str:
514
514
  "Validate.get_data_extracts",
515
515
  "Validate.all_passed",
516
516
  "Validate.assert_passing",
517
+ "Validate.assert_below_threshold",
518
+ "Validate.above_threshold",
517
519
  "Validate.n",
518
520
  "Validate.n_passed",
519
521
  "Validate.n_failed",
pointblank/actions.py CHANGED
@@ -216,7 +216,7 @@ def send_slack_notification(
216
216
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
217
217
  actions=pb.Actions(critical=notify_slack),
218
218
  )
219
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
219
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
220
220
  .col_vals_gt(columns="item_revenue", value=0.05)
221
221
  .col_vals_gt(columns="session_duration", value=15)
222
222
  .interrogate()
@@ -248,7 +248,7 @@ def send_slack_notification(
248
248
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
249
249
  final_actions=pb.FinalActions(notify_slack),
250
250
  )
251
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
251
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
252
252
  .col_vals_gt(columns="item_revenue", value=0.05)
253
253
  .col_vals_gt(columns="session_duration", value=15)
254
254
  .interrogate()
@@ -316,7 +316,7 @@ def send_slack_notification(
316
316
  actions=pb.Actions(default=notify_slack),
317
317
  final_actions=pb.FinalActions(notify_slack),
318
318
  )
319
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
319
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
320
320
  .col_vals_gt(columns="item_revenue", value=0.05)
321
321
  .col_vals_gt(columns="session_duration", value=15)
322
322
  .interrogate()
pointblank/column.py CHANGED
@@ -1007,7 +1007,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1007
1007
  `[rev_01, rev_02, profit_01, profit_02, age]`
1008
1008
 
1009
1009
  and you want to validate columns that have two digits at the end of the name, you can use
1010
- `columns=matches(r"\d{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
1010
+ `columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
1011
1011
  `profit_02` columns.
1012
1012
 
1013
1013
  There will be a validation step created for every resolved column. Note that if there aren't any
@@ -1061,7 +1061,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1061
1061
  [`col()`](`pointblank.col`) function, like this:
1062
1062
 
1063
1063
  ```python
1064
- col(matches(r"^\d{5}") & ends_with("_id"))
1064
+ col(matches(r"^[0-9]{5}") & ends_with("_id"))
1065
1065
  ```
1066
1066
 
1067
1067
  There are four operators that can be used to compose column selectors:
@@ -1107,7 +1107,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1107
1107
 
1108
1108
  validation = (
1109
1109
  pb.Validate(data=tbl)
1110
- .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID\d{4}")
1110
+ .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
1111
1111
  .interrogate()
1112
1112
  )
1113
1113
 
@@ -1115,7 +1115,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1115
1115
  ```
1116
1116
 
1117
1117
  From the results of the validation table we get two validation steps, one for `id_old` and one
1118
- for `new_identifier`. The values in both columns all match the pattern `"ID\d{4}"`.
1118
+ for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
1119
1119
 
1120
1120
  We can also use the `matches()` function in combination with other column selectors (within
1121
1121
  [`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
@@ -107,6 +107,11 @@ Validate(data: 'FrameT | Any', tbl_name: 'str | None' = None, label: 'str | None
107
107
  - MySQL table (`"mysql"`)*
108
108
  - PostgreSQL table (`"postgresql"`)*
109
109
  - SQLite table (`"sqlite"`)*
110
+ - Microsoft SQL Server table (`"mssql"`)*
111
+ - Snowflake table (`"snowflake"`)*
112
+ - Databricks table (`"databricks"`)*
113
+ - PySpark table (`"pyspark"`)*
114
+ - BigQuery table (`"bigquery"`)*
110
115
  - Parquet table (`"parquet"`)*
111
116
 
112
117
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -580,7 +585,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
580
585
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
581
586
  actions=pb.Actions(critical="Major data quality issue found in step {step}."),
582
587
  )
583
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
588
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
584
589
  .col_vals_gt(columns="item_revenue", value=0.05)
585
590
  .col_vals_gt(columns="session_duration", value=15)
586
591
  .interrogate()
@@ -610,7 +615,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
610
615
  data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
611
616
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
612
617
  )
613
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
618
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
614
619
  .col_vals_gt(columns="item_revenue", value=0.05)
615
620
  .col_vals_gt(
616
621
  columns="session_duration",
@@ -6231,7 +6236,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6231
6236
  `[rev_01, rev_02, profit_01, profit_02, age]`
6232
6237
 
6233
6238
  and you want to validate columns that have two digits at the end of the name, you can use
6234
- `columns=matches(r"\d{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
6239
+ `columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
6235
6240
  `profit_02` columns.
6236
6241
 
6237
6242
  There will be a validation step created for every resolved column. Note that if there aren't any
@@ -6285,7 +6290,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6285
6290
  [`col()`](`pointblank.col`) function, like this:
6286
6291
 
6287
6292
  ```python
6288
- col(matches(r"^\d{5}") & ends_with("_id"))
6293
+ col(matches(r"^[0-9]{5}") & ends_with("_id"))
6289
6294
  ```
6290
6295
 
6291
6296
  There are four operators that can be used to compose column selectors:
@@ -6324,7 +6329,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6324
6329
 
6325
6330
  validation = (
6326
6331
  pb.Validate(data=tbl)
6327
- .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID\d{4}")
6332
+ .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
6328
6333
  .interrogate()
6329
6334
  )
6330
6335
 
@@ -6332,7 +6337,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6332
6337
  ```
6333
6338
 
6334
6339
  From the results of the validation table we get two validation steps, one for `id_old` and one
6335
- for `new_identifier`. The values in both columns all match the pattern `"ID\d{4}"`.
6340
+ for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
6336
6341
 
6337
6342
  We can also use the `matches()` function in combination with other column selectors (within
6338
6343
  [`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
@@ -6875,7 +6880,7 @@ interrogate(self, collect_extracts: 'bool' = True, collect_tbl_checked: 'bool' =
6875
6880
 
6876
6881
  After interrogation is complete, the `Validate` object will have gathered information, and
6877
6882
  we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
6878
- [`f_failed()`](`pointblank.Validate.f_failed`)`, etc., to understand how the table performed
6883
+ [`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
6879
6884
  against the validation plan. A visual representation of the validation results can be viewed
6880
6885
  by printing the `Validate` object; this will display the validation table in an HTML viewing
6881
6886
  environment.
@@ -7578,6 +7583,10 @@ assert_passing(self) -> 'None'
7578
7583
  assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
7579
7584
  some details are preserved.
7580
7585
 
7586
+ If the validation has not yet been interrogated, this method will automatically call
7587
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
7588
+ for passing tests.
7589
+
7581
7590
  Raises
7582
7591
  -------
7583
7592
  AssertionError
@@ -7587,8 +7596,9 @@ assert_passing(self) -> 'None'
7587
7596
  --------
7588
7597
  In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
7589
7598
  `c`). There will be three validation steps, and the second step will have a failing test
7590
- unit (the value `10` isn't less than `9`). After interrogation, the `assert_passing()`
7591
- method is used to assert that all validation steps passed perfectly.
7599
+ unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
7600
+ that all validation steps passed perfectly, automatically performing the interrogation if
7601
+ needed.
7592
7602
 
7593
7603
  ```python
7594
7604
  #| error: True
@@ -7609,13 +7619,221 @@ assert_passing(self) -> 'None'
7609
7619
  .col_vals_gt(columns="a", value=0)
7610
7620
  .col_vals_lt(columns="b", value=9) # this assertion is false
7611
7621
  .col_vals_in_set(columns="c", set=["a", "b"])
7612
- .interrogate()
7613
7622
  )
7614
7623
 
7624
+ # No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
7615
7625
  validation.assert_passing()
7616
7626
  ```
7617
7627
 
7618
7628
 
7629
+ assert_below_threshold(self, level: 'str' = 'warning', i: 'int | None' = None, message: 'str | None' = None) -> 'None'
7630
+
7631
+ Raise an `AssertionError` if validation steps exceed a specified threshold level.
7632
+
7633
+ The `assert_below_threshold()` method checks whether validation steps' failure rates are
7634
+ below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
7635
+ particularly useful in automated testing environments where you want to ensure your data
7636
+ quality meets minimum standards before proceeding.
7637
+
7638
+ If any validation step exceeds the specified threshold level, an `AssertionError` will be
7639
+ raised with details about which steps failed. If the validation has not yet been
7640
+ interrogated, this method will automatically call
7641
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
7642
+
7643
+ Parameters
7644
+ ----------
7645
+ level
7646
+ The threshold level to check against, which could be any of `"warning"` (the default),
7647
+ `"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
7648
+ exceeds this level.
7649
+ i
7650
+ Specific validation step number(s) to check. Can be provided as a single integer or a
7651
+ list of integers. If `None` (the default), all steps are checked.
7652
+ message
7653
+ Custom error message to use if assertion fails. If `None`, a default message will be
7654
+ generated that lists the specific steps that exceeded the threshold.
7655
+
7656
+ Returns
7657
+ -------
7658
+ None
7659
+
7660
+ Raises
7661
+ ------
7662
+ AssertionError
7663
+ If any specified validation step exceeds the given threshold level.
7664
+ ValueError
7665
+ If an invalid threshold level is provided.
7666
+
7667
+ Examples
7668
+ --------
7669
+ Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
7670
+ create a simple Polars DataFrame with two columns (`a` and `b`).
7671
+
7672
+ ```python
7673
+ import polars as pl
7674
+
7675
+ tbl = pl.DataFrame({
7676
+ "a": [7, 4, 9, 7, 12],
7677
+ "b": [9, 8, 10, 5, 10]
7678
+ })
7679
+ ```
7680
+
7681
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
7682
+ `critical=0.3`). After interrogating, we display the validation report table:
7683
+
7684
+ ```python
7685
+ import pointblank as pb
7686
+
7687
+ validation = (
7688
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
7689
+ .col_vals_gt(columns="a", value=5) # 1 failing test unit
7690
+ .col_vals_lt(columns="b", value=10) # 2 failing test units
7691
+ .interrogate()
7692
+ )
7693
+
7694
+ validation
7695
+ ```
7696
+
7697
+ Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
7698
+ exceeds the 'warning' threshold:
7699
+
7700
+ Check a specific step against the 'critical' threshold using the `i=` parameter:
7701
+
7702
+ ```python
7703
+ validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
7704
+ ```
7705
+
7706
+ As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
7707
+ thresholds), no error is raised and nothing is printed.
7708
+
7709
+ We can also provide a custom error message with the `message=` parameter. Let's try that
7710
+ here:
7711
+
7712
+ ```python
7713
+ try:
7714
+ validation.assert_below_threshold(
7715
+ level="error",
7716
+ message="Data quality too low for processing!"
7717
+ )
7718
+ except AssertionError as e:
7719
+ print(f"Custom error: {e}")
7720
+ ```
7721
+
7722
+ See Also
7723
+ --------
7724
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
7725
+ step
7726
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
7727
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
7728
+ validation step
7729
+ - [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
7730
+ completely
7731
+
7732
+
7733
+ above_threshold(self, level: 'str' = 'warning', i: 'int | None' = None) -> 'bool'
7734
+
7735
+ Check if any validation steps exceed a specified threshold level.
7736
+
7737
+ The `above_threshold()` method checks whether validation steps exceed a given threshold
7738
+ level. This provides a non-exception-based alternative to
7739
+ [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
7740
+ workflow control based on validation results.
7741
+
7742
+ This method is useful in scenarios where you want to check if any validation steps failed
7743
+ beyond a certain threshold without raising an exception, allowing for more flexible
7744
+ programmatic responses to validation issues.
7745
+
7746
+ Parameters
7747
+ ----------
7748
+ level
7749
+ The threshold level to check against. Valid options are: `"warning"` (the least severe
7750
+ threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
7751
+ most severe threshold level). The default is `"warning"`.
7752
+ i
7753
+ Specific validation step number(s) to check. If a single integer, checks only that step.
7754
+ If a list of integers, checks all specified steps. If `None` (the default), checks all
7755
+ validation steps. Step numbers are 1-based (first step is `1`, not `0`).
7756
+
7757
+ Returns
7758
+ -------
7759
+ bool
7760
+ `True` if any of the specified validation steps exceed the given threshold level,
7761
+ `False` otherwise.
7762
+
7763
+ Raises
7764
+ ------
7765
+ ValueError
7766
+ If an invalid threshold level is provided.
7767
+
7768
+ Examples
7769
+ --------
7770
+ Below are some examples of how to use the `above_threshold()` method. First, we'll create a
7771
+ simple Polars DataFrame with a single column (`values`).
7772
+
7773
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
7774
+ `critical=0.3`). After interrogating, we display the validation report table:
7775
+
7776
+ ```python
7777
+ import pointblank as pb
7778
+
7779
+ validation = (
7780
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
7781
+ .col_vals_gt(columns="values", value=0)
7782
+ .col_vals_lt(columns="values", value=10)
7783
+ .col_vals_between(columns="values", left=0, right=5)
7784
+ .interrogate()
7785
+ )
7786
+
7787
+ validation
7788
+ ```
7789
+
7790
+ Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
7791
+ A message will be printed if that's the case:
7792
+
7793
+ ```python
7794
+ if validation.above_threshold(level="warning"):
7795
+ print("Some steps have exceeded the warning threshold")
7796
+ ```
7797
+
7798
+ Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
7799
+
7800
+ ```python
7801
+ if validation.above_threshold(level="error", i=[2, 3]):
7802
+ print("Steps 2 and/or 3 have exceeded the error threshold")
7803
+ ```
7804
+
7805
+ You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
7806
+ you might use this in a function:
7807
+
7808
+ ```python
7809
+ def process_data(validation_obj):
7810
+ # Only continue processing if validation passes critical thresholds
7811
+ if not validation_obj.above_threshold(level="critical"):
7812
+ # Continue with processing
7813
+ print("Data meets critical quality thresholds, proceeding...")
7814
+ return True
7815
+ else:
7816
+ # Log failure and stop processing
7817
+ print("Data fails critical quality checks, aborting...")
7818
+ return False
7819
+ ```
7820
+
7821
+ Note that this is just a suggestion for how to implement conditional workflow processes. You
7822
+ should adapt this pattern to your specific requirements, which might include different
7823
+ threshold levels, custom logging mechanisms, or integration with your organization's data
7824
+ pipelines and notification systems.
7825
+
7826
+ See Also
7827
+ --------
7828
+ - [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
7829
+ method that raises an exception if thresholds are exceeded
7830
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
7831
+ step
7832
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
7833
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
7834
+ validation step
7835
+
7836
+
7619
7837
  n(self, i: 'int | list[int] | None' = None, scalar: 'bool' = False) -> 'dict[int, int] | int'
7620
7838
 
7621
7839
  Provides a dictionary of the number of test units for each validation step.
@@ -8504,6 +8722,11 @@ preview(data: 'FrameT | Any', columns_subset: 'str | list[str] | Column | None'
8504
8722
  - MySQL table (`"mysql"`)*
8505
8723
  - PostgreSQL table (`"postgresql"`)*
8506
8724
  - SQLite table (`"sqlite"`)*
8725
+ - Microsoft SQL Server table (`"mssql"`)*
8726
+ - Snowflake table (`"snowflake"`)*
8727
+ - Databricks table (`"databricks"`)*
8728
+ - PySpark table (`"pyspark"`)*
8729
+ - BigQuery table (`"bigquery"`)*
8507
8730
  - Parquet table (`"parquet"`)*
8508
8731
 
8509
8732
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8672,6 +8895,11 @@ missing_vals_tbl(data: 'FrameT | Any') -> 'GT'
8672
8895
  - MySQL table (`"mysql"`)*
8673
8896
  - PostgreSQL table (`"postgresql"`)*
8674
8897
  - SQLite table (`"sqlite"`)*
8898
+ - Microsoft SQL Server table (`"mssql"`)*
8899
+ - Snowflake table (`"snowflake"`)*
8900
+ - Databricks table (`"databricks"`)*
8901
+ - PySpark table (`"pyspark"`)*
8902
+ - BigQuery table (`"bigquery"`)*
8675
8903
  - Parquet table (`"parquet"`)*
8676
8904
 
8677
8905
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8971,6 +9199,11 @@ get_column_count(data: 'FrameT | Any') -> 'int'
8971
9199
  - MySQL table (`"mysql"`)*
8972
9200
  - PostgreSQL table (`"postgresql"`)*
8973
9201
  - SQLite table (`"sqlite"`)*
9202
+ - Microsoft SQL Server table (`"mssql"`)*
9203
+ - Snowflake table (`"snowflake"`)*
9204
+ - Databricks table (`"databricks"`)*
9205
+ - PySpark table (`"pyspark"`)*
9206
+ - BigQuery table (`"bigquery"`)*
8974
9207
  - Parquet table (`"parquet"`)*
8975
9208
 
8976
9209
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -9028,6 +9261,11 @@ get_row_count(data: 'FrameT | Any') -> 'int'
9028
9261
  - MySQL table (`"mysql"`)*
9029
9262
  - PostgreSQL table (`"postgresql"`)*
9030
9263
  - SQLite table (`"sqlite"`)*
9264
+ - Microsoft SQL Server table (`"mssql"`)*
9265
+ - Snowflake table (`"snowflake"`)*
9266
+ - Databricks table (`"databricks"`)*
9267
+ - PySpark table (`"pyspark"`)*
9268
+ - BigQuery table (`"bigquery"`)*
9031
9269
  - Parquet table (`"parquet"`)*
9032
9270
 
9033
9271
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -9467,7 +9705,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9467
9705
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
9468
9706
  actions=pb.Actions(critical=notify_slack),
9469
9707
  )
9470
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9708
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9471
9709
  .col_vals_gt(columns="item_revenue", value=0.05)
9472
9710
  .col_vals_gt(columns="session_duration", value=15)
9473
9711
  .interrogate()
@@ -9499,7 +9737,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9499
9737
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
9500
9738
  final_actions=pb.FinalActions(notify_slack),
9501
9739
  )
9502
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9740
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9503
9741
  .col_vals_gt(columns="item_revenue", value=0.05)
9504
9742
  .col_vals_gt(columns="session_duration", value=15)
9505
9743
  .interrogate()
@@ -9567,7 +9805,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9567
9805
  actions=pb.Actions(default=notify_slack),
9568
9806
  final_actions=pb.FinalActions(notify_slack),
9569
9807
  )
9570
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9808
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9571
9809
  .col_vals_gt(columns="item_revenue", value=0.05)
9572
9810
  .col_vals_gt(columns="session_duration", value=15)
9573
9811
  .interrogate()
pointblank/thresholds.py CHANGED
@@ -404,7 +404,7 @@ class Actions:
404
404
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
405
405
  actions=pb.Actions(critical="Major data quality issue found in step {step}."),
406
406
  )
407
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
407
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
408
408
  .col_vals_gt(columns="item_revenue", value=0.05)
409
409
  .col_vals_gt(columns="session_duration", value=15)
410
410
  .interrogate()
@@ -434,7 +434,7 @@ class Actions:
434
434
  data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
435
435
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
436
436
  )
437
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
437
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
438
438
  .col_vals_gt(columns="item_revenue", value=0.05)
439
439
  .col_vals_gt(
440
440
  columns="session_duration",
pointblank/validate.py CHANGED
@@ -636,6 +636,11 @@ def preview(
636
636
  - MySQL table (`"mysql"`)*
637
637
  - PostgreSQL table (`"postgresql"`)*
638
638
  - SQLite table (`"sqlite"`)*
639
+ - Microsoft SQL Server table (`"mssql"`)*
640
+ - Snowflake table (`"snowflake"`)*
641
+ - Databricks table (`"databricks"`)*
642
+ - PySpark table (`"pyspark"`)*
643
+ - BigQuery table (`"bigquery"`)*
639
644
  - Parquet table (`"parquet"`)*
640
645
 
641
646
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1134,6 +1139,11 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
1134
1139
  - MySQL table (`"mysql"`)*
1135
1140
  - PostgreSQL table (`"postgresql"`)*
1136
1141
  - SQLite table (`"sqlite"`)*
1142
+ - Microsoft SQL Server table (`"mssql"`)*
1143
+ - Snowflake table (`"snowflake"`)*
1144
+ - Databricks table (`"databricks"`)*
1145
+ - PySpark table (`"pyspark"`)*
1146
+ - BigQuery table (`"bigquery"`)*
1137
1147
  - Parquet table (`"parquet"`)*
1138
1148
 
1139
1149
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1663,6 +1673,11 @@ def get_column_count(data: FrameT | Any) -> int:
1663
1673
  - MySQL table (`"mysql"`)*
1664
1674
  - PostgreSQL table (`"postgresql"`)*
1665
1675
  - SQLite table (`"sqlite"`)*
1676
+ - Microsoft SQL Server table (`"mssql"`)*
1677
+ - Snowflake table (`"snowflake"`)*
1678
+ - Databricks table (`"databricks"`)*
1679
+ - PySpark table (`"pyspark"`)*
1680
+ - BigQuery table (`"bigquery"`)*
1666
1681
  - Parquet table (`"parquet"`)*
1667
1682
 
1668
1683
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1741,6 +1756,11 @@ def get_row_count(data: FrameT | Any) -> int:
1741
1756
  - MySQL table (`"mysql"`)*
1742
1757
  - PostgreSQL table (`"postgresql"`)*
1743
1758
  - SQLite table (`"sqlite"`)*
1759
+ - Microsoft SQL Server table (`"mssql"`)*
1760
+ - Snowflake table (`"snowflake"`)*
1761
+ - Databricks table (`"databricks"`)*
1762
+ - PySpark table (`"pyspark"`)*
1763
+ - BigQuery table (`"bigquery"`)*
1744
1764
  - Parquet table (`"parquet"`)*
1745
1765
 
1746
1766
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -2007,6 +2027,11 @@ class Validate:
2007
2027
  - MySQL table (`"mysql"`)*
2008
2028
  - PostgreSQL table (`"postgresql"`)*
2009
2029
  - SQLite table (`"sqlite"`)*
2030
+ - Microsoft SQL Server table (`"mssql"`)*
2031
+ - Snowflake table (`"snowflake"`)*
2032
+ - Databricks table (`"databricks"`)*
2033
+ - PySpark table (`"pyspark"`)*
2034
+ - BigQuery table (`"bigquery"`)*
2010
2035
  - Parquet table (`"parquet"`)*
2011
2036
 
2012
2037
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8831,7 +8856,7 @@ class Validate:
8831
8856
  raise AssertionError(msg)
8832
8857
 
8833
8858
  def assert_below_threshold(
8834
- self, level: str = "warning", i: int = None, message: str = None
8859
+ self, level: str = "warning", i: int | None = None, message: str | None = None
8835
8860
  ) -> None:
8836
8861
  """
8837
8862
  Raise an `AssertionError` if validation steps exceed a specified threshold level.
@@ -8940,12 +8965,12 @@ class Validate:
8940
8965
 
8941
8966
  See Also
8942
8967
  --------
8943
- - [`warning()`](`pointblank.Validate.warning`): Get the 'warning' status for each validation
8968
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
8944
8969
  step
8945
- - [`error()`](`pointblank.Validate.error`): Get the 'error' status for each validation step
8946
- - [`critical()`](`pointblank.Validate.critical`): Get the 'critical' status for each
8970
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
8971
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
8947
8972
  validation step
8948
- - [`assert_passing()`](`pointblank.Validate.assert_passing`): Assert all validations pass
8973
+ - [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
8949
8974
  completely
8950
8975
  """
8951
8976
  # Check if validation has been interrogated
@@ -8991,6 +9016,145 @@ class Validate:
8991
9016
  )
8992
9017
  raise AssertionError(msg)
8993
9018
 
9019
+ def above_threshold(self, level: str = "warning", i: int | None = None) -> bool:
9020
+ """
9021
+ Check if any validation steps exceed a specified threshold level.
9022
+
9023
+ The `above_threshold()` method checks whether validation steps exceed a given threshold
9024
+ level. This provides a non-exception-based alternative to
9025
+ [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
9026
+ workflow control based on validation results.
9027
+
9028
+ This method is useful in scenarios where you want to check if any validation steps failed
9029
+ beyond a certain threshold without raising an exception, allowing for more flexible
9030
+ programmatic responses to validation issues.
9031
+
9032
+ Parameters
9033
+ ----------
9034
+ level
9035
+ The threshold level to check against. Valid options are: `"warning"` (the least severe
9036
+ threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
9037
+ most severe threshold level). The default is `"warning"`.
9038
+ i
9039
+ Specific validation step number(s) to check. If a single integer, checks only that step.
9040
+ If a list of integers, checks all specified steps. If `None` (the default), checks all
9041
+ validation steps. Step numbers are 1-based (first step is `1`, not `0`).
9042
+
9043
+ Returns
9044
+ -------
9045
+ bool
9046
+ `True` if any of the specified validation steps exceed the given threshold level,
9047
+ `False` otherwise.
9048
+
9049
+ Raises
9050
+ ------
9051
+ ValueError
9052
+ If an invalid threshold level is provided.
9053
+
9054
+ Examples
9055
+ --------
9056
+ ```{python}
9057
+ #| echo: false
9058
+ #| output: false
9059
+ import pointblank as pb
9060
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
9061
+ ```
9062
+ Below are some examples of how to use the `above_threshold()` method. First, we'll create a
9063
+ simple Polars DataFrame with a single column (`values`).
9064
+
9065
+ ```{python}
9066
+ import polars as pl
9067
+
9068
+ tbl = pl.DataFrame({
9069
+ "values": [1, 2, 3, 4, 5, 0, -1]
9070
+ })
9071
+ ```
9072
+
9073
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
9074
+ `critical=0.3`). After interrogating, we display the validation report table:
9075
+
9076
+ ```{python}
9077
+ import pointblank as pb
9078
+
9079
+ validation = (
9080
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
9081
+ .col_vals_gt(columns="values", value=0)
9082
+ .col_vals_lt(columns="values", value=10)
9083
+ .col_vals_between(columns="values", left=0, right=5)
9084
+ .interrogate()
9085
+ )
9086
+
9087
+ validation
9088
+ ```
9089
+
9090
+ Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
9091
+ A message will be printed if that's the case:
9092
+
9093
+ ```{python}
9094
+ if validation.above_threshold(level="warning"):
9095
+ print("Some steps have exceeded the warning threshold")
9096
+ ```
9097
+
9098
+ Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
9099
+
9100
+ ```{python}
9101
+ if validation.above_threshold(level="error", i=[2, 3]):
9102
+ print("Steps 2 and/or 3 have exceeded the error threshold")
9103
+ ```
9104
+
9105
+ You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
9106
+ you might use this in a function:
9107
+
9108
+ ```python
9109
+ def process_data(validation_obj):
9110
+ # Only continue processing if validation passes critical thresholds
9111
+ if not validation_obj.above_threshold(level="critical"):
9112
+ # Continue with processing
9113
+ print("Data meets critical quality thresholds, proceeding...")
9114
+ return True
9115
+ else:
9116
+ # Log failure and stop processing
9117
+ print("Data fails critical quality checks, aborting...")
9118
+ return False
9119
+ ```
9120
+
9121
+ Note that this is just a suggestion for how to implement conditional workflow processes. You
9122
+ should adapt this pattern to your specific requirements, which might include different
9123
+ threshold levels, custom logging mechanisms, or integration with your organization's data
9124
+ pipelines and notification systems.
9125
+
9126
+ See Also
9127
+ --------
9128
+ - [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
9129
+ method that raises an exception if thresholds are exceeded
9130
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
9131
+ step
9132
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
9133
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
9134
+ validation step
9135
+ """
9136
+ # Ensure validation has been run
9137
+ if not hasattr(self, "time_start") or self.time_start is None:
9138
+ return False
9139
+
9140
+ # Validate the level parameter
9141
+ level = level.lower()
9142
+ if level not in ["warning", "error", "critical"]:
9143
+ raise ValueError(
9144
+ f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
9145
+ )
9146
+
9147
+ # Get the threshold status using the appropriate method
9148
+ if level == "warning":
9149
+ status = self.warning(i=i)
9150
+ elif level == "error":
9151
+ status = self.error(i=i)
9152
+ elif level == "critical":
9153
+ status = self.critical(i=i)
9154
+
9155
+ # Return True if any steps exceeded the threshold
9156
+ return any(status.values())
9157
+
8994
9158
  def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
8995
9159
  """
8996
9160
  Provides a dictionary of the number of test units for each validation step.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.9.5
3
+ Version: 0.9.6
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -55,6 +55,8 @@ Requires-Dist: chatlas>=0.3.0; extra == "generate"
55
55
  Requires-Dist: anthropic[bedrock]>=0.45.2; extra == "generate"
56
56
  Requires-Dist: openai>=1.63.0; extra == "generate"
57
57
  Requires-Dist: shiny>=1.3.0; extra == "generate"
58
+ Provides-Extra: bigquery
59
+ Requires-Dist: ibis-framework[bigquery]>=9.5.0; extra == "bigquery"
58
60
  Provides-Extra: databricks
59
61
  Requires-Dist: ibis-framework[databricks]>=9.5.0; extra == "databricks"
60
62
  Provides-Extra: duckdb
@@ -1,22 +1,22 @@
1
1
  pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
2
- pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,81128
2
+ pointblank/_constants.py,sha256=YeQVYpSkdQ8v7D8ZJnG-M75zqAH3yJuDzzjwWC2I-d8,81227
3
3
  pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
4
4
  pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
5
5
  pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
6
6
  pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
7
- pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
7
+ pointblank/_utils.py,sha256=BoIwMEZYBwPEe5xGku1vSmkgAeGgnA4_bQ4MDeYFGrc,24824
8
8
  pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
9
9
  pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
10
- pointblank/actions.py,sha256=ilk__kbQiS4ieJp-4dM7SDGuobQihUxLyS5ahgiP7qE,18272
10
+ pointblank/actions.py,sha256=D6o9B2_ES9PNQg9HZwREacrrt-3A5bhdrBkL1UXz__s,18281
11
11
  pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
12
- pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
12
+ pointblank/column.py,sha256=_FJjpjv760D1p6YGgqbwmKYktouG7AJ2A9uIMYQBTYA,76560
13
13
  pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
14
14
  pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
15
15
  pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
16
16
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
17
- pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
18
- pointblank/validate.py,sha256=DfTChQcLyaJFNLdjkG3jQAsY7GtLvTHSbxkzKusG9I4,615287
19
- pointblank/data/api-docs.txt,sha256=Sk2ePat_ngz3tAizQVSo7uG_fInv638HFLmM6041osM,482808
17
+ pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
18
+ pointblank/validate.py,sha256=dM5U41me38atNDt1Llzv08gdUcnYyvWoHycQPpctidg,621961
19
+ pointblank/data/api-docs.txt,sha256=6cdUIYdVy2XfGRLNNxtcGTaxu2WX4EXEeICayOvJCTs,492756
20
20
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
21
21
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
22
22
  pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
@@ -26,8 +26,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
26
26
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
27
27
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
28
28
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
29
- pointblank-0.9.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
- pointblank-0.9.5.dist-info/METADATA,sha256=8SHBgMHqrX9T2cMOfa_cQMDw60NbCmMB1xLgrwWk5vw,14857
31
- pointblank-0.9.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
32
- pointblank-0.9.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
- pointblank-0.9.5.dist-info/RECORD,,
29
+ pointblank-0.9.6.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
+ pointblank-0.9.6.dist-info/METADATA,sha256=_BocxWcU0_AXIiMGBPcxsd9VwrD8uGXjXpjE16hUhVw,14950
31
+ pointblank-0.9.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
32
+ pointblank-0.9.6.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
+ pointblank-0.9.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5