pointblank 0.9.4__py3-none-any.whl → 0.9.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/_constants.py CHANGED
@@ -109,6 +109,7 @@ ROW_BASED_VALIDATION_TYPES = [
109
109
  ]
110
110
 
111
111
  IBIS_BACKENDS = [
112
+ "bigquery",
112
113
  "databricks",
113
114
  "duckdb",
114
115
  "memtable",
@@ -165,6 +166,7 @@ TABLE_TYPE_STYLES = {
165
166
  "parquet": {"background": "#3F9FF9", "text": "#FFFFFF", "label": "Parquet"},
166
167
  "memtable": {"background": "#2C3E50", "text": "#FFFFFF", "label": "Ibis memtable"},
167
168
  "mssql": {"background": "#E2E2E2", "text": "#222222", "label": "MSSQL"},
169
+ "bigquery": {"background": "#4285F4", "text": "#FFFFFF", "label": "BigQuery"},
168
170
  "pyspark": {"background": "#E66F21", "text": "#FFFFFF", "label": "Spark DataFrame"},
169
171
  "databricks": {"background": "#FF3621", "text": "#FFFFFF", "label": "Databricks"},
170
172
  }
pointblank/_utils.py CHANGED
@@ -514,6 +514,8 @@ def _get_api_text() -> str:
514
514
  "Validate.get_data_extracts",
515
515
  "Validate.all_passed",
516
516
  "Validate.assert_passing",
517
+ "Validate.assert_below_threshold",
518
+ "Validate.above_threshold",
517
519
  "Validate.n",
518
520
  "Validate.n_passed",
519
521
  "Validate.n_failed",
pointblank/actions.py CHANGED
@@ -216,7 +216,7 @@ def send_slack_notification(
216
216
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
217
217
  actions=pb.Actions(critical=notify_slack),
218
218
  )
219
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
219
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
220
220
  .col_vals_gt(columns="item_revenue", value=0.05)
221
221
  .col_vals_gt(columns="session_duration", value=15)
222
222
  .interrogate()
@@ -248,7 +248,7 @@ def send_slack_notification(
248
248
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
249
249
  final_actions=pb.FinalActions(notify_slack),
250
250
  )
251
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
251
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
252
252
  .col_vals_gt(columns="item_revenue", value=0.05)
253
253
  .col_vals_gt(columns="session_duration", value=15)
254
254
  .interrogate()
@@ -316,7 +316,7 @@ def send_slack_notification(
316
316
  actions=pb.Actions(default=notify_slack),
317
317
  final_actions=pb.FinalActions(notify_slack),
318
318
  )
319
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
319
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
320
320
  .col_vals_gt(columns="item_revenue", value=0.05)
321
321
  .col_vals_gt(columns="session_duration", value=15)
322
322
  .interrogate()
pointblank/column.py CHANGED
@@ -1007,7 +1007,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1007
1007
  `[rev_01, rev_02, profit_01, profit_02, age]`
1008
1008
 
1009
1009
  and you want to validate columns that have two digits at the end of the name, you can use
1010
- `columns=matches(r"\d{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
1010
+ `columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
1011
1011
  `profit_02` columns.
1012
1012
 
1013
1013
  There will be a validation step created for every resolved column. Note that if there aren't any
@@ -1061,7 +1061,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1061
1061
  [`col()`](`pointblank.col`) function, like this:
1062
1062
 
1063
1063
  ```python
1064
- col(matches(r"^\d{5}") & ends_with("_id"))
1064
+ col(matches(r"^[0-9]{5}") & ends_with("_id"))
1065
1065
  ```
1066
1066
 
1067
1067
  There are four operators that can be used to compose column selectors:
@@ -1107,7 +1107,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1107
1107
 
1108
1108
  validation = (
1109
1109
  pb.Validate(data=tbl)
1110
- .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID\d{4}")
1110
+ .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
1111
1111
  .interrogate()
1112
1112
  )
1113
1113
 
@@ -1115,7 +1115,7 @@ def matches(pattern: str, case_sensitive: bool = False) -> Matches:
1115
1115
  ```
1116
1116
 
1117
1117
  From the results of the validation table we get two validation steps, one for `id_old` and one
1118
- for `new_identifier`. The values in both columns all match the pattern `"ID\d{4}"`.
1118
+ for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
1119
1119
 
1120
1120
  We can also use the `matches()` function in combination with other column selectors (within
1121
1121
  [`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
@@ -107,6 +107,11 @@ Validate(data: 'FrameT | Any', tbl_name: 'str | None' = None, label: 'str | None
107
107
  - MySQL table (`"mysql"`)*
108
108
  - PostgreSQL table (`"postgresql"`)*
109
109
  - SQLite table (`"sqlite"`)*
110
+ - Microsoft SQL Server table (`"mssql"`)*
111
+ - Snowflake table (`"snowflake"`)*
112
+ - Databricks table (`"databricks"`)*
113
+ - PySpark table (`"pyspark"`)*
114
+ - BigQuery table (`"bigquery"`)*
110
115
  - Parquet table (`"parquet"`)*
111
116
 
112
117
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -580,7 +585,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
580
585
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
581
586
  actions=pb.Actions(critical="Major data quality issue found in step {step}."),
582
587
  )
583
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
588
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
584
589
  .col_vals_gt(columns="item_revenue", value=0.05)
585
590
  .col_vals_gt(columns="session_duration", value=15)
586
591
  .interrogate()
@@ -610,7 +615,7 @@ Actions(warning: 'str | Callable | list[str | Callable] | None' = None, error: '
610
615
  data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
611
616
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
612
617
  )
613
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
618
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
614
619
  .col_vals_gt(columns="item_revenue", value=0.05)
615
620
  .col_vals_gt(
616
621
  columns="session_duration",
@@ -6231,7 +6236,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6231
6236
  `[rev_01, rev_02, profit_01, profit_02, age]`
6232
6237
 
6233
6238
  and you want to validate columns that have two digits at the end of the name, you can use
6234
- `columns=matches(r"\d{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
6239
+ `columns=matches(r"[0-9]{2}$")`. This will select the `rev_01`, `rev_02`, `profit_01`, and
6235
6240
  `profit_02` columns.
6236
6241
 
6237
6242
  There will be a validation step created for every resolved column. Note that if there aren't any
@@ -6285,7 +6290,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6285
6290
  [`col()`](`pointblank.col`) function, like this:
6286
6291
 
6287
6292
  ```python
6288
- col(matches(r"^\d{5}") & ends_with("_id"))
6293
+ col(matches(r"^[0-9]{5}") & ends_with("_id"))
6289
6294
  ```
6290
6295
 
6291
6296
  There are four operators that can be used to compose column selectors:
@@ -6324,7 +6329,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6324
6329
 
6325
6330
  validation = (
6326
6331
  pb.Validate(data=tbl)
6327
- .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID\d{4}")
6332
+ .col_vals_regex(columns=pb.matches("id|identifier"), pattern=r"ID[0-9]{4}")
6328
6333
  .interrogate()
6329
6334
  )
6330
6335
 
@@ -6332,7 +6337,7 @@ matches(pattern: 'str', case_sensitive: 'bool' = False) -> 'Matches'
6332
6337
  ```
6333
6338
 
6334
6339
  From the results of the validation table we get two validation steps, one for `id_old` and one
6335
- for `new_identifier`. The values in both columns all match the pattern `"ID\d{4}"`.
6340
+ for `new_identifier`. The values in both columns all match the pattern `"ID[0-9]{4}"`.
6336
6341
 
6337
6342
  We can also use the `matches()` function in combination with other column selectors (within
6338
6343
  [`col()`](`pointblank.col`)) to create more complex column selection criteria (i.e., to select
@@ -6875,7 +6880,7 @@ interrogate(self, collect_extracts: 'bool' = True, collect_tbl_checked: 'bool' =
6875
6880
 
6876
6881
  After interrogation is complete, the `Validate` object will have gathered information, and
6877
6882
  we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
6878
- [`f_failed()`](`pointblank.Validate.f_failed`)`, etc., to understand how the table performed
6883
+ [`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
6879
6884
  against the validation plan. A visual representation of the validation results can be viewed
6880
6885
  by printing the `Validate` object; this will display the validation table in an HTML viewing
6881
6886
  environment.
@@ -7578,6 +7583,10 @@ assert_passing(self) -> 'None'
7578
7583
  assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
7579
7584
  some details are preserved.
7580
7585
 
7586
+ If the validation has not yet been interrogated, this method will automatically call
7587
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
7588
+ for passing tests.
7589
+
7581
7590
  Raises
7582
7591
  -------
7583
7592
  AssertionError
@@ -7587,8 +7596,9 @@ assert_passing(self) -> 'None'
7587
7596
  --------
7588
7597
  In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
7589
7598
  `c`). There will be three validation steps, and the second step will have a failing test
7590
- unit (the value `10` isn't less than `9`). After interrogation, the `assert_passing()`
7591
- method is used to assert that all validation steps passed perfectly.
7599
+ unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
7600
+ that all validation steps passed perfectly, automatically performing the interrogation if
7601
+ needed.
7592
7602
 
7593
7603
  ```python
7594
7604
  #| error: True
@@ -7609,13 +7619,221 @@ assert_passing(self) -> 'None'
7609
7619
  .col_vals_gt(columns="a", value=0)
7610
7620
  .col_vals_lt(columns="b", value=9) # this assertion is false
7611
7621
  .col_vals_in_set(columns="c", set=["a", "b"])
7612
- .interrogate()
7613
7622
  )
7614
7623
 
7624
+ # No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
7615
7625
  validation.assert_passing()
7616
7626
  ```
7617
7627
 
7618
7628
 
7629
+ assert_below_threshold(self, level: 'str' = 'warning', i: 'int | None' = None, message: 'str | None' = None) -> 'None'
7630
+
7631
+ Raise an `AssertionError` if validation steps exceed a specified threshold level.
7632
+
7633
+ The `assert_below_threshold()` method checks whether validation steps' failure rates are
7634
+ below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
7635
+ particularly useful in automated testing environments where you want to ensure your data
7636
+ quality meets minimum standards before proceeding.
7637
+
7638
+ If any validation step exceeds the specified threshold level, an `AssertionError` will be
7639
+ raised with details about which steps failed. If the validation has not yet been
7640
+ interrogated, this method will automatically call
7641
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
7642
+
7643
+ Parameters
7644
+ ----------
7645
+ level
7646
+ The threshold level to check against, which could be any of `"warning"` (the default),
7647
+ `"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
7648
+ exceeds this level.
7649
+ i
7650
+ Specific validation step number(s) to check. Can be provided as a single integer or a
7651
+ list of integers. If `None` (the default), all steps are checked.
7652
+ message
7653
+ Custom error message to use if assertion fails. If `None`, a default message will be
7654
+ generated that lists the specific steps that exceeded the threshold.
7655
+
7656
+ Returns
7657
+ -------
7658
+ None
7659
+
7660
+ Raises
7661
+ ------
7662
+ AssertionError
7663
+ If any specified validation step exceeds the given threshold level.
7664
+ ValueError
7665
+ If an invalid threshold level is provided.
7666
+
7667
+ Examples
7668
+ --------
7669
+ Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
7670
+ create a simple Polars DataFrame with two columns (`a` and `b`).
7671
+
7672
+ ```python
7673
+ import polars as pl
7674
+
7675
+ tbl = pl.DataFrame({
7676
+ "a": [7, 4, 9, 7, 12],
7677
+ "b": [9, 8, 10, 5, 10]
7678
+ })
7679
+ ```
7680
+
7681
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
7682
+ `critical=0.3`). After interrogating, we display the validation report table:
7683
+
7684
+ ```python
7685
+ import pointblank as pb
7686
+
7687
+ validation = (
7688
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
7689
+ .col_vals_gt(columns="a", value=5) # 1 failing test unit
7690
+ .col_vals_lt(columns="b", value=10) # 2 failing test units
7691
+ .interrogate()
7692
+ )
7693
+
7694
+ validation
7695
+ ```
7696
+
7697
+ Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
7698
+ exceeds the 'warning' threshold:
7699
+
7700
+ Check a specific step against the 'critical' threshold using the `i=` parameter:
7701
+
7702
+ ```python
7703
+ validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
7704
+ ```
7705
+
7706
+ As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
7707
+ thresholds), no error is raised and nothing is printed.
7708
+
7709
+ We can also provide a custom error message with the `message=` parameter. Let's try that
7710
+ here:
7711
+
7712
+ ```python
7713
+ try:
7714
+ validation.assert_below_threshold(
7715
+ level="error",
7716
+ message="Data quality too low for processing!"
7717
+ )
7718
+ except AssertionError as e:
7719
+ print(f"Custom error: {e}")
7720
+ ```
7721
+
7722
+ See Also
7723
+ --------
7724
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
7725
+ step
7726
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
7727
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
7728
+ validation step
7729
+ - [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
7730
+ completely
7731
+
7732
+
7733
+ above_threshold(self, level: 'str' = 'warning', i: 'int | None' = None) -> 'bool'
7734
+
7735
+ Check if any validation steps exceed a specified threshold level.
7736
+
7737
+ The `above_threshold()` method checks whether validation steps exceed a given threshold
7738
+ level. This provides a non-exception-based alternative to
7739
+ [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
7740
+ workflow control based on validation results.
7741
+
7742
+ This method is useful in scenarios where you want to check if any validation steps failed
7743
+ beyond a certain threshold without raising an exception, allowing for more flexible
7744
+ programmatic responses to validation issues.
7745
+
7746
+ Parameters
7747
+ ----------
7748
+ level
7749
+ The threshold level to check against. Valid options are: `"warning"` (the least severe
7750
+ threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
7751
+ most severe threshold level). The default is `"warning"`.
7752
+ i
7753
+ Specific validation step number(s) to check. If a single integer, checks only that step.
7754
+ If a list of integers, checks all specified steps. If `None` (the default), checks all
7755
+ validation steps. Step numbers are 1-based (first step is `1`, not `0`).
7756
+
7757
+ Returns
7758
+ -------
7759
+ bool
7760
+ `True` if any of the specified validation steps exceed the given threshold level,
7761
+ `False` otherwise.
7762
+
7763
+ Raises
7764
+ ------
7765
+ ValueError
7766
+ If an invalid threshold level is provided.
7767
+
7768
+ Examples
7769
+ --------
7770
+ Below are some examples of how to use the `above_threshold()` method. First, we'll create a
7771
+ simple Polars DataFrame with a single column (`values`).
7772
+
7773
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
7774
+ `critical=0.3`). After interrogating, we display the validation report table:
7775
+
7776
+ ```python
7777
+ import pointblank as pb
7778
+
7779
+ validation = (
7780
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
7781
+ .col_vals_gt(columns="values", value=0)
7782
+ .col_vals_lt(columns="values", value=10)
7783
+ .col_vals_between(columns="values", left=0, right=5)
7784
+ .interrogate()
7785
+ )
7786
+
7787
+ validation
7788
+ ```
7789
+
7790
+ Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
7791
+ A message will be printed if that's the case:
7792
+
7793
+ ```python
7794
+ if validation.above_threshold(level="warning"):
7795
+ print("Some steps have exceeded the warning threshold")
7796
+ ```
7797
+
7798
+ Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
7799
+
7800
+ ```python
7801
+ if validation.above_threshold(level="error", i=[2, 3]):
7802
+ print("Steps 2 and/or 3 have exceeded the error threshold")
7803
+ ```
7804
+
7805
+ You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
7806
+ you might use this in a function:
7807
+
7808
+ ```python
7809
+ def process_data(validation_obj):
7810
+ # Only continue processing if validation passes critical thresholds
7811
+ if not validation_obj.above_threshold(level="critical"):
7812
+ # Continue with processing
7813
+ print("Data meets critical quality thresholds, proceeding...")
7814
+ return True
7815
+ else:
7816
+ # Log failure and stop processing
7817
+ print("Data fails critical quality checks, aborting...")
7818
+ return False
7819
+ ```
7820
+
7821
+ Note that this is just a suggestion for how to implement conditional workflow processes. You
7822
+ should adapt this pattern to your specific requirements, which might include different
7823
+ threshold levels, custom logging mechanisms, or integration with your organization's data
7824
+ pipelines and notification systems.
7825
+
7826
+ See Also
7827
+ --------
7828
+ - [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
7829
+ method that raises an exception if thresholds are exceeded
7830
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
7831
+ step
7832
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
7833
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
7834
+ validation step
7835
+
7836
+
7619
7837
  n(self, i: 'int | list[int] | None' = None, scalar: 'bool' = False) -> 'dict[int, int] | int'
7620
7838
 
7621
7839
  Provides a dictionary of the number of test units for each validation step.
@@ -8504,6 +8722,11 @@ preview(data: 'FrameT | Any', columns_subset: 'str | list[str] | Column | None'
8504
8722
  - MySQL table (`"mysql"`)*
8505
8723
  - PostgreSQL table (`"postgresql"`)*
8506
8724
  - SQLite table (`"sqlite"`)*
8725
+ - Microsoft SQL Server table (`"mssql"`)*
8726
+ - Snowflake table (`"snowflake"`)*
8727
+ - Databricks table (`"databricks"`)*
8728
+ - PySpark table (`"pyspark"`)*
8729
+ - BigQuery table (`"bigquery"`)*
8507
8730
  - Parquet table (`"parquet"`)*
8508
8731
 
8509
8732
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8672,6 +8895,11 @@ missing_vals_tbl(data: 'FrameT | Any') -> 'GT'
8672
8895
  - MySQL table (`"mysql"`)*
8673
8896
  - PostgreSQL table (`"postgresql"`)*
8674
8897
  - SQLite table (`"sqlite"`)*
8898
+ - Microsoft SQL Server table (`"mssql"`)*
8899
+ - Snowflake table (`"snowflake"`)*
8900
+ - Databricks table (`"databricks"`)*
8901
+ - PySpark table (`"pyspark"`)*
8902
+ - BigQuery table (`"bigquery"`)*
8675
8903
  - Parquet table (`"parquet"`)*
8676
8904
 
8677
8905
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8837,7 +9065,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
8837
9065
  Pandas DataFrame, the availability of Ibis is not needed.
8838
9066
 
8839
9067
 
8840
- load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
9068
+ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights', 'global_sales']" = 'small_table', tbl_type: "Literal['polars', 'pandas', 'duckdb']" = 'polars') -> 'FrameT | Any'
8841
9069
 
8842
9070
  Load a dataset hosted in the library as specified table type.
8843
9071
 
@@ -8851,7 +9079,7 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
8851
9079
  ----------
8852
9080
  dataset
8853
9081
  The name of the dataset to load. Current options are `"small_table"`, `"game_revenue"`,
8854
- and `"nycflights"`.
9082
+ `"nycflights"`, and `"global_sales"`.
8855
9083
  tbl_type
8856
9084
  The type of table to generate from the dataset. The named options are `"polars"`,
8857
9085
  `"pandas"`, and `"duckdb"`.
@@ -8873,6 +9101,8 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
8873
9101
  they purchased, ads viewed, and the revenue generated.
8874
9102
  - `"nycflights"`: A dataset with 336,776 rows and 18 columns. This dataset provides information
8875
9103
  about flights departing from New York City airports (JFK, LGA, or EWR) in 2013.
9104
+ - `"global_sales"`: A dataset with 50,000 rows and 20 columns. Provides information about
9105
+ global sales of products across different regions and countries.
8876
9106
 
8877
9107
  Supported DataFrame Types
8878
9108
  -------------------------
@@ -8884,10 +9114,10 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
8884
9114
 
8885
9115
  Examples
8886
9116
  --------
8887
- Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with its
8888
- defaults:
9117
+ Load the `"small_table"` dataset as a Polars DataFrame by calling `load_dataset()` with
9118
+ `dataset="small_table"` and `tbl_type="polars"`:
8889
9119
 
8890
- Note that the `"small_table"` dataset is a simple Polars DataFrame and using the
9120
+ Note that the `"small_table"` dataset is a Polars DataFrame and using the
8891
9121
  [`preview()`](`pointblank.preview`) function will display the table in an HTML viewing
8892
9122
  environment.
8893
9123
 
@@ -8915,6 +9145,19 @@ load_dataset(dataset: "Literal['small_table', 'game_revenue', 'nycflights']" = '
8915
9145
  The `"nycflights"` dataset is a large dataset with 336,776 rows and 18 columns. This dataset is
8916
9146
  truly a real-world dataset and provides information about flights originating from New York City
8917
9147
  airports in 2013.
9148
+
9149
+ Finally, the `"global_sales"` dataset can be loaded as a Polars table by specifying the dataset
9150
+ name. Since `tbl_type=` is set to `"polars"` by default, we don't need to specify it:
9151
+
9152
+ ```python
9153
+ global_sales = pb.load_dataset(dataset="global_sales")
9154
+
9155
+ pb.preview(global_sales)
9156
+ ```
9157
+
9158
+ The `"global_sales"` dataset is a large dataset with 50,000 rows and 20 columns. Each record
9159
+ describes the sales of a particular product to a customer located in one of three global
9160
+ regions: North America, Europe, or Asia.
8918
9161
 
8919
9162
 
8920
9163
 
@@ -8956,6 +9199,11 @@ get_column_count(data: 'FrameT | Any') -> 'int'
8956
9199
  - MySQL table (`"mysql"`)*
8957
9200
  - PostgreSQL table (`"postgresql"`)*
8958
9201
  - SQLite table (`"sqlite"`)*
9202
+ - Microsoft SQL Server table (`"mssql"`)*
9203
+ - Snowflake table (`"snowflake"`)*
9204
+ - Databricks table (`"databricks"`)*
9205
+ - PySpark table (`"pyspark"`)*
9206
+ - BigQuery table (`"bigquery"`)*
8959
9207
  - Parquet table (`"parquet"`)*
8960
9208
 
8961
9209
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -9013,6 +9261,11 @@ get_row_count(data: 'FrameT | Any') -> 'int'
9013
9261
  - MySQL table (`"mysql"`)*
9014
9262
  - PostgreSQL table (`"postgresql"`)*
9015
9263
  - SQLite table (`"sqlite"`)*
9264
+ - Microsoft SQL Server table (`"mssql"`)*
9265
+ - Snowflake table (`"snowflake"`)*
9266
+ - Databricks table (`"databricks"`)*
9267
+ - PySpark table (`"pyspark"`)*
9268
+ - BigQuery table (`"bigquery"`)*
9016
9269
  - Parquet table (`"parquet"`)*
9017
9270
 
9018
9271
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -9452,7 +9705,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9452
9705
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
9453
9706
  actions=pb.Actions(critical=notify_slack),
9454
9707
  )
9455
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9708
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9456
9709
  .col_vals_gt(columns="item_revenue", value=0.05)
9457
9710
  .col_vals_gt(columns="session_duration", value=15)
9458
9711
  .interrogate()
@@ -9484,7 +9737,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9484
9737
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
9485
9738
  final_actions=pb.FinalActions(notify_slack),
9486
9739
  )
9487
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9740
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9488
9741
  .col_vals_gt(columns="item_revenue", value=0.05)
9489
9742
  .col_vals_gt(columns="session_duration", value=15)
9490
9743
  .interrogate()
@@ -9552,7 +9805,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9552
9805
  actions=pb.Actions(default=notify_slack),
9553
9806
  final_actions=pb.FinalActions(notify_slack),
9554
9807
  )
9555
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
9808
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
9556
9809
  .col_vals_gt(columns="item_revenue", value=0.05)
9557
9810
  .col_vals_gt(columns="session_duration", value=15)
9558
9811
  .interrogate()
pointblank/schema.py CHANGED
@@ -728,7 +728,14 @@ class Schema:
728
728
  return new_schema
729
729
 
730
730
  def __str__(self):
731
- return "Pointblank Schema\n" + "\n".join([f" {col[0]}: {col[1]}" for col in self.columns])
731
+ formatted_columns = []
732
+ for col in self.columns:
733
+ if len(col) == 1: # Only column name provided (no data type)
734
+ formatted_columns.append(f" {col[0]}: <ANY>")
735
+ else: # Both column name and data type provided
736
+ formatted_columns.append(f" {col[0]}: {col[1]}")
737
+
738
+ return "Pointblank Schema\n" + "\n".join(formatted_columns)
732
739
 
733
740
  def __repr__(self):
734
741
  return f"Schema(columns={self.columns})"
pointblank/thresholds.py CHANGED
@@ -404,7 +404,7 @@ class Actions:
404
404
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
405
405
  actions=pb.Actions(critical="Major data quality issue found in step {step}."),
406
406
  )
407
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
407
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
408
408
  .col_vals_gt(columns="item_revenue", value=0.05)
409
409
  .col_vals_gt(columns="session_duration", value=15)
410
410
  .interrogate()
@@ -434,7 +434,7 @@ class Actions:
434
434
  data=pb.load_dataset(dataset="game_revenue", tbl_type="duckdb"),
435
435
  thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
436
436
  )
437
- .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
437
+ .col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}[0-9]{3}")
438
438
  .col_vals_gt(columns="item_revenue", value=0.05)
439
439
  .col_vals_gt(
440
440
  columns="session_duration",
pointblank/validate.py CHANGED
@@ -636,6 +636,11 @@ def preview(
636
636
  - MySQL table (`"mysql"`)*
637
637
  - PostgreSQL table (`"postgresql"`)*
638
638
  - SQLite table (`"sqlite"`)*
639
+ - Microsoft SQL Server table (`"mssql"`)*
640
+ - Snowflake table (`"snowflake"`)*
641
+ - Databricks table (`"databricks"`)*
642
+ - PySpark table (`"pyspark"`)*
643
+ - BigQuery table (`"bigquery"`)*
639
644
  - Parquet table (`"parquet"`)*
640
645
 
641
646
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1134,6 +1139,11 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
1134
1139
  - MySQL table (`"mysql"`)*
1135
1140
  - PostgreSQL table (`"postgresql"`)*
1136
1141
  - SQLite table (`"sqlite"`)*
1142
+ - Microsoft SQL Server table (`"mssql"`)*
1143
+ - Snowflake table (`"snowflake"`)*
1144
+ - Databricks table (`"databricks"`)*
1145
+ - PySpark table (`"pyspark"`)*
1146
+ - BigQuery table (`"bigquery"`)*
1137
1147
  - Parquet table (`"parquet"`)*
1138
1148
 
1139
1149
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1663,6 +1673,11 @@ def get_column_count(data: FrameT | Any) -> int:
1663
1673
  - MySQL table (`"mysql"`)*
1664
1674
  - PostgreSQL table (`"postgresql"`)*
1665
1675
  - SQLite table (`"sqlite"`)*
1676
+ - Microsoft SQL Server table (`"mssql"`)*
1677
+ - Snowflake table (`"snowflake"`)*
1678
+ - Databricks table (`"databricks"`)*
1679
+ - PySpark table (`"pyspark"`)*
1680
+ - BigQuery table (`"bigquery"`)*
1666
1681
  - Parquet table (`"parquet"`)*
1667
1682
 
1668
1683
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -1741,6 +1756,11 @@ def get_row_count(data: FrameT | Any) -> int:
1741
1756
  - MySQL table (`"mysql"`)*
1742
1757
  - PostgreSQL table (`"postgresql"`)*
1743
1758
  - SQLite table (`"sqlite"`)*
1759
+ - Microsoft SQL Server table (`"mssql"`)*
1760
+ - Snowflake table (`"snowflake"`)*
1761
+ - Databricks table (`"databricks"`)*
1762
+ - PySpark table (`"pyspark"`)*
1763
+ - BigQuery table (`"bigquery"`)*
1744
1764
  - Parquet table (`"parquet"`)*
1745
1765
 
1746
1766
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -2007,6 +2027,11 @@ class Validate:
2007
2027
  - MySQL table (`"mysql"`)*
2008
2028
  - PostgreSQL table (`"postgresql"`)*
2009
2029
  - SQLite table (`"sqlite"`)*
2030
+ - Microsoft SQL Server table (`"mssql"`)*
2031
+ - Snowflake table (`"snowflake"`)*
2032
+ - Databricks table (`"databricks"`)*
2033
+ - PySpark table (`"pyspark"`)*
2034
+ - BigQuery table (`"bigquery"`)*
2010
2035
  - Parquet table (`"parquet"`)*
2011
2036
 
2012
2037
  The table types marked with an asterisk need to be prepared as Ibis tables (with type of
@@ -8031,7 +8056,7 @@ class Validate:
8031
8056
 
8032
8057
  After interrogation is complete, the `Validate` object will have gathered information, and
8033
8058
  we can use methods like [`n_passed()`](`pointblank.Validate.n_passed`),
8034
- [`f_failed()`](`pointblank.Validate.f_failed`)`, etc., to understand how the table performed
8059
+ [`f_failed()`](`pointblank.Validate.f_failed`), etc., to understand how the table performed
8035
8060
  against the validation plan. A visual representation of the validation results can be viewed
8036
8061
  by printing the `Validate` object; this will display the validation table in an HTML viewing
8037
8062
  environment.
@@ -8772,6 +8797,10 @@ class Validate:
8772
8797
  assertion made is printed in the `AssertionError` message if a failure occurs, ensuring
8773
8798
  some details are preserved.
8774
8799
 
8800
+ If the validation has not yet been interrogated, this method will automatically call
8801
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters before checking
8802
+ for passing tests.
8803
+
8775
8804
  Raises
8776
8805
  -------
8777
8806
  AssertionError
@@ -8781,8 +8810,9 @@ class Validate:
8781
8810
  --------
8782
8811
  In the example below, we'll use a simple Polars DataFrame with three columns (`a`, `b`, and
8783
8812
  `c`). There will be three validation steps, and the second step will have a failing test
8784
- unit (the value `10` isn't less than `9`). After interrogation, the `assert_passing()`
8785
- method is used to assert that all validation steps passed perfectly.
8813
+ unit (the value `10` isn't less than `9`). The `assert_passing()` method is used to assert
8814
+ that all validation steps passed perfectly, automatically performing the interrogation if
8815
+ needed.
8786
8816
 
8787
8817
  ```{python}
8788
8818
  #| error: True
@@ -8803,12 +8833,16 @@ class Validate:
8803
8833
  .col_vals_gt(columns="a", value=0)
8804
8834
  .col_vals_lt(columns="b", value=9) # this assertion is false
8805
8835
  .col_vals_in_set(columns="c", set=["a", "b"])
8806
- .interrogate()
8807
8836
  )
8808
8837
 
8838
+ # No need to call [`interrogate()`](`pointblank.Validate.interrogate`) explicitly
8809
8839
  validation.assert_passing()
8810
8840
  ```
8811
8841
  """
8842
+ # Check if validation has been interrogated
8843
+ if not hasattr(self, "time_start") or self.time_start is None:
8844
+ # Auto-interrogate with default parameters
8845
+ self.interrogate()
8812
8846
 
8813
8847
  if not self.all_passed():
8814
8848
  failed_steps = [
@@ -8821,6 +8855,306 @@ class Validate:
8821
8855
  )
8822
8856
  raise AssertionError(msg)
8823
8857
 
8858
+ def assert_below_threshold(
8859
+ self, level: str = "warning", i: int | None = None, message: str | None = None
8860
+ ) -> None:
8861
+ """
8862
+ Raise an `AssertionError` if validation steps exceed a specified threshold level.
8863
+
8864
+ The `assert_below_threshold()` method checks whether validation steps' failure rates are
8865
+ below a given threshold level (`"warning"`, `"error"`, or `"critical"`). This is
8866
+ particularly useful in automated testing environments where you want to ensure your data
8867
+ quality meets minimum standards before proceeding.
8868
+
8869
+ If any validation step exceeds the specified threshold level, an `AssertionError` will be
8870
+ raised with details about which steps failed. If the validation has not yet been
8871
+ interrogated, this method will automatically call
8872
+ [`interrogate()`](`pointblank.Validate.interrogate`) with default parameters.
8873
+
8874
+ Parameters
8875
+ ----------
8876
+ level
8877
+ The threshold level to check against, which could be any of `"warning"` (the default),
8878
+ `"error"`, or `"critical"`. An `AssertionError` will be raised if any validation step
8879
+ exceeds this level.
8880
+ i
8881
+ Specific validation step number(s) to check. Can be provided as a single integer or a
8882
+ list of integers. If `None` (the default), all steps are checked.
8883
+ message
8884
+ Custom error message to use if assertion fails. If `None`, a default message will be
8885
+ generated that lists the specific steps that exceeded the threshold.
8886
+
8887
+ Returns
8888
+ -------
8889
+ None
8890
+
8891
+ Raises
8892
+ ------
8893
+ AssertionError
8894
+ If any specified validation step exceeds the given threshold level.
8895
+ ValueError
8896
+ If an invalid threshold level is provided.
8897
+
8898
+ Examples
8899
+ --------
8900
+ ```{python}
8901
+ #| echo: false
8902
+ #| output: false
8903
+ import pointblank as pb
8904
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8905
+ ```
8906
+ Below are some examples of how to use the `assert_below_threshold()` method. First, we'll
8907
+ create a simple Polars DataFrame with two columns (`a` and `b`).
8908
+
8909
+ ```{python}
8910
+ import polars as pl
8911
+
8912
+ tbl = pl.DataFrame({
8913
+ "a": [7, 4, 9, 7, 12],
8914
+ "b": [9, 8, 10, 5, 10]
8915
+ })
8916
+ ```
8917
+
8918
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
8919
+ `critical=0.3`). After interrogating, we display the validation report table:
8920
+
8921
+ ```{python}
8922
+ import pointblank as pb
8923
+
8924
+ validation = (
8925
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
8926
+ .col_vals_gt(columns="a", value=5) # 1 failing test unit
8927
+ .col_vals_lt(columns="b", value=10) # 2 failing test units
8928
+ .interrogate()
8929
+ )
8930
+
8931
+ validation
8932
+ ```
8933
+
8934
+ Using `assert_below_threshold(level="warning")` will raise an `AssertionError` if any step
8935
+ exceeds the 'warning' threshold:
8936
+
8937
+ ```{python}
8938
+ try:
8939
+ validation.assert_below_threshold(level="warning")
8940
+ except AssertionError as e:
8941
+ print(f"Assertion failed: {e}")
8942
+ ```
8943
+
8944
+ Check a specific step against the 'critical' threshold using the `i=` parameter:
8945
+
8946
+ ```{python}
8947
+ validation.assert_below_threshold(level="critical", i=1) # Won't raise an error
8948
+ ```
8949
+
8950
+ As the first step is below the 'critical' threshold (it exceeds the 'warning' and 'error'
8951
+ thresholds), no error is raised and nothing is printed.
8952
+
8953
+ We can also provide a custom error message with the `message=` parameter. Let's try that
8954
+ here:
8955
+
8956
+ ```{python}
8957
+ try:
8958
+ validation.assert_below_threshold(
8959
+ level="error",
8960
+ message="Data quality too low for processing!"
8961
+ )
8962
+ except AssertionError as e:
8963
+ print(f"Custom error: {e}")
8964
+ ```
8965
+
8966
+ See Also
8967
+ --------
8968
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
8969
+ step
8970
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
8971
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
8972
+ validation step
8973
+ - [`assert_passing()`](`pointblank.Validate.assert_passing`): assert all validations pass
8974
+ completely
8975
+ """
8976
+ # Check if validation has been interrogated
8977
+ if not hasattr(self, "time_start") or self.time_start is None:
8978
+ # Auto-interrogate with default parameters
8979
+ self.interrogate()
8980
+
8981
+ # Validate the level parameter
8982
+ level = level.lower()
8983
+ if level not in ["warning", "error", "critical"]:
8984
+ raise ValueError(
8985
+ f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
8986
+ )
8987
+
8988
+ # Get the threshold status using the appropriate method
8989
+ if level == "warning":
8990
+ status = self.warning(i=i)
8991
+ elif level == "error":
8992
+ status = self.error(i=i)
8993
+ elif level == "critical":
8994
+ status = self.critical(i=i)
8995
+
8996
+ # Find any steps that exceeded the threshold
8997
+ failures = []
8998
+ for step_num, exceeded in status.items():
8999
+ if exceeded:
9000
+ # Get the step's description
9001
+ validation_step = self.validation_info[step_num - 1]
9002
+ step_descriptor = (
9003
+ validation_step.autobrief
9004
+ if hasattr(validation_step, "autobrief") and validation_step.autobrief
9005
+ else f"Validation step {step_num}"
9006
+ )
9007
+ failures.append(f"Step {step_num}: {step_descriptor}")
9008
+
9009
+ # If any failures were found, raise an AssertionError
9010
+ if failures:
9011
+ if message:
9012
+ msg = message
9013
+ else:
9014
+ msg = f"The following steps exceeded the {level} threshold level:\n" + "\n".join(
9015
+ failures
9016
+ )
9017
+ raise AssertionError(msg)
9018
+
9019
+ def above_threshold(self, level: str = "warning", i: int | None = None) -> bool:
9020
+ """
9021
+ Check if any validation steps exceed a specified threshold level.
9022
+
9023
+ The `above_threshold()` method checks whether validation steps exceed a given threshold
9024
+ level. This provides a non-exception-based alternative to
9025
+ [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`) for conditional
9026
+ workflow control based on validation results.
9027
+
9028
+ This method is useful in scenarios where you want to check if any validation steps failed
9029
+ beyond a certain threshold without raising an exception, allowing for more flexible
9030
+ programmatic responses to validation issues.
9031
+
9032
+ Parameters
9033
+ ----------
9034
+ level
9035
+ The threshold level to check against. Valid options are: `"warning"` (the least severe
9036
+ threshold level), `"error"` (the middle severity threshold level), and `"critical"` (the
9037
+ most severe threshold level). The default is `"warning"`.
9038
+ i
9039
+ Specific validation step number(s) to check. If a single integer, checks only that step.
9040
+ If a list of integers, checks all specified steps. If `None` (the default), checks all
9041
+ validation steps. Step numbers are 1-based (first step is `1`, not `0`).
9042
+
9043
+ Returns
9044
+ -------
9045
+ bool
9046
+ `True` if any of the specified validation steps exceed the given threshold level,
9047
+ `False` otherwise.
9048
+
9049
+ Raises
9050
+ ------
9051
+ ValueError
9052
+ If an invalid threshold level is provided.
9053
+
9054
+ Examples
9055
+ --------
9056
+ ```{python}
9057
+ #| echo: false
9058
+ #| output: false
9059
+ import pointblank as pb
9060
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
9061
+ ```
9062
+ Below are some examples of how to use the `above_threshold()` method. First, we'll create a
9063
+ simple Polars DataFrame with a single column (`values`).
9064
+
9065
+ ```{python}
9066
+ import polars as pl
9067
+
9068
+ tbl = pl.DataFrame({
9069
+ "values": [1, 2, 3, 4, 5, 0, -1]
9070
+ })
9071
+ ```
9072
+
9073
+ Then a validation plan will be created with thresholds (`warning=0.1`, `error=0.2`,
9074
+ `critical=0.3`). After interrogating, we display the validation report table:
9075
+
9076
+ ```{python}
9077
+ import pointblank as pb
9078
+
9079
+ validation = (
9080
+ pb.Validate(data=tbl, thresholds=(0.1, 0.2, 0.3))
9081
+ .col_vals_gt(columns="values", value=0)
9082
+ .col_vals_lt(columns="values", value=10)
9083
+ .col_vals_between(columns="values", left=0, right=5)
9084
+ .interrogate()
9085
+ )
9086
+
9087
+ validation
9088
+ ```
9089
+
9090
+ Let's check if any steps exceed the 'warning' threshold with the `above_threshold()` method.
9091
+ A message will be printed if that's the case:
9092
+
9093
+ ```{python}
9094
+ if validation.above_threshold(level="warning"):
9095
+ print("Some steps have exceeded the warning threshold")
9096
+ ```
9097
+
9098
+ Check if only steps 2 and 3 exceed the 'error' threshold through use of the `i=` argument:
9099
+
9100
+ ```{python}
9101
+ if validation.above_threshold(level="error", i=[2, 3]):
9102
+ print("Steps 2 and/or 3 have exceeded the error threshold")
9103
+ ```
9104
+
9105
+ You can use this in a workflow to conditionally trigger processes. Here's a snippet of how
9106
+ you might use this in a function:
9107
+
9108
+ ```python
9109
+ def process_data(validation_obj):
9110
+ # Only continue processing if validation passes critical thresholds
9111
+ if not validation_obj.above_threshold(level="critical"):
9112
+ # Continue with processing
9113
+ print("Data meets critical quality thresholds, proceeding...")
9114
+ return True
9115
+ else:
9116
+ # Log failure and stop processing
9117
+ print("Data fails critical quality checks, aborting...")
9118
+ return False
9119
+ ```
9120
+
9121
+ Note that this is just a suggestion for how to implement conditional workflow processes. You
9122
+ should adapt this pattern to your specific requirements, which might include different
9123
+ threshold levels, custom logging mechanisms, or integration with your organization's data
9124
+ pipelines and notification systems.
9125
+
9126
+ See Also
9127
+ --------
9128
+ - [`assert_below_threshold()`](`pointblank.Validate.assert_below_threshold`): a similar
9129
+ method that raises an exception if thresholds are exceeded
9130
+ - [`warning()`](`pointblank.Validate.warning`): get the 'warning' status for each validation
9131
+ step
9132
+ - [`error()`](`pointblank.Validate.error`): get the 'error' status for each validation step
9133
+ - [`critical()`](`pointblank.Validate.critical`): get the 'critical' status for each
9134
+ validation step
9135
+ """
9136
+ # Ensure validation has been run
9137
+ if not hasattr(self, "time_start") or self.time_start is None:
9138
+ return False
9139
+
9140
+ # Validate the level parameter
9141
+ level = level.lower()
9142
+ if level not in ["warning", "error", "critical"]:
9143
+ raise ValueError(
9144
+ f"Invalid threshold level: {level}. Must be one of 'warning', 'error', or 'critical'."
9145
+ )
9146
+
9147
+ # Get the threshold status using the appropriate method
9148
+ if level == "warning":
9149
+ status = self.warning(i=i)
9150
+ elif level == "error":
9151
+ status = self.error(i=i)
9152
+ elif level == "critical":
9153
+ status = self.critical(i=i)
9154
+
9155
+ # Return True if any steps exceeded the threshold
9156
+ return any(status.values())
9157
+
8824
9158
  def n(self, i: int | list[int] | None = None, scalar: bool = False) -> dict[int, int] | int:
8825
9159
  """
8826
9160
  Provides a dictionary of the number of test units for each validation step.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.9.4
3
+ Version: 0.9.6
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -55,6 +55,8 @@ Requires-Dist: chatlas>=0.3.0; extra == "generate"
55
55
  Requires-Dist: anthropic[bedrock]>=0.45.2; extra == "generate"
56
56
  Requires-Dist: openai>=1.63.0; extra == "generate"
57
57
  Requires-Dist: shiny>=1.3.0; extra == "generate"
58
+ Provides-Extra: bigquery
59
+ Requires-Dist: ibis-framework[bigquery]>=9.5.0; extra == "bigquery"
58
60
  Provides-Extra: databricks
59
61
  Requires-Dist: ibis-framework[databricks]>=9.5.0; extra == "databricks"
60
62
  Provides-Extra: duckdb
@@ -103,7 +105,7 @@ _Data validation made beautiful and powerful_
103
105
 
104
106
  </div>
105
107
 
106
- <div align="right">
108
+ <div align="center">
107
109
  <a href="translations/README.fr.md">Français</a> |
108
110
  <a href="translations/README.de.md">Deutsch</a> |
109
111
  <a href="translations/README.it.md">Italiano</a> |
@@ -112,7 +114,9 @@ _Data validation made beautiful and powerful_
112
114
  <a href="translations/README.nl.md">Nederlands</a> |
113
115
  <a href="translations/README.zh-CN.md">简体中文</a> |
114
116
  <a href="translations/README.ja.md">日本語</a> |
115
- <a href="translations/README.ko.md">한국어</a>
117
+ <a href="translations/README.ko.md">한국어</a> |
118
+ <a href="translations/README.hi.md">हिन्दी</a> |
119
+ <a href="translations/README.ar.md">العربية</a>
116
120
  </div>
117
121
 
118
122
  ## What is Pointblank?
@@ -1,22 +1,22 @@
1
1
  pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
2
- pointblank/_constants.py,sha256=D4HF0NrNAd-mdb88gZ6VatkRYfVX-9gC6C7TOQjjAw4,81128
2
+ pointblank/_constants.py,sha256=YeQVYpSkdQ8v7D8ZJnG-M75zqAH3yJuDzzjwWC2I-d8,81227
3
3
  pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
4
4
  pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
5
5
  pointblank/_interrogation.py,sha256=U4GQ8Ik5rP75BYBkmunBvHKwf3XvLPHcUx18JwiBQZI,89422
6
6
  pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
7
- pointblank/_utils.py,sha256=CsuUYXNzox-Nc5CjQNhyy2XnmnvYJVJrS5cZxklzIFo,24745
7
+ pointblank/_utils.py,sha256=BoIwMEZYBwPEe5xGku1vSmkgAeGgnA4_bQ4MDeYFGrc,24824
8
8
  pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
9
9
  pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
10
- pointblank/actions.py,sha256=ilk__kbQiS4ieJp-4dM7SDGuobQihUxLyS5ahgiP7qE,18272
10
+ pointblank/actions.py,sha256=D6o9B2_ES9PNQg9HZwREacrrt-3A5bhdrBkL1UXz__s,18281
11
11
  pointblank/assistant.py,sha256=ZIQJKTy9rDwq_Wmr1FMp0J7Q3ekxSgF3_tK0p4PTEUM,14850
12
- pointblank/column.py,sha256=LumGbnterw5VM7-2-7Za3jdlug1VVS9a3TOH0Y1E5eg,76548
12
+ pointblank/column.py,sha256=_FJjpjv760D1p6YGgqbwmKYktouG7AJ2A9uIMYQBTYA,76560
13
13
  pointblank/datascan.py,sha256=rRz0hR81uTgd1e9OfLdfsNYXRk8vcpE8PW8exu-GJoE,47697
14
14
  pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
15
- pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
15
+ pointblank/schema.py,sha256=nHkOXykPw7mTmVGjT67hjx13iKySZ5xsfVgPUQV0yCM,44588
16
16
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
17
- pointblank/thresholds.py,sha256=cweex25DwBPrsvPW12pRoaTQnwFpUUwqTdHyFJXTnN0,25760
18
- pointblank/validate.py,sha256=9dIWFetyBm70f_Ps0UkroT1gO4b5qACGs8trhObKUHg,608551
19
- pointblank/data/api-docs.txt,sha256=jKjPSq6X_vU_RRSJAydnVc3C35WvTqNvu-lLKroVO4I,482044
17
+ pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
18
+ pointblank/validate.py,sha256=dM5U41me38atNDt1Llzv08gdUcnYyvWoHycQPpctidg,621961
19
+ pointblank/data/api-docs.txt,sha256=6cdUIYdVy2XfGRLNNxtcGTaxu2WX4EXEeICayOvJCTs,492756
20
20
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
21
21
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
22
22
  pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
@@ -26,8 +26,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
26
26
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
27
27
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
28
28
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
29
- pointblank-0.9.4.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
- pointblank-0.9.4.dist-info/METADATA,sha256=TO7kSRz1e8_lhuqkF6st8ompJq-I0i5mevVfsCiHumU,14732
31
- pointblank-0.9.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
32
- pointblank-0.9.4.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
- pointblank-0.9.4.dist-info/RECORD,,
29
+ pointblank-0.9.6.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
30
+ pointblank-0.9.6.dist-info/METADATA,sha256=_BocxWcU0_AXIiMGBPcxsd9VwrD8uGXjXpjE16hUhVw,14950
31
+ pointblank-0.9.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
32
+ pointblank-0.9.6.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
33
+ pointblank-0.9.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5