pointblank 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/validate.py CHANGED
@@ -45,6 +45,7 @@ from pointblank._constants import (
45
45
  )
46
46
  from pointblank._constants_translations import (
47
47
  EXPECT_FAIL_TEXT,
48
+ NOTES_TEXT,
48
49
  STEP_REPORT_TEXT,
49
50
  VALIDATION_REPORT_TEXT,
50
51
  )
@@ -122,6 +123,7 @@ __all__ = [
122
123
  "write_file",
123
124
  "config",
124
125
  "connect_to_table",
126
+ "print_database_tables",
125
127
  "preview",
126
128
  "missing_vals_tbl",
127
129
  "get_action_metadata",
@@ -3699,6 +3701,10 @@ class _ValidationInfo:
3699
3701
  The time the validation step was processed. This is in the ISO 8601 format in UTC time.
3700
3702
  proc_duration_s
3701
3703
  The duration of processing for the validation step in seconds.
3704
+ notes
3705
+ An ordered dictionary of notes/footnotes associated with the validation step. Each entry
3706
+ contains both 'markdown' and 'text' versions of the note content. The dictionary preserves
3707
+ insertion order, ensuring notes appear in a consistent sequence in reports and logs.
3702
3708
  """
3703
3709
 
3704
3710
  # Validation plan
@@ -3736,10 +3742,224 @@ class _ValidationInfo:
3736
3742
  val_info: dict[str, any] | None = None
3737
3743
  time_processed: str | None = None
3738
3744
  proc_duration_s: float | None = None
3745
+ notes: dict[str, dict[str, str]] | None = None
3739
3746
 
3740
3747
  def get_val_info(self) -> dict[str, any]:
3741
3748
  return self.val_info
3742
3749
 
3750
+ def _add_note(self, key: str, markdown: str, text: str | None = None) -> None:
3751
+ """
3752
+ Add a note/footnote to the validation step.
3753
+
3754
+ This internal method adds a note entry to the validation step's notes dictionary.
3755
+ Notes are displayed as footnotes in validation reports and included in log output.
3756
+
3757
+ Parameters
3758
+ ----------
3759
+ key
3760
+ A unique identifier for the note. If a note with this key already exists, it will
3761
+ be overwritten.
3762
+ markdown
3763
+ The note content formatted with Markdown. This version is used for display in
3764
+ HTML reports and other rich text formats.
3765
+ text
3766
+ The note content as plain text. This version is used for log files and text-based
3767
+ output. If not provided, the markdown version will be used (with markdown formatting
3768
+ intact).
3769
+
3770
+ Examples
3771
+ --------
3772
+ ```python
3773
+ # Add a note about evaluation failure
3774
+ validation_info._add_note(
3775
+ key="eval_error",
3776
+ markdown="Column expression evaluation **failed**",
3777
+ text="Column expression evaluation failed"
3778
+ )
3779
+
3780
+ # Add a note about LLM response
3781
+ validation_info._add_note(
3782
+ key="llm_response",
3783
+ markdown="LLM validation returned `200` passing rows",
3784
+ text="LLM validation returned 200 passing rows"
3785
+ )
3786
+ ```
3787
+ """
3788
+ # Initialize notes dictionary if it doesn't exist
3789
+ if self.notes is None:
3790
+ self.notes = {}
3791
+
3792
+ # Use markdown as text if text is not provided
3793
+ if text is None:
3794
+ text = markdown
3795
+
3796
+ # Add the note entry
3797
+ self.notes[key] = {"markdown": markdown, "text": text}
3798
+
3799
+ def _get_notes(self, format: str = "dict") -> dict[str, dict[str, str]] | list[str] | None:
3800
+ """
3801
+ Get notes associated with this validation step.
3802
+
3803
+ Parameters
3804
+ ----------
3805
+ format
3806
+ The format to return notes in:
3807
+ - `"dict"`: Returns the full notes dictionary (default)
3808
+ - `"markdown"`: Returns a list of markdown-formatted note values
3809
+ - `"text"`: Returns a list of plain text note values
3810
+ - `"keys"`: Returns a list of note keys
3811
+
3812
+ Returns
3813
+ -------
3814
+ dict, list, or None
3815
+ The notes in the requested format, or `None` if no notes exist.
3816
+
3817
+ Examples
3818
+ --------
3819
+ ```python
3820
+ # Get all notes as dictionary
3821
+ notes = validation_info._get_notes()
3822
+ # Returns: {'key1': {'markdown': '...', 'text': '...'}, ...}
3823
+
3824
+ # Get just markdown versions
3825
+ markdown_notes = validation_info._get_notes(format="markdown")
3826
+ # Returns: ['First note with **emphasis**', 'Second note']
3827
+
3828
+ # Get just plain text versions
3829
+ text_notes = validation_info._get_notes(format="text")
3830
+ # Returns: ['First note with emphasis', 'Second note']
3831
+
3832
+ # Get just the keys
3833
+ keys = validation_info._get_notes(format="keys")
3834
+ # Returns: ['key1', 'key2']
3835
+ ```
3836
+ """
3837
+ if self.notes is None:
3838
+ return None
3839
+
3840
+ if format == "dict":
3841
+ return self.notes
3842
+ elif format == "markdown":
3843
+ return [note["markdown"] for note in self.notes.values()]
3844
+ elif format == "text":
3845
+ return [note["text"] for note in self.notes.values()]
3846
+ elif format == "keys":
3847
+ return list(self.notes.keys())
3848
+ else:
3849
+ raise ValueError(
3850
+ f"Invalid format '{format}'. Must be one of: 'dict', 'markdown', 'text', 'keys'"
3851
+ )
3852
+
3853
+ def _get_note(self, key: str, format: str = "dict") -> dict[str, str] | str | None:
3854
+ """
3855
+ Get a specific note by its key.
3856
+
3857
+ Parameters
3858
+ ----------
3859
+ key
3860
+ The unique identifier of the note to retrieve.
3861
+ format
3862
+ The format to return the note in:
3863
+ - `"dict"`: Returns `{'markdown': '...', 'text': '...'}` (default)
3864
+ - `"markdown"`: Returns just the markdown string
3865
+ - `"text"`: Returns just the plain text string
3866
+
3867
+ Returns
3868
+ -------
3869
+ dict, str, or None
3870
+ The note in the requested format, or `None` if the note doesn't exist.
3871
+
3872
+ Examples
3873
+ --------
3874
+ ```python
3875
+ # Get a specific note as dictionary
3876
+ note = validation_info._get_note("threshold_info")
3877
+ # Returns: {'markdown': 'Using **default** thresholds', 'text': '...'}
3878
+
3879
+ # Get just the markdown version
3880
+ markdown = validation_info._get_note("threshold_info", format="markdown")
3881
+ # Returns: 'Using **default** thresholds'
3882
+
3883
+ # Get just the text version
3884
+ text = validation_info._get_note("threshold_info", format="text")
3885
+ # Returns: 'Using default thresholds'
3886
+ ```
3887
+ """
3888
+ if self.notes is None or key not in self.notes:
3889
+ return None
3890
+
3891
+ note = self.notes[key]
3892
+
3893
+ if format == "dict":
3894
+ return note
3895
+ elif format == "markdown":
3896
+ return note["markdown"]
3897
+ elif format == "text":
3898
+ return note["text"]
3899
+ else:
3900
+ raise ValueError(
3901
+ f"Invalid format '{format}'. Must be one of: 'dict', 'markdown', 'text'"
3902
+ )
3903
+
3904
+ def _has_notes(self) -> bool:
3905
+ """
3906
+ Check if this validation step has any notes.
3907
+
3908
+ Returns
3909
+ -------
3910
+ bool
3911
+ `True` if the validation step has notes, `False` otherwise.
3912
+
3913
+ Examples
3914
+ --------
3915
+ ```python
3916
+ if validation_info._has_notes():
3917
+ print("This step has notes")
3918
+ ```
3919
+ """
3920
+ return self.notes is not None and len(self.notes) > 0
3921
+
3922
+
3923
+ def _handle_connection_errors(e: Exception, connection_string: str) -> None:
3924
+ """
3925
+ Shared error handling for database connection failures.
3926
+
3927
+ Raises appropriate ConnectionError with helpful messages based on the exception.
3928
+ """
3929
+
3930
+ error_str = str(e).lower()
3931
+ backend_install_map = {
3932
+ "duckdb": "pip install 'ibis-framework[duckdb]'",
3933
+ "postgresql": "pip install 'ibis-framework[postgres]'",
3934
+ "postgres": "pip install 'ibis-framework[postgres]'",
3935
+ "mysql": "pip install 'ibis-framework[mysql]'",
3936
+ "sqlite": "pip install 'ibis-framework[sqlite]'",
3937
+ "bigquery": "pip install 'ibis-framework[bigquery]'",
3938
+ "snowflake": "pip install 'ibis-framework[snowflake]'",
3939
+ }
3940
+
3941
+ # Check if this is a missing backend dependency
3942
+ for backend, install_cmd in backend_install_map.items():
3943
+ if backend in error_str and ("not found" in error_str or "no module" in error_str):
3944
+ raise ConnectionError(
3945
+ f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
3946
+ f" {install_cmd}\n\n"
3947
+ f"Original error: {e}"
3948
+ ) from e
3949
+
3950
+ # Generic connection error
3951
+ raise ConnectionError( # pragma: no cover
3952
+ f"Failed to connect using: {connection_string}\n"
3953
+ f"Error: {e}\n\n"
3954
+ f"Supported connection string formats:\n"
3955
+ f"- DuckDB: 'duckdb:///path/to/file.ddb'\n"
3956
+ f"- SQLite: 'sqlite:///path/to/file.db'\n"
3957
+ f"- PostgreSQL: 'postgresql://user:pass@host:port/db'\n"
3958
+ f"- MySQL: 'mysql://user:pass@host:port/db'\n"
3959
+ f"- BigQuery: 'bigquery://project/dataset'\n"
3960
+ f"- Snowflake: 'snowflake://user:pass@account/db/schema'"
3961
+ ) from e
3962
+
3743
3963
 
3744
3964
  def connect_to_table(connection_string: str) -> Any:
3745
3965
  """
@@ -3820,7 +4040,11 @@ def connect_to_table(connection_string: str) -> Any:
3820
4040
  pip install 'ibis-framework[duckdb]' # for DuckDB
3821
4041
  pip install 'ibis-framework[postgres]' # for PostgreSQL
3822
4042
  ```
4043
+ See Also
4044
+ --------
4045
+ print_database_tables : List all available tables in a database for discovery
3823
4046
  """
4047
+
3824
4048
  # Check if Ibis is available
3825
4049
  if not _is_lib_present(lib_name="ibis"):
3826
4050
  raise ImportError(
@@ -3834,14 +4058,10 @@ def connect_to_table(connection_string: str) -> Any:
3834
4058
  if "::" not in connection_string:
3835
4059
  # Try to connect to get available tables for helpful error message
3836
4060
  try:
3837
- # Extract the base connection string (without table name)
3838
4061
  base_connection = connection_string
3839
-
3840
- # Connect to the database
3841
4062
  conn = ibis.connect(base_connection)
3842
4063
 
3843
- # Get list of available tables
3844
- try:
4064
+ try: # pragma: no cover
3845
4065
  available_tables = conn.list_tables()
3846
4066
  except Exception: # pragma: no cover
3847
4067
  available_tables = []
@@ -3858,7 +4078,6 @@ def connect_to_table(connection_string: str) -> Any:
3858
4078
  f" {connection_string}::TABLE_NAME\n\n"
3859
4079
  f"Examples:\n"
3860
4080
  )
3861
- # Add examples with first few table names
3862
4081
  for table in available_tables[:3]:
3863
4082
  error_msg += f" {connection_string}::{table}\n"
3864
4083
  else:
@@ -3873,43 +4092,8 @@ def connect_to_table(connection_string: str) -> Any:
3873
4092
 
3874
4093
  except Exception as e:
3875
4094
  if isinstance(e, ValueError):
3876
- raise # Re-raise our custom ValueError
3877
-
3878
- # Check for backend-specific errors and provide installation guidance
3879
- error_str = str(e).lower()
3880
- backend_install_map = {
3881
- "duckdb": "pip install 'ibis-framework[duckdb]'",
3882
- "postgresql": "pip install 'ibis-framework[postgres]'",
3883
- "postgres": "pip install 'ibis-framework[postgres]'",
3884
- "mysql": "pip install 'ibis-framework[mysql]'",
3885
- "sqlite": "pip install 'ibis-framework[sqlite]'",
3886
- "bigquery": "pip install 'ibis-framework[bigquery]'",
3887
- "snowflake": "pip install 'ibis-framework[snowflake]'",
3888
- }
3889
-
3890
- # Check if this is a missing backend dependency
3891
- for backend, install_cmd in backend_install_map.items(): # pragma: no cover
3892
- if backend in error_str and ("not found" in error_str or "no module" in error_str):
3893
- raise ConnectionError(
3894
- f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
3895
- f" {install_cmd}\n\n"
3896
- f"Original error: {e}\n\n"
3897
- f"Supported connection string formats:\n"
3898
- f"- DuckDB: 'duckdb:///path/to/file.ddb::table_name'\n"
3899
- f"- SQLite: 'sqlite:///path/to/file.db::table_name'\n"
3900
- f"- PostgreSQL: 'postgresql://user:pass@host:port/db::table_name'\n"
3901
- f"- MySQL: 'mysql://user:pass@host:port/db::table_name'\n"
3902
- f"- BigQuery: 'bigquery://project/dataset::table_name'\n"
3903
- f"- Snowflake: 'snowflake://user:pass@account/db/schema::table_name'\n"
3904
- f"\nNote: Use '::table_name' to specify the table within the database."
3905
- ) from e
3906
-
3907
- # Generic connection error
3908
- raise ConnectionError( # pragma: no cover
3909
- f"Failed to connect to database using connection string: {connection_string}\n"
3910
- f"Error: {e}\n\n"
3911
- f"No table specified. Use the format: {connection_string}::TABLE_NAME"
3912
- ) from e
4095
+ raise
4096
+ _handle_connection_errors(e, connection_string)
3913
4097
 
3914
4098
  # Split connection string and table name
3915
4099
  try:
@@ -3922,32 +4106,14 @@ def connect_to_table(connection_string: str) -> Any:
3922
4106
  conn = ibis.connect(base_connection)
3923
4107
  table = conn.table(table_name)
3924
4108
  return table
3925
-
3926
4109
  except Exception as e:
3927
- # Check for backend-specific errors and provide installation guidance
3928
4110
  error_str = str(e).lower()
3929
- backend_install_map = {
3930
- "duckdb": "pip install 'ibis-framework[duckdb]'",
3931
- "postgresql": "pip install 'ibis-framework[postgres]'",
3932
- "postgres": "pip install 'ibis-framework[postgres]'",
3933
- "mysql": "pip install 'ibis-framework[mysql]'",
3934
- "sqlite": "pip install 'ibis-framework[sqlite]'",
3935
- "bigquery": "pip install 'ibis-framework[bigquery]'",
3936
- "snowflake": "pip install 'ibis-framework[snowflake]'",
3937
- }
3938
4111
 
3939
- # Check if this is a missing backend dependency
3940
- for backend, install_cmd in backend_install_map.items():
3941
- if backend in error_str and ("not found" in error_str or "no module" in error_str):
3942
- raise ConnectionError(
3943
- f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
3944
- f" {install_cmd}\n\n"
3945
- f"Original error: {e}"
3946
- ) from e
3947
-
3948
- # Check if table doesn't exist
3949
- if "table" in error_str and ("not found" in error_str or "does not exist" in error_str):
3950
- # Try to get available tables for helpful message
4112
+ # Check if this is a "table not found" error
4113
+ if "table" in error_str and (
4114
+ "not found" in error_str or "does not exist" in error_str or "not exist" in error_str
4115
+ ):
4116
+ # Try to get available tables for a helpful error message
3951
4117
  try: # pragma: no cover
3952
4118
  available_tables = conn.list_tables()
3953
4119
  if available_tables:
@@ -3955,23 +4121,79 @@ def connect_to_table(connection_string: str) -> Any:
3955
4121
  raise ValueError(
3956
4122
  f"Table '{table_name}' not found in database.\n\n"
3957
4123
  f"Available tables:\n{table_list}\n\n"
3958
- f"Check the table name and try again with:\n"
3959
- f" {base_connection}::CORRECT_TABLE_NAME"
3960
- ) from e
3961
- else:
3962
- raise ValueError(
3963
- f"Table '{table_name}' not found and no tables available in database."
4124
+ f"Connection: {base_connection}"
3964
4125
  ) from e
4126
+ except ValueError:
4127
+ # Re-raise the table-specific ValueError
4128
+ raise
3965
4129
  except Exception:
3966
- raise ValueError(
3967
- f"Table '{table_name}' not found in database. "
3968
- f"Check the table name and connection string."
3969
- ) from e
4130
+ # If we can't list tables, just raise a simple error
4131
+ pass
4132
+
4133
+ raise ValueError(
4134
+ f"Table '{table_name}' not found in database.\n"
4135
+ f"Connection: {base_connection}\n\n"
4136
+ f"Original error: {e}"
4137
+ ) from e
4138
+
4139
+ # For other errors, use the generic connection error handler
4140
+ _handle_connection_errors(e, base_connection)
4141
+
4142
+
4143
+ def print_database_tables(connection_string: str) -> list[str]:
4144
+ """
4145
+ List all tables in a database from a connection string.
4146
+
4147
+ The `print_database_tables()` function connects to a database and returns a list of all
4148
+ available tables. This is particularly useful for discovering what tables exist in a database
4149
+ before connecting to a specific table with `connect_to_table(). The function automatically
4150
+ filters out temporary Ibis tables (memtables) to show only user tables. It supports all database
4151
+ backends available through Ibis, including DuckDB, SQLite, PostgreSQL, MySQL, BigQuery, and
4152
+ Snowflake.
4153
+
4154
+ Parameters
4155
+ ----------
4156
+ connection_string
4157
+ A database connection string *without* the `::table_name` suffix. Example:
4158
+ `"duckdb:///path/to/database.ddb"`.
4159
+
4160
+ Returns
4161
+ -------
4162
+ list[str]
4163
+ List of table names, excluding temporary Ibis tables.
4164
+
4165
+ See Also
4166
+ --------
4167
+ connect_to_table : Connect to a database table with full connection string documentation
4168
+ """
4169
+ # Check if connection string includes table specification (which is not allowed)
4170
+ if "::" in connection_string:
4171
+ raise ValueError(
4172
+ "Connection string should not include table specification (::table_name).\n"
4173
+ f"You've supplied: {connection_string}\n"
4174
+ f"Expected format: 'duckdb:///path/to/database.ddb' (without ::table_name)"
4175
+ )
4176
+
4177
+ # Check if Ibis is available
4178
+ if not _is_lib_present(lib_name="ibis"):
4179
+ raise ImportError(
4180
+ "The Ibis library is not installed but is required for database connection strings.\n"
4181
+ "Install it with: pip install 'ibis-framework[duckdb]' (or other backend as needed)"
4182
+ )
4183
+
4184
+ import ibis
4185
+
4186
+ try:
4187
+ # Connect to database
4188
+ conn = ibis.connect(connection_string)
4189
+ # Get all tables and filter out temporary Ibis tables
4190
+ all_tables = conn.list_tables()
4191
+ user_tables = [t for t in all_tables if "memtable" not in t]
4192
+
4193
+ return user_tables
3970
4194
 
3971
- # Generic connection error
3972
- raise ConnectionError(
3973
- f"Failed to connect to table '{table_name}' using: {base_connection}\nError: {e}"
3974
- ) from e
4195
+ except Exception as e:
4196
+ _handle_connection_errors(e, connection_string)
3975
4197
 
3976
4198
 
3977
4199
  @dataclass
@@ -4253,6 +4475,16 @@ class Validate:
4253
4475
  - Vietnamese (`"vi"`)
4254
4476
  - Indonesian (`"id"`)
4255
4477
  - Ukrainian (`"uk"`)
4478
+ - Bulgarian (`"bg"`)
4479
+ - Croatian (`"hr"`)
4480
+ - Estonian (`"et"`)
4481
+ - Hungarian (`"hu"`)
4482
+ - Irish (`"ga"`)
4483
+ - Latvian (`"lv"`)
4484
+ - Lithuanian (`"lt"`)
4485
+ - Maltese (`"mt"`)
4486
+ - Slovak (`"sk"`)
4487
+ - Slovenian (`"sl"`)
4256
4488
  - Hebrew (`"he"`)
4257
4489
  - Thai (`"th"`)
4258
4490
  - Persian (`"fa"`)
@@ -7718,9 +7950,12 @@ class Validate:
7718
7950
 
7719
7951
  return self
7720
7952
 
7721
- def col_vals_null(
7953
+ def col_vals_increasing(
7722
7954
  self,
7723
7955
  columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
7956
+ allow_stationary: bool = False,
7957
+ decreasing_tol: float | None = None,
7958
+ na_pass: bool = False,
7724
7959
  pre: Callable | None = None,
7725
7960
  segments: SegmentSpec | None = None,
7726
7961
  thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -7729,11 +7964,14 @@ class Validate:
7729
7964
  active: bool = True,
7730
7965
  ) -> Validate:
7731
7966
  """
7732
- Validate whether values in a column are Null.
7967
+ Are column data increasing by row?
7733
7968
 
7734
- The `col_vals_null()` validation method checks whether column values in a table are Null.
7735
- This validation will operate over the number of test units that is equal to the number
7736
- of rows in the table.
7969
+ The `col_vals_increasing()` validation method checks whether column values in a table are
7970
+ increasing when moving down a table. There are options for allowing missing values in the
7971
+ target column, allowing stationary phases (where consecutive values don't change), and even
7972
+ one for allowing decreasing movements up to a certain threshold. This validation will
7973
+ operate over the number of test units that is equal to the number of rows in the table
7974
+ (determined after any `pre=` mutation has been applied).
7737
7975
 
7738
7976
  Parameters
7739
7977
  ----------
@@ -7742,6 +7980,20 @@ class Validate:
7742
7980
  [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
7743
7981
  multiple columns are supplied or resolved, there will be a separate validation step
7744
7982
  generated for each column.
7983
+ allow_stationary
7984
+ An option to allow pauses in increasing values. For example, if the values for the test
7985
+ units are `[80, 82, 82, 85, 88]` then the third unit (`82`, appearing a second time)
7986
+ would be marked as failing when `allow_stationary` is `False`. Using
7987
+ `allow_stationary=True` will result in all the test units in `[80, 82, 82, 85, 88]` to
7988
+ be marked as passing.
7989
+ decreasing_tol
7990
+ An optional threshold value that allows for movement of numerical values in the negative
7991
+ direction. By default this is `None` but using a numerical value will set the absolute
7992
+ threshold of negative travel allowed across numerical test units. Note that setting a
7993
+ value here also has the effect of setting `allow_stationary` to `True`.
7994
+ na_pass
7995
+ Should any encountered None, NA, or Null values be considered as passing test units? By
7996
+ default, this is `False`. Set to `True` to pass test units with missing values.
7745
7997
  pre
7746
7998
  An optional preprocessing function or lambda to apply to the data table during
7747
7999
  interrogation. This function should take a table as input and return a modified table.
@@ -7778,89 +8030,6 @@ class Validate:
7778
8030
  Validate
7779
8031
  The `Validate` object with the added validation step.
7780
8032
 
7781
- Preprocessing
7782
- -------------
7783
- The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
7784
- table during interrogation. This function should take a table as input and return a modified
7785
- table. This is useful for performing any necessary transformations or filtering on the data
7786
- before the validation step is applied.
7787
-
7788
- The preprocessing function can be any callable that takes a table as input and returns a
7789
- modified table. For example, you could use a lambda function to filter the table based on
7790
- certain criteria or to apply a transformation to the data. Note that you can refer to
7791
- a column via `columns=` that is expected to be present in the transformed table, but may not
7792
- exist in the table before preprocessing. Regarding the lifetime of the transformed table, it
7793
- only exists during the validation step and is not stored in the `Validate` object or used in
7794
- subsequent validation steps.
7795
-
7796
- Segmentation
7797
- ------------
7798
- The `segments=` argument allows for the segmentation of a validation step into multiple
7799
- segments. This is useful for applying the same validation step to different subsets of the
7800
- data. The segmentation can be done based on a single column or specific fields within a
7801
- column.
7802
-
7803
- Providing a single column name will result in a separate validation step for each unique
7804
- value in that column. For example, if you have a column called `"region"` with values
7805
- `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
7806
- region.
7807
-
7808
- Alternatively, you can provide a tuple that specifies a column name and its corresponding
7809
- values to segment on. For example, if you have a column called `"date"` and you want to
7810
- segment on only specific dates, you can provide a tuple like
7811
- `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
7812
- (i.e., no validation steps will be created for them).
7813
-
7814
- A list with a combination of column names and tuples can be provided as well. This allows
7815
- for more complex segmentation scenarios. The following inputs are both valid:
7816
-
7817
- ```
7818
- # Segments from all unique values in the `region` column
7819
- # and specific dates in the `date` column
7820
- segments=["region", ("date", ["2023-01-01", "2023-01-02"])]
7821
-
7822
- # Segments from all unique values in the `region` and `date` columns
7823
- segments=["region", "date"]
7824
- ```
7825
-
7826
- The segmentation is performed during interrogation, and the resulting validation steps will
7827
- be numbered sequentially. Each segment will have its own validation step, and the results
7828
- will be reported separately. This allows for a more granular analysis of the data and helps
7829
- identify issues within specific segments.
7830
-
7831
- Importantly, the segmentation process will be performed after any preprocessing of the data
7832
- table. Because of this, one can conceivably use the `pre=` argument to generate a column
7833
- that can be used for segmentation. For example, you could create a new column called
7834
- `"segment"` through use of `pre=` and then use that column for segmentation.
7835
-
7836
- Thresholds
7837
- ----------
7838
- The `thresholds=` parameter is used to set the failure-condition levels for the validation
7839
- step. If they are set here at the step level, these thresholds will override any thresholds
7840
- set at the global level in `Validate(thresholds=...)`.
7841
-
7842
- There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
7843
- can either be set as a proportion failing of all test units (a value between `0` to `1`),
7844
- or, the absolute number of failing test units (as integer that's `1` or greater).
7845
-
7846
- Thresholds can be defined using one of these input schemes:
7847
-
7848
- 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
7849
- thresholds)
7850
- 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
7851
- the 'error' level, and position `2` is the 'critical' level
7852
- 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
7853
- 'critical'
7854
- 4. a single integer/float value denoting absolute number or fraction of failing test units
7855
- for the 'warning' level only
7856
-
7857
- If the number of failing test units exceeds set thresholds, the validation step will be
7858
- marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
7859
- set, you're free to set any combination of them.
7860
-
7861
- Aside from reporting failure conditions, thresholds can be used to determine the actions to
7862
- take for each level of failure (using the `actions=` parameter).
7863
-
7864
8033
  Examples
7865
8034
  --------
7866
8035
  ```{python}
@@ -7869,8 +8038,9 @@ class Validate:
7869
8038
  import pointblank as pb
7870
8039
  pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
7871
8040
  ```
7872
- For the examples here, we'll use a simple Polars DataFrame with two numeric columns (`a` and
7873
- `b`). The table is shown below:
8041
+
8042
+ For the examples here, we'll use a simple Polars DataFrame with a numeric column (`a`). The
8043
+ table is shown below:
7874
8044
 
7875
8045
  ```{python}
7876
8046
  import pointblank as pb
@@ -7878,52 +8048,490 @@ class Validate:
7878
8048
 
7879
8049
  tbl = pl.DataFrame(
7880
8050
  {
7881
- "a": [None, None, None, None],
7882
- "b": [None, 2, None, 9],
8051
+ "a": [1, 2, 3, 4, 5, 6],
8052
+ "b": [1, 2, 2, 3, 4, 5],
8053
+ "c": [1, 2, 1, 3, 4, 5],
7883
8054
  }
7884
- ).with_columns(pl.col("a").cast(pl.Int64))
8055
+ )
7885
8056
 
7886
8057
  pb.preview(tbl)
7887
8058
  ```
7888
8059
 
7889
- Let's validate that values in column `a` are all Null values. We'll determine if this
7890
- validation had any failing test units (there are four test units, one for each row).
8060
+ Let's validate that values in column `a` are increasing. We'll determine if this validation
8061
+ had any failing test units (there are six test units, one for each row).
7891
8062
 
7892
8063
  ```{python}
7893
8064
  validation = (
7894
8065
  pb.Validate(data=tbl)
7895
- .col_vals_null(columns="a")
8066
+ .col_vals_increasing(columns="a")
7896
8067
  .interrogate()
7897
8068
  )
7898
8069
 
7899
8070
  validation
7900
8071
  ```
7901
8072
 
7902
- Printing the `validation` object shows the validation table in an HTML viewing environment.
7903
- The validation table shows the single entry that corresponds to the validation step created
7904
- by using `col_vals_null()`. All test units passed, and there are no failing test units.
7905
-
7906
- Now, let's use that same set of values for a validation on column `b`.
8073
+ The validation passed as all values in column `a` are increasing. Now let's check column
8074
+ `b` which has a stationary value:
7907
8075
 
7908
8076
  ```{python}
7909
8077
  validation = (
7910
8078
  pb.Validate(data=tbl)
7911
- .col_vals_null(columns="b")
8079
+ .col_vals_increasing(columns="b")
7912
8080
  .interrogate()
7913
8081
  )
7914
8082
 
7915
8083
  validation
7916
8084
  ```
7917
8085
 
7918
- The validation table reports two failing test units. The specific failing cases are for the
7919
- two non-Null values in column `b`.
7920
- """
7921
- assertion_type = _get_fn_name()
8086
+ This validation fails at the third row because the value `2` is repeated. If we want to
8087
+ allow stationary values, we can use `allow_stationary=True`:
7922
8088
 
7923
- _check_column(column=columns)
7924
- _check_pre(pre=pre)
7925
- # TODO: add check for segments
7926
- # _check_segments(segments=segments)
8089
+ ```{python}
8090
+ validation = (
8091
+ pb.Validate(data=tbl)
8092
+ .col_vals_increasing(columns="b", allow_stationary=True)
8093
+ .interrogate()
8094
+ )
8095
+
8096
+ validation
8097
+ ```
8098
+ """
8099
+ assertion_type = "col_vals_increasing"
8100
+
8101
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
8102
+ thresholds = (
8103
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
8104
+ )
8105
+
8106
+ # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
8107
+ # resolve the columns
8108
+ if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
8109
+ columns = col(columns)
8110
+
8111
+ # If `columns` is Column value or a string, place it in a list for iteration
8112
+ if isinstance(columns, (Column, str)):
8113
+ columns = [columns]
8114
+
8115
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
8116
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
8117
+
8118
+ # Iterate over the columns and create a validation step for each
8119
+ for column in columns:
8120
+ val_info = _ValidationInfo(
8121
+ assertion_type=assertion_type,
8122
+ column=column,
8123
+ values="",
8124
+ na_pass=na_pass,
8125
+ pre=pre,
8126
+ segments=segments,
8127
+ thresholds=thresholds,
8128
+ actions=actions,
8129
+ brief=brief,
8130
+ active=active,
8131
+ val_info={
8132
+ "allow_stationary": allow_stationary,
8133
+ "decreasing_tol": decreasing_tol if decreasing_tol else 0.0,
8134
+ },
8135
+ )
8136
+
8137
+ self._add_validation(validation_info=val_info)
8138
+
8139
+ return self
8140
+
8141
+ def col_vals_decreasing(
8142
+ self,
8143
+ columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
8144
+ allow_stationary: bool = False,
8145
+ increasing_tol: float | None = None,
8146
+ na_pass: bool = False,
8147
+ pre: Callable | None = None,
8148
+ segments: SegmentSpec | None = None,
8149
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
8150
+ actions: Actions | None = None,
8151
+ brief: str | bool | None = None,
8152
+ active: bool = True,
8153
+ ) -> Validate:
8154
+ """
8155
+ Are column data decreasing by row?
8156
+
8157
+ The `col_vals_decreasing()` validation method checks whether column values in a table are
8158
+ decreasing when moving down a table. There are options for allowing missing values in the
8159
+ target column, allowing stationary phases (where consecutive values don't change), and even
8160
+ one for allowing increasing movements up to a certain threshold. This validation will
8161
+ operate over the number of test units that is equal to the number of rows in the table
8162
+ (determined after any `pre=` mutation has been applied).
8163
+
8164
+ Parameters
8165
+ ----------
8166
+ columns
8167
+ A single column or a list of columns to validate. Can also use
8168
+ [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
8169
+ multiple columns are supplied or resolved, there will be a separate validation step
8170
+ generated for each column.
8171
+ allow_stationary
8172
+ An option to allow pauses in decreasing values. For example, if the values for the test
8173
+ units are `[88, 85, 85, 82, 80]` then the third unit (`85`, appearing a second time)
8174
+ would be marked as failing when `allow_stationary` is `False`. Using
8175
+ `allow_stationary=True` will result in all the test units in `[88, 85, 85, 82, 80]` to
8176
+ be marked as passing.
8177
+ increasing_tol
8178
+ An optional threshold value that allows for movement of numerical values in the positive
8179
+ direction. By default this is `None` but using a numerical value will set the absolute
8180
+ threshold of positive travel allowed across numerical test units. Note that setting a
8181
+ value here also has the effect of setting `allow_stationary` to `True`.
8182
+ na_pass
8183
+ Should any encountered None, NA, or Null values be considered as passing test units? By
8184
+ default, this is `False`. Set to `True` to pass test units with missing values.
8185
+ pre
8186
+ An optional preprocessing function or lambda to apply to the data table during
8187
+ interrogation. This function should take a table as input and return a modified table.
8188
+ Have a look at the *Preprocessing* section for more information on how to use this
8189
+ argument.
8190
+ segments
8191
+ An optional directive on segmentation, which serves to split a validation step into
8192
+ multiple (one step per segment). Can be a single column name, a tuple that specifies a
8193
+ column name and its corresponding values to segment on, or a combination of both
8194
+ (provided as a list). Read the *Segmentation* section for usage information.
8195
+ thresholds
8196
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
8197
+ The thresholds are set at the step level and will override any global thresholds set in
8198
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
8199
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
8200
+ section for information on how to set threshold levels.
8201
+ actions
8202
+ Optional actions to take when the validation step(s) meets or exceeds any set threshold
8203
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
8204
+ define the actions.
8205
+ brief
8206
+ An optional brief description of the validation step that will be displayed in the
8207
+ reporting table. You can use the templating elements like `"{step}"` to insert
8208
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
8209
+ the entire brief will be automatically generated. If `None` (the default) then there
8210
+ won't be a brief.
8211
+ active
8212
+ A boolean value indicating whether the validation step should be active. Using `False`
8213
+ will make the validation step inactive (still reporting its presence and keeping indexes
8214
+ for the steps unchanged).
8215
+
8216
+ Returns
8217
+ -------
8218
+ Validate
8219
+ The `Validate` object with the added validation step.
8220
+
8221
+ Examples
8222
+ --------
8223
+ ```{python}
8224
+ #| echo: false
8225
+ #| output: false
8226
+ import pointblank as pb
8227
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8228
+ ```
8229
+
8230
+ For the examples here, we'll use a simple Polars DataFrame with a numeric column (`a`). The
8231
+ table is shown below:
8232
+
8233
+ ```{python}
8234
+ import pointblank as pb
8235
+ import polars as pl
8236
+
8237
+ tbl = pl.DataFrame(
8238
+ {
8239
+ "a": [6, 5, 4, 3, 2, 1],
8240
+ "b": [5, 4, 4, 3, 2, 1],
8241
+ "c": [5, 4, 5, 3, 2, 1],
8242
+ }
8243
+ )
8244
+
8245
+ pb.preview(tbl)
8246
+ ```
8247
+
8248
+ Let's validate that values in column `a` are decreasing. We'll determine if this validation
8249
+ had any failing test units (there are six test units, one for each row).
8250
+
8251
+ ```{python}
8252
+ validation = (
8253
+ pb.Validate(data=tbl)
8254
+ .col_vals_decreasing(columns="a")
8255
+ .interrogate()
8256
+ )
8257
+
8258
+ validation
8259
+ ```
8260
+
8261
+ The validation passed as all values in column `a` are decreasing. Now let's check column
8262
+ `b` which has a stationary value:
8263
+
8264
+ ```{python}
8265
+ validation = (
8266
+ pb.Validate(data=tbl)
8267
+ .col_vals_decreasing(columns="b")
8268
+ .interrogate()
8269
+ )
8270
+
8271
+ validation
8272
+ ```
8273
+
8274
+ This validation fails at the third row because the value `4` is repeated. If we want to
8275
+ allow stationary values, we can use `allow_stationary=True`:
8276
+
8277
+ ```{python}
8278
+ validation = (
8279
+ pb.Validate(data=tbl)
8280
+ .col_vals_decreasing(columns="b", allow_stationary=True)
8281
+ .interrogate()
8282
+ )
8283
+
8284
+ validation
8285
+ ```
8286
+ """
8287
+ assertion_type = "col_vals_decreasing"
8288
+
8289
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
8290
+ thresholds = (
8291
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
8292
+ )
8293
+
8294
+ # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
8295
+ # resolve the columns
8296
+ if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
8297
+ columns = col(columns)
8298
+
8299
+ # If `columns` is Column value or a string, place it in a list for iteration
8300
+ if isinstance(columns, (Column, str)):
8301
+ columns = [columns]
8302
+
8303
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
8304
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
8305
+
8306
+ # Iterate over the columns and create a validation step for each
8307
+ for column in columns:
8308
+ val_info = _ValidationInfo(
8309
+ assertion_type=assertion_type,
8310
+ column=column,
8311
+ values="",
8312
+ na_pass=na_pass,
8313
+ pre=pre,
8314
+ segments=segments,
8315
+ thresholds=thresholds,
8316
+ actions=actions,
8317
+ brief=brief,
8318
+ active=active,
8319
+ val_info={
8320
+ "allow_stationary": allow_stationary,
8321
+ "increasing_tol": increasing_tol if increasing_tol else 0.0,
8322
+ },
8323
+ )
8324
+
8325
+ self._add_validation(validation_info=val_info)
8326
+
8327
+ return self
8328
+
8329
+ def col_vals_null(
8330
+ self,
8331
+ columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
8332
+ pre: Callable | None = None,
8333
+ segments: SegmentSpec | None = None,
8334
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
8335
+ actions: Actions | None = None,
8336
+ brief: str | bool | None = None,
8337
+ active: bool = True,
8338
+ ) -> Validate:
8339
+ """
8340
+ Validate whether values in a column are Null.
8341
+
8342
+ The `col_vals_null()` validation method checks whether column values in a table are Null.
8343
+ This validation will operate over the number of test units that is equal to the number
8344
+ of rows in the table.
8345
+
8346
+ Parameters
8347
+ ----------
8348
+ columns
8349
+ A single column or a list of columns to validate. Can also use
8350
+ [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
8351
+ multiple columns are supplied or resolved, there will be a separate validation step
8352
+ generated for each column.
8353
+ pre
8354
+ An optional preprocessing function or lambda to apply to the data table during
8355
+ interrogation. This function should take a table as input and return a modified table.
8356
+ Have a look at the *Preprocessing* section for more information on how to use this
8357
+ argument.
8358
+ segments
8359
+ An optional directive on segmentation, which serves to split a validation step into
8360
+ multiple (one step per segment). Can be a single column name, a tuple that specifies a
8361
+ column name and its corresponding values to segment on, or a combination of both
8362
+ (provided as a list). Read the *Segmentation* section for usage information.
8363
+ thresholds
8364
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
8365
+ The thresholds are set at the step level and will override any global thresholds set in
8366
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
8367
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
8368
+ section for information on how to set threshold levels.
8369
+ actions
8370
+ Optional actions to take when the validation step(s) meets or exceeds any set threshold
8371
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
8372
+ define the actions.
8373
+ brief
8374
+ An optional brief description of the validation step that will be displayed in the
8375
+ reporting table. You can use the templating elements like `"{step}"` to insert
8376
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
8377
+ the entire brief will be automatically generated. If `None` (the default) then there
8378
+ won't be a brief.
8379
+ active
8380
+ A boolean value indicating whether the validation step should be active. Using `False`
8381
+ will make the validation step inactive (still reporting its presence and keeping indexes
8382
+ for the steps unchanged).
8383
+
8384
+ Returns
8385
+ -------
8386
+ Validate
8387
+ The `Validate` object with the added validation step.
8388
+
8389
+ Preprocessing
8390
+ -------------
8391
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
8392
+ table during interrogation. This function should take a table as input and return a modified
8393
+ table. This is useful for performing any necessary transformations or filtering on the data
8394
+ before the validation step is applied.
8395
+
8396
+ The preprocessing function can be any callable that takes a table as input and returns a
8397
+ modified table. For example, you could use a lambda function to filter the table based on
8398
+ certain criteria or to apply a transformation to the data. Note that you can refer to
8399
+ a column via `columns=` that is expected to be present in the transformed table, but may not
8400
+ exist in the table before preprocessing. Regarding the lifetime of the transformed table, it
8401
+ only exists during the validation step and is not stored in the `Validate` object or used in
8402
+ subsequent validation steps.
8403
+
8404
+ Segmentation
8405
+ ------------
8406
+ The `segments=` argument allows for the segmentation of a validation step into multiple
8407
+ segments. This is useful for applying the same validation step to different subsets of the
8408
+ data. The segmentation can be done based on a single column or specific fields within a
8409
+ column.
8410
+
8411
+ Providing a single column name will result in a separate validation step for each unique
8412
+ value in that column. For example, if you have a column called `"region"` with values
8413
+ `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
8414
+ region.
8415
+
8416
+ Alternatively, you can provide a tuple that specifies a column name and its corresponding
8417
+ values to segment on. For example, if you have a column called `"date"` and you want to
8418
+ segment on only specific dates, you can provide a tuple like
8419
+ `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
8420
+ (i.e., no validation steps will be created for them).
8421
+
8422
+ A list with a combination of column names and tuples can be provided as well. This allows
8423
+ for more complex segmentation scenarios. The following inputs are both valid:
8424
+
8425
+ ```
8426
+ # Segments from all unique values in the `region` column
8427
+ # and specific dates in the `date` column
8428
+ segments=["region", ("date", ["2023-01-01", "2023-01-02"])]
8429
+
8430
+ # Segments from all unique values in the `region` and `date` columns
8431
+ segments=["region", "date"]
8432
+ ```
8433
+
8434
+ The segmentation is performed during interrogation, and the resulting validation steps will
8435
+ be numbered sequentially. Each segment will have its own validation step, and the results
8436
+ will be reported separately. This allows for a more granular analysis of the data and helps
8437
+ identify issues within specific segments.
8438
+
8439
+ Importantly, the segmentation process will be performed after any preprocessing of the data
8440
+ table. Because of this, one can conceivably use the `pre=` argument to generate a column
8441
+ that can be used for segmentation. For example, you could create a new column called
8442
+ `"segment"` through use of `pre=` and then use that column for segmentation.
8443
+
8444
+ Thresholds
8445
+ ----------
8446
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
8447
+ step. If they are set here at the step level, these thresholds will override any thresholds
8448
+ set at the global level in `Validate(thresholds=...)`.
8449
+
8450
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
8451
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
8452
+ or, the absolute number of failing test units (as integer that's `1` or greater).
8453
+
8454
+ Thresholds can be defined using one of these input schemes:
8455
+
8456
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
8457
+ thresholds)
8458
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
8459
+ the 'error' level, and position `2` is the 'critical' level
8460
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
8461
+ 'critical'
8462
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
8463
+ for the 'warning' level only
8464
+
8465
+ If the number of failing test units exceeds set thresholds, the validation step will be
8466
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
8467
+ set, you're free to set any combination of them.
8468
+
8469
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
8470
+ take for each level of failure (using the `actions=` parameter).
8471
+
8472
+ Examples
8473
+ --------
8474
+ ```{python}
8475
+ #| echo: false
8476
+ #| output: false
8477
+ import pointblank as pb
8478
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8479
+ ```
8480
+ For the examples here, we'll use a simple Polars DataFrame with two numeric columns (`a` and
8481
+ `b`). The table is shown below:
8482
+
8483
+ ```{python}
8484
+ import pointblank as pb
8485
+ import polars as pl
8486
+
8487
+ tbl = pl.DataFrame(
8488
+ {
8489
+ "a": [None, None, None, None],
8490
+ "b": [None, 2, None, 9],
8491
+ }
8492
+ ).with_columns(pl.col("a").cast(pl.Int64))
8493
+
8494
+ pb.preview(tbl)
8495
+ ```
8496
+
8497
+ Let's validate that values in column `a` are all Null values. We'll determine if this
8498
+ validation had any failing test units (there are four test units, one for each row).
8499
+
8500
+ ```{python}
8501
+ validation = (
8502
+ pb.Validate(data=tbl)
8503
+ .col_vals_null(columns="a")
8504
+ .interrogate()
8505
+ )
8506
+
8507
+ validation
8508
+ ```
8509
+
8510
+ Printing the `validation` object shows the validation table in an HTML viewing environment.
8511
+ The validation table shows the single entry that corresponds to the validation step created
8512
+ by using `col_vals_null()`. All test units passed, and there are no failing test units.
8513
+
8514
+ Now, let's use that same set of values for a validation on column `b`.
8515
+
8516
+ ```{python}
8517
+ validation = (
8518
+ pb.Validate(data=tbl)
8519
+ .col_vals_null(columns="b")
8520
+ .interrogate()
8521
+ )
8522
+
8523
+ validation
8524
+ ```
8525
+
8526
+ The validation table reports two failing test units. The specific failing cases are for the
8527
+ two non-Null values in column `b`.
8528
+ """
8529
+ assertion_type = _get_fn_name()
8530
+
8531
+ _check_column(column=columns)
8532
+ _check_pre(pre=pre)
8533
+ # TODO: add check for segments
8534
+ # _check_segments(segments=segments)
7927
8535
  _check_thresholds(thresholds=thresholds)
7928
8536
  _check_boolean_input(param=active, param_name="active")
7929
8537
 
@@ -8112,7 +8720,262 @@ class Validate:
8112
8720
  import pointblank as pb
8113
8721
  pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8114
8722
  ```
8115
- For the examples here, we'll use a simple Polars DataFrame with two numeric columns (`a` and
8723
+ For the examples here, we'll use a simple Polars DataFrame with two numeric columns (`a` and
8724
+ `b`). The table is shown below:
8725
+
8726
+ ```{python}
8727
+ import pointblank as pb
8728
+ import polars as pl
8729
+
8730
+ tbl = pl.DataFrame(
8731
+ {
8732
+ "a": [4, 7, 2, 8],
8733
+ "b": [5, None, 1, None],
8734
+ }
8735
+ )
8736
+
8737
+ pb.preview(tbl)
8738
+ ```
8739
+
8740
+ Let's validate that none of the values in column `a` are Null values. We'll determine if
8741
+ this validation had any failing test units (there are four test units, one for each row).
8742
+
8743
+ ```{python}
8744
+ validation = (
8745
+ pb.Validate(data=tbl)
8746
+ .col_vals_not_null(columns="a")
8747
+ .interrogate()
8748
+ )
8749
+
8750
+ validation
8751
+ ```
8752
+
8753
+ Printing the `validation` object shows the validation table in an HTML viewing environment.
8754
+ The validation table shows the single entry that corresponds to the validation step created
8755
+ by using `col_vals_not_null()`. All test units passed, and there are no failing test units.
8756
+
8757
+ Now, let's use that same set of values for a validation on column `b`.
8758
+
8759
+ ```{python}
8760
+ validation = (
8761
+ pb.Validate(data=tbl)
8762
+ .col_vals_not_null(columns="b")
8763
+ .interrogate()
8764
+ )
8765
+
8766
+ validation
8767
+ ```
8768
+
8769
+ The validation table reports two failing test units. The specific failing cases are for the
8770
+ two Null values in column `b`.
8771
+ """
8772
+ assertion_type = _get_fn_name()
8773
+
8774
+ _check_column(column=columns)
8775
+ _check_pre(pre=pre)
8776
+ # TODO: add check for segments
8777
+ # _check_segments(segments=segments)
8778
+ _check_thresholds(thresholds=thresholds)
8779
+ _check_boolean_input(param=active, param_name="active")
8780
+
8781
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
8782
+ thresholds = (
8783
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
8784
+ )
8785
+
8786
+ # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
8787
+ # resolve the columns
8788
+ if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
8789
+ columns = col(columns)
8790
+
8791
+ # If `columns` is Column value or a string, place it in a list for iteration
8792
+ if isinstance(columns, (Column, str)):
8793
+ columns = [columns]
8794
+
8795
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
8796
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
8797
+
8798
+ # Iterate over the columns and create a validation step for each
8799
+ for column in columns:
8800
+ val_info = _ValidationInfo(
8801
+ assertion_type=assertion_type,
8802
+ column=column,
8803
+ pre=pre,
8804
+ segments=segments,
8805
+ thresholds=thresholds,
8806
+ actions=actions,
8807
+ brief=brief,
8808
+ active=active,
8809
+ )
8810
+
8811
+ self._add_validation(validation_info=val_info)
8812
+
8813
+ return self
8814
+
8815
+ def col_vals_regex(
8816
+ self,
8817
+ columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
8818
+ pattern: str,
8819
+ na_pass: bool = False,
8820
+ inverse: bool = False,
8821
+ pre: Callable | None = None,
8822
+ segments: SegmentSpec | None = None,
8823
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
8824
+ actions: Actions | None = None,
8825
+ brief: str | bool | None = None,
8826
+ active: bool = True,
8827
+ ) -> Validate:
8828
+ """
8829
+ Validate whether column values match a regular expression pattern.
8830
+
8831
+ The `col_vals_regex()` validation method checks whether column values in a table
8832
+ correspond to a `pattern=` matching expression. This validation will operate over the number
8833
+ of test units that is equal to the number of rows in the table (determined after any `pre=`
8834
+ mutation has been applied).
8835
+
8836
+ Parameters
8837
+ ----------
8838
+ columns
8839
+ A single column or a list of columns to validate. Can also use
8840
+ [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
8841
+ multiple columns are supplied or resolved, there will be a separate validation step
8842
+ generated for each column.
8843
+ pattern
8844
+ A regular expression pattern to compare against.
8845
+ na_pass
8846
+ Should any encountered None, NA, or Null values be considered as passing test units? By
8847
+ default, this is `False`. Set to `True` to pass test units with missing values.
8848
+ inverse
8849
+ Should the validation step be inverted? If `True`, then the expectation is that column
8850
+ values should *not* match the specified `pattern=` regex.
8851
+ pre
8852
+ An optional preprocessing function or lambda to apply to the data table during
8853
+ interrogation. This function should take a table as input and return a modified table.
8854
+ Have a look at the *Preprocessing* section for more information on how to use this
8855
+ argument.
8856
+ segments
8857
+ An optional directive on segmentation, which serves to split a validation step into
8858
+ multiple (one step per segment). Can be a single column name, a tuple that specifies a
8859
+ column name and its corresponding values to segment on, or a combination of both
8860
+ (provided as a list). Read the *Segmentation* section for usage information.
8861
+ thresholds
8862
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
8863
+ The thresholds are set at the step level and will override any global thresholds set in
8864
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
8865
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
8866
+ section for information on how to set threshold levels.
8867
+ actions
8868
+ Optional actions to take when the validation step(s) meets or exceeds any set threshold
8869
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
8870
+ define the actions.
8871
+ brief
8872
+ An optional brief description of the validation step that will be displayed in the
8873
+ reporting table. You can use the templating elements like `"{step}"` to insert
8874
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
8875
+ the entire brief will be automatically generated. If `None` (the default) then there
8876
+ won't be a brief.
8877
+ active
8878
+ A boolean value indicating whether the validation step should be active. Using `False`
8879
+ will make the validation step inactive (still reporting its presence and keeping indexes
8880
+ for the steps unchanged).
8881
+
8882
+ Returns
8883
+ -------
8884
+ Validate
8885
+ The `Validate` object with the added validation step.
8886
+
8887
+ Preprocessing
8888
+ -------------
8889
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
8890
+ table during interrogation. This function should take a table as input and return a modified
8891
+ table. This is useful for performing any necessary transformations or filtering on the data
8892
+ before the validation step is applied.
8893
+
8894
+ The preprocessing function can be any callable that takes a table as input and returns a
8895
+ modified table. For example, you could use a lambda function to filter the table based on
8896
+ certain criteria or to apply a transformation to the data. Note that you can refer to
8897
+ a column via `columns=` that is expected to be present in the transformed table, but may not
8898
+ exist in the table before preprocessing. Regarding the lifetime of the transformed table, it
8899
+ only exists during the validation step and is not stored in the `Validate` object or used in
8900
+ subsequent validation steps.
8901
+
8902
+ Segmentation
8903
+ ------------
8904
+ The `segments=` argument allows for the segmentation of a validation step into multiple
8905
+ segments. This is useful for applying the same validation step to different subsets of the
8906
+ data. The segmentation can be done based on a single column or specific fields within a
8907
+ column.
8908
+
8909
+ Providing a single column name will result in a separate validation step for each unique
8910
+ value in that column. For example, if you have a column called `"region"` with values
8911
+ `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
8912
+ region.
8913
+
8914
+ Alternatively, you can provide a tuple that specifies a column name and its corresponding
8915
+ values to segment on. For example, if you have a column called `"date"` and you want to
8916
+ segment on only specific dates, you can provide a tuple like
8917
+ `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
8918
+ (i.e., no validation steps will be created for them).
8919
+
8920
+ A list with a combination of column names and tuples can be provided as well. This allows
8921
+ for more complex segmentation scenarios. The following inputs are both valid:
8922
+
8923
+ ```
8924
+ # Segments from all unique values in the `region` column
8925
+ # and specific dates in the `date` column
8926
+ segments=["region", ("date", ["2023-01-01", "2023-01-02"])]
8927
+
8928
+ # Segments from all unique values in the `region` and `date` columns
8929
+ segments=["region", "date"]
8930
+ ```
8931
+
8932
+ The segmentation is performed during interrogation, and the resulting validation steps will
8933
+ be numbered sequentially. Each segment will have its own validation step, and the results
8934
+ will be reported separately. This allows for a more granular analysis of the data and helps
8935
+ identify issues within specific segments.
8936
+
8937
+ Importantly, the segmentation process will be performed after any preprocessing of the data
8938
+ table. Because of this, one can conceivably use the `pre=` argument to generate a column
8939
+ that can be used for segmentation. For example, you could create a new column called
8940
+ `"segment"` through use of `pre=` and then use that column for segmentation.
8941
+
8942
+ Thresholds
8943
+ ----------
8944
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
8945
+ step. If they are set here at the step level, these thresholds will override any thresholds
8946
+ set at the global level in `Validate(thresholds=...)`.
8947
+
8948
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
8949
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
8950
+ or, the absolute number of failing test units (as integer that's `1` or greater).
8951
+
8952
+ Thresholds can be defined using one of these input schemes:
8953
+
8954
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
8955
+ thresholds)
8956
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
8957
+ the 'error' level, and position `2` is the 'critical' level
8958
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
8959
+ 'critical'
8960
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
8961
+ for the 'warning' level only
8962
+
8963
+ If the number of failing test units exceeds set thresholds, the validation step will be
8964
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
8965
+ set, you're free to set any combination of them.
8966
+
8967
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
8968
+ take for each level of failure (using the `actions=` parameter).
8969
+
8970
+ Examples
8971
+ --------
8972
+ ```{python}
8973
+ #| echo: false
8974
+ #| output: false
8975
+ import pointblank as pb
8976
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8977
+ ```
8978
+ For the examples here, we'll use a simple Polars DataFrame with two string columns (`a` and
8116
8979
  `b`). The table is shown below:
8117
8980
 
8118
8981
  ```{python}
@@ -8121,21 +8984,22 @@ class Validate:
8121
8984
 
8122
8985
  tbl = pl.DataFrame(
8123
8986
  {
8124
- "a": [4, 7, 2, 8],
8125
- "b": [5, None, 1, None],
8987
+ "a": ["rb-0343", "ra-0232", "ry-0954", "rc-1343"],
8988
+ "b": ["ra-0628", "ra-583", "rya-0826", "rb-0735"],
8126
8989
  }
8127
8990
  )
8128
8991
 
8129
8992
  pb.preview(tbl)
8130
8993
  ```
8131
8994
 
8132
- Let's validate that none of the values in column `a` are Null values. We'll determine if
8133
- this validation had any failing test units (there are four test units, one for each row).
8995
+ Let's validate that all of the values in column `a` match a particular regex pattern. We'll
8996
+ determine if this validation had any failing test units (there are four test units, one for
8997
+ each row).
8134
8998
 
8135
8999
  ```{python}
8136
9000
  validation = (
8137
9001
  pb.Validate(data=tbl)
8138
- .col_vals_not_null(columns="a")
9002
+ .col_vals_regex(columns="a", pattern=r"r[a-z]-[0-9]{4}")
8139
9003
  .interrogate()
8140
9004
  )
8141
9005
 
@@ -8144,14 +9008,14 @@ class Validate:
8144
9008
 
8145
9009
  Printing the `validation` object shows the validation table in an HTML viewing environment.
8146
9010
  The validation table shows the single entry that corresponds to the validation step created
8147
- by using `col_vals_not_null()`. All test units passed, and there are no failing test units.
9011
+ by using `col_vals_regex()`. All test units passed, and there are no failing test units.
8148
9012
 
8149
- Now, let's use that same set of values for a validation on column `b`.
9013
+ Now, let's use the same regex for a validation on column `b`.
8150
9014
 
8151
9015
  ```{python}
8152
9016
  validation = (
8153
9017
  pb.Validate(data=tbl)
8154
- .col_vals_not_null(columns="b")
9018
+ .col_vals_regex(columns="b", pattern=r"r[a-z]-[0-9]{4}")
8155
9019
  .interrogate()
8156
9020
  )
8157
9021
 
@@ -8159,8 +9023,9 @@ class Validate:
8159
9023
  ```
8160
9024
 
8161
9025
  The validation table reports two failing test units. The specific failing cases are for the
8162
- two Null values in column `b`.
9026
+ string values of rows 1 and 2 in column `b`.
8163
9027
  """
9028
+
8164
9029
  assertion_type = _get_fn_name()
8165
9030
 
8166
9031
  _check_column(column=columns)
@@ -8168,6 +9033,8 @@ class Validate:
8168
9033
  # TODO: add check for segments
8169
9034
  # _check_segments(segments=segments)
8170
9035
  _check_thresholds(thresholds=thresholds)
9036
+ _check_boolean_input(param=na_pass, param_name="na_pass")
9037
+ _check_boolean_input(param=inverse, param_name="inverse")
8171
9038
  _check_boolean_input(param=active, param_name="active")
8172
9039
 
8173
9040
  # Determine threshold to use (global or local) and normalize a local `thresholds=` value
@@ -8187,11 +9054,16 @@ class Validate:
8187
9054
  # Determine brief to use (global or local) and transform any shorthands of `brief=`
8188
9055
  brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
8189
9056
 
9057
+ # Package up the `pattern=` and boolean params into a dictionary for later interrogation
9058
+ values = {"pattern": pattern, "inverse": inverse}
9059
+
8190
9060
  # Iterate over the columns and create a validation step for each
8191
9061
  for column in columns:
8192
9062
  val_info = _ValidationInfo(
8193
9063
  assertion_type=assertion_type,
8194
9064
  column=column,
9065
+ values=values,
9066
+ na_pass=na_pass,
8195
9067
  pre=pre,
8196
9068
  segments=segments,
8197
9069
  thresholds=thresholds,
@@ -8204,12 +9076,11 @@ class Validate:
8204
9076
 
8205
9077
  return self
8206
9078
 
8207
- def col_vals_regex(
9079
+ def col_vals_within_spec(
8208
9080
  self,
8209
9081
  columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
8210
- pattern: str,
9082
+ spec: str,
8211
9083
  na_pass: bool = False,
8212
- inverse: bool = False,
8213
9084
  pre: Callable | None = None,
8214
9085
  segments: SegmentSpec | None = None,
8215
9086
  thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -8218,12 +9089,14 @@ class Validate:
8218
9089
  active: bool = True,
8219
9090
  ) -> Validate:
8220
9091
  """
8221
- Validate whether column values match a regular expression pattern.
9092
+ Validate whether column values fit within a specification.
8222
9093
 
8223
- The `col_vals_regex()` validation method checks whether column values in a table
8224
- correspond to a `pattern=` matching expression. This validation will operate over the number
8225
- of test units that is equal to the number of rows in the table (determined after any `pre=`
8226
- mutation has been applied).
9094
+ The `col_vals_within_spec()` validation method checks whether column values in a table
9095
+ correspond to a specification (`spec=`) type (details of which are available in the
9096
+ *Specifications* section). Specifications include common data types like email addresses,
9097
+ URLs, postal codes, vehicle identification numbers (VINs), International Bank Account
9098
+ Numbers (IBANs), and more. This validation will operate over the number of test units that
9099
+ is equal to the number of rows in the table.
8227
9100
 
8228
9101
  Parameters
8229
9102
  ----------
@@ -8232,14 +9105,13 @@ class Validate:
8232
9105
  [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
8233
9106
  multiple columns are supplied or resolved, there will be a separate validation step
8234
9107
  generated for each column.
8235
- pattern
8236
- A regular expression pattern to compare against.
9108
+ spec
9109
+ A specification string for defining the specification type. Examples are `"email"`,
9110
+ `"url"`, and `"postal_code[USA]"`. See the *Specifications* section for all available
9111
+ options.
8237
9112
  na_pass
8238
9113
  Should any encountered None, NA, or Null values be considered as passing test units? By
8239
9114
  default, this is `False`. Set to `True` to pass test units with missing values.
8240
- inverse
8241
- Should the validation step be inverted? If `True`, then the expectation is that column
8242
- values should *not* match the specified `pattern=` regex.
8243
9115
  pre
8244
9116
  An optional preprocessing function or lambda to apply to the data table during
8245
9117
  interrogation. This function should take a table as input and return a modified table.
@@ -8276,6 +9148,40 @@ class Validate:
8276
9148
  Validate
8277
9149
  The `Validate` object with the added validation step.
8278
9150
 
9151
+ Specifications
9152
+ --------------
9153
+ A specification type must be used with the `spec=` argument. This is a string-based keyword
9154
+ that corresponds to the type of data in the specified columns. The following keywords can
9155
+ be used:
9156
+
9157
+ - `"isbn"`: The International Standard Book Number (ISBN) is a unique numerical identifier
9158
+ for books. This keyword validates both 10-digit and 13-digit ISBNs.
9159
+
9160
+ - `"vin"`: A vehicle identification number (VIN) is a unique code used by the automotive
9161
+ industry to identify individual motor vehicles.
9162
+
9163
+ - `"postal_code[<country_code>]"`: A postal code (also known as postcodes, PIN, or ZIP
9164
+ codes) is a series of letters, digits, or both included in a postal address. Because the
9165
+ coding varies by country, a country code in either the 2-letter (ISO 3166-1 alpha-2) or
9166
+ 3-letter (ISO 3166-1 alpha-3) format needs to be supplied (e.g., `"postal_code[US]"` or
9167
+ `"postal_code[USA]"`). The keyword alias `"zip"` can be used for US ZIP codes.
9168
+
9169
+ - `"credit_card"`: A credit card number can be validated across a variety of issuers. The
9170
+ validation uses the Luhn algorithm.
9171
+
9172
+ - `"iban[<country_code>]"`: The International Bank Account Number (IBAN) is a system of
9173
+ identifying bank accounts across countries. Because the length and coding varies by
9174
+ country, a country code needs to be supplied (e.g., `"iban[DE]"` or `"iban[DEU]"`).
9175
+
9176
+ - `"swift"`: Business Identifier Codes (also known as SWIFT-BIC, BIC, or SWIFT code) are
9177
+ unique identifiers for financial and non-financial institutions.
9178
+
9179
+ - `"phone"`, `"email"`, `"url"`, `"ipv4"`, `"ipv6"`, `"mac"`: Phone numbers, email
9180
+ addresses, Internet URLs, IPv4 or IPv6 addresses, and MAC addresses can be validated with
9181
+ their respective keywords.
9182
+
9183
+ Only a single `spec=` value should be provided per function call.
9184
+
8279
9185
  Preprocessing
8280
9186
  -------------
8281
9187
  The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
@@ -8367,8 +9273,9 @@ class Validate:
8367
9273
  import pointblank as pb
8368
9274
  pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
8369
9275
  ```
8370
- For the examples here, we'll use a simple Polars DataFrame with two string columns (`a` and
8371
- `b`). The table is shown below:
9276
+
9277
+ For the examples here, we'll use a simple Polars DataFrame with an email column. The table
9278
+ is shown below:
8372
9279
 
8373
9280
  ```{python}
8374
9281
  import pointblank as pb
@@ -8376,46 +9283,33 @@ class Validate:
8376
9283
 
8377
9284
  tbl = pl.DataFrame(
8378
9285
  {
8379
- "a": ["rb-0343", "ra-0232", "ry-0954", "rc-1343"],
8380
- "b": ["ra-0628", "ra-583", "rya-0826", "rb-0735"],
9286
+ "email": [
9287
+ "user@example.com",
9288
+ "admin@test.org",
9289
+ "invalid-email",
9290
+ "contact@company.co.uk",
9291
+ ],
8381
9292
  }
8382
9293
  )
8383
9294
 
8384
9295
  pb.preview(tbl)
8385
9296
  ```
8386
9297
 
8387
- Let's validate that all of the values in column `a` match a particular regex pattern. We'll
8388
- determine if this validation had any failing test units (there are four test units, one for
8389
- each row).
8390
-
8391
- ```{python}
8392
- validation = (
8393
- pb.Validate(data=tbl)
8394
- .col_vals_regex(columns="a", pattern=r"r[a-z]-[0-9]{4}")
8395
- .interrogate()
8396
- )
8397
-
8398
- validation
8399
- ```
8400
-
8401
- Printing the `validation` object shows the validation table in an HTML viewing environment.
8402
- The validation table shows the single entry that corresponds to the validation step created
8403
- by using `col_vals_regex()`. All test units passed, and there are no failing test units.
8404
-
8405
- Now, let's use the same regex for a validation on column `b`.
9298
+ Let's validate that all of the values in the `email` column are valid email addresses.
9299
+ We'll determine if this validation had any failing test units (there are four test units,
9300
+ one for each row).
8406
9301
 
8407
9302
  ```{python}
8408
9303
  validation = (
8409
9304
  pb.Validate(data=tbl)
8410
- .col_vals_regex(columns="b", pattern=r"r[a-z]-[0-9]{4}")
9305
+ .col_vals_within_spec(columns="email", spec="email")
8411
9306
  .interrogate()
8412
9307
  )
8413
9308
 
8414
9309
  validation
8415
9310
  ```
8416
9311
 
8417
- The validation table reports two failing test units. The specific failing cases are for the
8418
- string values of rows 1 and 2 in column `b`.
9312
+ The validation table shows that one test unit failed (the invalid email address in row 3).
8419
9313
  """
8420
9314
 
8421
9315
  assertion_type = _get_fn_name()
@@ -8426,7 +9320,6 @@ class Validate:
8426
9320
  # _check_segments(segments=segments)
8427
9321
  _check_thresholds(thresholds=thresholds)
8428
9322
  _check_boolean_input(param=na_pass, param_name="na_pass")
8429
- _check_boolean_input(param=inverse, param_name="inverse")
8430
9323
  _check_boolean_input(param=active, param_name="active")
8431
9324
 
8432
9325
  # Determine threshold to use (global or local) and normalize a local `thresholds=` value
@@ -8446,8 +9339,8 @@ class Validate:
8446
9339
  # Determine brief to use (global or local) and transform any shorthands of `brief=`
8447
9340
  brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
8448
9341
 
8449
- # Package up the `pattern=` and boolean params into a dictionary for later interrogation
8450
- values = {"pattern": pattern, "inverse": inverse}
9342
+ # Package up the `spec=` param into a dictionary for later interrogation
9343
+ values = {"spec": spec}
8451
9344
 
8452
9345
  # Iterate over the columns and create a validation step for each
8453
9346
  for column in columns:
@@ -9396,10 +10289,10 @@ class Validate:
9396
10289
  so try to include only the columns necessary for the validation.
9397
10290
  model
9398
10291
  The model to be used. This should be in the form of `provider:model` (e.g.,
9399
- `"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`,
9400
- `"openai"`, `"ollama"`, and `"bedrock"`. The model name should be the specific model to
9401
- be used from the provider. Model names are subject to change so consult the provider's
9402
- documentation for the most up-to-date model names.
10292
+ `"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
10293
+ `"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
10294
+ the provider. Model names are subject to change so consult the provider's documentation
10295
+ for the most up-to-date model names.
9403
10296
  batch_size
9404
10297
  Number of rows to process in each batch. Larger batches are more efficient but may hit
9405
10298
  API limits. Default is `1000`.
@@ -9551,13 +10444,6 @@ class Validate:
9551
10444
  - "Describe the quality of each row" (asks for description, not validation)
9552
10445
  - "How would you improve this data?" (asks for suggestions, not pass/fail)
9553
10446
 
9554
- Provider Setup
9555
- --------------
9556
- **OpenAI**: Set `OPENAI_API_KEY` environment variable or create `.env` file.
9557
- **Anthropic**: Set `ANTHROPIC_API_KEY` environment variable or create `.env` file.
9558
- **Ollama**: Ensure Ollama is running locally (default: http://localhost:11434).
9559
- **Bedrock**: Configure AWS credentials and region.
9560
-
9561
10447
  Performance Considerations
9562
10448
  --------------------------
9563
10449
  AI validation is significantly slower than traditional validation methods due to API calls
@@ -10344,8 +11230,277 @@ class Validate:
10344
11230
  if _is_value_a_df(count) or "ibis.expr.types.relations.Table" in str(type(count)):
10345
11231
  count = get_column_count(count)
10346
11232
 
10347
- # Package up the `count=` and boolean params into a dictionary for later interrogation
10348
- values = {"count": count, "inverse": inverse}
11233
+ # Package up the `count=` and boolean params into a dictionary for later interrogation
11234
+ values = {"count": count, "inverse": inverse}
11235
+
11236
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
11237
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
11238
+
11239
+ val_info = _ValidationInfo(
11240
+ assertion_type=assertion_type,
11241
+ values=values,
11242
+ pre=pre,
11243
+ thresholds=thresholds,
11244
+ actions=actions,
11245
+ brief=brief,
11246
+ active=active,
11247
+ )
11248
+
11249
+ self._add_validation(validation_info=val_info)
11250
+
11251
+ return self
11252
+
11253
+ def tbl_match(
11254
+ self,
11255
+ tbl_compare: FrameT | Any,
11256
+ pre: Callable | None = None,
11257
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
11258
+ actions: Actions | None = None,
11259
+ brief: str | bool | None = None,
11260
+ active: bool = True,
11261
+ ) -> Validate:
11262
+ """
11263
+ Validate whether the target table matches a comparison table.
11264
+
11265
+ The `tbl_match()` method checks whether the target table's composition matches that of a
11266
+ comparison table. The validation performs a comprehensive comparison using progressively
11267
+ stricter checks (from least to most stringent):
11268
+
11269
+ 1. **Column count match**: both tables must have the same number of columns
11270
+ 2. **Row count match**: both tables must have the same number of rows
11271
+ 3. **Schema match (loose)**: column names and dtypes match (case-insensitive, any order)
11272
+ 4. **Schema match (order)**: columns in the correct order (case-insensitive names)
11273
+ 5. **Schema match (exact)**: column names match exactly (case-sensitive, correct order)
11274
+ 6. **Data match**: values in corresponding cells must be identical
11275
+
11276
+ This progressive approach helps identify exactly where tables differ. The validation will
11277
+ fail at the first check that doesn't pass, making it easier to diagnose mismatches. This
11278
+ validation operates over a single test unit (pass/fail for complete table match).
11279
+
11280
+ Parameters
11281
+ ----------
11282
+ tbl_compare
11283
+ The comparison table to validate against. This can be a DataFrame object (Polars or
11284
+ Pandas), an Ibis table object, or a callable that returns a table. If a callable is
11285
+ provided, it will be executed during interrogation to obtain the comparison table.
11286
+ pre
11287
+ An optional preprocessing function or lambda to apply to the data table during
11288
+ interrogation. This function should take a table as input and return a modified table.
11289
+ Have a look at the *Preprocessing* section for more information on how to use this
11290
+ argument.
11291
+ thresholds
11292
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
11293
+ The thresholds are set at the step level and will override any global thresholds set in
11294
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
11295
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
11296
+ section for information on how to set threshold levels.
11297
+ actions
11298
+ Optional actions to take when the validation step meets or exceeds any set threshold
11299
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
11300
+ define the actions.
11301
+ brief
11302
+ An optional brief description of the validation step that will be displayed in the
11303
+ reporting table. You can use the templating elements like `"{step}"` to insert
11304
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
11305
+ the entire brief will be automatically generated. If `None` (the default) then there
11306
+ won't be a brief.
11307
+ active
11308
+ A boolean value indicating whether the validation step should be active. Using `False`
11309
+ will make the validation step inactive (still reporting its presence and keeping indexes
11310
+ for the steps unchanged).
11311
+
11312
+ Returns
11313
+ -------
11314
+ Validate
11315
+ The `Validate` object with the added validation step.
11316
+
11317
+ Preprocessing
11318
+ -------------
11319
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
11320
+ table during interrogation. This function should take a table as input and return a modified
11321
+ table. This is useful for performing any necessary transformations or filtering on the data
11322
+ before the validation step is applied.
11323
+
11324
+ The preprocessing function can be any callable that takes a table as input and returns a
11325
+ modified table. For example, you could use a lambda function to filter the table based on
11326
+ certain criteria or to apply a transformation to the data. Note that the same preprocessing
11327
+ is **not** applied to the comparison table; only the target table is preprocessed. Regarding
11328
+ the lifetime of the transformed table, it only exists during the validation step and is not
11329
+ stored in the `Validate` object or used in subsequent validation steps.
11330
+
11331
+ Thresholds
11332
+ ----------
11333
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
11334
+ step. If they are set here at the step level, these thresholds will override any thresholds
11335
+ set at the global level in `Validate(thresholds=...)`.
11336
+
11337
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
11338
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
11339
+ or, the absolute number of failing test units (as integer that's `1` or greater).
11340
+
11341
+ Thresholds can be defined using one of these input schemes:
11342
+
11343
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
11344
+ thresholds)
11345
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
11346
+ the 'error' level, and position `2` is the 'critical' level
11347
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
11348
+ 'critical'
11349
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
11350
+ for the 'warning' level only
11351
+
11352
+ If the number of failing test units exceeds set thresholds, the validation step will be
11353
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
11354
+ set, you're free to set any combination of them.
11355
+
11356
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
11357
+ take for each level of failure (using the `actions=` parameter).
11358
+
11359
+ Cross-Backend Validation
11360
+ ------------------------
11361
+ The `tbl_match()` method supports **automatic backend coercion** when comparing tables from
11362
+ different backends (e.g., comparing a Polars DataFrame against a Pandas DataFrame, or
11363
+ comparing database tables from DuckDB/SQLite against in-memory DataFrames). When tables with
11364
+ different backends are detected, the comparison table is automatically converted to match the
11365
+ data table's backend before validation proceeds.
11366
+
11367
+ **Certified Backend Combinations:**
11368
+
11369
+ All combinations of the following backends have been tested and certified to work (in both
11370
+ directions):
11371
+
11372
+ - Pandas DataFrame
11373
+ - Polars DataFrame
11374
+ - DuckDB (native)
11375
+ - DuckDB (as Ibis table)
11376
+ - SQLite (via Ibis)
11377
+
11378
+ Note that database backends (DuckDB, SQLite, PostgreSQL, MySQL, Snowflake, BigQuery) are
11379
+ automatically materialized during validation:
11380
+
11381
+ - if comparing **against Polars**: materialized to Polars
11382
+ - if comparing **against Pandas**: materialized to Pandas
11383
+ - if **both tables are database backends**: both materialized to Polars
11384
+
11385
+ This ensures optimal performance and type consistency.
11386
+
11387
+ **Data Types That Work Best in Cross-Backend Validation:**
11388
+
11389
+ - numeric types: int, float columns (including proper NaN handling)
11390
+ - string types: text columns with consistent encodings
11391
+ - boolean types: True/False values
11392
+ - null values: `None` and `NaN` are treated as equivalent across backends
11393
+ - list columns: nested list structures (with basic types)
11394
+
11395
+ **Known Limitations:**
11396
+
11397
+ While many data types work well in cross-backend validation, there are some known
11398
+ limitations to be aware of:
11399
+
11400
+ - date/datetime types: When converting between Polars and Pandas, date objects may be
11401
+ represented differently. For example, `datetime.date` objects in Pandas may become
11402
+ `pd.Timestamp` objects when converted from Polars, leading to false mismatches. To work
11403
+ around this, ensure both tables use the same datetime representation before comparison.
11404
+ - custom types: User-defined types or complex nested structures may not convert cleanly
11405
+ between backends and could cause unexpected comparison failures.
11406
+ - categorical types: Categorical/factor columns may have different internal
11407
+ representations across backends.
11408
+ - timezone-aware datetimes: Timezone handling differs between backends and may cause
11409
+ comparison issues.
11410
+
11411
+ Here are some ideas to overcome such limitations:
11412
+
11413
+ - for date/datetime columns, consider using `pre=` preprocessing to normalize representations
11414
+ before comparison.
11415
+ - when working with custom types, manually convert tables to the same backend before using
11416
+ `tbl_match()`.
11417
+ - use the same datetime precision (e.g., milliseconds vs microseconds) in both tables.
11418
+
11419
+ Examples
11420
+ --------
11421
+ ```{python}
11422
+ #| echo: false
11423
+ #| output: false
11424
+ import pointblank as pb
11425
+ pb.config(report_incl_header=False, report_incl_footer=False)
11426
+ ```
11427
+
11428
+ For the examples here, we'll create two simple tables to demonstrate the `tbl_match()`
11429
+ validation.
11430
+
11431
+ ```{python}
11432
+ import pointblank as pb
11433
+ import polars as pl
11434
+
11435
+ # Create the first table
11436
+ tbl_1 = pl.DataFrame({
11437
+ "a": [1, 2, 3, 4],
11438
+ "b": ["w", "x", "y", "z"],
11439
+ "c": [4.0, 5.0, 6.0, 7.0]
11440
+ })
11441
+
11442
+ # Create an identical table
11443
+ tbl_2 = pl.DataFrame({
11444
+ "a": [1, 2, 3, 4],
11445
+ "b": ["w", "x", "y", "z"],
11446
+ "c": [4.0, 5.0, 6.0, 7.0]
11447
+ })
11448
+
11449
+ pb.preview(tbl_1)
11450
+ ```
11451
+
11452
+ Let's validate that `tbl_1` matches `tbl_2`. Since these tables are identical, the
11453
+ validation should pass.
11454
+
11455
+ ```{python}
11456
+ validation = (
11457
+ pb.Validate(data=tbl_1)
11458
+ .tbl_match(tbl_compare=tbl_2)
11459
+ .interrogate()
11460
+ )
11461
+
11462
+ validation
11463
+ ```
11464
+
11465
+ The validation table shows that the single test unit passed, indicating that the two tables
11466
+ match completely.
11467
+
11468
+ Now, let's create a table with a slight difference and see what happens.
11469
+
11470
+ ```{python}
11471
+ # Create a table with one different value
11472
+ tbl_3 = pl.DataFrame({
11473
+ "a": [1, 2, 3, 4],
11474
+ "b": ["w", "x", "y", "z"],
11475
+ "c": [4.0, 5.5, 6.0, 7.0] # Changed 5.0 to 5.5
11476
+ })
11477
+
11478
+ validation = (
11479
+ pb.Validate(data=tbl_1)
11480
+ .tbl_match(tbl_compare=tbl_3)
11481
+ .interrogate()
11482
+ )
11483
+
11484
+ validation
11485
+ ```
11486
+
11487
+ The validation table shows that the single test unit failed because the tables don't match
11488
+ (one value is different in column `c`).
11489
+ """
11490
+
11491
+ assertion_type = _get_fn_name()
11492
+
11493
+ _check_pre(pre=pre)
11494
+ _check_thresholds(thresholds=thresholds)
11495
+ _check_boolean_input(param=active, param_name="active")
11496
+
11497
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
11498
+ thresholds = (
11499
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
11500
+ )
11501
+
11502
+ # Package up the `tbl_compare` into a dictionary for later interrogation
11503
+ values = {"tbl_compare": tbl_compare}
10349
11504
 
10350
11505
  # Determine brief to use (global or local) and transform any shorthands of `brief=`
10351
11506
  brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
@@ -11275,11 +12430,14 @@ class Validate:
11275
12430
  "col_vals_le",
11276
12431
  "col_vals_null",
11277
12432
  "col_vals_not_null",
12433
+ "col_vals_increasing",
12434
+ "col_vals_decreasing",
11278
12435
  "col_vals_between",
11279
12436
  "col_vals_outside",
11280
12437
  "col_vals_in_set",
11281
12438
  "col_vals_not_in_set",
11282
12439
  "col_vals_regex",
12440
+ "col_vals_within_spec",
11283
12441
  ]:
11284
12442
  # Process table for column validation
11285
12443
  tbl = _column_test_prep(
@@ -11315,6 +12473,36 @@ class Validate:
11315
12473
  elif assertion_method == "not_null":
11316
12474
  results_tbl = interrogate_not_null(tbl=tbl, column=column)
11317
12475
 
12476
+ elif assertion_type == "col_vals_increasing":
12477
+ from pointblank._interrogation import interrogate_increasing
12478
+
12479
+ # Extract direction options from val_info
12480
+ allow_stationary = validation.val_info.get("allow_stationary", False)
12481
+ decreasing_tol = validation.val_info.get("decreasing_tol", 0.0)
12482
+
12483
+ results_tbl = interrogate_increasing(
12484
+ tbl=tbl,
12485
+ column=column,
12486
+ allow_stationary=allow_stationary,
12487
+ decreasing_tol=decreasing_tol,
12488
+ na_pass=na_pass,
12489
+ )
12490
+
12491
+ elif assertion_type == "col_vals_decreasing":
12492
+ from pointblank._interrogation import interrogate_decreasing
12493
+
12494
+ # Extract direction options from val_info
12495
+ allow_stationary = validation.val_info.get("allow_stationary", False)
12496
+ increasing_tol = validation.val_info.get("increasing_tol", 0.0)
12497
+
12498
+ results_tbl = interrogate_decreasing(
12499
+ tbl=tbl,
12500
+ column=column,
12501
+ allow_stationary=allow_stationary,
12502
+ increasing_tol=increasing_tol,
12503
+ na_pass=na_pass,
12504
+ )
12505
+
11318
12506
  elif assertion_type == "col_vals_between":
11319
12507
  results_tbl = interrogate_between(
11320
12508
  tbl=tbl,
@@ -11348,6 +12536,13 @@ class Validate:
11348
12536
  tbl=tbl, column=column, values=value, na_pass=na_pass
11349
12537
  )
11350
12538
 
12539
+ elif assertion_type == "col_vals_within_spec":
12540
+ from pointblank._interrogation import interrogate_within_spec
12541
+
12542
+ results_tbl = interrogate_within_spec(
12543
+ tbl=tbl, column=column, values=value, na_pass=na_pass
12544
+ )
12545
+
11351
12546
  elif assertion_type == "col_vals_expr":
11352
12547
  results_tbl = col_vals_expr(
11353
12548
  data_tbl=data_tbl_step, expr=value, tbl_type=tbl_type
@@ -11441,6 +12636,25 @@ class Validate:
11441
12636
 
11442
12637
  results_tbl = None
11443
12638
 
12639
+ elif assertion_type == "tbl_match":
12640
+ from pointblank._interrogation import tbl_match
12641
+
12642
+ # Get the comparison table (could be callable or actual table)
12643
+ tbl_compare = value["tbl_compare"]
12644
+
12645
+ # If tbl_compare is callable, execute it to get the table
12646
+ if callable(tbl_compare):
12647
+ tbl_compare = tbl_compare()
12648
+
12649
+ result_bool = tbl_match(data_tbl=data_tbl_step, tbl_compare=tbl_compare)
12650
+
12651
+ validation.all_passed = result_bool
12652
+ validation.n = 1
12653
+ validation.n_passed = int(result_bool)
12654
+ validation.n_failed = 1 - result_bool
12655
+
12656
+ results_tbl = None
12657
+
11444
12658
  elif assertion_type == "conjointly":
11445
12659
  results_tbl = conjointly_validation(
11446
12660
  data_tbl=data_tbl_step,
@@ -11563,6 +12777,33 @@ class Validate:
11563
12777
  ),
11564
12778
  )
11565
12779
 
12780
+ # Add note for local thresholds (if they differ from global thresholds)
12781
+ if threshold != self.thresholds:
12782
+ if threshold != Thresholds():
12783
+ # Local thresholds are set - generate threshold note
12784
+ threshold_note_html = _create_local_threshold_note_html(
12785
+ thresholds=threshold, locale=self.locale
12786
+ )
12787
+ threshold_note_text = _create_local_threshold_note_text(thresholds=threshold)
12788
+
12789
+ # Add the note to the validation step
12790
+ validation._add_note(
12791
+ key="local_thresholds",
12792
+ markdown=threshold_note_html,
12793
+ text=threshold_note_text,
12794
+ )
12795
+ elif self.thresholds != Thresholds():
12796
+ # Thresholds explicitly reset to empty when global thresholds exist
12797
+ reset_note_html = _create_threshold_reset_note_html(locale=self.locale)
12798
+ reset_note_text = _create_threshold_reset_note_text()
12799
+
12800
+ # Add the note to the validation step
12801
+ validation._add_note(
12802
+ key="local_threshold_reset",
12803
+ markdown=reset_note_html,
12804
+ text=reset_note_text,
12805
+ )
12806
+
11566
12807
  # If there is any threshold level that has been exceeded, then produce and
11567
12808
  # set the general failure text for the validation step
11568
12809
  if validation.warning or validation.error or validation.critical:
@@ -13058,11 +14299,15 @@ class Validate:
13058
14299
  - [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
13059
14300
  - [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
13060
14301
  - [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
14302
+ - [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
14303
+ - [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
13061
14304
  - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
13062
14305
  - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
13063
14306
  - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
14307
+ - [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
13064
14308
  - [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
13065
14309
  - [`conjointly()`](`pointblank.Validate.conjointly`)
14310
+ - [`prompt()`](`pointblank.Validate.prompt`)
13066
14311
 
13067
14312
  An extracted row for these validation methods means that a test unit failed for that row in
13068
14313
  the validation step.
@@ -13501,6 +14746,151 @@ class Validate:
13501
14746
 
13502
14747
  return sundered_tbl
13503
14748
 
14749
+ def get_notes(
14750
+ self, i: int, format: str = "dict"
14751
+ ) -> dict[str, dict[str, str]] | list[str] | None:
14752
+ """
14753
+ Get notes from a validation step by its step number.
14754
+
14755
+ This is a convenience method that retrieves notes from a specific validation step using
14756
+ the step number (1-indexed). It provides easier access to step notes without having to
14757
+ navigate through the `validation_info` list.
14758
+
14759
+ Parameters
14760
+ ----------
14761
+ i
14762
+ The step number (1-indexed) to retrieve notes from. This corresponds to the step
14763
+ numbers shown in validation reports.
14764
+ format
14765
+ The format to return notes in:
14766
+ - `"dict"`: Returns the full notes dictionary (default)
14767
+ - `"markdown"`: Returns a list of markdown-formatted note values
14768
+ - `"text"`: Returns a list of plain text note values
14769
+ - `"keys"`: Returns a list of note keys
14770
+
14771
+ Returns
14772
+ -------
14773
+ dict, list, or None
14774
+ The notes in the requested format, or `None` if the step doesn't exist or has no notes.
14775
+
14776
+ Examples
14777
+ --------
14778
+ ```python
14779
+ import pointblank as pb
14780
+ import polars as pl
14781
+
14782
+ # Create validation with notes
14783
+ validation = pb.Validate(pl.DataFrame({"x": [1, 2, 3]}))
14784
+ validation.col_vals_gt(columns="x", value=0)
14785
+
14786
+ # Add a note to step 1
14787
+ validation.validation_info[0]._add_note(
14788
+ key="info",
14789
+ markdown="This is a **test** note",
14790
+ text="This is a test note"
14791
+ )
14792
+
14793
+ # Interrogate
14794
+ validation.interrogate()
14795
+
14796
+ # Get notes from step 1 using the step number
14797
+ notes = validation.get_notes(1)
14798
+ # Returns: {'info': {'markdown': 'This is a **test** note', 'text': '...'}}
14799
+
14800
+ # Get just the markdown versions
14801
+ markdown_notes = validation.get_notes(1, format="markdown")
14802
+ # Returns: ['This is a **test** note']
14803
+
14804
+ # Get just the keys
14805
+ keys = validation.get_notes(1, format="keys")
14806
+ # Returns: ['info']
14807
+ ```
14808
+ """
14809
+ # Validate step number
14810
+ if not isinstance(i, int) or i < 1:
14811
+ raise ValueError(f"Step number must be a positive integer, got: {i}")
14812
+
14813
+ # Find the validation step with the matching step number
14814
+ # Note: validation_info may contain multiple steps after segmentation,
14815
+ # so we need to find the one with the matching `i` value
14816
+ for validation in self.validation_info:
14817
+ if validation.i == i:
14818
+ return validation._get_notes(format=format)
14819
+
14820
+ # Step not found
14821
+ return None
14822
+
14823
+ def get_note(self, i: int, key: str, format: str = "dict") -> dict[str, str] | str | None:
14824
+ """
14825
+ Get a specific note from a validation step by its step number and note key.
14826
+
14827
+ This method retrieves a specific note from a validation step using the step number
14828
+ (1-indexed) and the note key. It provides easier access to individual notes without having
14829
+ to navigate through the `validation_info` list or retrieve all notes.
14830
+
14831
+ Parameters
14832
+ ----------
14833
+ i
14834
+ The step number (1-indexed) to retrieve the note from. This corresponds to the step
14835
+ numbers shown in validation reports.
14836
+ key
14837
+ The key of the note to retrieve.
14838
+ format
14839
+ The format to return the note in:
14840
+ - `"dict"`: Returns the note as a dictionary with 'markdown' and 'text' keys (default)
14841
+ - `"markdown"`: Returns just the markdown-formatted note value
14842
+ - `"text"`: Returns just the plain text note value
14843
+
14844
+ Returns
14845
+ -------
14846
+ dict, str, or None
14847
+ The note in the requested format, or `None` if the step or note doesn't exist.
14848
+
14849
+ Examples
14850
+ --------
14851
+ ```python
14852
+ import pointblank as pb
14853
+ import polars as pl
14854
+
14855
+ # Create validation with notes
14856
+ validation = pb.Validate(pl.DataFrame({"x": [1, 2, 3]}))
14857
+ validation.col_vals_gt(columns="x", value=0)
14858
+
14859
+ # Add a note to step 1
14860
+ validation.validation_info[0]._add_note(
14861
+ key="threshold_info",
14862
+ markdown="Using **default** thresholds",
14863
+ text="Using default thresholds"
14864
+ )
14865
+
14866
+ # Interrogate
14867
+ validation.interrogate()
14868
+
14869
+ # Get a specific note from step 1 using step number and key
14870
+ note = validation.get_note(1, "threshold_info")
14871
+ # Returns: {'markdown': 'Using **default** thresholds', 'text': '...'}
14872
+
14873
+ # Get just the markdown version
14874
+ markdown = validation.get_note(1, "threshold_info", format="markdown")
14875
+ # Returns: 'Using **default** thresholds'
14876
+
14877
+ # Get just the text version
14878
+ text = validation.get_note(1, "threshold_info", format="text")
14879
+ # Returns: 'Using default thresholds'
14880
+ ```
14881
+ """
14882
+ # Validate step number
14883
+ if not isinstance(i, int) or i < 1:
14884
+ raise ValueError(f"Step number must be a positive integer, got: {i}")
14885
+
14886
+ # Find the validation step with the matching step number
14887
+ for validation in self.validation_info:
14888
+ if validation.i == i:
14889
+ return validation._get_note(key=key, format=format)
14890
+
14891
+ # Step not found
14892
+ return None
14893
+
13504
14894
  def get_tabular_report(
13505
14895
  self, title: str | None = ":default:", incl_header: bool = None, incl_footer: bool = None
13506
14896
  ) -> GT:
@@ -13907,6 +15297,9 @@ class Validate:
13907
15297
  elif assertion_type[i] in ["col_vals_expr", "conjointly"]:
13908
15298
  values_upd.append("COLUMN EXPR")
13909
15299
 
15300
+ elif assertion_type[i] in ["col_vals_increasing", "col_vals_decreasing"]:
15301
+ values_upd.append("")
15302
+
13910
15303
  elif assertion_type[i] in ["row_count_match", "col_count_match"]:
13911
15304
  count = values[i]["count"]
13912
15305
  inverse = values[i]["inverse"]
@@ -13916,6 +15309,9 @@ class Validate:
13916
15309
 
13917
15310
  values_upd.append(str(count))
13918
15311
 
15312
+ elif assertion_type[i] in ["tbl_match"]:
15313
+ values_upd.append("EXTERNAL TABLE")
15314
+
13919
15315
  elif assertion_type[i] in ["specially"]:
13920
15316
  values_upd.append("EXPR")
13921
15317
 
@@ -13924,6 +15320,11 @@ class Validate:
13924
15320
 
13925
15321
  values_upd.append(str(pattern))
13926
15322
 
15323
+ elif assertion_type[i] in ["col_vals_within_spec"]:
15324
+ spec = value["spec"]
15325
+
15326
+ values_upd.append(str(spec))
15327
+
13927
15328
  elif assertion_type[i] in ["prompt"]: # pragma: no cover
13928
15329
  # For AI validation, show only the prompt, not the full config
13929
15330
  if isinstance(value, dict) and "prompt" in value: # pragma: no cover
@@ -14180,6 +15581,7 @@ class Validate:
14180
15581
  validation_info_dict.pop("label")
14181
15582
  validation_info_dict.pop("active")
14182
15583
  validation_info_dict.pop("all_passed")
15584
+ validation_info_dict.pop("notes")
14183
15585
 
14184
15586
  # If no interrogation performed, populate the `i` entry with a sequence of integers
14185
15587
  # from `1` to the number of validation steps
@@ -14364,8 +15766,14 @@ class Validate:
14364
15766
  gt_tbl = gt_tbl.tab_header(title=html(title_text), subtitle=html(combined_subtitle))
14365
15767
 
14366
15768
  if incl_footer:
15769
+ # Add table time as HTML source note
14367
15770
  gt_tbl = gt_tbl.tab_source_note(source_note=html(table_time))
14368
15771
 
15772
+ # Create notes markdown from validation steps and add as separate source note
15773
+ notes_markdown = _create_notes_html(self.validation_info)
15774
+ if notes_markdown:
15775
+ gt_tbl = gt_tbl.tab_source_note(source_note=md(notes_markdown))
15776
+
14369
15777
  # If the interrogation has not been performed, then style the table columns dealing with
14370
15778
  # interrogation data as grayed out
14371
15779
  if not interrogation_performed:
@@ -14473,11 +15881,15 @@ class Validate:
14473
15881
  - [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
14474
15882
  - [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
14475
15883
  - [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
15884
+ - [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
15885
+ - [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
14476
15886
  - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
14477
15887
  - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
14478
15888
  - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
15889
+ - [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
14479
15890
  - [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
14480
15891
  - [`conjointly()`](`pointblank.Validate.conjointly`)
15892
+ - [`prompt()`](`pointblank.Validate.prompt`)
14481
15893
  - [`rows_complete()`](`pointblank.Validate.rows_complete`)
14482
15894
 
14483
15895
  The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
@@ -16064,6 +17476,7 @@ def _validation_info_as_dict(validation_info: _ValidationInfo) -> dict:
16064
17476
  "critical",
16065
17477
  "extract",
16066
17478
  "proc_duration_s",
17479
+ "notes",
16067
17480
  ]
16068
17481
 
16069
17482
  # Filter the validation information to include only the selected fields
@@ -16407,6 +17820,14 @@ def _transform_assertion_str(
16407
17820
  # Use Markdown-to-HTML conversion to format the `brief_str` text
16408
17821
  brief_str = [commonmark.commonmark(x) for x in brief_str]
16409
17822
 
17823
+ # Add inline styles to <p> tags for proper rendering in all environments
17824
+ # In some sandboxed HTML environments (e.g., Streamlit), <p> tags don't inherit
17825
+ # font-size from parent divs, so we add inline styles directly to the <p> tags
17826
+ brief_str = [
17827
+ re.sub(r"<p>", r'<p style="font-size: inherit; margin: 0;">', x) if x.strip() else x
17828
+ for x in brief_str
17829
+ ]
17830
+
16410
17831
  # Obtain the number of characters contained in the assertion
16411
17832
  # string; this is important for sizing components appropriately
16412
17833
  assertion_type_nchar = [len(x) for x in assertion_str]
@@ -16535,6 +17956,86 @@ def _create_table_time_html(
16535
17956
  )
16536
17957
 
16537
17958
 
17959
+ def _create_notes_html(validation_info: list) -> str:
17960
+ """
17961
+ Create markdown text for validation notes/footnotes.
17962
+
17963
+ This function collects notes from all validation steps and formats them as footnotes
17964
+ for display in the report footer. Each note is prefixed with the step number in
17965
+ uppercase small caps bold formatting, and the note content is rendered as markdown.
17966
+
17967
+ Parameters
17968
+ ----------
17969
+ validation_info
17970
+ List of _ValidationInfo objects from which to extract notes.
17971
+
17972
+ Returns
17973
+ -------
17974
+ str
17975
+ Markdown string containing formatted footnotes, or empty string if no notes exist.
17976
+ """
17977
+ # Collect all notes from validation steps
17978
+ all_notes = []
17979
+ for step in validation_info:
17980
+ if step.notes:
17981
+ for key, content in step.notes.items():
17982
+ # Store note with step number for context
17983
+ all_notes.append(
17984
+ {
17985
+ "step": step.i,
17986
+ "key": key,
17987
+ "markdown": content["markdown"],
17988
+ "text": content["text"],
17989
+ }
17990
+ )
17991
+
17992
+ # If no notes, return empty string
17993
+ if not all_notes:
17994
+ return ""
17995
+
17996
+ # Build markdown for notes section
17997
+ # Start with a styled horizontal rule and bold "Notes" header
17998
+ notes_parts = [
17999
+ (
18000
+ "<hr style='border: none; border-top-width: 1px; border-top-style: dotted; "
18001
+ "border-top-color: #B5B5B5; margin-top: -3px; margin-bottom: 3px;'>"
18002
+ ),
18003
+ "<strong>Notes</strong>",
18004
+ "",
18005
+ ]
18006
+
18007
+ previous_step = None
18008
+ for note in all_notes:
18009
+ # Determine if this is the first note for this step
18010
+ is_first_for_step = note["step"] != previous_step
18011
+ previous_step = note["step"]
18012
+
18013
+ # Format step label with HTML for uppercase small caps bold
18014
+ # Use lighter color for subsequent notes of the same step
18015
+ step_color = "#333333" if is_first_for_step else "#999999"
18016
+ step_label = (
18017
+ f"<span style='font-variant: small-caps; font-weight: bold; font-size: smaller; "
18018
+ f"text-transform: uppercase; color: {step_color};'>Step {note['step']}</span>"
18019
+ )
18020
+
18021
+ # Format note key in monospaced font with smaller size
18022
+ note_key = f"<span style='font-family: \"IBM Plex Mono\", monospace; font-size: smaller;'>({note['key']})</span>"
18023
+
18024
+ # Combine step label, note key, and markdown content
18025
+ note_text = f"{step_label} {note_key} {note['markdown']}"
18026
+ notes_parts.append(note_text)
18027
+ notes_parts.append("") # Add blank line between notes
18028
+
18029
+ # Remove trailing blank line
18030
+ if notes_parts[-1] == "":
18031
+ notes_parts.pop()
18032
+
18033
+ # Join with newlines to create markdown text
18034
+ notes_markdown = "\n".join(notes_parts)
18035
+
18036
+ return notes_markdown
18037
+
18038
+
16538
18039
  def _create_label_html(label: str | None, start_time: str) -> str:
16539
18040
  if label is None:
16540
18041
  # Remove the decimal and everything beyond that
@@ -16619,60 +18120,93 @@ def _format_single_float_with_gt_custom(
16619
18120
  return formatted_values[0] # Return the single formatted value
16620
18121
 
16621
18122
 
18123
+ def _format_number_safe(
18124
+ value: float, decimals: int, drop_trailing_zeros: bool = False, locale: str = "en", df_lib=None
18125
+ ) -> str:
18126
+ """
18127
+ Safely format a float value with locale support.
18128
+
18129
+ Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
18130
+ vals.fmt_number. This helper is used by threshold formatting functions.
18131
+ """
18132
+ if df_lib is not None and value is not None:
18133
+ # Use GT-based formatting to avoid Pandas dependency completely
18134
+ return _format_single_float_with_gt_custom(
18135
+ value,
18136
+ decimals=decimals,
18137
+ drop_trailing_zeros=drop_trailing_zeros,
18138
+ locale=locale,
18139
+ df_lib=df_lib,
18140
+ )
18141
+ else:
18142
+ # Fallback to the original behavior
18143
+ return fmt_number(
18144
+ value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
18145
+ )[0] # pragma: no cover
18146
+
18147
+
18148
+ def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
18149
+ """
18150
+ Safely format an integer value with locale support.
18151
+
18152
+ Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
18153
+ vals.fmt_integer. This helper is used by threshold formatting functions.
18154
+ """
18155
+ if df_lib is not None and value is not None:
18156
+ # Use GT-based formatting to avoid Pandas dependency completely
18157
+ return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
18158
+ else:
18159
+ # Fallback to the original behavior
18160
+ return fmt_integer(value, locale=locale)[0]
18161
+
18162
+
16622
18163
  def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
16623
18164
  if thresholds == Thresholds():
16624
18165
  return ""
16625
18166
 
16626
- # Helper functions to format numbers safely
16627
- def _format_number_safe(value: float, decimals: int, drop_trailing_zeros: bool = False) -> str:
16628
- if df_lib is not None and value is not None:
16629
- # Use GT-based formatting to avoid Pandas dependency completely
16630
- return _format_single_float_with_gt_custom(
16631
- value,
16632
- decimals=decimals,
16633
- drop_trailing_zeros=drop_trailing_zeros,
16634
- locale=locale,
16635
- df_lib=df_lib,
16636
- )
16637
- else:
16638
- # Fallback to the original behavior
16639
- return fmt_number(
16640
- value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
16641
- )[0] # pragma: no cover
16642
-
16643
- def _format_integer_safe(value: int) -> str:
16644
- if df_lib is not None and value is not None:
16645
- # Use GT-based formatting to avoid Pandas dependency completely
16646
- return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
16647
- else:
16648
- # Fallback to the original behavior
16649
- return fmt_integer(value, locale=locale)[0]
16650
-
16651
18167
  warning = (
16652
- _format_number_safe(thresholds.warning_fraction, decimals=3, drop_trailing_zeros=True)
18168
+ _format_number_safe(
18169
+ thresholds.warning_fraction,
18170
+ decimals=3,
18171
+ drop_trailing_zeros=True,
18172
+ locale=locale,
18173
+ df_lib=df_lib,
18174
+ )
16653
18175
  if thresholds.warning_fraction is not None
16654
18176
  else (
16655
- _format_integer_safe(thresholds.warning_count)
18177
+ _format_integer_safe(thresholds.warning_count, locale=locale, df_lib=df_lib)
16656
18178
  if thresholds.warning_count is not None
16657
18179
  else "&mdash;"
16658
18180
  )
16659
18181
  )
16660
18182
 
16661
18183
  error = (
16662
- _format_number_safe(thresholds.error_fraction, decimals=3, drop_trailing_zeros=True)
18184
+ _format_number_safe(
18185
+ thresholds.error_fraction,
18186
+ decimals=3,
18187
+ drop_trailing_zeros=True,
18188
+ locale=locale,
18189
+ df_lib=df_lib,
18190
+ )
16663
18191
  if thresholds.error_fraction is not None
16664
18192
  else (
16665
- _format_integer_safe(thresholds.error_count)
18193
+ _format_integer_safe(thresholds.error_count, locale=locale, df_lib=df_lib)
16666
18194
  if thresholds.error_count is not None
16667
18195
  else "&mdash;"
16668
18196
  )
16669
18197
  )
16670
18198
 
16671
18199
  critical = (
16672
- _format_number_safe(thresholds.critical_fraction, decimals=3, drop_trailing_zeros=True)
18200
+ _format_number_safe(
18201
+ thresholds.critical_fraction,
18202
+ decimals=3,
18203
+ drop_trailing_zeros=True,
18204
+ locale=locale,
18205
+ df_lib=df_lib,
18206
+ )
16673
18207
  if thresholds.critical_fraction is not None
16674
18208
  else (
16675
- _format_integer_safe(thresholds.critical_count)
18209
+ _format_integer_safe(thresholds.critical_count, locale=locale, df_lib=df_lib)
16676
18210
  if thresholds.critical_count is not None
16677
18211
  else "&mdash;"
16678
18212
  )
@@ -16718,6 +18252,187 @@ def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) ->
16718
18252
  )
16719
18253
 
16720
18254
 
18255
+ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en") -> str:
18256
+ """
18257
+ Create a miniature HTML representation of local thresholds for display in notes.
18258
+
18259
+ This function generates a compact HTML representation of threshold values that is suitable for
18260
+ display in validation step notes/footnotes. It follows a similar visual style to the global
18261
+ thresholds shown in the header, but with a more compact format.
18262
+
18263
+ Parameters
18264
+ ----------
18265
+ thresholds
18266
+ The Thresholds object containing the local threshold values.
18267
+ locale
18268
+ The locale to use for formatting numbers (default: "en").
18269
+
18270
+ Returns
18271
+ -------
18272
+ str
18273
+ HTML string containing the formatted threshold information.
18274
+ """
18275
+ if thresholds == Thresholds():
18276
+ return ""
18277
+
18278
+ # Get df_lib for formatting
18279
+ df_lib = None
18280
+ if _is_lib_present("polars"):
18281
+ import polars as pl
18282
+
18283
+ df_lib = pl
18284
+ elif _is_lib_present("pandas"):
18285
+ import pandas as pd
18286
+
18287
+ df_lib = pd
18288
+
18289
+ # Helper function to format threshold values using the shared formatting functions
18290
+ def _format_threshold_value(fraction: float | None, count: int | None) -> str:
18291
+ if fraction is not None:
18292
+ # Format as fraction/percentage with locale formatting
18293
+ if fraction == 0:
18294
+ return "0"
18295
+ elif fraction < 0.01:
18296
+ # For very small fractions, show "<0.01" with locale formatting
18297
+ formatted = _format_number_safe(0.01, decimals=2, locale=locale, df_lib=df_lib)
18298
+ return f"&lt;{formatted}"
18299
+ else:
18300
+ # Use shared formatting function with drop_trailing_zeros
18301
+ formatted = _format_number_safe(
18302
+ fraction, decimals=2, drop_trailing_zeros=True, locale=locale, df_lib=df_lib
18303
+ )
18304
+ return formatted
18305
+ elif count is not None:
18306
+ # Format integer count using shared formatting function
18307
+ return _format_integer_safe(count, locale=locale, df_lib=df_lib)
18308
+ else:
18309
+ return "&mdash;"
18310
+
18311
+ warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
18312
+ error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
18313
+ critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
18314
+
18315
+ warning_color = SEVERITY_LEVEL_COLORS["warning"]
18316
+ error_color = SEVERITY_LEVEL_COLORS["error"]
18317
+ critical_color = SEVERITY_LEVEL_COLORS["critical"]
18318
+
18319
+ # Build threshold parts with colored letters in monospace font
18320
+ threshold_parts = []
18321
+
18322
+ # Add warning threshold if set
18323
+ if thresholds.warning is not None:
18324
+ threshold_parts.append(
18325
+ f'<span style="color: {warning_color}; font-weight: bold;">W</span>:{warning}'
18326
+ )
18327
+
18328
+ # Add error threshold if set
18329
+ if thresholds.error is not None:
18330
+ threshold_parts.append(
18331
+ f'<span style="color: {error_color}; font-weight: bold;">E</span>:{error}'
18332
+ )
18333
+
18334
+ # Add critical threshold if set
18335
+ if thresholds.critical is not None:
18336
+ threshold_parts.append(
18337
+ f'<span style="color: {critical_color}; font-weight: bold;">C</span>:{critical}'
18338
+ )
18339
+
18340
+ # Join with "|" separator (only between multiple thresholds)
18341
+ thresholds_html = f'<span style="font-family: monospace;">{"|".join(threshold_parts)}</span>'
18342
+
18343
+ # Get localized text and format with threshold HTML
18344
+ localized_text = NOTES_TEXT["local_threshold"].get(locale, NOTES_TEXT["local_threshold"]["en"])
18345
+ note_html = localized_text.replace("{thresholds}", thresholds_html)
18346
+
18347
+ return note_html
18348
+
18349
+
18350
+ def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
18351
+ """
18352
+ Create a plain text representation of local thresholds for display in logs.
18353
+
18354
+ This function generates a plain text representation of threshold values that is
18355
+ suitable for display in text-based output such as logs or console output.
18356
+
18357
+ Parameters
18358
+ ----------
18359
+ thresholds
18360
+ The Thresholds object containing the local threshold values.
18361
+
18362
+ Returns
18363
+ -------
18364
+ str
18365
+ Plain text string containing the formatted threshold information.
18366
+ """
18367
+ if thresholds == Thresholds():
18368
+ return ""
18369
+
18370
+ # Helper function to format threshold values
18371
+ def _format_threshold_value(fraction: float | None, count: int | None) -> str:
18372
+ if fraction is not None:
18373
+ if fraction == 0:
18374
+ return "0"
18375
+ elif fraction < 0.01:
18376
+ return "<0.01"
18377
+ else:
18378
+ return f"{fraction:.2f}".rstrip("0").rstrip(".")
18379
+ elif count is not None:
18380
+ return str(count)
18381
+ else:
18382
+ return "—"
18383
+
18384
+ parts = []
18385
+
18386
+ if thresholds.warning is not None:
18387
+ warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
18388
+ parts.append(f"W: {warning}")
18389
+
18390
+ if thresholds.error is not None:
18391
+ error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
18392
+ parts.append(f"E: {error}")
18393
+
18394
+ if thresholds.critical is not None:
18395
+ critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
18396
+ parts.append(f"C: {critical}")
18397
+
18398
+ if parts:
18399
+ return "Step-specific thresholds set: " + ", ".join(parts)
18400
+ else:
18401
+ return ""
18402
+
18403
+
18404
+ def _create_threshold_reset_note_html(locale: str = "en") -> str:
18405
+ """
18406
+ Create an HTML note for when thresholds are explicitly reset to empty.
18407
+
18408
+ Parameters
18409
+ ----------
18410
+ locale
18411
+ The locale string (e.g., 'en', 'fr').
18412
+
18413
+ Returns
18414
+ -------
18415
+ str
18416
+ HTML-formatted note text.
18417
+ """
18418
+ text = NOTES_TEXT.get("local_threshold_reset", {}).get(
18419
+ locale, NOTES_TEXT.get("local_threshold_reset", {}).get("en", "")
18420
+ )
18421
+ return text
18422
+
18423
+
18424
+ def _create_threshold_reset_note_text() -> str:
18425
+ """
18426
+ Create a plain text note for when thresholds are explicitly reset to empty.
18427
+
18428
+ Returns
18429
+ -------
18430
+ str
18431
+ Plain text note.
18432
+ """
18433
+ return "Global thresholds explicitly not used for this step."
18434
+
18435
+
16721
18436
  def _step_report_row_based(
16722
18437
  assertion_type: str,
16723
18438
  i: int,