PyPI - pointblank - Versions diffs - 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

pointblank 0.15.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pointblank/__init__.py +2 -0
pointblank/_constants.py +25 -1
pointblank/_constants_translations.py +2361 -2
pointblank/_interrogation.py +24 -0
pointblank/_typing.py +37 -9
pointblank/_utils.py +0 -355
pointblank/_utils_llms_txt.py +661 -0
pointblank/column.py +24 -0
pointblank/data/api-docs.txt +336 -3
pointblank/validate.py +2551 -926
pointblank/yaml.py +10 -2
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/METADATA +9 -4
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/RECORD +17 -16
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/WHEEL +0 -0
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.15.0.dist-info → pointblank-0.17.0.dist-info}/top_level.txt +0 -0

pointblank/validate.py CHANGED Viewed

@@ -12,6 +12,7 @@ import tempfile
 import threading
 from dataclasses import dataclass
 from enum import Enum
+from functools import partial
 from importlib.metadata import version
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, Literal
@@ -45,6 +46,7 @@ from pointblank._constants import (
 )
 from pointblank._constants_translations import (
     EXPECT_FAIL_TEXT,
+    NOTES_TEXT,
     STEP_REPORT_TEXT,
     VALIDATION_REPORT_TEXT,
 )
@@ -53,6 +55,7 @@ from pointblank._interrogation import (
     SpeciallyValidation,
     col_count_match,
     col_exists,
+    col_pct_null,
     col_schema_match,
     col_vals_expr,
     conjointly_validation,
@@ -122,6 +125,7 @@ __all__ = [
     "write_file",
     "config",
     "connect_to_table",
+    "print_database_tables",
     "preview",
     "missing_vals_tbl",
     "get_action_metadata",
@@ -361,12 +365,16 @@ class PointblankConfig:
     report_incl_header: bool = True
     report_incl_footer: bool = True
+    report_incl_footer_timings: bool = True
+    report_incl_footer_notes: bool = True
     preview_incl_header: bool = True
     def __repr__(self):
         return (
             f"PointblankConfig(report_incl_header={self.report_incl_header}, "
             f"report_incl_footer={self.report_incl_footer}, "
+            f"report_incl_footer_timings={self.report_incl_footer_timings}, "
+            f"report_incl_footer_notes={self.report_incl_footer_notes}, "
             f"preview_incl_header={self.preview_incl_header})"
         )
@@ -378,6 +386,8 @@ global_config = PointblankConfig()
 def config(
     report_incl_header: bool = True,
     report_incl_footer: bool = True,
+    report_incl_footer_timings: bool = True,
+    report_incl_footer_notes: bool = True,
     preview_incl_header: bool = True,
 ) -> PointblankConfig:
     """
@@ -391,7 +401,13 @@ def config(
         threshold levels (if set).
     report_incl_footer
         Should the footer of the validation table report be displayed? The footer contains the
-        starting and ending times of the interrogation.
+        starting and ending times of the interrogation and any notes added to validation steps.
+    report_incl_footer_timings
+        Controls whether the validation timing information (start time, duration, and end time)
+        should be displayed in the footer. Only applies when `report_incl_footer=True`.
+    report_incl_footer_notes
+        Controls whether the notes from validation steps should be displayed in the footer. Only
+        applies when `report_incl_footer=True`.
     preview_incl_header
         Whether the header should be present in any preview table (generated via the
         [`preview()`](`pointblank.preview`) function).
@@ -405,6 +421,8 @@ def config(
     global global_config
     global_config.report_incl_header = report_incl_header  # pragma: no cover
     global_config.report_incl_footer = report_incl_footer  # pragma: no cover
+    global_config.report_incl_footer_timings = report_incl_footer_timings  # pragma: no cover
+    global_config.report_incl_footer_notes = report_incl_footer_notes  # pragma: no cover
     global_config.preview_incl_header = preview_incl_header  # pragma: no cover
@@ -3918,6 +3936,47 @@ class _ValidationInfo:
         return self.notes is not None and len(self.notes) > 0
+def _handle_connection_errors(e: Exception, connection_string: str) -> None:
+    """
+    Shared error handling for database connection failures.
+    Raises appropriate ConnectionError with helpful messages based on the exception.
+    """
+    error_str = str(e).lower()
+    backend_install_map = {
+        "duckdb": "pip install 'ibis-framework[duckdb]'",
+        "postgresql": "pip install 'ibis-framework[postgres]'",
+        "postgres": "pip install 'ibis-framework[postgres]'",
+        "mysql": "pip install 'ibis-framework[mysql]'",
+        "sqlite": "pip install 'ibis-framework[sqlite]'",
+        "bigquery": "pip install 'ibis-framework[bigquery]'",
+        "snowflake": "pip install 'ibis-framework[snowflake]'",
+    }
+    # Check if this is a missing backend dependency
+    for backend, install_cmd in backend_install_map.items():
+        if backend in error_str and ("not found" in error_str or "no module" in error_str):
+            raise ConnectionError(
+                f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
+                f"  {install_cmd}\n\n"
+                f"Original error: {e}"
+            ) from e
+    # Generic connection error
+    raise ConnectionError(  # pragma: no cover
+        f"Failed to connect using: {connection_string}\n"
+        f"Error: {e}\n\n"
+        f"Supported connection string formats:\n"
+        f"- DuckDB: 'duckdb:///path/to/file.ddb'\n"
+        f"- SQLite: 'sqlite:///path/to/file.db'\n"
+        f"- PostgreSQL: 'postgresql://user:pass@host:port/db'\n"
+        f"- MySQL: 'mysql://user:pass@host:port/db'\n"
+        f"- BigQuery: 'bigquery://project/dataset'\n"
+        f"- Snowflake: 'snowflake://user:pass@account/db/schema'"
+    ) from e
 def connect_to_table(connection_string: str) -> Any:
     """
     Connect to a database table using a connection string.
@@ -3997,7 +4056,11 @@ def connect_to_table(connection_string: str) -> Any:
     pip install 'ibis-framework[duckdb]'    # for DuckDB
     pip install 'ibis-framework[postgres]'  # for PostgreSQL
     ```
+    See Also
+    --------
+    print_database_tables : List all available tables in a database for discovery
     """
     # Check if Ibis is available
     if not _is_lib_present(lib_name="ibis"):
         raise ImportError(
@@ -4011,14 +4074,10 @@ def connect_to_table(connection_string: str) -> Any:
     if "::" not in connection_string:
         # Try to connect to get available tables for helpful error message
         try:
-            # Extract the base connection string (without table name)
             base_connection = connection_string
-            # Connect to the database
             conn = ibis.connect(base_connection)
-            # Get list of available tables
-            try:
+            try:  # pragma: no cover
                 available_tables = conn.list_tables()
             except Exception:  # pragma: no cover
                 available_tables = []
@@ -4035,7 +4094,6 @@ def connect_to_table(connection_string: str) -> Any:
                     f"  {connection_string}::TABLE_NAME\n\n"
                     f"Examples:\n"
                 )
-                # Add examples with first few table names
                 for table in available_tables[:3]:
                     error_msg += f"  {connection_string}::{table}\n"
             else:
@@ -4050,43 +4108,8 @@ def connect_to_table(connection_string: str) -> Any:
         except Exception as e:
             if isinstance(e, ValueError):
-                raise  # Re-raise our custom ValueError
-            # Check for backend-specific errors and provide installation guidance
-            error_str = str(e).lower()
-            backend_install_map = {
-                "duckdb": "pip install 'ibis-framework[duckdb]'",
-                "postgresql": "pip install 'ibis-framework[postgres]'",
-                "postgres": "pip install 'ibis-framework[postgres]'",
-                "mysql": "pip install 'ibis-framework[mysql]'",
-                "sqlite": "pip install 'ibis-framework[sqlite]'",
-                "bigquery": "pip install 'ibis-framework[bigquery]'",
-                "snowflake": "pip install 'ibis-framework[snowflake]'",
-            }
-            # Check if this is a missing backend dependency
-            for backend, install_cmd in backend_install_map.items():  # pragma: no cover
-                if backend in error_str and ("not found" in error_str or "no module" in error_str):
-                    raise ConnectionError(
-                        f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
-                        f"  {install_cmd}\n\n"
-                        f"Original error: {e}\n\n"
-                        f"Supported connection string formats:\n"
-                        f"- DuckDB: 'duckdb:///path/to/file.ddb::table_name'\n"
-                        f"- SQLite: 'sqlite:///path/to/file.db::table_name'\n"
-                        f"- PostgreSQL: 'postgresql://user:pass@host:port/db::table_name'\n"
-                        f"- MySQL: 'mysql://user:pass@host:port/db::table_name'\n"
-                        f"- BigQuery: 'bigquery://project/dataset::table_name'\n"
-                        f"- Snowflake: 'snowflake://user:pass@account/db/schema::table_name'\n"
-                        f"\nNote: Use '::table_name' to specify the table within the database."
-                    ) from e
-            # Generic connection error
-            raise ConnectionError(  # pragma: no cover
-                f"Failed to connect to database using connection string: {connection_string}\n"
-                f"Error: {e}\n\n"
-                f"No table specified. Use the format: {connection_string}::TABLE_NAME"
-            ) from e
+                raise
+            _handle_connection_errors(e, connection_string)
     # Split connection string and table name
     try:
@@ -4099,32 +4122,14 @@ def connect_to_table(connection_string: str) -> Any:
         conn = ibis.connect(base_connection)
         table = conn.table(table_name)
         return table
     except Exception as e:
-        # Check for backend-specific errors and provide installation guidance
         error_str = str(e).lower()
-        backend_install_map = {
-            "duckdb": "pip install 'ibis-framework[duckdb]'",
-            "postgresql": "pip install 'ibis-framework[postgres]'",
-            "postgres": "pip install 'ibis-framework[postgres]'",
-            "mysql": "pip install 'ibis-framework[mysql]'",
-            "sqlite": "pip install 'ibis-framework[sqlite]'",
-            "bigquery": "pip install 'ibis-framework[bigquery]'",
-            "snowflake": "pip install 'ibis-framework[snowflake]'",
-        }
-        # Check if this is a missing backend dependency
-        for backend, install_cmd in backend_install_map.items():
-            if backend in error_str and ("not found" in error_str or "no module" in error_str):
-                raise ConnectionError(
-                    f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
-                    f"  {install_cmd}\n\n"
-                    f"Original error: {e}"
-                ) from e
-        # Check if table doesn't exist
-        if "table" in error_str and ("not found" in error_str or "does not exist" in error_str):
-            # Try to get available tables for helpful message
+        # Check if this is a "table not found" error
+        if "table" in error_str and (
+            "not found" in error_str or "does not exist" in error_str or "not exist" in error_str
+        ):
+            # Try to get available tables for a helpful error message
             try:  # pragma: no cover
                 available_tables = conn.list_tables()
                 if available_tables:
@@ -4132,23 +4137,79 @@ def connect_to_table(connection_string: str) -> Any:
                     raise ValueError(
                         f"Table '{table_name}' not found in database.\n\n"
                         f"Available tables:\n{table_list}\n\n"
-                        f"Check the table name and try again with:\n"
-                        f"  {base_connection}::CORRECT_TABLE_NAME"
-                    ) from e
-                else:
-                    raise ValueError(
-                        f"Table '{table_name}' not found and no tables available in database."
+                        f"Connection: {base_connection}"
                     ) from e
+            except ValueError:
+                # Re-raise the table-specific ValueError
+                raise
             except Exception:
-                raise ValueError(
-                    f"Table '{table_name}' not found in database. "
-                    f"Check the table name and connection string."
-                ) from e
+                # If we can't list tables, just raise a simple error
+                pass
+            raise ValueError(
+                f"Table '{table_name}' not found in database.\n"
+                f"Connection: {base_connection}\n\n"
+                f"Original error: {e}"
+            ) from e
+        # For other errors, use the generic connection error handler
+        _handle_connection_errors(e, base_connection)
+def print_database_tables(connection_string: str) -> list[str]:
+    """
+    List all tables in a database from a connection string.
+    The `print_database_tables()` function connects to a database and returns a list of all
+    available tables. This is particularly useful for discovering what tables exist in a database
+    before connecting to a specific table with `connect_to_table(). The function automatically
+    filters out temporary Ibis tables (memtables) to show only user tables. It supports all database
+    backends available through Ibis, including DuckDB, SQLite, PostgreSQL, MySQL, BigQuery, and
+    Snowflake.
+    Parameters
+    ----------
+    connection_string
+        A database connection string *without* the `::table_name` suffix. Example:
+        `"duckdb:///path/to/database.ddb"`.
+    Returns
+    -------
+    list[str]
+        List of table names, excluding temporary Ibis tables.
+    See Also
+    --------
+    connect_to_table : Connect to a database table with full connection string documentation
+    """
+    # Check if connection string includes table specification (which is not allowed)
+    if "::" in connection_string:
+        raise ValueError(
+            "Connection string should not include table specification (::table_name).\n"
+            f"You've supplied: {connection_string}\n"
+            f"Expected format: 'duckdb:///path/to/database.ddb' (without ::table_name)"
+        )
+    # Check if Ibis is available
+    if not _is_lib_present(lib_name="ibis"):
+        raise ImportError(
+            "The Ibis library is not installed but is required for database connection strings.\n"
+            "Install it with: pip install 'ibis-framework[duckdb]' (or other backend as needed)"
+        )
+    import ibis
+    try:
+        # Connect to database
+        conn = ibis.connect(connection_string)
+        # Get all tables and filter out temporary Ibis tables
+        all_tables = conn.list_tables()
+        user_tables = [t for t in all_tables if "memtable" not in t]
-        # Generic connection error
-        raise ConnectionError(
-            f"Failed to connect to table '{table_name}' using: {base_connection}\nError: {e}"
-        ) from e
+        return user_tables
+    except Exception as e:
+        _handle_connection_errors(e, connection_string)
 @dataclass
@@ -4430,6 +4491,16 @@ class Validate:
     - Vietnamese (`"vi"`)
     - Indonesian (`"id"`)
     - Ukrainian (`"uk"`)
+    - Bulgarian (`"bg"`)
+    - Croatian (`"hr"`)
+    - Estonian (`"et"`)
+    - Hungarian (`"hu"`)
+    - Irish (`"ga"`)
+    - Latvian (`"lv"`)
+    - Lithuanian (`"lt"`)
+    - Maltese (`"mt"`)
+    - Slovak (`"sk"`)
+    - Slovenian (`"sl"`)
     - Hebrew (`"he"`)
     - Thai (`"th"`)
     - Persian (`"fa"`)
@@ -9700,40 +9771,41 @@ class Validate:
         return self
-    def rows_distinct(
+    def col_pct_null(
         self,
-        columns_subset: str | list[str] | None = None,
-        pre: Callable | None = None,
-        segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        p: float,
+        tol: Tolerance = 0,
+        thresholds: int | float | None | bool | tuple | dict | Thresholds = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
     ) -> Validate:
         """
-        Validate whether rows in the table are distinct.
+        Validate whether a column has a specific percentage of Null values.
-        The `rows_distinct()` method checks whether rows in the table are distinct. This validation
-        will operate over the number of test units that is equal to the number of rows in the table
-        (determined after any `pre=` mutation has been applied).
+        The `col_pct_null()` validation method checks whether the percentage of Null values in a
+        column matches a specified percentage `p=` (within an optional tolerance `tol=`). This
+        validation operates at the column level, generating a single validation step per column that
+        passes or fails based on whether the actual percentage of Null values falls within the
+        acceptable range defined by `p ± tol`.
         Parameters
         ----------
-        columns_subset
-            A single column or a list of columns to use as a subset for the distinct comparison.
-            If `None`, then all columns in the table will be used for the comparison. If multiple
-            columns are supplied, the distinct comparison will be made over the combination of
-            values in those columns.
-        pre
-            An optional preprocessing function or lambda to apply to the data table during
-            interrogation. This function should take a table as input and return a modified table.
-            Have a look at the *Preprocessing* section for more information on how to use this
-            argument.
-        segments
-            An optional directive on segmentation, which serves to split a validation step into
-            multiple (one step per segment). Can be a single column name, a tuple that specifies a
-            column name and its corresponding values to segment on, or a combination of both
-            (provided as a list). Read the *Segmentation* section for usage information.
+        columns
+            A single column or a list of columns to validate. Can also use
+            [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
+            multiple columns are supplied or resolved, there will be a separate validation step
+            generated for each column.
+        p
+            The expected percentage of Null values in the column, expressed as a decimal between
+            `0.0` and `1.0`. For example, `p=0.5` means 50% of values should be Null.
+        tol
+            The tolerance allowed when comparing the actual percentage of Null values to the
+            expected percentage `p=`. The validation passes if the actual percentage falls within
+            the range `[p - tol, p + tol]`. Default is `0`, meaning an exact match is required. See
+            the *Tolerance* section for details on all supported formats (absolute, relative,
+            symmetric, and asymmetric bounds).
         thresholds
             Set threshold failure levels for reporting and reacting to exceedences of the levels.
             The thresholds are set at the step level and will override any global thresholds set in
@@ -9741,7 +9813,7 @@ class Validate:
             be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
             section for information on how to set threshold levels.
         actions
-            Optional actions to take when the validation step meets or exceeds any set threshold
+            Optional actions to take when the validation step(s) meets or exceeds any set threshold
             levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
             define the actions.
         brief
@@ -9760,60 +9832,30 @@ class Validate:
         Validate
             The `Validate` object with the added validation step.
-        Preprocessing
-        -------------
-        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
-        table during interrogation. This function should take a table as input and return a modified
-        table. This is useful for performing any necessary transformations or filtering on the data
-        before the validation step is applied.
-        The preprocessing function can be any callable that takes a table as input and returns a
-        modified table. For example, you could use a lambda function to filter the table based on
-        certain criteria or to apply a transformation to the data. Note that you can refer to
-        columns via `columns_subset=` that are expected to be present in the transformed table, but
-        may not exist in the table before preprocessing. Regarding the lifetime of the transformed
-        table, it only exists during the validation step and is not stored in the `Validate` object
-        or used in subsequent validation steps.
-        Segmentation
-        ------------
-        The `segments=` argument allows for the segmentation of a validation step into multiple
-        segments. This is useful for applying the same validation step to different subsets of the
-        data. The segmentation can be done based on a single column or specific fields within a
-        column.
-        Providing a single column name will result in a separate validation step for each unique
-        value in that column. For example, if you have a column called `"region"` with values
-        `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
-        region.
-        Alternatively, you can provide a tuple that specifies a column name and its corresponding
-        values to segment on. For example, if you have a column called `"date"` and you want to
-        segment on only specific dates, you can provide a tuple like
-        `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
-        (i.e., no validation steps will be created for them).
+        Tolerance
+        ---------
+        The `tol=` parameter accepts several different formats to specify the acceptable deviation
+        from the expected percentage `p=`. The tolerance can be expressed as:
-        A list with a combination of column names and tuples can be provided as well. This allows
-        for more complex segmentation scenarios. The following inputs are both valid:
+        1. *single integer* (absolute tolerance): the exact number of test units that can deviate.
+        For example, `tol=2` means the actual count can differ from the expected count by up to 2
+        units in either direction.
-        ```
-        # Segments from all unique values in the `region` column
-        # and specific dates in the `date` column
-        segments=["region", ("date", ["2023-01-01", "2023-01-02"])]
+        2. *single float between 0 and 1* (relative tolerance): a proportion of the expected
+        count. For example, if the expected count is 50 and `tol=0.1`, the acceptable range is
+        45 to 55 (50 ± 10% of 50 = 50 ± 5).
-        # Segments from all unique values in the `region` and `date` columns
-        segments=["region", "date"]
-        ```
+        3. *tuple of two integers* (absolute bounds): explicitly specify the lower and upper
+        bounds as absolute deviations. For example, `tol=(1, 3)` means the actual count can be
+        1 unit below or 3 units above the expected count.
-        The segmentation is performed during interrogation, and the resulting validation steps will
-        be numbered sequentially. Each segment will have its own validation step, and the results
-        will be reported separately. This allows for a more granular analysis of the data and helps
-        identify issues within specific segments.
+        4. *tuple of two floats between 0 and 1* (relative bounds): explicitly specify the lower
+        and upper bounds as proportional deviations. For example, `tol=(0.05, 0.15)` means the
+        lower bound is 5% below and the upper bound is 15% above the expected count.
-        Importantly, the segmentation process will be performed after any preprocessing of the data
-        table. Because of this, one can conceivably use the `pre=` argument to generate a column
-        that can be used for segmentation. For example, you could create a new column called
-        `"segment"` through use of `pre=` and then use that column for segmentation.
+        When using a single value (integer or float), the tolerance is applied symmetrically in both
+        directions. When using a tuple, you can specify asymmetric tolerances where the lower and
+        upper bounds differ.
         Thresholds
         ----------
@@ -9851,8 +9893,8 @@ class Validate:
         import pointblank as pb
         pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
         ```
-        For the examples here, we'll use a simple Polars DataFrame with three string columns
-        (`col_1`, `col_2`, and `col_3`). The table is shown below:
+        For the examples here, we'll use a simple Polars DataFrame with three columns (`a`, `b`,
+        and `c`) that have different percentages of Null values. The table is shown below:
         ```{python}
         import pointblank as pb
@@ -9860,56 +9902,133 @@ class Validate:
         tbl = pl.DataFrame(
             {
-                "col_1": ["a", "b", "c", "d"],
-                "col_2": ["a", "a", "c", "d"],
-                "col_3": ["a", "a", "d", "e"],
+                "a": [1, 2, 3, 4, 5, 6, 7, 8],
+                "b": [1, None, 3, None, 5, None, 7, None],
+                "c": [None, None, None, None, None, None, 1, 2],
             }
         )
         pb.preview(tbl)
         ```
-        Let's validate that the rows in the table are distinct with `rows_distinct()`. We'll
-        determine if this validation had any failing test units (there are four test units, one for
-        each row). A failing test units means that a given row is not distinct from every other row.
+        Let's validate that column `a` has 0% Null values (i.e., no Null values at all).
         ```{python}
         validation = (
             pb.Validate(data=tbl)
-            .rows_distinct()
+            .col_pct_null(columns="a", p=0.0)
             .interrogate()
         )
         validation
         ```
-        From this validation table we see that there are no failing test units. All rows in the
-        table are distinct from one another.
+        Printing the `validation` object shows the validation table in an HTML viewing environment.
+        The validation table shows the single entry that corresponds to the validation step created
+        by using `col_pct_null()`. The validation passed since column `a` has no Null values.
-        We can also use a subset of columns to determine distinctness. Let's specify the subset
-        using columns `col_2` and `col_3` for the next validation.
+        Now, let's check that column `b` has exactly 50% Null values.
         ```{python}
         validation = (
             pb.Validate(data=tbl)
-            .rows_distinct(columns_subset=["col_2", "col_3"])
+            .col_pct_null(columns="b", p=0.5)
             .interrogate()
         )
         validation
         ```
-        The validation table reports two failing test units. The first and second rows are
-        duplicated when considering only the values in columns `col_2` and `col_3`. There's only
-        one set of duplicates but there are two failing test units since each row is compared to all
-        others.
-        """
+        This validation also passes, as column `b` has exactly 4 out of 8 values as Null (50%).
+        Finally, let's validate column `c` with a tolerance. Column `c` has 75% Null values, so
+        we'll check if it's approximately 70% Null with a tolerance of 10%.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_null(columns="c", p=0.70, tol=0.10)
+            .interrogate()
+        )
+        validation
+        ```
+        This validation passes because the actual percentage (75%) falls within the acceptable
+        range of 60% to 80% (70% ± 10%).
+        The `tol=` parameter supports multiple formats to express tolerance. Let's explore all the
+        different ways to specify tolerance using column `b`, which has exactly 50% Null values
+        (4 out of 8 values).
+        *Using an absolute tolerance (integer)*: Specify the exact number of rows that can
+        deviate. With `tol=1`, we allow the count to differ by 1 row in either direction.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_null(columns="b", p=0.375, tol=1)  # Expect 3 nulls, allow ±1 (range: 2-4)
+            .interrogate()
+        )
+        validation
+        ```
+        This passes because column `b` has 4 Null values, which falls within the acceptable range
+        of 2 to 4 (3 ± 1).
+        *Using a relative tolerance (float)*: Specify the tolerance as a proportion of the
+        expected count. With `tol=0.25`, we allow a 25% deviation from the expected count.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_null(columns="b", p=0.375, tol=0.25)  # Expect 3 nulls, allow ±25% (range: 2.25-3.75)
+            .interrogate()
+        )
+        validation
+        ```
+        This passes because 4 Null values falls within the acceptable range (3 ± 0.75 calculates
+        to 2.25 to 3.75, which rounds down to 2 to 3 rows).
+        *Using asymmetric absolute bounds (tuple of integers)*: Specify different lower and
+        upper bounds as absolute values. With `tol=(0, 2)`, we allow no deviation below but up
+        to 2 rows above the expected count.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_null(columns="b", p=0.25, tol=(0, 2))  # Expect 2 Nulls, allow +0/-2 (range: 2-4)
+            .interrogate()
+        )
+        validation
+        ```
+        This passes because 4 Null values falls within the acceptable range of 2 to 4.
+        *Using asymmetric relative bounds (tuple of floats)*: Specify different lower and upper
+        bounds as proportions. With `tol=(0.1, 0.3)`, we allow 10% below and 30% above the
+        expected count.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_null(columns="b", p=0.375, tol=(0.1, 0.3))  # Expect 3 Nulls, allow -10%/+30%
+            .interrogate()
+        )
+        validation
+        ```
+        This passes because 4 Null values falls within the acceptable range (3 - 0.3 to 3 + 0.9
+        calculates to 2.7 to 3.9, which rounds down to 2 to 3 rows).
+        """
         assertion_type = _get_fn_name()
-        _check_pre(pre=pre)
-        # TODO: add check for segments
-        # _check_segments(segments=segments)
+        _check_column(column=columns)
         _check_thresholds(thresholds=thresholds)
         _check_boolean_input(param=active, param_name="active")
@@ -9918,31 +10037,38 @@ class Validate:
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
-        if columns_subset is not None and isinstance(columns_subset, str):
-            columns_subset = [columns_subset]
+        # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
+        # resolve the columns
+        if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
+            columns = col(columns)
-        # TODO: incorporate Column object
+        # If `columns` is Column value or a string, place it in a list for iteration
+        if isinstance(columns, (Column, str)):
+            columns = [columns]
         # Determine brief to use (global or local) and transform any shorthands of `brief=`
         brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
-        val_info = _ValidationInfo(
-            assertion_type=assertion_type,
-            column=columns_subset,
-            pre=pre,
-            segments=segments,
-            thresholds=thresholds,
-            actions=actions,
-            brief=brief,
-            active=active,
-        )
-        self._add_validation(validation_info=val_info)
+        bound_finder: Callable[[int], AbsoluteBounds] = partial(_derive_bounds, tol=tol)
-        return self
-    def rows_complete(
-        self,
+        # Iterate over the columns and create a validation step for each
+        for column in columns:
+            val_info = _ValidationInfo(
+                assertion_type=assertion_type,
+                column=column,
+                values={"p": p, "bound_finder": bound_finder},
+                thresholds=thresholds,
+                actions=actions,
+                brief=brief,
+                active=active,
+            )
+            self._add_validation(validation_info=val_info)
+        return self
+    def rows_distinct(
+        self,
         columns_subset: str | list[str] | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
@@ -9952,19 +10078,19 @@ class Validate:
         active: bool = True,
     ) -> Validate:
         """
-        Validate whether row data are complete by having no missing values.
+        Validate whether rows in the table are distinct.
-        The `rows_complete()` method checks whether rows in the table are complete. Completeness
-        of a row means that there are no missing values within the row. This validation will operate
-        over the number of test units that is equal to the number of rows in the table (determined
-        after any `pre=` mutation has been applied). A subset of columns can be specified for the
-        completeness check. If no subset is provided, all columns in the table will be used.
+        The `rows_distinct()` method checks whether rows in the table are distinct. This validation
+        will operate over the number of test units that is equal to the number of rows in the table
+        (determined after any `pre=` mutation has been applied).
         Parameters
         ----------
         columns_subset
-            A single column or a list of columns to use as a subset for the completeness check. If
-            `None` (the default), then all columns in the table will be used.
+            A single column or a list of columns to use as a subset for the distinct comparison.
+            If `None`, then all columns in the table will be used for the comparison. If multiple
+            columns are supplied, the distinct comparison will be made over the combination of
+            values in those columns.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
@@ -10101,48 +10227,48 @@ class Validate:
         tbl = pl.DataFrame(
             {
-                "col_1": ["a", None, "c", "d"],
-                "col_2": ["a", "a", "c", None],
-                "col_3": ["a", "a", "d", None],
+                "col_1": ["a", "b", "c", "d"],
+                "col_2": ["a", "a", "c", "d"],
+                "col_3": ["a", "a", "d", "e"],
             }
         )
         pb.preview(tbl)
         ```
-        Let's validate that the rows in the table are complete with `rows_complete()`. We'll
+        Let's validate that the rows in the table are distinct with `rows_distinct()`. We'll
         determine if this validation had any failing test units (there are four test units, one for
-        each row). A failing test units means that a given row is not complete (i.e., has at least
-        one missing value).
+        each row). A failing test units means that a given row is not distinct from every other row.
         ```{python}
         validation = (
             pb.Validate(data=tbl)
-            .rows_complete()
+            .rows_distinct()
             .interrogate()
         )
         validation
         ```
-        From this validation table we see that there are two failing test units. This is because
-        two rows in the table have at least one missing value (the second row and the last row).
+        From this validation table we see that there are no failing test units. All rows in the
+        table are distinct from one another.
-        We can also use a subset of columns to determine completeness. Let's specify the subset
+        We can also use a subset of columns to determine distinctness. Let's specify the subset
         using columns `col_2` and `col_3` for the next validation.
         ```{python}
         validation = (
             pb.Validate(data=tbl)
-            .rows_complete(columns_subset=["col_2", "col_3"])
+            .rows_distinct(columns_subset=["col_2", "col_3"])
             .interrogate()
         )
         validation
         ```
-        The validation table reports a single failing test units. The last row contains missing
-        values in both the `col_2` and `col_3` columns.
+        The validation table reports two failing test units. The first and second rows are
+        duplicated when considering only the values in columns `col_2` and `col_3`. There's only
+        one set of duplicates but there are two failing test units since each row is compared to all
         others.
         """
@@ -10159,8 +10285,8 @@ class Validate:
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
-        if columns_subset is not None and isinstance(columns_subset, str):  # pragma: no cover
-            columns_subset = [columns_subset]  # pragma: no cover
+        if columns_subset is not None and isinstance(columns_subset, str):
+            columns_subset = [columns_subset]
         # TODO: incorporate Column object
@@ -10182,13 +10308,9 @@ class Validate:
         return self
-    def prompt(
+    def rows_complete(
         self,
-        prompt: str,
-        model: str,
         columns_subset: str | list[str] | None = None,
-        batch_size: int = 1000,
-        max_concurrent: int = 3,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -10197,66 +10319,35 @@ class Validate:
         active: bool = True,
     ) -> Validate:
         """
-        Validate rows using AI/LLM-powered analysis.
-        The `prompt()` validation method uses Large Language Models (LLMs) to validate rows of data
-        based on natural language criteria. Similar to other Pointblank validation methods, this
-        generates binary test results (pass/fail) that integrate seamlessly with the standard
-        reporting framework.
-        Like `col_vals_*()` methods, `prompt()` evaluates data against specific criteria, but
-        instead of using programmatic rules, it uses natural language prompts interpreted by an LLM.
-        Like `rows_distinct()` and `rows_complete()`, it operates at the row level and allows you to
-        specify a subset of columns for evaluation using `columns_subset=`.
-        The system automatically combines your validation criteria from the `prompt=` parameter with
-        the necessary technical context, data formatting instructions, and response structure
-        requirements. This is all so you only need to focus on describing your validation logic in
-        plain language.
+        Validate whether row data are complete by having no missing values.
-        Each row becomes a test unit that either passes or fails the validation criteria, producing
-        the familiar True/False results that appear in Pointblank validation reports. This method
-        is particularly useful for complex validation rules that are difficult to express with
-        traditional validation methods, such as semantic checks, context-dependent validation, or
-        subjective quality assessments.
+        The `rows_complete()` method checks whether rows in the table are complete. Completeness
+        of a row means that there are no missing values within the row. This validation will operate
+        over the number of test units that is equal to the number of rows in the table (determined
+        after any `pre=` mutation has been applied). A subset of columns can be specified for the
+        completeness check. If no subset is provided, all columns in the table will be used.
         Parameters
         ----------
-        prompt
-            A natural language description of the validation criteria. This prompt should clearly
-            describe what constitutes valid vs invalid rows. Some examples:
-            `"Each row should contain a valid email address and a realistic person name"`,
-            `"Values should indicate positive sentiment"`,
-            `"The description should mention a country name"`.
         columns_subset
-            A single column or list of columns to include in the validation. If `None`, all columns
-            will be included. Specifying fewer columns can improve performance and reduce API costs
-            so try to include only the columns necessary for the validation.
-        model
-            The model to be used. This should be in the form of `provider:model` (e.g.,
-            `"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
-            `"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
-            the provider. Model names are subject to change so consult the provider's documentation
-            for the most up-to-date model names.
-        batch_size
-            Number of rows to process in each batch. Larger batches are more efficient but may hit
-            API limits. Default is `1000`.
-        max_concurrent
-            Maximum number of concurrent API requests. Higher values speed up processing but may
-            hit rate limits. Default is `3`.
+            A single column or a list of columns to use as a subset for the completeness check. If
+            `None` (the default), then all columns in the table will be used.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
         segments
             An optional directive on segmentation, which serves to split a validation step into
             multiple (one step per segment). Can be a single column name, a tuple that specifies a
             column name and its corresponding values to segment on, or a combination of both
-            (provided as a list).
+            (provided as a list). Read the *Segmentation* section for usage information.
         thresholds
             Set threshold failure levels for reporting and reacting to exceedences of the levels.
             The thresholds are set at the step level and will override any global thresholds set in
             `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
-            be set locally and global thresholds (if any) will take effect.
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
         actions
             Optional actions to take when the validation step meets or exceeds any set threshold
             levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
@@ -10277,152 +10368,88 @@ class Validate:
         Validate
             The `Validate` object with the added validation step.
-        Constructing the `model` Argument
-        ---------------------------------
-        The `model=` argument should be constructed using the provider and model name separated by a
-        colon (`provider:model`). The provider text can any of:
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
-        - `"anthropic"` (Anthropic)
-        - `"openai"` (OpenAI)
-        - `"ollama"` (Ollama)
-        - `"bedrock"` (Amazon Bedrock)
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Note that you can refer to
+        columns via `columns_subset=` that are expected to be present in the transformed table, but
+        may not exist in the table before preprocessing. Regarding the lifetime of the transformed
+        table, it only exists during the validation step and is not stored in the `Validate` object
+        or used in subsequent validation steps.
-        The model name should be the specific model to be used from the provider. Model names are
-        subject to change so consult the provider's documentation for the most up-to-date model
-        names.
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column.
-        Notes on Authentication
-        -----------------------
-        API keys are automatically loaded from environment variables or `.env` files and are **not**
-        stored in the validation object for security reasons. You should consider using a secure
-        method for handling API keys.
+        Providing a single column name will result in a separate validation step for each unique
+        value in that column. For example, if you have a column called `"region"` with values
+        `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
+        region.
-        One way to do this is to load the API key from an environment variable and retrieve it using
-        the `os` module (specifically the `os.getenv()` function). Places to store the API key might
-        include `.bashrc`, `.bash_profile`, `.zshrc`, or `.zsh_profile`.
+        Alternatively, you can provide a tuple that specifies a column name and its corresponding
+        values to segment on. For example, if you have a column called `"date"` and you want to
+        segment on only specific dates, you can provide a tuple like
+        `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
+        (i.e., no validation steps will be created for them).
-        Another solution is to store one or more model provider API keys in an `.env` file (in the
-        root of your project). If the API keys have correct names (e.g., `ANTHROPIC_API_KEY` or
-        `OPENAI_API_KEY`) then the AI validation will automatically load the API key from the `.env`
-        file. An `.env` file might look like this:
+        A list with a combination of column names and tuples can be provided as well. This allows
+        for more complex segmentation scenarios. The following inputs are both valid:
-        ```plaintext
-        ANTHROPIC_API_KEY="your_anthropic_api_key_here"
-        OPENAI_API_KEY="your_openai_api_key_here"
         ```
+        # Segments from all unique values in the `region` column
+        # and specific dates in the `date` column
+        segments=["region", ("date", ["2023-01-01", "2023-01-02"])]
-        There's no need to have the `python-dotenv` package installed when using `.env` files in
-        this way.
+        # Segments from all unique values in the `region` and `date` columns
+        segments=["region", "date"]
+        ```
-        **Provider-specific setup**:
+        The segmentation is performed during interrogation, and the resulting validation steps will
+        be numbered sequentially. Each segment will have its own validation step, and the results
+        will be reported separately. This allows for a more granular analysis of the data and helps
+        identify issues within specific segments.
-        - **OpenAI**: set `OPENAI_API_KEY` environment variable or create `.env` file
-        - **Anthropic**: set `ANTHROPIC_API_KEY` environment variable or create `.env` file
-        - **Ollama**: no API key required, just ensure Ollama is running locally
-        - **Bedrock**: configure AWS credentials through standard AWS methods
+        Importantly, the segmentation process will be performed after any preprocessing of the data
+        table. Because of this, one can conceivably use the `pre=` argument to generate a column
+        that can be used for segmentation. For example, you could create a new column called
+        `"segment"` through use of `pre=` and then use that column for segmentation.
-        AI Validation Process
-        ---------------------
-        The AI validation process works as follows:
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
-        1. data batching: the data is split into batches of the specified size
-        2. row deduplication: duplicate rows (based on selected columns) are identified and only
-        unique combinations are sent to the LLM for analysis
-        3. json conversion: each batch of unique rows is converted to JSON format for the LLM
-        4. prompt construction: the user prompt is embedded in a structured system prompt
-        5. llm processing: each batch is sent to the LLM for analysis
-        6. response parsing: LLM responses are parsed to extract validation results
-        7. result projection: results are mapped back to all original rows using row signatures
-        8. result aggregation: results from all batches are combined
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
-        **Performance Optimization**: the process uses row signature memoization to avoid redundant
-        LLM calls. When multiple rows have identical values in the selected columns, only one
-        representative row is validated, and the result is applied to all matching rows. This can
-        dramatically reduce API costs and processing time for datasets with repetitive patterns.
+        Thresholds can be defined using one of these input schemes:
-        The LLM receives data in this JSON format:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
-        ```json
-        {
-          "columns": ["col1", "col2", "col3"],
-          "rows": [
-            {"col1": "value1", "col2": "value2", "col3": "value3", "_pb_row_index": 0},
-            {"col1": "value4", "col2": "value5", "col3": "value6", "_pb_row_index": 1}
-          ]
-        }
-        ```
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
-        The LLM returns validation results in this format:
-        ```json
-        [
-          {"index": 0, "result": true},
-          {"index": 1, "result": false}
-        ]
-        ```
-        Prompt Design Tips
-        ------------------
-        For best results, design prompts that are:
-        - boolean-oriented: frame validation criteria to elicit clear valid/invalid responses
-        - specific: clearly define what makes a row valid/invalid
-        - unambiguous: avoid subjective language that could be interpreted differently
-        - context-aware: include relevant business rules or domain knowledge
-        - example-driven: consider providing examples in the prompt when helpful
-        **Critical**: Prompts must be designed so the LLM can determine whether each row passes or
-        fails the validation criteria. The system expects binary validation responses, so avoid
-        open-ended questions or prompts that might generate explanatory text instead of clear
-        pass/fail judgments.
-        Good prompt examples:
-        - "Each row should contain a valid email address in the 'email' column and a non-empty name
-        in the 'name' column"
-        - "The 'sentiment' column should contain positive sentiment words (happy, good, excellent,
-        etc.)"
-        - "Product descriptions should mention at least one technical specification"
-        Poor prompt examples (avoid these):
-        - "What do you think about this data?" (too open-ended)
-        - "Describe the quality of each row" (asks for description, not validation)
-        - "How would you improve this data?" (asks for suggestions, not pass/fail)
-        Performance Considerations
-        --------------------------
-        AI validation is significantly slower than traditional validation methods due to API calls
-        to LLM providers. However, performance varies dramatically based on data characteristics:
-        **High Memoization Scenarios** (seconds to minutes):
-        - data with many duplicate rows in the selected columns
-        - low cardinality data (repeated patterns)
-        - small number of unique row combinations
-        **Low Memoization Scenarios** (minutes to hours):
-        - high cardinality data with mostly unique rows
-        - large datasets with few repeated patterns
-        - all or most rows requiring individual LLM evaluation
-        The row signature memoization optimization can reduce processing time significantly when
-        data has repetitive patterns. For datasets where every row is unique, expect longer
-        processing times similar to validating each row individually.
-        **Strategies to Reduce Processing Time**:
-        - test on data slices: define a sampling function like `def sample_1000(df): return df.head(1000)`
-        and use `pre=sample_1000` to validate on smaller samples
-        - filter relevant data: define filter functions like `def active_only(df): return df.filter(df["status"] == "active")`
-        and use `pre=active_only` to focus on a specific subset
-        - optimize column selection: use `columns_subset=` to include only the columns necessary
-        for validation
-        - start with smaller batches: begin with `batch_size=100` for testing, then increase
-        gradually
-        - reduce concurrency: lower `max_concurrent=1` if hitting rate limits
-        - use faster/cheaper models: consider using smaller or more efficient models for initial
-        testing before switching to more capable models
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
         Examples
         --------
@@ -10432,139 +10459,84 @@ class Validate:
         import pointblank as pb
         pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
         ```
-        The following examples demonstrate how to use AI validation for different types of data
-        quality checks. These examples show both basic usage and more advanced configurations with
-        custom thresholds and actions.
-        **Basic AI validation example:**
-        This first example shows a simple validation scenario where we want to check that customer
-        records have both valid email addresses and non-empty names. Notice how we use
-        `columns_subset=` to focus only on the relevant columns, which improves both performance
-        and cost-effectiveness.
+        For the examples here, we'll use a simple Polars DataFrame with three string columns
+        (`col_1`, `col_2`, and `col_3`). The table is shown below:
-        ```python
+        ```{python}
         import pointblank as pb
         import polars as pl
-        # Sample data with email and name columns
-        tbl = pl.DataFrame({
-            "email": ["john@example.com", "invalid-email", "jane@test.org"],
-            "name": ["John Doe", "", "Jane Smith"],
-            "age": [25, 30, 35]
-        })
+        tbl = pl.DataFrame(
+            {
+                "col_1": ["a", None, "c", "d"],
+                "col_2": ["a", "a", "c", None],
+                "col_3": ["a", "a", "d", None],
+            }
+        )
-        # Validate using AI
+        pb.preview(tbl)
+        ```
+        Let's validate that the rows in the table are complete with `rows_complete()`. We'll
+        determine if this validation had any failing test units (there are four test units, one for
+        each row). A failing test units means that a given row is not complete (i.e., has at least
+        one missing value).
+        ```{python}
         validation = (
             pb.Validate(data=tbl)
-            .prompt(
-                prompt="Each row should have a valid email address and a non-empty name",
-                columns_subset=["email", "name"],  # Only check these columns
-                model="openai:gpt-4o-mini",
-            )
+            .rows_complete()
             .interrogate()
         )
         validation
         ```
-        In this example, the AI will identify that the second row fails validation because it has
-        an invalid email format (`"invalid-email"`) and the third row also fails because it has an
-        empty name field. The validation results will show 2 out of 3 rows failing the criteria.
-        **Advanced example with custom thresholds:**
-        This more sophisticated example demonstrates how to use AI validation with custom thresholds
-        and actions. Here we're validating phone number formats to ensure they include area codes,
-        which is a common data quality requirement for customer contact information.
+        From this validation table we see that there are two failing test units. This is because
+        two rows in the table have at least one missing value (the second row and the last row).
-        ```python
-        customer_data = pl.DataFrame({
-            "customer_id": [1, 2, 3, 4, 5],
-            "name": ["John Doe", "Jane Smith", "Bob Johnson", "Alice Brown", "Charlie Davis"],
-            "phone_number": [
-                "(555) 123-4567",  # Valid with area code
-                "555-987-6543",    # Valid with area code
-                "123-4567",        # Missing area code
-                "(800) 555-1234",  # Valid with area code
-                "987-6543"         # Missing area code
-            ]
-        })
+        We can also use a subset of columns to determine completeness. Let's specify the subset
+        using columns `col_2` and `col_3` for the next validation.
+        ```{python}
         validation = (
-            pb.Validate(data=customer_data)
-            .prompt(
-                prompt="Do all the phone numbers include an area code?",
-                columns_subset="phone_number",  # Only check the `phone_number` column
-                model="openai:gpt-4o",
-                batch_size=500,
-                max_concurrent=5,
-                thresholds=pb.Thresholds(warning=0.1, error=0.2, critical=0.3),
-                actions=pb.Actions(error="Too many phone numbers missing area codes.")
-            )
+            pb.Validate(data=tbl)
+            .rows_complete(columns_subset=["col_2", "col_3"])
             .interrogate()
         )
+        validation
         ```
-        This validation will identify that 2 out of 5 phone numbers (40%) are missing area codes,
-        which exceeds all threshold levels. The validation will trigger the specified error action
-        since the failure rate (40%) is above the error threshold (20%). The AI can recognize
-        various phone number formats and determine whether they include area codes.
+        The validation table reports a single failing test units. The last row contains missing
+        values in both the `col_2` and `col_3` columns.
+        others.
         """
         assertion_type = _get_fn_name()
-        # Validation of inputs
-        if not isinstance(prompt, str) or not prompt.strip():
-            raise ValueError("prompt must be a non-empty string")
-        # Parse the provider and model name from the `model=` argument
-        try:
-            provider, model_name = model.split(sep=":", maxsplit=1)
-        except ValueError:
-            raise ValueError(f"Model must be in format 'provider:model_name', got: {model}")
-        # Error if an unsupported provider is used
-        if provider not in MODEL_PROVIDERS:
-            raise ValueError(
-                f"Unsupported provider: {provider}. Supported providers are {MODEL_PROVIDERS}."
-            )
-        # Ensure that `batch_size` and `max_concurrent` are positive integers
-        if not isinstance(batch_size, int) or batch_size < 1:
-            raise ValueError("batch_size must be a positive integer")
-        if not isinstance(max_concurrent, int) or max_concurrent < 1:
-            raise ValueError("max_concurrent must be a positive integer")
         _check_pre(pre=pre)
+        # TODO: add check for segments
+        # _check_segments(segments=segments)
         _check_thresholds(thresholds=thresholds)
         _check_boolean_input(param=active, param_name="active")
-        # Promote a single column given as a string to a list
-        if columns_subset is not None and isinstance(columns_subset, str):
-            columns_subset = [columns_subset]
         # Determine threshold to use (global or local) and normalize a local `thresholds=` value
         thresholds = (
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
+        if columns_subset is not None and isinstance(columns_subset, str):  # pragma: no cover
+            columns_subset = [columns_subset]  # pragma: no cover
+        # TODO: incorporate Column object
         # Determine brief to use (global or local) and transform any shorthands of `brief=`
         brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
-        # Package up the AI-specific parameters as a dictionary for later use
-        ai_config = {
-            "prompt": prompt,
-            "llm_provider": provider,
-            "llm_model": model_name,
-            "batch_size": batch_size,
-            "max_concurrent": max_concurrent,
-        }
         val_info = _ValidationInfo(
             assertion_type=assertion_type,
             column=columns_subset,
-            values=ai_config,
             pre=pre,
             segments=segments,
             thresholds=thresholds,
@@ -10577,66 +10549,81 @@ class Validate:
         return self
-    def col_schema_match(
+    def prompt(
         self,
-        schema: Schema,
-        complete: bool = True,
-        in_order: bool = True,
-        case_sensitive_colnames: bool = True,
-        case_sensitive_dtypes: bool = True,
-        full_match_dtypes: bool = True,
+        prompt: str,
+        model: str,
+        columns_subset: str | list[str] | None = None,
+        batch_size: int = 1000,
+        max_concurrent: int = 3,
         pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
     ) -> Validate:
         """
-        Do columns in the table (and their types) match a predefined schema?
+        Validate rows using AI/LLM-powered analysis.
-        The `col_schema_match()` method works in conjunction with an object generated by the
-        [`Schema`](`pointblank.Schema`) class. That class object is the expectation for the actual
-        schema of the target table. The validation step operates over a single test unit, which is
-        whether the schema matches that of the table (within the constraints enforced by the
-        `complete=`, and `in_order=` options).
+        The `prompt()` validation method uses Large Language Models (LLMs) to validate rows of data
+        based on natural language criteria. Similar to other Pointblank validation methods, this
+        generates binary test results (pass/fail) that integrate seamlessly with the standard
+        reporting framework.
+        Like `col_vals_*()` methods, `prompt()` evaluates data against specific criteria, but
+        instead of using programmatic rules, it uses natural language prompts interpreted by an LLM.
+        Like `rows_distinct()` and `rows_complete()`, it operates at the row level and allows you to
+        specify a subset of columns for evaluation using `columns_subset=`.
+        The system automatically combines your validation criteria from the `prompt=` parameter with
+        the necessary technical context, data formatting instructions, and response structure
+        requirements. This is all so you only need to focus on describing your validation logic in
+        plain language.
+        Each row becomes a test unit that either passes or fails the validation criteria, producing
+        the familiar True/False results that appear in Pointblank validation reports. This method
+        is particularly useful for complex validation rules that are difficult to express with
+        traditional validation methods, such as semantic checks, context-dependent validation, or
+        subjective quality assessments.
         Parameters
         ----------
-        schema
-            A `Schema` object that represents the expected schema of the table. This object is
-            generated by the [`Schema`](`pointblank.Schema`) class.
-        complete
-            Should the schema match be complete? If `True`, then the target table must have all
-            columns specified in the schema. If `False`, then the table can have additional columns
-            not in the schema (i.e., the schema is a subset of the target table's columns).
-        in_order
-            Should the schema match be in order? If `True`, then the columns in the schema must
-            appear in the same order as they do in the target table. If `False`, then the order of
-            columns in the schema and the target table can differ.
-        case_sensitive_colnames
-            Should the schema match be case-sensitive with regard to column names? If `True`, then
-            the column names in the schema and the target table must match exactly. If `False`, then
-            the column names are compared in a case-insensitive manner.
-        case_sensitive_dtypes
-            Should the schema match be case-sensitive with regard to column data types? If `True`,
-            then the column data types in the schema and the target table must match exactly. If
-            `False`, then the column data types are compared in a case-insensitive manner.
-        full_match_dtypes
-            Should the schema match require a full match of data types? If `True`, then the column
-            data types in the schema and the target table must match exactly. If `False` then
-            substring matches are allowed, so a schema data type of `Int` would match a target table
-            data type of `Int64`.
+        prompt
+            A natural language description of the validation criteria. This prompt should clearly
+            describe what constitutes valid vs invalid rows. Some examples:
+            `"Each row should contain a valid email address and a realistic person name"`,
+            `"Values should indicate positive sentiment"`,
+            `"The description should mention a country name"`.
+        columns_subset
+            A single column or list of columns to include in the validation. If `None`, all columns
+            will be included. Specifying fewer columns can improve performance and reduce API costs
+            so try to include only the columns necessary for the validation.
+        model
+            The model to be used. This should be in the form of `provider:model` (e.g.,
+            `"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
+            `"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
+            the provider. Model names are subject to change so consult the provider's documentation
+            for the most up-to-date model names.
+        batch_size
+            Number of rows to process in each batch. Larger batches are more efficient but may hit
+            API limits. Default is `1000`.
+        max_concurrent
+            Maximum number of concurrent API requests. Higher values speed up processing but may
+            hit rate limits. Default is `3`.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
-            Have a look at the *Preprocessing* section for more information on how to use this
-            argument.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment). Can be a single column name, a tuple that specifies a
+            column name and its corresponding values to segment on, or a combination of both
+            (provided as a list).
         thresholds
             Set threshold failure levels for reporting and reacting to exceedences of the levels.
             The thresholds are set at the step level and will override any global thresholds set in
             `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
-            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
-            section for information on how to set threshold levels.
+            be set locally and global thresholds (if any) will take effect.
         actions
             Optional actions to take when the validation step meets or exceeds any set threshold
             levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
@@ -10657,154 +10644,314 @@ class Validate:
         Validate
             The `Validate` object with the added validation step.
-        Preprocessing
-        -------------
-        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
-        table during interrogation. This function should take a table as input and return a modified
-        table. This is useful for performing any necessary transformations or filtering on the data
-        before the validation step is applied.
+        Constructing the `model` Argument
+        ---------------------------------
+        The `model=` argument should be constructed using the provider and model name separated by a
+        colon (`provider:model`). The provider text can any of:
-        The preprocessing function can be any callable that takes a table as input and returns a
-        modified table. Regarding the lifetime of the transformed table, it only exists during the
-        validation step and is not stored in the `Validate` object or used in subsequent validation
-        steps.
+        - `"anthropic"` (Anthropic)
+        - `"openai"` (OpenAI)
+        - `"ollama"` (Ollama)
+        - `"bedrock"` (Amazon Bedrock)
-        Thresholds
-        ----------
-        The `thresholds=` parameter is used to set the failure-condition levels for the validation
-        step. If they are set here at the step level, these thresholds will override any thresholds
-        set at the global level in `Validate(thresholds=...)`.
+        The model name should be the specific model to be used from the provider. Model names are
+        subject to change so consult the provider's documentation for the most up-to-date model
+        names.
-        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
-        can either be set as a proportion failing of all test units (a value between `0` to `1`),
-        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Notes on Authentication
+        -----------------------
+        API keys are automatically loaded from environment variables or `.env` files and are **not**
+        stored in the validation object for security reasons. You should consider using a secure
+        method for handling API keys.
-        Thresholds can be defined using one of these input schemes:
+        One way to do this is to load the API key from an environment variable and retrieve it using
+        the `os` module (specifically the `os.getenv()` function). Places to store the API key might
+        include `.bashrc`, `.bash_profile`, `.zshrc`, or `.zsh_profile`.
-        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
-        thresholds)
-        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
-        the 'error' level, and position `2` is the 'critical' level
-        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
-        'critical'
-        4. a single integer/float value denoting absolute number or fraction of failing test units
-        for the 'warning' level only
+        Another solution is to store one or more model provider API keys in an `.env` file (in the
+        root of your project). If the API keys have correct names (e.g., `ANTHROPIC_API_KEY` or
+        `OPENAI_API_KEY`) then the AI validation will automatically load the API key from the `.env`
+        file. An `.env` file might look like this:
-        If the number of failing test units exceeds set thresholds, the validation step will be
-        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
-        set, you're free to set any combination of them.
+        ```plaintext
+        ANTHROPIC_API_KEY="your_anthropic_api_key_here"
+        OPENAI_API_KEY="your_openai_api_key_here"
+        ```
-        Aside from reporting failure conditions, thresholds can be used to determine the actions to
-        take for each level of failure (using the `actions=` parameter).
+        There's no need to have the `python-dotenv` package installed when using `.env` files in
+        this way.
-        Examples
-        --------
-        ```{python}
-        #| echo: false
-        #| output: false
-        import pointblank as pb
-        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
-        ```
+        **Provider-specific setup**:
-        For the examples here, we'll use a simple Polars DataFrame with three columns (string,
-        integer, and float). The table is shown below:
+        - **OpenAI**: set `OPENAI_API_KEY` environment variable or create `.env` file
+        - **Anthropic**: set `ANTHROPIC_API_KEY` environment variable or create `.env` file
+        - **Ollama**: no API key required, just ensure Ollama is running locally
+        - **Bedrock**: configure AWS credentials through standard AWS methods
-        ```{python}
-        import pointblank as pb
-        import polars as pl
+        AI Validation Process
+        ---------------------
+        The AI validation process works as follows:
-        tbl = pl.DataFrame(
-            {
-                "a": ["apple", "banana", "cherry", "date"],
-                "b": [1, 6, 3, 5],
-                "c": [1.1, 2.2, 3.3, 4.4],
-            }
-        )
+        1. data batching: the data is split into batches of the specified size
+        2. row deduplication: duplicate rows (based on selected columns) are identified and only
+        unique combinations are sent to the LLM for analysis
+        3. json conversion: each batch of unique rows is converted to JSON format for the LLM
+        4. prompt construction: the user prompt is embedded in a structured system prompt
+        5. llm processing: each batch is sent to the LLM for analysis
+        6. response parsing: LLM responses are parsed to extract validation results
+        7. result projection: results are mapped back to all original rows using row signatures
+        8. result aggregation: results from all batches are combined
-        pb.preview(tbl)
-        ```
+        **Performance Optimization**: the process uses row signature memoization to avoid redundant
+        LLM calls. When multiple rows have identical values in the selected columns, only one
+        representative row is validated, and the result is applied to all matching rows. This can
+        dramatically reduce API costs and processing time for datasets with repetitive patterns.
-        Let's validate that the columns in the table match a predefined schema. A schema can be
-        defined using the [`Schema`](`pointblank.Schema`) class.
+        The LLM receives data in this JSON format:
-        ```{python}
-        schema = pb.Schema(
-            columns=[("a", "String"), ("b", "Int64"), ("c", "Float64")]
-        )
+        ```json
+        {
+          "columns": ["col1", "col2", "col3"],
+          "rows": [
+            {"col1": "value1", "col2": "value2", "col3": "value3", "_pb_row_index": 0},
+            {"col1": "value4", "col2": "value5", "col3": "value6", "_pb_row_index": 1}
+          ]
+        }
         ```
-        You can print the schema object to verify that the expected schema is as intended.
-        ```{python}
-        print(schema)
+        The LLM returns validation results in this format:
+        ```json
+        [
+          {"index": 0, "result": true},
+          {"index": 1, "result": false}
+        ]
         ```
-        Now, we'll use the `col_schema_match()` method to validate the table against the expected
-        `schema` object. There is a single test unit for this validation step (whether the schema
-        matches the table or not).
+        Prompt Design Tips
+        ------------------
+        For best results, design prompts that are:
-        ```{python}
-        validation = (
-            pb.Validate(data=tbl)
-            .col_schema_match(schema=schema)
-            .interrogate()
-        )
+        - boolean-oriented: frame validation criteria to elicit clear valid/invalid responses
+        - specific: clearly define what makes a row valid/invalid
+        - unambiguous: avoid subjective language that could be interpreted differently
+        - context-aware: include relevant business rules or domain knowledge
+        - example-driven: consider providing examples in the prompt when helpful
-        validation
-        ```
+        **Critical**: Prompts must be designed so the LLM can determine whether each row passes or
+        fails the validation criteria. The system expects binary validation responses, so avoid
+        open-ended questions or prompts that might generate explanatory text instead of clear
+        pass/fail judgments.
-        The validation table shows that the schema matches the table. The single test unit passed
-        since the table columns and their types match the schema.
-        """
+        Good prompt examples:
-        assertion_type = _get_fn_name()
+        - "Each row should contain a valid email address in the 'email' column and a non-empty name
+        in the 'name' column"
+        - "The 'sentiment' column should contain positive sentiment words (happy, good, excellent,
+        etc.)"
+        - "Product descriptions should mention at least one technical specification"
-        _check_pre(pre=pre)
-        _check_thresholds(thresholds=thresholds)
-        _check_boolean_input(param=active, param_name="active")
-        _check_boolean_input(param=complete, param_name="complete")
-        _check_boolean_input(param=in_order, param_name="in_order")
-        _check_boolean_input(param=case_sensitive_colnames, param_name="case_sensitive_colnames")
-        _check_boolean_input(param=case_sensitive_dtypes, param_name="case_sensitive_dtypes")
-        _check_boolean_input(param=full_match_dtypes, param_name="full_match_dtypes")
+        Poor prompt examples (avoid these):
-        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
-        thresholds = (
-            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
-        )
+        - "What do you think about this data?" (too open-ended)
+        - "Describe the quality of each row" (asks for description, not validation)
+        - "How would you improve this data?" (asks for suggestions, not pass/fail)
-        # Package up the `schema=` and boolean params into a dictionary for later interrogation
-        values = {
-            "schema": schema,
-            "complete": complete,
-            "in_order": in_order,
-            "case_sensitive_colnames": case_sensitive_colnames,
-            "case_sensitive_dtypes": case_sensitive_dtypes,
-            "full_match_dtypes": full_match_dtypes,
-        }
+        Performance Considerations
+        --------------------------
+        AI validation is significantly slower than traditional validation methods due to API calls
+        to LLM providers. However, performance varies dramatically based on data characteristics:
-        # Determine brief to use (global or local) and transform any shorthands of `brief=`
-        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        **High Memoization Scenarios** (seconds to minutes):
-        val_info = _ValidationInfo(
-            assertion_type=assertion_type,
-            values=values,
-            pre=pre,
-            thresholds=thresholds,
-            actions=actions,
-            brief=brief,
-            active=active,
-        )
+        - data with many duplicate rows in the selected columns
+        - low cardinality data (repeated patterns)
+        - small number of unique row combinations
-        self._add_validation(validation_info=val_info)
+        **Low Memoization Scenarios** (minutes to hours):
-        return self
+        - high cardinality data with mostly unique rows
+        - large datasets with few repeated patterns
+        - all or most rows requiring individual LLM evaluation
-    def row_count_match(
-        self,
-        count: int | FrameT | Any,
-        tol: Tolerance = 0,
-        inverse: bool = False,
+        The row signature memoization optimization can reduce processing time significantly when
+        data has repetitive patterns. For datasets where every row is unique, expect longer
+        processing times similar to validating each row individually.
+        **Strategies to Reduce Processing Time**:
+        - test on data slices: define a sampling function like `def sample_1000(df): return df.head(1000)`
+        and use `pre=sample_1000` to validate on smaller samples
+        - filter relevant data: define filter functions like `def active_only(df): return df.filter(df["status"] == "active")`
+        and use `pre=active_only` to focus on a specific subset
+        - optimize column selection: use `columns_subset=` to include only the columns necessary
+        for validation
+        - start with smaller batches: begin with `batch_size=100` for testing, then increase
+        gradually
+        - reduce concurrency: lower `max_concurrent=1` if hitting rate limits
+        - use faster/cheaper models: consider using smaller or more efficient models for initial
+        testing before switching to more capable models
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        The following examples demonstrate how to use AI validation for different types of data
+        quality checks. These examples show both basic usage and more advanced configurations with
+        custom thresholds and actions.
+        **Basic AI validation example:**
+        This first example shows a simple validation scenario where we want to check that customer
+        records have both valid email addresses and non-empty names. Notice how we use
+        `columns_subset=` to focus only on the relevant columns, which improves both performance
+        and cost-effectiveness.
+        ```python
+        import pointblank as pb
+        import polars as pl
+        # Sample data with email and name columns
+        tbl = pl.DataFrame({
+            "email": ["john@example.com", "invalid-email", "jane@test.org"],
+            "name": ["John Doe", "", "Jane Smith"],
+            "age": [25, 30, 35]
+        })
+        # Validate using AI
+        validation = (
+            pb.Validate(data=tbl)
+            .prompt(
+                prompt="Each row should have a valid email address and a non-empty name",
+                columns_subset=["email", "name"],  # Only check these columns
+                model="openai:gpt-4o-mini",
+            )
+            .interrogate()
+        )
+        validation
+        ```
+        In this example, the AI will identify that the second row fails validation because it has
+        an invalid email format (`"invalid-email"`) and the third row also fails because it has an
+        empty name field. The validation results will show 2 out of 3 rows failing the criteria.
+        **Advanced example with custom thresholds:**
+        This more sophisticated example demonstrates how to use AI validation with custom thresholds
+        and actions. Here we're validating phone number formats to ensure they include area codes,
+        which is a common data quality requirement for customer contact information.
+        ```python
+        customer_data = pl.DataFrame({
+            "customer_id": [1, 2, 3, 4, 5],
+            "name": ["John Doe", "Jane Smith", "Bob Johnson", "Alice Brown", "Charlie Davis"],
+            "phone_number": [
+                "(555) 123-4567",  # Valid with area code
+                "555-987-6543",    # Valid with area code
+                "123-4567",        # Missing area code
+                "(800) 555-1234",  # Valid with area code
+                "987-6543"         # Missing area code
+            ]
+        })
+        validation = (
+            pb.Validate(data=customer_data)
+            .prompt(
+                prompt="Do all the phone numbers include an area code?",
+                columns_subset="phone_number",  # Only check the `phone_number` column
+                model="openai:gpt-4o",
+                batch_size=500,
+                max_concurrent=5,
+                thresholds=pb.Thresholds(warning=0.1, error=0.2, critical=0.3),
+                actions=pb.Actions(error="Too many phone numbers missing area codes.")
+            )
+            .interrogate()
+        )
+        ```
+        This validation will identify that 2 out of 5 phone numbers (40%) are missing area codes,
+        which exceeds all threshold levels. The validation will trigger the specified error action
+        since the failure rate (40%) is above the error threshold (20%). The AI can recognize
+        various phone number formats and determine whether they include area codes.
+        """
+        assertion_type = _get_fn_name()
+        # Validation of inputs
+        if not isinstance(prompt, str) or not prompt.strip():
+            raise ValueError("prompt must be a non-empty string")
+        # Parse the provider and model name from the `model=` argument
+        try:
+            provider, model_name = model.split(sep=":", maxsplit=1)
+        except ValueError:
+            raise ValueError(f"Model must be in format 'provider:model_name', got: {model}")
+        # Error if an unsupported provider is used
+        if provider not in MODEL_PROVIDERS:
+            raise ValueError(
+                f"Unsupported provider: {provider}. Supported providers are {MODEL_PROVIDERS}."
+            )
+        # Ensure that `batch_size` and `max_concurrent` are positive integers
+        if not isinstance(batch_size, int) or batch_size < 1:
+            raise ValueError("batch_size must be a positive integer")
+        if not isinstance(max_concurrent, int) or max_concurrent < 1:
+            raise ValueError("max_concurrent must be a positive integer")
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        # Promote a single column given as a string to a list
+        if columns_subset is not None and isinstance(columns_subset, str):
+            columns_subset = [columns_subset]
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        # Package up the AI-specific parameters as a dictionary for later use
+        ai_config = {
+            "prompt": prompt,
+            "llm_provider": provider,
+            "llm_model": model_name,
+            "batch_size": batch_size,
+            "max_concurrent": max_concurrent,
+        }
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=columns_subset,
+            values=ai_config,
+            pre=pre,
+            segments=segments,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
+    def col_schema_match(
+        self,
+        schema: Schema,
+        complete: bool = True,
+        in_order: bool = True,
+        case_sensitive_colnames: bool = True,
+        case_sensitive_dtypes: bool = True,
+        full_match_dtypes: bool = True,
         pre: Callable | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds = None,
         actions: Actions | None = None,
@@ -10812,33 +10959,40 @@ class Validate:
         active: bool = True,
     ) -> Validate:
         """
-        Validate whether the row count of the table matches a specified count.
-        The `row_count_match()` method checks whether the row count of the target table matches a
-        specified count. This validation will operate over a single test unit, which is whether the
-        row count matches the specified count.
+        Do columns in the table (and their types) match a predefined schema?
-        We also have the option to invert the validation step by setting `inverse=True`. This will
-        make the expectation that the row count of the target table *does not* match the specified
-        count.
+        The `col_schema_match()` method works in conjunction with an object generated by the
+        [`Schema`](`pointblank.Schema`) class. That class object is the expectation for the actual
+        schema of the target table. The validation step operates over a single test unit, which is
+        whether the schema matches that of the table (within the constraints enforced by the
+        `complete=`, and `in_order=` options).
         Parameters
         ----------
-        count
-            The expected row count of the table. This can be an integer value, a Polars or Pandas
-            DataFrame object, or an Ibis backend table. If a DataFrame/table is provided, the row
-            count of that object will be used as the expected count.
-        tol
-            The tolerance allowable for the row count match. This can be specified as a single
-            numeric value (integer or float) or as a tuple of two integers representing the lower
-            and upper bounds of the tolerance range. If a single integer value (greater than 1) is
-            provided, it represents the absolute bounds of the tolerance, ie. plus or minus the value.
-            If a float value (between 0-1) is provided, it represents the relative tolerance, ie.
-            plus or minus the relative percentage of the target. If a tuple is provided, it represents
-            the lower and upper absolute bounds of the tolerance range. See the examples for more.
-        inverse
-            Should the validation step be inverted? If `True`, then the expectation is that the row
-            count of the target table should not match the specified `count=` value.
+        schema
+            A `Schema` object that represents the expected schema of the table. This object is
+            generated by the [`Schema`](`pointblank.Schema`) class.
+        complete
+            Should the schema match be complete? If `True`, then the target table must have all
+            columns specified in the schema. If `False`, then the table can have additional columns
+            not in the schema (i.e., the schema is a subset of the target table's columns).
+        in_order
+            Should the schema match be in order? If `True`, then the columns in the schema must
+            appear in the same order as they do in the target table. If `False`, then the order of
+            columns in the schema and the target table can differ.
+        case_sensitive_colnames
+            Should the schema match be case-sensitive with regard to column names? If `True`, then
+            the column names in the schema and the target table must match exactly. If `False`, then
+            the column names are compared in a case-insensitive manner.
+        case_sensitive_dtypes
+            Should the schema match be case-sensitive with regard to column data types? If `True`,
+            then the column data types in the schema and the target table must match exactly. If
+            `False`, then the column data types are compared in a case-insensitive manner.
+        full_match_dtypes
+            Should the schema match require a full match of data types? If `True`, then the column
+            data types in the schema and the target table must match exactly. If `False` then
+            substring matches are allowed, so a schema data type of `Int` would match a target table
+            data type of `Int64`.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
@@ -10878,10 +11032,9 @@ class Validate:
         before the validation step is applied.
         The preprocessing function can be any callable that takes a table as input and returns a
-        modified table. For example, you could use a lambda function to filter the table based on
-        certain criteria or to apply a transformation to the data. Regarding the lifetime of the
-        transformed table, it only exists during the validation step and is not stored in the
-        `Validate` object or used in subsequent validation steps.
+        modified table. Regarding the lifetime of the transformed table, it only exists during the
+        validation step and is not stored in the `Validate` object or used in subsequent validation
+        steps.
         Thresholds
         ----------
@@ -10917,18 +11070,232 @@ class Validate:
         #| echo: false
         #| output: false
         import pointblank as pb
-        pb.config(report_incl_header=False, report_incl_footer=False)
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
         ```
-        For the examples here, we'll use the built in dataset `"small_table"`. The table can be
-        obtained by calling `load_dataset("small_table")`.
+        For the examples here, we'll use a simple Polars DataFrame with three columns (string,
+        integer, and float). The table is shown below:
         ```{python}
         import pointblank as pb
+        import polars as pl
-        small_table = pb.load_dataset("small_table")
+        tbl = pl.DataFrame(
+            {
+                "a": ["apple", "banana", "cherry", "date"],
+                "b": [1, 6, 3, 5],
+                "c": [1.1, 2.2, 3.3, 4.4],
+            }
+        )
-        pb.preview(small_table)
+        pb.preview(tbl)
+        ```
+        Let's validate that the columns in the table match a predefined schema. A schema can be
+        defined using the [`Schema`](`pointblank.Schema`) class.
+        ```{python}
+        schema = pb.Schema(
+            columns=[("a", "String"), ("b", "Int64"), ("c", "Float64")]
+        )
+        ```
+        You can print the schema object to verify that the expected schema is as intended.
+        ```{python}
+        print(schema)
+        ```
+        Now, we'll use the `col_schema_match()` method to validate the table against the expected
+        `schema` object. There is a single test unit for this validation step (whether the schema
+        matches the table or not).
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .col_schema_match(schema=schema)
+            .interrogate()
+        )
+        validation
+        ```
+        The validation table shows that the schema matches the table. The single test unit passed
+        since the table columns and their types match the schema.
+        """
+        assertion_type = _get_fn_name()
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        _check_boolean_input(param=complete, param_name="complete")
+        _check_boolean_input(param=in_order, param_name="in_order")
+        _check_boolean_input(param=case_sensitive_colnames, param_name="case_sensitive_colnames")
+        _check_boolean_input(param=case_sensitive_dtypes, param_name="case_sensitive_dtypes")
+        _check_boolean_input(param=full_match_dtypes, param_name="full_match_dtypes")
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        # Package up the `schema=` and boolean params into a dictionary for later interrogation
+        values = {
+            "schema": schema,
+            "complete": complete,
+            "in_order": in_order,
+            "case_sensitive_colnames": case_sensitive_colnames,
+            "case_sensitive_dtypes": case_sensitive_dtypes,
+            "full_match_dtypes": full_match_dtypes,
+        }
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            values=values,
+            pre=pre,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
+    def row_count_match(
+        self,
+        count: int | FrameT | Any,
+        tol: Tolerance = 0,
+        inverse: bool = False,
+        pre: Callable | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool = True,
+    ) -> Validate:
+        """
+        Validate whether the row count of the table matches a specified count.
+        The `row_count_match()` method checks whether the row count of the target table matches a
+        specified count. This validation will operate over a single test unit, which is whether the
+        row count matches the specified count.
+        We also have the option to invert the validation step by setting `inverse=True`. This will
+        make the expectation that the row count of the target table *does not* match the specified
+        count.
+        Parameters
+        ----------
+        count
+            The expected row count of the table. This can be an integer value, a Polars or Pandas
+            DataFrame object, or an Ibis backend table. If a DataFrame/table is provided, the row
+            count of that object will be used as the expected count.
+        tol
+            The tolerance allowable for the row count match. This can be specified as a single
+            numeric value (integer or float) or as a tuple of two integers representing the lower
+            and upper bounds of the tolerance range. If a single integer value (greater than 1) is
+            provided, it represents the absolute bounds of the tolerance, ie. plus or minus the value.
+            If a float value (between 0-1) is provided, it represents the relative tolerance, ie.
+            plus or minus the relative percentage of the target. If a tuple is provided, it represents
+            the lower and upper absolute bounds of the tolerance range. See the examples for more.
+        inverse
+            Should the validation step be inverted? If `True`, then the expectation is that the row
+            count of the target table should not match the specified `count=` value.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Regarding the lifetime of the
+        transformed table, it only exists during the validation step and is not stored in the
+        `Validate` object or used in subsequent validation steps.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False)
+        ```
+        For the examples here, we'll use the built in dataset `"small_table"`. The table can be
+        obtained by calling `load_dataset("small_table")`.
+        ```{python}
+        import pointblank as pb
+        small_table = pb.load_dataset("small_table")
+        pb.preview(small_table)
         ```
         Let's validate that the number of rows in the table matches a fixed value. In this case, we
@@ -12227,12 +12594,19 @@ class Validate:
             # Generate the autobrief description for the validation step; it's important to perform
             # that here since text components like the column and the value(s) have been resolved
             # at this point
+            # Get row count for col_pct_null to properly calculate absolute tolerance percentages
+            n_rows = None
+            if assertion_type == "col_pct_null":
+                n_rows = get_row_count(data_tbl)
             autobrief = _create_autobrief_or_failure_text(
                 assertion_type=assertion_type,
                 lang=self.lang,
                 column=column,
                 values=value,
                 for_failure=False,
+                locale=self.locale,
+                n_rows=n_rows,
             )
             validation.autobrief = autobrief
@@ -12260,6 +12634,12 @@ class Validate:
             # This prevents modifications from one validation step affecting others
             data_tbl_step = _copy_dataframe(data_tbl)
+            # Capture original table dimensions and columns before preprocessing
+            # (only if preprocessing is present - we'll set these inside the preprocessing block)
+            original_rows = None
+            original_cols = None
+            original_column_names = None
             # ------------------------------------------------
             # Preprocessing stage
             # ------------------------------------------------
@@ -12267,6 +12647,16 @@ class Validate:
             # Determine whether any preprocessing functions are to be applied to the table
             if validation.pre is not None:
                 try:
+                    # Capture original table dimensions before preprocessing
+                    # Use get_row_count() instead of len() for compatibility with PySpark, etc.
+                    original_rows = get_row_count(data_tbl_step)
+                    original_cols = get_column_count(data_tbl_step)
+                    original_column_names = set(
+                        data_tbl_step.columns
+                        if hasattr(data_tbl_step, "columns")
+                        else list(data_tbl_step.columns)
+                    )
                     # Read the text of the preprocessing function
                     pre_text = _pre_processing_funcs_to_str(validation.pre)
@@ -12299,6 +12689,62 @@ class Validate:
                     elif isinstance(validation.pre, Callable):
                         data_tbl_step = validation.pre(data_tbl_step)
+                    # After successful preprocessing, check dimensions and create notes
+                    # Use get_row_count() and get_column_count() for compatibility
+                    processed_rows = get_row_count(data_tbl_step)
+                    processed_cols = get_column_count(data_tbl_step)
+                    # Always add a note when preprocessing is applied
+                    if original_rows != processed_rows or original_cols != processed_cols:
+                        # Dimensions changed - show the change
+                        note_html = _create_preprocessing_note_html(
+                            original_rows=original_rows,
+                            original_cols=original_cols,
+                            processed_rows=processed_rows,
+                            processed_cols=processed_cols,
+                            locale=self.locale,
+                        )
+                        note_text = _create_preprocessing_note_text(
+                            original_rows=original_rows,
+                            original_cols=original_cols,
+                            processed_rows=processed_rows,
+                            processed_cols=processed_cols,
+                        )
+                    else:
+                        # No dimension change - just indicate preprocessing was applied
+                        note_html = _create_preprocessing_no_change_note_html(locale=self.locale)
+                        note_text = _create_preprocessing_no_change_note_text()
+                    validation._add_note(
+                        key="pre_applied",
+                        markdown=note_html,
+                        text=note_text,
+                    )
+                    # Check if target column is synthetic (exists in processed but not original)
+                    # Only check for single column names (not lists used in rows_distinct, etc.)
+                    if column is not None and isinstance(column, str):
+                        processed_column_names = set(
+                            data_tbl_step.columns
+                            if hasattr(data_tbl_step, "columns")
+                            else list(data_tbl_step.columns)
+                        )
+                        # Check if the target column is in the processed table but not in original
+                        if column in processed_column_names and column not in original_column_names:
+                            note_html = _create_synthetic_target_column_note_html(
+                                column_name=column,
+                                locale=self.locale,
+                            )
+                            note_text = _create_synthetic_target_column_note_text(
+                                column_name=column,
+                            )
+                            validation._add_note(
+                                key="syn_target_col",
+                                markdown=note_html,
+                                text=note_text,
+                            )
                 except Exception:
                     # If preprocessing fails, mark the validation as having an eval_error
                     validation.eval_error = True
@@ -12488,6 +12934,21 @@ class Validate:
                                 tbl=tbl, column=column, values=value, na_pass=na_pass
                             )
+                    elif assertion_type == "col_pct_null":
+                        result_bool = col_pct_null(
+                            data_tbl=data_tbl_step,
+                            column=column,
+                            p=value["p"],
+                            bound_finder=value["bound_finder"],
+                        )
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - int(result_bool)
+                        results_tbl = None
                     elif assertion_type == "col_vals_expr":
                         results_tbl = col_vals_expr(
                             data_tbl=data_tbl_step, expr=value, tbl_type=tbl_type
@@ -12547,10 +13008,21 @@ class Validate:
                         # Add the schema validation info to the validation object
                         validation.val_info = schema_validation_info
+                        # Add a note with the schema expectation and results
+                        schema_note_html = _create_col_schema_match_note_html(
+                            schema_info=schema_validation_info, locale=self.locale
+                        )
+                        schema_note_text = _create_col_schema_match_note_text(
+                            schema_info=schema_validation_info
+                        )
+                        validation._add_note(
+                            key="schema_check", markdown=schema_note_html, text=schema_note_text
+                        )
                         validation.all_passed = result_bool
                         validation.n = 1
                         validation.n_passed = int(result_bool)
-                        validation.n_failed = 1 - result_bool
+                        validation.n_failed = 1 - int(result_bool)
                         results_tbl = None
@@ -12565,7 +13037,7 @@ class Validate:
                         validation.all_passed = result_bool
                         validation.n = 1
                         validation.n_passed = int(result_bool)
-                        validation.n_failed = 1 - result_bool
+                        validation.n_failed = 1 - int(result_bool)
                         results_tbl = None
@@ -12577,7 +13049,7 @@ class Validate:
                         validation.all_passed = result_bool
                         validation.n = 1
                         validation.n_passed = int(result_bool)
-                        validation.n_failed = 1 - result_bool
+                        validation.n_failed = 1 - int(result_bool)
                         results_tbl = None
@@ -12596,7 +13068,7 @@ class Validate:
                         validation.all_passed = result_bool
                         validation.n = 1
                         validation.n_passed = int(result_bool)
-                        validation.n_failed = 1 - result_bool
+                        validation.n_failed = 1 - int(result_bool)
                         results_tbl = None
@@ -12614,8 +13086,9 @@ class Validate:
                         )  # pragma: no cover
                 except Exception as e:
-                    # Only catch specific data quality comparison errors, not programming errors
+                    # Catch data quality errors and column not found errors
                     error_msg = str(e).lower()
                     is_comparison_error = (
                         "boolean value of na is ambiguous" in error_msg
                         or "cannot compare" in error_msg
@@ -12626,20 +13099,101 @@ class Validate:
                         or ("dtype" in error_msg and "compare" in error_msg)
                     )
-                    if is_comparison_error:  # pragma: no cover
-                        # If data quality comparison fails, mark the validation as having an eval_error
+                    is_column_not_found = "column" in error_msg and "not found" in error_msg
+                    is_comparison_column_not_found = (
+                        "unable to find column" in error_msg and "valid columns" in error_msg
+                    )
+                    if (
+                        is_comparison_error or is_column_not_found or is_comparison_column_not_found
+                    ):  # pragma: no cover
+                        # If data quality comparison fails or column not found, mark as eval_error
                         validation.eval_error = True  # pragma: no cover
+                        # Add a note for column not found errors (target column)
+                        if is_column_not_found:
+                            note_html = _create_column_not_found_note_html(
+                                column_name=column,
+                                available_columns=list(data_tbl_step.columns)
+                                if hasattr(data_tbl_step, "columns")
+                                else [],
+                                locale=self.locale,
+                            )
+                            note_text = _create_column_not_found_note_text(
+                                column_name=column,
+                                available_columns=list(data_tbl_step.columns)
+                                if hasattr(data_tbl_step, "columns")
+                                else [],
+                            )
+                            validation._add_note(
+                                key="column_not_found",
+                                markdown=note_html,
+                                text=note_text,
+                            )
+                        # Add a note for comparison column not found errors
+                        elif is_comparison_column_not_found:
+                            # Extract column name from error message
+                            # Error format: 'unable to find column "col_name"; valid columns: ...'
+                            match = re.search(r'unable to find column "([^"]+)"', str(e))
+                            if match:
+                                missing_col_name = match.group(1)
+                                # Determine position for between/outside validations
+                                position = None
+                                if assertion_type in ["col_vals_between", "col_vals_outside"]:
+                                    # Check if missing column is in left or right position
+                                    from pointblank.column import Column
+                                    if (
+                                        isinstance(value[0], Column)
+                                        and value[0].exprs == missing_col_name
+                                    ):
+                                        position = "left"
+                                    elif (
+                                        isinstance(value[1], Column)
+                                        and value[1].exprs == missing_col_name
+                                    ):
+                                        position = "right"
+                                note_html = _create_comparison_column_not_found_note_html(
+                                    column_name=missing_col_name,
+                                    position=position,
+                                    available_columns=list(data_tbl_step.columns)
+                                    if hasattr(data_tbl_step, "columns")
+                                    else [],
+                                    locale=self.locale,
+                                )
+                                note_text = _create_comparison_column_not_found_note_text(
+                                    column_name=missing_col_name,
+                                    position=position,
+                                    available_columns=list(data_tbl_step.columns)
+                                    if hasattr(data_tbl_step, "columns")
+                                    else [],
+                                )
+                                validation._add_note(
+                                    key="comparison_column_not_found",
+                                    markdown=note_html,
+                                    text=note_text,
+                                )
                         end_time = datetime.datetime.now(datetime.timezone.utc)  # pragma: no cover
                         validation.proc_duration_s = (
                             end_time - start_time
                         ).total_seconds()  # pragma: no cover
                         validation.time_processed = end_time.isoformat(
                             timespec="milliseconds"
                         )  # pragma: no cover
                         validation.active = False  # pragma: no cover
                         continue  # pragma: no cover
                     else:
-                        # For other errors (like missing columns), let them propagate
+                        # For other unexpected errors, let them propagate
                         raise
             else:
@@ -12722,6 +13276,34 @@ class Validate:
                     ),
                 )
+            # Add note for local thresholds (if they differ from global thresholds)
+            if threshold != self.thresholds:
+                if threshold != Thresholds():
+                    # Local thresholds are set - generate threshold note
+                    threshold_note_html = _create_local_threshold_note_html(
+                        thresholds=threshold, locale=self.locale
+                    )
+                    threshold_note_text = _create_local_threshold_note_text(thresholds=threshold)
+                    # Add the note to the validation step
+                    validation._add_note(
+                        key="local_thresholds",
+                        markdown=threshold_note_html,
+                        text=threshold_note_text,
+                    )
+                elif self.thresholds != Thresholds():
+                    # Thresholds explicitly reset to empty when global thresholds exist
+                    reset_note_html = _create_threshold_reset_note_html(locale=self.locale)
+                    reset_note_text = _create_threshold_reset_note_text()
+                    # Add the note to the validation step
+                    validation._add_note(
+                        key="local_threshold_reset",
+                        markdown=reset_note_html,
+                        text=reset_note_text,
+                    )
             # If there is any threshold level that has been exceeded, then produce and
             # set the general failure text for the validation step
             if validation.warning or validation.error or validation.critical:
@@ -12732,6 +13314,8 @@ class Validate:
                     column=column,
                     values=value,
                     for_failure=True,
+                    locale=self.locale,
+                    n_rows=n_rows,
                 )
                 # Set the failure text in the validation step
@@ -14217,11 +14801,15 @@ class Validate:
         - [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
         - [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
         - [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
+        - [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
+        - [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
         - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
         - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
         - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
+        - [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
         - [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
         - [`conjointly()`](`pointblank.Validate.conjointly`)
+        - [`prompt()`](`pointblank.Validate.prompt`)
         An extracted row for these validation methods means that a test unit failed for that row in
         the validation step.
@@ -14806,7 +15394,12 @@ class Validate:
         return None
     def get_tabular_report(
-        self, title: str | None = ":default:", incl_header: bool = None, incl_footer: bool = None
+        self,
+        title: str | None = ":default:",
+        incl_header: bool = None,
+        incl_footer: bool = None,
+        incl_footer_timings: bool = None,
+        incl_footer_notes: bool = None,
     ) -> GT:
         """
         Validation report as a GT table.
@@ -14829,6 +15422,20 @@ class Validate:
             name of the table as the title for the report. If no title is wanted, then `":none:"`
             can be used. Aside from keyword options, text can be provided for the title. This will
             be interpreted as Markdown text and transformed internally to HTML.
+        incl_header
+            Controls whether the header section should be displayed. If `None`, uses the global
+            configuration setting. The header contains the table name, label, and threshold
+            information.
+        incl_footer
+            Controls whether the footer section should be displayed. If `None`, uses the global
+            configuration setting. The footer can contain validation timing information and notes.
+        incl_footer_timings
+            Controls whether validation timing information (start time, duration, end time) should
+            be displayed in the footer. If `None`, uses the global configuration setting. Only
+            applies when `incl_footer=True`.
+        incl_footer_notes
+            Controls whether notes from validation steps should be displayed in the footer. If
+            `None`, uses the global configuration setting. Only applies when `incl_footer=True`.
         Returns
         -------
@@ -14888,6 +15495,10 @@ class Validate:
             incl_header = global_config.report_incl_header
         if incl_footer is None:
             incl_footer = global_config.report_incl_footer
+        if incl_footer_timings is None:
+            incl_footer_timings = global_config.report_incl_footer_timings
+        if incl_footer_notes is None:
+            incl_footer_notes = global_config.report_incl_footer_notes
         # Do we have a DataFrame library to work with?
         _check_any_df_lib(method_used="get_tabular_report")
@@ -15126,30 +15737,53 @@ class Validate:
         columns_upd = []
         columns = validation_info_dict["column"]
+        notes = validation_info_dict["notes"]
         assertion_type = validation_info_dict["assertion_type"]
         # Iterate over the values in the `column` entry
         for i, column in enumerate(columns):
+            # Check if this validation has a synthetic target column note
+            has_synthetic_column = (
+                notes[i] is not None and isinstance(notes[i], dict) and "syn_target_col" in notes[i]
+            )
+            column_text = None
             if assertion_type[i] in [
                 "col_schema_match",
                 "row_count_match",
                 "col_count_match",
                 "col_vals_expr",
             ]:
-                columns_upd.append("&mdash;")
+                column_text = "&mdash;"
             elif assertion_type[i] in ["rows_distinct", "rows_complete", "prompt"]:
                 if not column:
                     # If there is no column subset, then all columns are used
-                    columns_upd.append("ALL COLUMNS")
+                    column_text = "ALL COLUMNS"
                 else:
                     # With a column subset list, format with commas between the column names
-                    columns_upd.append(", ".join(column))
+                    column_text = ", ".join(column)
             elif assertion_type[i] in ["conjointly", "specially"]:
-                columns_upd.append("")
+                column_text = ""
             else:
-                columns_upd.append(str(column))
+                column_text = str(column)
+            # Apply underline styling for synthetic columns (using the purple color from the icon)
+            # Only apply styling if column_text is not empty and not a special marker
+            if (
+                has_synthetic_column
+                and column_text
+                and column_text not in ["&mdash;", "ALL COLUMNS", ""]
+            ):
+                column_text = (
+                    f'<span style="text-decoration: underline; '
+                    f"text-decoration-color: #9A7CB4; text-decoration-thickness: 1px; "
+                    f'text-underline-offset: 3px;">'
+                    f"{column_text}</span>"
+                )
+            columns_upd.append(column_text)
         # Add the `columns_upd` entry to the dictionary
         validation_info_dict["columns_upd"] = columns_upd
@@ -15205,6 +15839,15 @@ class Validate:
             ]:
                 values_upd.append("&mdash;")
+            elif assertion_type[i] in ["col_pct_null"]:
+                # Extract p and tol from the values dict for nice formatting
+                p_value = value["p"]
+                # Extract tol from the bound_finder partial function
+                bound_finder = value.get("bound_finder")
+                tol_value = bound_finder.keywords.get("tol", 0) if bound_finder else 0
+                values_upd.append(f"p = {p_value}<br/>tol = {tol_value}")
             elif assertion_type[i] in ["col_schema_match"]:
                 values_upd.append("SCHEMA")
@@ -15680,13 +16323,15 @@ class Validate:
             gt_tbl = gt_tbl.tab_header(title=html(title_text), subtitle=html(combined_subtitle))
         if incl_footer:
-            # Add table time as HTML source note
-            gt_tbl = gt_tbl.tab_source_note(source_note=html(table_time))
+            # Add table time as HTML source note if enabled
+            if incl_footer_timings:
+                gt_tbl = gt_tbl.tab_source_note(source_note=html(table_time))
-            # Create notes markdown from validation steps and add as separate source note
-            notes_markdown = _create_notes_html(self.validation_info)
-            if notes_markdown:
-                gt_tbl = gt_tbl.tab_source_note(source_note=md(notes_markdown))
+            # Create notes markdown from validation steps and add as separate source note if enabled
+            if incl_footer_notes:
+                notes_markdown = _create_notes_html(self.validation_info)
+                if notes_markdown:
+                    gt_tbl = gt_tbl.tab_source_note(source_note=md(notes_markdown))
         # If the interrogation has not been performed, then style the table columns dealing with
         # interrogation data as grayed out
@@ -15795,11 +16440,15 @@ class Validate:
         - [`col_vals_outside()`](`pointblank.Validate.col_vals_outside`)
         - [`col_vals_in_set()`](`pointblank.Validate.col_vals_in_set`)
         - [`col_vals_not_in_set()`](`pointblank.Validate.col_vals_not_in_set`)
+        - [`col_vals_increasing()`](`pointblank.Validate.col_vals_increasing`)
+        - [`col_vals_decreasing()`](`pointblank.Validate.col_vals_decreasing`)
         - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
         - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
         - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
+        - [`col_vals_within_spec()`](`pointblank.Validate.col_vals_within_spec`)
         - [`col_vals_expr()`](`pointblank.Validate.col_vals_expr`)
         - [`conjointly()`](`pointblank.Validate.conjointly`)
+        - [`prompt()`](`pointblank.Validate.prompt`)
         - [`rows_complete()`](`pointblank.Validate.rows_complete`)
         The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
@@ -16099,6 +16748,12 @@ class Validate:
             except Exception:  # pragma: no cover
                 validation.eval_error = True
+                columns_resolved = []
+                # Store columns list for note generation
+                try:
+                    columns = list(table.columns) if "table" in locals() else []
+                except Exception:
+                    columns = []
             # If no columns were resolved, then create a patched validation step with the
             # `eval_error` and `column` attributes set
@@ -16106,6 +16761,22 @@ class Validate:
                 validation.eval_error = True
                 validation.column = str(column_expr)
+                # Add a helpful note explaining that no columns were resolved
+                note_html = _create_no_columns_resolved_note_html(
+                    column_expr=str(column_expr),
+                    available_columns=columns,
+                    locale=self.locale,
+                )
+                note_text = _create_no_columns_resolved_note_text(
+                    column_expr=str(column_expr),
+                    available_columns=columns,
+                )
+                validation._add_note(
+                    key="no_columns_resolved",
+                    markdown=note_html,
+                    text=note_text,
+                )
                 expanded_validation_info.append(validation)
                 continue
@@ -16664,7 +17335,13 @@ def _process_action_str(
 def _create_autobrief_or_failure_text(
-    assertion_type: str, lang: str, column: str | None, values: str | None, for_failure: bool
+    assertion_type: str,
+    lang: str,
+    column: str | None,
+    values: str | None,
+    for_failure: bool,
+    locale: str | None = None,
+    n_rows: int | None = None,
 ) -> str:
     if assertion_type in [
         "col_vals_gt",
@@ -16788,6 +17465,16 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
+    if assertion_type == "col_pct_null":
+        return _create_text_col_pct_null(
+            lang=lang,
+            column=column,
+            value=values,
+            for_failure=for_failure,
+            locale=locale if locale else lang,
+            n_rows=n_rows,
+        )
     if assertion_type == "conjointly":
         return _create_text_conjointly(lang=lang, for_failure=for_failure)
@@ -17010,6 +17697,115 @@ def _create_text_col_count_match(lang: str, value: int, for_failure: bool = Fals
     return EXPECT_FAIL_TEXT[f"col_count_match_n_{type_}_text"][lang].format(values_text=values_text)
+def _create_text_col_pct_null(
+    lang: str,
+    column: str | None,
+    value: dict,
+    for_failure: bool = False,
+    locale: str | None = None,
+    n_rows: int | None = None,
+) -> str:
+    """Create text for col_pct_null validation with tolerance handling."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+    column_text = _prep_column_text(column=column)
+    # Use locale for number formatting, defaulting to lang if not provided
+    fmt_locale = locale if locale else lang
+    # Extract p and tol from the values dict
+    p_value = value.get("p", 0) * 100  # Convert to percentage
+    p_value_original = value.get("p", 0)  # Keep original value for deviation format
+    # Extract tol from the bound_finder partial function
+    bound_finder = value.get("bound_finder")
+    tol_value = bound_finder.keywords.get("tol", 0) if bound_finder else 0
+    # Handle different tolerance types
+    has_tolerance = False
+    is_asymmetric = False
+    if isinstance(tol_value, tuple):
+        # Tuple tolerance: can be (lower, upper) in absolute or relative terms
+        tol_lower, tol_upper = tol_value
+        # Check if we have any non-zero tolerance
+        has_tolerance = tol_lower != 0 or tol_upper != 0
+        is_asymmetric = tol_lower != tol_upper
+        # For relative tolerances (floats < 1), we can compute exact percentage bounds
+        # For absolute tolerances (ints >= 1), calculate based on actual row count if available
+        if tol_lower < 1:
+            # Relative tolerance (float)
+            lower_pct_delta = tol_lower * 100
+        else:
+            # Absolute tolerance (int); uses actual row count if available
+            if n_rows is not None and n_rows > 0:
+                lower_pct_delta = (tol_lower / n_rows) * 100
+            else:
+                lower_pct_delta = tol_lower  # Fallback approximation
+        if tol_upper < 1:
+            # Relative tolerance (float)
+            upper_pct_delta = tol_upper * 100
+        else:
+            # Absolute tolerance (int); uses actual row count if available
+            if n_rows is not None and n_rows > 0:
+                upper_pct_delta = (tol_upper / n_rows) * 100
+            else:
+                upper_pct_delta = tol_upper  # Fallback approximation
+    else:
+        # Single value tolerance: symmetric
+        has_tolerance = tol_value != 0
+        if tol_value < 1:
+            # Relative tolerance (float)
+            tol_pct = tol_value * 100
+        else:
+            # Absolute tolerance (int) - use actual row count if available
+            if n_rows is not None and n_rows > 0:
+                tol_pct = (tol_value / n_rows) * 100
+            else:
+                tol_pct = tol_value  # Fallback approximation
+        lower_pct_delta = tol_pct
+        upper_pct_delta = tol_pct
+    # Format numbers with locale-aware formatting
+    p_formatted = _format_number_safe(p_value, decimals=1, locale=fmt_locale)
+    p_original_formatted = _format_number_safe(p_value_original, decimals=2, locale=fmt_locale)
+    # Choose the appropriate translation key based on tolerance
+    if not has_tolerance:
+        # No tolerance - use simple text
+        text = EXPECT_FAIL_TEXT[f"col_pct_null_{type_}_text"][lang].format(
+            column_text=column_text,
+            p=p_formatted,
+        )
+    elif is_asymmetric or isinstance(tol_value, tuple):
+        # Use deviation format for tuple tolerances (including symmetric ones)
+        # Format the deviation values with signs (using proper minus sign U+2212)
+        lower_dev = f"−{_format_number_safe(lower_pct_delta, decimals=1, locale=fmt_locale)}%"
+        upper_dev = f"+{_format_number_safe(upper_pct_delta, decimals=1, locale=fmt_locale)}%"
+        text = EXPECT_FAIL_TEXT[f"col_pct_null_{type_}_text_tol_deviation"][lang].format(
+            column_text=column_text,
+            lower_dev=lower_dev,
+            upper_dev=upper_dev,
+            p=p_original_formatted,
+        )
+    else:
+        # Single value tolerance - use the symmetric ± format
+        tol_formatted = _format_number_safe(lower_pct_delta, decimals=1, locale=fmt_locale)
+        text = EXPECT_FAIL_TEXT[f"col_pct_null_{type_}_text_tol"][lang].format(
+            column_text=column_text,
+            p=p_formatted,
+            tol=tol_formatted,
+        )
+    return text
 def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
@@ -17408,6 +18204,7 @@ def _validation_info_as_dict(validation_info: _ValidationInfo) -> dict:
 def _get_assertion_icon(icon: list[str], length_val: int = 30) -> list[str]:
     # For each icon, get the assertion icon SVG test from SVG_ICONS_FOR_ASSERTION_TYPES dictionary
+    # TODO: No point in using `get` if we can't handle missing keys anyways
     icon_svg = [SVG_ICONS_FOR_ASSERTION_TYPES.get(icon) for icon in icon]
     # Replace the width and height in the SVG string
@@ -17866,267 +18663,1078 @@ def _create_table_time_html(
     )
-def _create_notes_html(validation_info: list) -> str:
+def _create_notes_html(validation_info: list) -> str:
+    """
+    Create markdown text for validation notes/footnotes.
+    This function collects notes from all validation steps and formats them as footnotes
+    for display in the report footer. Each note is prefixed with the step number in
+    uppercase small caps bold formatting, and the note content is rendered as markdown.
+    Parameters
+    ----------
+    validation_info
+        List of _ValidationInfo objects from which to extract notes.
+    Returns
+    -------
+    str
+        Markdown string containing formatted footnotes, or empty string if no notes exist.
+    """
+    # Collect all notes from validation steps
+    all_notes = []
+    for step in validation_info:
+        if step.notes:
+            for key, content in step.notes.items():
+                # Store note with step number for context
+                all_notes.append(
+                    {
+                        "step": step.i,
+                        "key": key,
+                        "markdown": content["markdown"],
+                        "text": content["text"],
+                    }
+                )
+    # If no notes, return empty string
+    if not all_notes:
+        return ""
+    # Build markdown for notes section
+    # Start with a styled horizontal rule and bold "Notes" header
+    notes_parts = [
+        (
+            "<hr style='border: none; border-top-width: 1px; border-top-style: dotted; "
+            "border-top-color: #B5B5B5; margin-top: -3px; margin-bottom: 3px;'>"
+        ),
+        "<strong>Notes</strong>",
+        "",
+    ]
+    previous_step = None
+    for note in all_notes:
+        # Determine if this is the first note for this step
+        is_first_for_step = note["step"] != previous_step
+        previous_step = note["step"]
+        # Format step label with HTML for uppercase small caps bold
+        # Use lighter color for subsequent notes of the same step
+        step_color = "#333333" if is_first_for_step else "#999999"
+        step_label = (
+            f"<span style='font-variant: small-caps; font-weight: bold; font-size: smaller; "
+            f"text-transform: uppercase; color: {step_color};'>Step {note['step']}</span>"
+        )
+        # Format note key in monospaced font with smaller size
+        note_key = f"<span style='font-family: \"IBM Plex Mono\", monospace; font-size: smaller;'>({note['key']})</span>"
+        # Combine step label, note key, and markdown content
+        note_text = f"{step_label} {note_key} {note['markdown']}"
+        notes_parts.append(note_text)
+        notes_parts.append("")  # Add blank line between notes
+    # Remove trailing blank line
+    if notes_parts[-1] == "":
+        notes_parts.pop()
+    # Join with newlines to create markdown text
+    notes_markdown = "\n".join(notes_parts)
+    return notes_markdown
+def _create_label_html(label: str | None, start_time: str) -> str:
+    if label is None:
+        # Remove the decimal and everything beyond that
+        start_time = str(start_time).split(".")[0]
+        # Replace the space character with a pipe character
+        start_time = start_time.replace(" ", "|")
+        label = start_time
+    return (
+        f"<span style='text-decoration-style: solid; text-decoration-color: #ADD8E6; "
+        f"text-decoration-line: underline; text-underline-position: under; color: #333333; "
+        f"font-variant-numeric: tabular-nums; padding-left: 4px; margin-right: 5px; "
+        f"padding-right: 2px;'>{label}</span>"
+    )
+def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None) -> str:
+    """Format a single integer using Great Tables GT object to avoid pandas dependency."""
+    if df_lib is None:
+        # Use library detection to select appropriate DataFrame library
+        if _is_lib_present("polars"):
+            import polars as pl
+            df_lib = pl
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
+    # Create a single-row, single-column DataFrame using the specified library
+    df = df_lib.DataFrame({"value": [value]})
+    # Create GT object and format the column
+    gt_obj = GT(df).fmt_integer(columns="value", locale=locale)
+    # Extract the formatted value using _get_column_of_values
+    formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
+    return formatted_values[0]  # Return the single formatted value
+def _format_single_float_with_gt_custom(
+    value: float,
+    decimals: int = 2,
+    drop_trailing_zeros: bool = False,
+    locale: str = "en",
+    df_lib=None,
+) -> str:
+    """Format a single float with custom options using Great Tables GT object to avoid pandas dependency."""
+    if df_lib is None:
+        # Use library detection to select appropriate DataFrame library
+        if _is_lib_present("polars"):
+            import polars as pl
+            df_lib = pl
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
+    # Create a single-row, single-column DataFrame using the specified library
+    df = df_lib.DataFrame({"value": [value]})
+    # Create GT object and format the column
+    gt_obj = GT(df).fmt_number(
+        columns="value", decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
+    )
+    # Extract the formatted value using _get_column_of_values
+    formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
+    return formatted_values[0]  # Return the single formatted value
+def _format_number_safe(
+    value: float, decimals: int, drop_trailing_zeros: bool = False, locale: str = "en", df_lib=None
+) -> str:
+    """
+    Safely format a float value with locale support.
+    Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
+    vals.fmt_number. This helper is used by threshold formatting functions.
+    """
+    if df_lib is not None and value is not None:
+        # Use GT-based formatting to avoid Pandas dependency completely
+        return _format_single_float_with_gt_custom(
+            value,
+            decimals=decimals,
+            drop_trailing_zeros=drop_trailing_zeros,
+            locale=locale,
+            df_lib=df_lib,
+        )
+    else:
+        # Fallback to the original behavior
+        return fmt_number(
+            value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
+        )[0]  # pragma: no cover
+def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
+    """
+    Safely format an integer value with locale support.
+    Uses GT-based formatting when a DataFrame library is available, otherwise falls back to
+    vals.fmt_integer. This helper is used by threshold formatting functions.
+    """
+    if df_lib is not None and value is not None:
+        # Use GT-based formatting to avoid Pandas dependency completely
+        return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
+    else:
+        # Fallback to the original behavior
+        return fmt_integer(value, locale=locale)[0]
+def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
+    if thresholds == Thresholds():
+        return ""
+    warning = (
+        _format_number_safe(
+            thresholds.warning_fraction,
+            decimals=3,
+            drop_trailing_zeros=True,
+            locale=locale,
+            df_lib=df_lib,
+        )
+        if thresholds.warning_fraction is not None
+        else (
+            _format_integer_safe(thresholds.warning_count, locale=locale, df_lib=df_lib)
+            if thresholds.warning_count is not None
+            else "&mdash;"
+        )
+    )
+    error = (
+        _format_number_safe(
+            thresholds.error_fraction,
+            decimals=3,
+            drop_trailing_zeros=True,
+            locale=locale,
+            df_lib=df_lib,
+        )
+        if thresholds.error_fraction is not None
+        else (
+            _format_integer_safe(thresholds.error_count, locale=locale, df_lib=df_lib)
+            if thresholds.error_count is not None
+            else "&mdash;"
+        )
+    )
+    critical = (
+        _format_number_safe(
+            thresholds.critical_fraction,
+            decimals=3,
+            drop_trailing_zeros=True,
+            locale=locale,
+            df_lib=df_lib,
+        )
+        if thresholds.critical_fraction is not None
+        else (
+            _format_integer_safe(thresholds.critical_count, locale=locale, df_lib=df_lib)
+            if thresholds.critical_count is not None
+            else "&mdash;"
+        )
+    )
+    warning_color = SEVERITY_LEVEL_COLORS["warning"]
+    error_color = SEVERITY_LEVEL_COLORS["error"]
+    critical_color = SEVERITY_LEVEL_COLORS["critical"]
+    return (
+        "<span>"
+        f'<span style="background-color: {warning_color}; color: white; '
+        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
+        f"margin: 5px 0px 5px 5px; border: solid 1px {warning_color}; "
+        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">WARNING</span>'
+        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
+        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
+        f"border: solid 1px {warning_color}; padding: 2px 15px 2px 15px; "
+        'font-size: smaller; margin-right: 5px;">'
+        f"{warning}"
+        "</span>"
+        f'<span style="background-color: {error_color}; color: white; '
+        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
+        f"margin: 5px 0px 5px 1px; border: solid 1px {error_color}; "
+        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">ERROR</span>'
+        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
+        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
+        f"border: solid 1px {error_color}; padding: 2px 15px 2px 15px; "
+        'font-size: smaller; margin-right: 5px;">'
+        f"{error}"
+        "</span>"
+        f'<span style="background-color: {critical_color}; color: white; '
+        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
+        f"margin: 5px 0px 5px 1px; border: solid 1px {critical_color}; "
+        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">CRITICAL</span>'
+        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
+        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
+        f"border: solid 1px {critical_color}; padding: 2px 15px 2px 15px; "
+        'font-size: smaller;">'
+        f"{critical}"
+        "</span>"
+        "</span>"
+    )
+def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en") -> str:
+    """
+    Create a miniature HTML representation of local thresholds for display in notes.
+    This function generates a compact HTML representation of threshold values that is suitable for
+    display in validation step notes/footnotes. It follows a similar visual style to the global
+    thresholds shown in the header, but with a more compact format.
+    Parameters
+    ----------
+    thresholds
+        The Thresholds object containing the local threshold values.
+    locale
+        The locale to use for formatting numbers (default: "en").
+    Returns
+    -------
+    str
+        HTML string containing the formatted threshold information.
+    """
+    if thresholds == Thresholds():
+        return ""
+    # Get df_lib for formatting
+    df_lib = None
+    if _is_lib_present("polars"):
+        import polars as pl
+        df_lib = pl
+    elif _is_lib_present("pandas"):
+        import pandas as pd
+        df_lib = pd
+    # Helper function to format threshold values using the shared formatting functions
+    def _format_threshold_value(fraction: float | None, count: int | None) -> str:
+        if fraction is not None:
+            # Format as fraction/percentage with locale formatting
+            if fraction == 0:
+                return "0"
+            elif fraction < 0.01:
+                # For very small fractions, show "<0.01" with locale formatting
+                formatted = _format_number_safe(0.01, decimals=2, locale=locale, df_lib=df_lib)
+                return f"&lt;{formatted}"
+            else:
+                # Use shared formatting function with drop_trailing_zeros
+                formatted = _format_number_safe(
+                    fraction, decimals=2, drop_trailing_zeros=True, locale=locale, df_lib=df_lib
+                )
+                return formatted
+        elif count is not None:
+            # Format integer count using shared formatting function
+            return _format_integer_safe(count, locale=locale, df_lib=df_lib)
+        else:
+            return "&mdash;"
+    warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
+    error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
+    critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
+    warning_color = SEVERITY_LEVEL_COLORS["warning"]
+    error_color = SEVERITY_LEVEL_COLORS["error"]
+    critical_color = SEVERITY_LEVEL_COLORS["critical"]
+    # Build threshold parts with colored letters in monospace font
+    threshold_parts = []
+    # Add warning threshold if set
+    if thresholds.warning is not None:
+        threshold_parts.append(
+            f'<span style="color: {warning_color}; font-weight: bold;">W</span>:{warning}'
+        )
+    # Add error threshold if set
+    if thresholds.error is not None:
+        threshold_parts.append(
+            f'<span style="color: {error_color}; font-weight: bold;">E</span>:{error}'
+        )
+    # Add critical threshold if set
+    if thresholds.critical is not None:
+        threshold_parts.append(
+            f'<span style="color: {critical_color}; font-weight: bold;">C</span>:{critical}'
+        )
+    # Join with "|" separator (only between multiple thresholds)
+    thresholds_html = f'<span style="font-family: monospace;">{"|".join(threshold_parts)}</span>'
+    # Get localized text and format with threshold HTML
+    localized_text = NOTES_TEXT["local_threshold"].get(locale, NOTES_TEXT["local_threshold"]["en"])
+    note_html = localized_text.replace("{thresholds}", thresholds_html)
+    return note_html
+def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
+    """
+    Create a plain text representation of local thresholds for display in logs.
+    This function generates a plain text representation of threshold values that is
+    suitable for display in text-based output such as logs or console output.
+    Parameters
+    ----------
+    thresholds
+        The Thresholds object containing the local threshold values.
+    Returns
+    -------
+    str
+        Plain text string containing the formatted threshold information.
+    """
+    if thresholds == Thresholds():
+        return ""
+    # Helper function to format threshold values
+    def _format_threshold_value(fraction: float | None, count: int | None) -> str:
+        if fraction is not None:
+            if fraction == 0:
+                return "0"
+            elif fraction < 0.01:
+                return "<0.01"
+            else:
+                return f"{fraction:.2f}".rstrip("0").rstrip(".")
+        elif count is not None:
+            return str(count)
+        else:
+            return "—"
+    parts = []
+    if thresholds.warning is not None:
+        warning = _format_threshold_value(thresholds.warning_fraction, thresholds.warning_count)
+        parts.append(f"W: {warning}")
+    if thresholds.error is not None:
+        error = _format_threshold_value(thresholds.error_fraction, thresholds.error_count)
+        parts.append(f"E: {error}")
+    if thresholds.critical is not None:
+        critical = _format_threshold_value(thresholds.critical_fraction, thresholds.critical_count)
+        parts.append(f"C: {critical}")
+    if parts:
+        return "Step-specific thresholds set: " + ", ".join(parts)
+    else:
+        return ""
+def _create_threshold_reset_note_html(locale: str = "en") -> str:
+    """
+    Create an HTML note for when thresholds are explicitly reset to empty.
+    Parameters
+    ----------
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    text = NOTES_TEXT.get("local_threshold_reset", {}).get(
+        locale, NOTES_TEXT.get("local_threshold_reset", {}).get("en", "")
+    )
+    return text
+def _create_threshold_reset_note_text() -> str:
+    """
+    Create a plain text note for when thresholds are explicitly reset to empty.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    return "Global thresholds explicitly not used for this step."
+def _create_no_columns_resolved_note_html(
+    column_expr: str, available_columns: list[str], locale: str = "en"
+) -> str:
+    """
+    Create an HTML note explaining that a column expression resolved to no columns.
+    Parameters
+    ----------
+    column_expr
+        The column expression that failed to resolve columns (as a string).
+    available_columns
+        List of available column names in the table.
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    # Get translated strings
+    intro = NOTES_TEXT.get("column_not_found_intro", {}).get(
+        locale, NOTES_TEXT.get("column_not_found_intro", {}).get("en", "The column expression")
+    )
+    no_resolve = NOTES_TEXT.get("column_not_found_no_resolve", {}).get(
+        locale,
+        NOTES_TEXT.get("column_not_found_no_resolve", {}).get(
+            "en", "does not resolve to any columns"
+        ),
+    )
+    # Format the column expression with monospace font
+    col_expr_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_expr}</code>"
+    # Build the HTML note
+    html = f"{intro} {col_expr_html} {no_resolve}."
+    return html
+def _create_no_columns_resolved_note_text(column_expr: str, available_columns: list[str]) -> str:
+    """
+    Create a plain text note explaining that a column expression resolved to no columns.
+    Parameters
+    ----------
+    column_expr
+        The column expression that failed to resolve columns (as a string).
+    available_columns
+        List of available column names in the table.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    return f"The column expression `{column_expr}` does not resolve to any columns."
+def _create_column_not_found_note_html(
+    column_name: str, available_columns: list[str], locale: str = "en"
+) -> str:
+    """
+    Create an HTML note explaining that a specific column was not found.
+    Parameters
+    ----------
+    column_name
+        The column name that was not found.
+    available_columns
+        List of available column names in the table.
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    # Get translated strings
+    intro = NOTES_TEXT.get("target_column_provided", {}).get(
+        locale, NOTES_TEXT.get("target_column_provided", {}).get("en", "The target column provided")
+    )
+    not_found = NOTES_TEXT.get("does_not_match_any_columns", {}).get(
+        locale,
+        NOTES_TEXT.get("does_not_match_any_columns", {}).get(
+            "en", "does not match any columns in the table"
+        ),
+    )
+    # Format the column name with monospace font
+    col_name_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_name}</code>"
+    # Build the HTML note
+    html = f"{intro} ({col_name_html}) {not_found}."
+    return html
+def _create_column_not_found_note_text(column_name: str, available_columns: list[str]) -> str:
+    """
+    Create a plain text note explaining that a specific column was not found.
+    Parameters
+    ----------
+    column_name
+        The column name that was not found.
+    available_columns
+        List of available column names in the table.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    return f"The target column provided ({column_name}) does not match any columns in the table."
+def _create_comparison_column_not_found_note_html(
+    column_name: str, position: str | None, available_columns: list[str], locale: str = "en"
+) -> str:
+    """
+    Create an HTML note explaining that a comparison column was not found.
+    Parameters
+    ----------
+    column_name
+        The comparison column name that was not found.
+    position
+        Optional position indicator ("left", "right") for between/outside validations.
+    available_columns
+        List of available column names in the table.
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    # Get translated strings
+    intro = NOTES_TEXT.get("comparison_column_provided", {}).get(
+        locale,
+        NOTES_TEXT.get("comparison_column_provided", {}).get(
+            "en", "The comparison column provided"
+        ),
+    )
+    intro_with_for = NOTES_TEXT.get("comparison_column_for", {}).get(
+        locale,
+        NOTES_TEXT.get("comparison_column_for", {}).get("en", "The comparison column provided for"),
+    )
+    not_found = NOTES_TEXT.get("does_not_match_any_columns", {}).get(
+        locale,
+        NOTES_TEXT.get("does_not_match_any_columns", {}).get(
+            "en", "does not match any columns in the table"
+        ),
+    )
+    # Format the column name with monospace font
+    col_name_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_name}</code>"
+    # Add position if provided (for between/outside validations)
+    if position:
+        # Format position parameter with monospace font (e.g., "left=", "right=")
+        position_param = (
+            f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{position}=</code>"
+        )
+        # Use the "for" version of the intro text
+        html = f"{intro_with_for} {position_param} ({col_name_html}) {not_found}."
+    else:
+        # Use the standard intro text without "for"
+        html = f"{intro} ({col_name_html}) {not_found}."
+    return html
+def _create_comparison_column_not_found_note_text(
+    column_name: str, position: str | None, available_columns: list[str]
+) -> str:
+    """
+    Create a plain text note explaining that a comparison column was not found.
+    Parameters
+    ----------
+    column_name
+        The comparison column name that was not found.
+    position
+        Optional position indicator ("left", "right") for between/outside validations.
+    available_columns
+        List of available column names in the table.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    if position:
+        position_text = f" for {position}="
+    else:
+        position_text = ""
+    return (
+        f"The comparison column provided{position_text} ({column_name}) "
+        f"does not match any columns in the table."
+    )
+def _create_preprocessing_note_html(
+    original_rows: int,
+    original_cols: int,
+    processed_rows: int,
+    processed_cols: int,
+    locale: str = "en",
+) -> str:
+    """
+    Create an HTML note showing table dimension changes from preprocessing.
+    Parameters
+    ----------
+    original_rows
+        Number of rows in the original table.
+    original_cols
+        Number of columns in the original table.
+    processed_rows
+        Number of rows after preprocessing.
+    processed_cols
+        Number of columns after preprocessing.
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    # Get translated strings
+    precondition_text = NOTES_TEXT.get("precondition_applied", {}).get(
+        locale, NOTES_TEXT.get("precondition_applied", {}).get("en", "Precondition applied")
+    )
+    table_dims_text = NOTES_TEXT.get("table_dimensions", {}).get(
+        locale, NOTES_TEXT.get("table_dimensions", {}).get("en", "table dimensions")
+    )
+    # Helper function to get singular or plural form
+    def get_row_text(count: int) -> str:
+        if count == 1:
+            return NOTES_TEXT.get("row", {}).get(locale, NOTES_TEXT.get("row", {}).get("en", "row"))
+        return NOTES_TEXT.get("rows", {}).get(locale, NOTES_TEXT.get("rows", {}).get("en", "rows"))
+    def get_col_text(count: int) -> str:
+        if count == 1:
+            return NOTES_TEXT.get("column", {}).get(
+                locale, NOTES_TEXT.get("column", {}).get("en", "column")
+            )
+        return NOTES_TEXT.get("columns", {}).get(
+            locale, NOTES_TEXT.get("columns", {}).get("en", "columns")
+        )
+    # Determine which dimensions changed
+    rows_changed = original_rows != processed_rows
+    cols_changed = original_cols != processed_cols
+    # Format original dimensions
+    original_rows_text = get_row_text(original_rows)
+    original_cols_text = get_col_text(original_cols)
+    original_dim = (
+        f'<span style="font-family: monospace;">'
+        f"[{original_rows:,} {original_rows_text}, {original_cols} {original_cols_text}]"
+        f"</span>"
+    )
+    # Format processed dimensions with bold for changed values
+    processed_rows_text = get_row_text(processed_rows)
+    processed_cols_text = get_col_text(processed_cols)
+    if rows_changed:
+        rows_display = f"<strong>{processed_rows:,}</strong> {processed_rows_text}"
+    else:
+        rows_display = f"{processed_rows:,} {processed_rows_text}"
+    if cols_changed:
+        cols_display = f"<strong>{processed_cols}</strong> {processed_cols_text}"
+    else:
+        cols_display = f"{processed_cols} {processed_cols_text}"
+    processed_dim = f'<span style="font-family: monospace;">[{rows_display}, {cols_display}]</span>'
+    # Build the HTML note
+    html = f"{precondition_text}: {table_dims_text} {original_dim} → {processed_dim}."
+    return html
+def _create_preprocessing_note_text(
+    original_rows: int,
+    original_cols: int,
+    processed_rows: int,
+    processed_cols: int,
+) -> str:
+    """
+    Create a plain text note showing table dimension changes from preprocessing.
+    Parameters
+    ----------
+    original_rows
+        Number of rows in the original table.
+    original_cols
+        Number of columns in the original table.
+    processed_rows
+        Number of rows after preprocessing.
+    processed_cols
+        Number of columns after preprocessing.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    # Get singular or plural forms
+    original_rows_text = "row" if original_rows == 1 else "rows"
+    original_cols_text = "column" if original_cols == 1 else "columns"
+    processed_rows_text = "row" if processed_rows == 1 else "rows"
+    processed_cols_text = "column" if processed_cols == 1 else "columns"
+    return (
+        f"Precondition applied: table dimensions "
+        f"[{original_rows:,} {original_rows_text}, {original_cols} {original_cols_text}] → "
+        f"[{processed_rows:,} {processed_rows_text}, {processed_cols} {processed_cols_text}]."
+    )
+def _create_preprocessing_no_change_note_html(locale: str = "en") -> str:
+    """
+    Create an HTML note indicating preprocessing was applied with no dimension change.
+    Parameters
+    ----------
+    locale
+        The locale string (e.g., 'en', 'fr').
+    Returns
+    -------
+    str
+        HTML-formatted note text.
+    """
+    # Get translated string
+    note_text = NOTES_TEXT.get("precondition_applied_no_change", {}).get(
+        locale,
+        NOTES_TEXT.get("precondition_applied_no_change", {}).get(
+            "en", "Precondition applied: no table dimension change"
+        ),
+    )
+    return f"{note_text}."
+def _create_preprocessing_no_change_note_text() -> str:
     """
-    Create markdown text for validation notes/footnotes.
+    Create a plain text note indicating preprocessing was applied with no dimension change.
-    This function collects notes from all validation steps and formats them as footnotes
-    for display in the report footer. Each note is prefixed with the step number in
-    uppercase small caps bold formatting, and the note content is rendered as markdown.
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    return "Precondition applied: no table dimension change."
+def _create_synthetic_target_column_note_html(column_name: str, locale: str = "en") -> str:
+    """
+    Create an HTML note indicating that the target column was created via preprocessing.
     Parameters
     ----------
-    validation_info
-        List of _ValidationInfo objects from which to extract notes.
+    column_name
+        The name of the synthetic target column.
+    locale
+        The locale string (e.g., 'en', 'fr').
     Returns
     -------
     str
-        Markdown string containing formatted footnotes, or empty string if no notes exist.
+        HTML-formatted note text.
     """
-    # Collect all notes from validation steps
-    all_notes = []
-    for step in validation_info:
-        if step.notes:
-            for key, content in step.notes.items():
-                # Store note with step number for context
-                all_notes.append(
-                    {
-                        "step": step.i,
-                        "key": key,
-                        "markdown": content["markdown"],
-                        "text": content["text"],
-                    }
-                )
-    # If no notes, return empty string
-    if not all_notes:
-        return ""
+    # Get translated strings
+    synthetic_text = NOTES_TEXT.get("synthetic_target_column", {}).get(
+        locale, NOTES_TEXT.get("synthetic_target_column", {}).get("en", "Synthetic target column")
+    )
+    created_via_text = NOTES_TEXT.get("created_via_preprocessing", {}).get(
+        locale,
+        NOTES_TEXT.get("created_via_preprocessing", {}).get("en", "created via preprocessing"),
+    )
-    # Build markdown for notes section
-    # Start with a styled horizontal rule and bold "Notes" header
-    notes_parts = [
-        (
-            "<hr style='border: none; border-top-width: 1px; border-top-style: dotted; "
-            "border-top-color: #B5B5B5; margin-top: -3px; margin-bottom: 3px;'>"
-        ),
-        "<strong>Notes</strong>",
-        "",
-    ]
+    # Format the column name with monospace font
+    col_name_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_name}</code>"
-    previous_step = None
-    for note in all_notes:
-        # Determine if this is the first note for this step
-        is_first_for_step = note["step"] != previous_step
-        previous_step = note["step"]
+    # Build the HTML note
+    html = f"{synthetic_text} {col_name_html} {created_via_text}."
-        # Format step label with HTML for uppercase small caps bold
-        # Use lighter color for subsequent notes of the same step
-        step_color = "#333333" if is_first_for_step else "#999999"
-        step_label = (
-            f"<span style='font-variant: small-caps; font-weight: bold; font-size: smaller; "
-            f"text-transform: uppercase; color: {step_color};'>Step {note['step']}</span>"
-        )
+    return html
-        # Format note key in monospaced font with smaller size
-        note_key = f"<span style='font-family: \"IBM Plex Mono\", monospace; font-size: smaller;'>({note['key']})</span>"
-        # Combine step label, note key, and markdown content
-        note_text = f"{step_label} {note_key} {note['markdown']}"
-        notes_parts.append(note_text)
-        notes_parts.append("")  # Add blank line between notes
+def _create_synthetic_target_column_note_text(column_name: str) -> str:
+    """
+    Create a plain text note indicating that the target column was created via preprocessing.
-    # Remove trailing blank line
-    if notes_parts[-1] == "":
-        notes_parts.pop()
+    Parameters
+    ----------
+    column_name
+        The name of the synthetic target column.
-    # Join with newlines to create markdown text
-    notes_markdown = "\n".join(notes_parts)
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    return f"Synthetic target column ({column_name}) created via preprocessing."
-    return notes_markdown
+def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") -> str:
+    """
+    Create an HTML note with collapsible schema expectation and results.
-def _create_label_html(label: str | None, start_time: str) -> str:
-    if label is None:
-        # Remove the decimal and everything beyond that
-        start_time = str(start_time).split(".")[0]
+    This generates a disclosure-style note showing:
+    1. A summary of what failed (if anything)
+    2. The full step report table (collapsible)
-        # Replace the space character with a pipe character
-        start_time = start_time.replace(" ", "|")
+    Parameters
+    ----------
+    schema_info
+        The schema validation information dictionary from interrogation.
+    locale
+        The locale string (e.g., 'en', 'fr').
-        label = start_time
+    Returns
+    -------
+    str
+        HTML-formatted note with collapsible schema details.
+    """
+    passed = schema_info["passed"]
+    expect_schema = schema_info["expect_schema"]
+    target_schema = schema_info["target_schema"]
+    params = schema_info["params"]
+    columns_dict = schema_info["columns"]
+    in_order = params["in_order"]
-    return (
-        f"<span style='text-decoration-style: solid; text-decoration-color: #ADD8E6; "
-        f"text-decoration-line: underline; text-underline-position: under; color: #333333; "
-        f"font-variant-numeric: tabular-nums; padding-left: 4px; margin-right: 5px; "
-        f"padding-right: 2px;'>{label}</span>"
+    # Get translations for the locale
+    passed_text = VALIDATION_REPORT_TEXT["note_schema_comparison_passed"].get(
+        locale, VALIDATION_REPORT_TEXT["note_schema_comparison_passed"]["en"]
+    )
+    failed_text = VALIDATION_REPORT_TEXT["note_schema_comparison_failed"].get(
+        locale, VALIDATION_REPORT_TEXT["note_schema_comparison_failed"]["en"]
+    )
+    disclosure_text = VALIDATION_REPORT_TEXT["note_schema_comparison_disclosure"].get(
+        locale, VALIDATION_REPORT_TEXT["note_schema_comparison_disclosure"]["en"]
+    )
+    settings_title_text = VALIDATION_REPORT_TEXT["note_schema_comparison_match_settings_title"].get(
+        locale, VALIDATION_REPORT_TEXT["note_schema_comparison_match_settings_title"]["en"]
     )
+    # Build summary message
+    if passed:
+        summary = f'<span style="color:#4CA64C;">✓</span> {passed_text}.'
+    else:
+        # Analyze what failed
+        failures = []
-def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None) -> str:
-    """Format a single integer using Great Tables GT object to avoid pandas dependency."""
-    if df_lib is None:
-        # Use library detection to select appropriate DataFrame library
-        if _is_lib_present("polars"):
-            import polars as pl
-            df_lib = pl
-        elif _is_lib_present("pandas"):  # pragma: no cover
-            import pandas as pd  # pragma: no cover
-            df_lib = pd  # pragma: no cover
-        else:  # pragma: no cover
-            raise ImportError(
-                "Neither Polars nor Pandas is available for formatting"
-            )  # pragma: no cover
-    # Create a single-row, single-column DataFrame using the specified library
-    df = df_lib.DataFrame({"value": [value]})
-    # Create GT object and format the column
-    gt_obj = GT(df).fmt_integer(columns="value", locale=locale)
+        # Check column count mismatch
+        n_expect = len(expect_schema)
+        n_target = len(target_schema)
+        if n_expect != n_target:
+            count_mismatch_text = VALIDATION_REPORT_TEXT["note_schema_column_count_mismatch"].get(
+                locale, VALIDATION_REPORT_TEXT["note_schema_column_count_mismatch"]["en"]
+            )
+            failures.append(count_mismatch_text.format(n_expect=n_expect, n_target=n_target))
-    # Extract the formatted value using _get_column_of_values
-    formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
+        # Check for unmatched columns
+        unmatched_cols = [col for col, info in columns_dict.items() if not info["colname_matched"]]
+        if unmatched_cols:
+            unmatched_text = VALIDATION_REPORT_TEXT["note_schema_unmatched_columns"].get(
+                locale, VALIDATION_REPORT_TEXT["note_schema_unmatched_columns"]["en"]
+            )
+            failures.append(unmatched_text.format(n=len(unmatched_cols)))
+        # Check for wrong order (if in_order=True)
+        if params["in_order"]:
+            wrong_order = [
+                col
+                for col, info in columns_dict.items()
+                if info["colname_matched"] and not info["index_matched"]
+            ]
+            if wrong_order:
+                wrong_order_text = VALIDATION_REPORT_TEXT["note_schema_wrong_order"].get(
+                    locale, VALIDATION_REPORT_TEXT["note_schema_wrong_order"]["en"]
+                )
+                failures.append(wrong_order_text.format(n=len(wrong_order)))
-    return formatted_values[0]  # Return the single formatted value
+        # Check for dtype mismatches
+        dtype_mismatches = [
+            col
+            for col, info in columns_dict.items()
+            if info["colname_matched"] and info["dtype_present"] and not info["dtype_matched"]
+        ]
+        if dtype_mismatches:
+            dtype_mismatch_text = VALIDATION_REPORT_TEXT["note_schema_dtype_mismatch"].get(
+                locale, VALIDATION_REPORT_TEXT["note_schema_dtype_mismatch"]["en"]
+            )
+            failures.append(dtype_mismatch_text.format(n=len(dtype_mismatches)))
+        if failures:
+            summary = (
+                f'<span style="color:#FF3300;">✗</span> {failed_text}: ' + ", ".join(failures) + "."
+            )
+        else:
+            summary = f'<span style="color:#FF3300;">✗</span> {failed_text}.'
-def _format_single_float_with_gt_custom(
-    value: float,
-    decimals: int = 2,
-    drop_trailing_zeros: bool = False,
-    locale: str = "en",
-    df_lib=None,
-) -> str:
-    """Format a single float with custom options using Great Tables GT object to avoid pandas dependency."""
-    if df_lib is None:
-        # Use library detection to select appropriate DataFrame library
-        if _is_lib_present("polars"):
-            import polars as pl
+    # Generate the step report table using the existing function
+    # We'll call either _step_report_schema_in_order or _step_report_schema_any_order
+    # depending on the in_order parameter
+    if in_order:
+        step_report_gt = _step_report_schema_in_order(
+            step=1, schema_info=schema_info, header=None, lang=locale, debug_return_df=False
+        )
+    else:
+        step_report_gt = _step_report_schema_any_order(
+            step=1, schema_info=schema_info, header=None, lang=locale, debug_return_df=False
+        )
+    # Generate the settings HTML using the existing function
+    settings_html = _create_col_schema_match_params_html(
+        lang=locale,
+        complete=params["complete"],
+        in_order=params["in_order"],
+        case_sensitive_colnames=params["case_sensitive_colnames"],
+        case_sensitive_dtypes=params["case_sensitive_dtypes"],
+        full_match_dtypes=params["full_match_dtypes"],
+    )
-            df_lib = pl
-        elif _is_lib_present("pandas"):  # pragma: no cover
-            import pandas as pd  # pragma: no cover
+    # Remove the inner div containing column_schema_match_str
+    settings_html = re.sub(r'<div style="margin-right: 5px;">.*?</div>', "", settings_html, count=1)
-            df_lib = pd  # pragma: no cover
-        else:  # pragma: no cover
-            raise ImportError(
-                "Neither Polars nor Pandas is available for formatting"
-            )  # pragma: no cover
+    # Change padding-top from 7px to 2px
+    settings_html = settings_html.replace("padding-top: 7px;", "padding-top: 2px;")
-    # Create a single-row, single-column DataFrame using the specified library
-    df = df_lib.DataFrame({"value": [value]})
+    # Create new source note HTML that includes both settings and schema
+    source_note_html = f"""
+<div style='padding-bottom: 2px;'>{settings_title_text}</div>
+<div style='padding-bottom: 4px;'>{settings_html}</div>
+"""
-    # Create GT object and format the column
-    gt_obj = GT(df).fmt_number(
-        columns="value", decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
-    )
+    # Add the settings as an additional source note to the step report
+    step_report_gt = step_report_gt.tab_source_note(source_note=html(source_note_html))
-    # Extract the formatted value using _get_column_of_values
-    formatted_values = _get_column_of_values(gt_obj, column_name="value", context="html")
+    # Extract the HTML from the GT object
+    step_report_html = step_report_gt._repr_html_()
-    return formatted_values[0]  # Return the single formatted value
+    # Create collapsible section with the step report
+    note_html = f"""
+{summary}
+<details style="margin-top: 2px; margin-bottom: 8px; font-size: 12px; text-indent: 12px;">
+<summary style="cursor: pointer; font-weight: bold; color: #555; margin-bottom: -5px;">{disclosure_text}</summary>
+<div style="margin-top: 6px; padding-left: 15px; padding-right: 15px;">
-def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
-    if thresholds == Thresholds():
-        return ""
+{step_report_html}
-    # Helper functions to format numbers safely
-    def _format_number_safe(value: float, decimals: int, drop_trailing_zeros: bool = False) -> str:
-        if df_lib is not None and value is not None:
-            # Use GT-based formatting to avoid Pandas dependency completely
-            return _format_single_float_with_gt_custom(
-                value,
-                decimals=decimals,
-                drop_trailing_zeros=drop_trailing_zeros,
-                locale=locale,
-                df_lib=df_lib,
-            )
-        else:
-            # Fallback to the original behavior
-            return fmt_number(
-                value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
-            )[0]  # pragma: no cover
+</div>
+</details>
+"""
-    def _format_integer_safe(value: int) -> str:
-        if df_lib is not None and value is not None:
-            # Use GT-based formatting to avoid Pandas dependency completely
-            return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
-        else:
-            # Fallback to the original behavior
-            return fmt_integer(value, locale=locale)[0]
+    return note_html.strip()
-    warning = (
-        _format_number_safe(thresholds.warning_fraction, decimals=3, drop_trailing_zeros=True)
-        if thresholds.warning_fraction is not None
-        else (
-            _format_integer_safe(thresholds.warning_count)
-            if thresholds.warning_count is not None
-            else "&mdash;"
-        )
-    )
-    error = (
-        _format_number_safe(thresholds.error_fraction, decimals=3, drop_trailing_zeros=True)
-        if thresholds.error_fraction is not None
-        else (
-            _format_integer_safe(thresholds.error_count)
-            if thresholds.error_count is not None
-            else "&mdash;"
-        )
-    )
+def _create_col_schema_match_note_text(schema_info: dict) -> str:
+    """
+    Create a plain text note for schema validation.
-    critical = (
-        _format_number_safe(thresholds.critical_fraction, decimals=3, drop_trailing_zeros=True)
-        if thresholds.critical_fraction is not None
-        else (
-            _format_integer_safe(thresholds.critical_count)
-            if thresholds.critical_count is not None
-            else "&mdash;"
-        )
-    )
+    Parameters
+    ----------
+    schema_info
+        The schema validation information dictionary from interrogation.
-    warning_color = SEVERITY_LEVEL_COLORS["warning"]
-    error_color = SEVERITY_LEVEL_COLORS["error"]
-    critical_color = SEVERITY_LEVEL_COLORS["critical"]
+    Returns
+    -------
+    str
+        Plain text note.
+    """
+    passed = schema_info["passed"]
+    expect_schema = schema_info["expect_schema"]
+    target_schema = schema_info["target_schema"]
-    return (
-        "<span>"
-        f'<span style="background-color: {warning_color}; color: white; '
-        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
-        f"margin: 5px 0px 5px 5px; border: solid 1px {warning_color}; "
-        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">WARNING</span>'
-        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
-        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
-        f"border: solid 1px {warning_color}; padding: 2px 15px 2px 15px; "
-        'font-size: smaller; margin-right: 5px;">'
-        f"{warning}"
-        "</span>"
-        f'<span style="background-color: {error_color}; color: white; '
-        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
-        f"margin: 5px 0px 5px 1px; border: solid 1px {error_color}; "
-        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">ERROR</span>'
-        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
-        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
-        f"border: solid 1px {error_color}; padding: 2px 15px 2px 15px; "
-        'font-size: smaller; margin-right: 5px;">'
-        f"{error}"
-        "</span>"
-        f'<span style="background-color: {critical_color}; color: white; '
-        "padding: 0.5em 0.5em; position: inherit; text-transform: uppercase; "
-        f"margin: 5px 0px 5px 1px; border: solid 1px {critical_color}; "
-        'font-weight: bold; padding: 2px 15px 2px 15px; font-size: smaller;">CRITICAL</span>'
-        '<span style="background-color: none; color: #333333; padding: 0.5em 0.5em; '
-        "position: inherit; margin: 5px 0px 5px -4px; font-weight: bold; "
-        f"border: solid 1px {critical_color}; padding: 2px 15px 2px 15px; "
-        'font-size: smaller;">'
-        f"{critical}"
-        "</span>"
-        "</span>"
-    )
+    if passed:
+        return f"Schema validation passed. Expected {len(expect_schema)} column(s), found {len(target_schema)}."
+    else:
+        return f"Schema validation failed. Expected {len(expect_schema)} column(s), found {len(target_schema)}."
 def _step_report_row_based(
@@ -18576,16 +20184,33 @@ def _step_report_schema_in_order(
     dtype_exp = []
     dtype_exp_correct = []
-    for i in range(len(exp_columns_dict)):
+    for i in range(len(expect_schema)):
         #
         # `col_name_exp` values
         #
-        # The column name is the key in the dictionary, get the column name and
-        # append it to the `col_name_exp` list
-        col_name_exp.append(list(exp_columns_dict.keys())[i])
+        # Get the column name from expect_schema (which can have duplicates)
+        column_name_exp_i = expect_schema[i][0]
+        col_name_exp.append(column_name_exp_i)
+        # Check if this column exists in exp_columns_dict (it might not if it's a duplicate)
+        # For duplicates, we need to handle them specially
+        if column_name_exp_i not in exp_columns_dict:
+            # This is a duplicate or invalid column, mark it as incorrect
+            col_exp_correct.append(CROSS_MARK_SPAN)
+            # For dtype, check if there's a dtype specified in the schema
+            if len(expect_schema[i]) > 1:
+                dtype_value = expect_schema[i][1]
+                if isinstance(dtype_value, list):
+                    dtype_exp.append(" | ".join(dtype_value))
+                else:
+                    dtype_exp.append(str(dtype_value))
+            else:
+                dtype_exp.append("&mdash;")
-        column_name_exp_i = col_name_exp[i]
+            dtype_exp_correct.append("&mdash;")
+            continue
         #
         # `col_exp_correct` values

pointblank 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

pointblank 0.15.0py3-none-any.whl → 0.17.0py3-none-any.whl