PyPI - pointblank - Versions diffs - 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

pointblank 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pointblank/__init__.py +2 -0
pointblank/_agg.py +120 -0
pointblank/_constants.py +192 -5
pointblank/_datascan_utils.py +28 -10
pointblank/_interrogation.py +202 -149
pointblank/_typing.py +12 -0
pointblank/_utils.py +81 -44
pointblank/_utils_ai.py +4 -5
pointblank/_utils_check_args.py +3 -3
pointblank/_utils_llms_txt.py +40 -2
pointblank/actions.py +1 -1
pointblank/assistant.py +2 -3
pointblank/cli.py +1 -1
pointblank/column.py +162 -46
pointblank/data/api-docs.txt +2695 -49
pointblank/datascan.py +17 -17
pointblank/draft.py +2 -3
pointblank/scan_profile.py +2 -1
pointblank/schema.py +61 -20
pointblank/thresholds.py +15 -13
pointblank/validate.py +780 -231
pointblank/validate.pyi +1104 -0
pointblank/yaml.py +10 -6
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/METADATA +2 -2
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/RECORD +29 -27
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/licenses/LICENSE +1 -1
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/WHEEL +0 -0
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.17.0.dist-info → pointblank-0.18.0.dist-info}/top_level.txt +0 -0

pointblank/validate.py CHANGED Viewed

@@ -15,7 +15,7 @@ from enum import Enum
 from functools import partial
 from importlib.metadata import version
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Literal
+from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, ParamSpec, TypeVar
 from zipfile import ZipFile
 import commonmark
@@ -24,8 +24,8 @@ from great_tables import GT, from_column, google_font, html, loc, md, style, val
 from great_tables.gt import _get_column_of_values
 from great_tables.vals import fmt_integer, fmt_number
 from importlib_resources import files
-from narwhals.typing import FrameT
+from pointblank._agg import is_valid_agg, load_validation_method_grid, resolve_agg_registries
 from pointblank._constants import (
     ASSERTION_TYPE_METHOD_MAP,
     CHECK_MARK_SPAN,
@@ -92,6 +92,8 @@ from pointblank._utils import (
     _is_lib_present,
     _is_narwhals_table,
     _is_value_a_df,
+    _PBUnresolvedColumn,
+    _resolve_columns,
     _select_df_lib,
 )
 from pointblank._utils_check_args import (
@@ -102,7 +104,14 @@ from pointblank._utils_check_args import (
     _check_thresholds,
 )
 from pointblank._utils_html import _create_table_dims_html, _create_table_type_html
-from pointblank.column import Column, ColumnLiteral, ColumnSelector, ColumnSelectorNarwhals, col
+from pointblank.column import (
+    Column,
+    ColumnLiteral,
+    ColumnSelector,
+    ColumnSelectorNarwhals,
+    ReferenceColumn,
+    col,
+)
 from pointblank.schema import Schema, _get_schema_validation_info
 from pointblank.segments import Segment
 from pointblank.thresholds import (
@@ -113,10 +122,18 @@ from pointblank.thresholds import (
     _normalize_thresholds_creation,
 )
+P = ParamSpec("P")
+R = TypeVar("R")
 if TYPE_CHECKING:
     from collections.abc import Collection
+    from typing import Any
+    import polars as pl
+    from narwhals.typing import IntoDataFrame, IntoFrame
+    from pointblank._typing import AbsoluteBounds, Tolerance, _CompliantValue, _CompliantValues
-    from pointblank._typing import AbsoluteBounds, Tolerance
 __all__ = [
     "Validate",
@@ -135,6 +152,7 @@ __all__ = [
     "get_validation_summary",
 ]
 # Create a thread-local storage for the metadata
 _action_context = threading.local()
@@ -424,12 +442,13 @@ def config(
     global_config.report_incl_footer_timings = report_incl_footer_timings  # pragma: no cover
     global_config.report_incl_footer_notes = report_incl_footer_notes  # pragma: no cover
     global_config.preview_incl_header = preview_incl_header  # pragma: no cover
+    return global_config  # pragma: no cover
 def load_dataset(
     dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
     tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
-) -> FrameT | Any:
+) -> Any:
     """
     Load a dataset hosted in the library as specified table type.
@@ -450,7 +469,7 @@ def load_dataset(
     Returns
     -------
-    FrameT | Any
+    Any
         The dataset for the `Validate` object. This could be a Polars DataFrame, a Pandas DataFrame,
         or a DuckDB table as an Ibis table.
@@ -1523,7 +1542,7 @@ def get_data_path(
                 return tmp_file.name
-def _process_data(data: FrameT | Any) -> FrameT | Any:
+def _process_data(data: Any) -> Any:
     """
     Centralized data processing pipeline that handles all supported input types.
@@ -1540,7 +1559,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     Parameters
     ----------
-    data : FrameT | Any
+    data
         The input data which could be:
         - a DataFrame object (Polars, Pandas, Ibis, etc.)
         - a GitHub URL pointing to a CSV or Parquet file
@@ -1551,7 +1570,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     Returns
     -------
-    FrameT | Any
+    Any
         Processed data as a DataFrame if input was a supported data source type,
         otherwise the original data unchanged.
     """
@@ -1570,7 +1589,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     return data
-def _process_github_url(data: FrameT | Any) -> FrameT | Any:
+def _process_github_url(data: Any) -> Any:
     """
     Process data parameter to handle GitHub URLs pointing to CSV or Parquet files.
@@ -1585,12 +1604,12 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
     Parameters
     ----------
-    data : FrameT | Any
+    data
         The data parameter which may be a GitHub URL string or any other data type.
     Returns
     -------
-    FrameT | Any
+    Any
         If the input is a supported GitHub URL, returns a DataFrame loaded from the downloaded file.
         Otherwise, returns the original data unchanged.
@@ -1675,7 +1694,7 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
         return data
-def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
+def _process_connection_string(data: Any) -> Any:
     """
     Process data parameter to handle database connection strings.
@@ -1702,7 +1721,7 @@ def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
     return connect_to_table(data)
-def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
+def _process_csv_input(data: Any) -> Any:
     """
     Process data parameter to handle CSV file inputs.
@@ -1760,7 +1779,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
         )
-def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
+def _process_parquet_input(data: Any) -> Any:
     """
     Process data parameter to handle Parquet file inputs.
@@ -1903,7 +1922,7 @@ def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
 def preview(
-    data: FrameT | Any,
+    data: Any,
     columns_subset: str | list[str] | Column | None = None,
     n_head: int = 5,
     n_tail: int = 5,
@@ -1911,7 +1930,7 @@ def preview(
     show_row_numbers: bool = True,
     max_col_width: int = 250,
     min_tbl_width: int = 500,
-    incl_header: bool = None,
+    incl_header: bool | None = None,
 ) -> GT:
     """
     Display a table preview that shows some rows from the top, some from the bottom.
@@ -2169,7 +2188,7 @@ def preview(
 def _generate_display_table(
-    data: FrameT | Any,
+    data: Any,
     columns_subset: str | list[str] | Column | None = None,
     n_head: int = 5,
     n_tail: int = 5,
@@ -2177,7 +2196,7 @@ def _generate_display_table(
     show_row_numbers: bool = True,
     max_col_width: int = 250,
     min_tbl_width: int = 500,
-    incl_header: bool = None,
+    incl_header: bool | None = None,
     mark_missing_values: bool = True,
     row_number_list: list[int] | None = None,
 ) -> GT:
@@ -2274,7 +2293,8 @@ def _generate_display_table(
         tbl_schema = Schema(tbl=data)
         # Get the row count for the table
-        ibis_rows = data.count()
+        # Note: ibis tables have count(), to_polars(), to_pandas() methods
+        ibis_rows = data.count()  # type: ignore[union-attr]
         n_rows = ibis_rows.to_polars() if df_lib_name_gt == "polars" else int(ibis_rows.to_pandas())
         # If n_head + n_tail is greater than the row count, display the entire table
@@ -2283,11 +2303,11 @@ def _generate_display_table(
             data_subset = data
             if row_number_list is None:
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
         else:
             # Get the first n and last n rows of the table
-            data_head = data.head(n_head)
-            data_tail = data.filter(
+            data_head = data.head(n_head)  # type: ignore[union-attr]
+            data_tail = data.filter(  # type: ignore[union-attr]
                 [ibis.row_number() >= (n_rows - n_tail), ibis.row_number() <= n_rows]
             )
             data_subset = data_head.union(data_tail)
@@ -2299,9 +2319,9 @@ def _generate_display_table(
         # Convert either to Polars or Pandas depending on the available library
         if df_lib_name_gt == "polars":
-            data = data_subset.to_polars()
+            data = data_subset.to_polars()  # type: ignore[union-attr]
         else:
-            data = data_subset.to_pandas()
+            data = data_subset.to_pandas()  # type: ignore[union-attr]
     # From a DataFrame:
     # - get the row count
@@ -2312,17 +2332,18 @@ def _generate_display_table(
         tbl_schema = Schema(tbl=data)
         if tbl_type == "polars":
-            n_rows = int(data.height)
+            # Note: polars DataFrames have height, head(), tail() attributes
+            n_rows = int(data.height)  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 if row_number_list is None:
-                    row_number_list = range(1, n_rows + 1)
+                    row_number_list = list(range(1, n_rows + 1))
             else:
-                data = pl.concat([data.head(n=n_head), data.tail(n=n_tail)])
+                data = pl.concat([data.head(n=n_head), data.tail(n=n_tail)])  # type: ignore[union-attr]
                 if row_number_list is None:
                     row_number_list = list(range(1, n_head + 1)) + list(
@@ -2330,40 +2351,42 @@ def _generate_display_table(
                     )
         if tbl_type == "pandas":
-            n_rows = data.shape[0]
+            # Note: pandas DataFrames have shape, head(), tail() attributes
+            n_rows = data.shape[0]  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 data_subset = data
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
             else:
-                data = pd.concat([data.head(n=n_head), data.tail(n=n_tail)])
+                data = pd.concat([data.head(n=n_head), data.tail(n=n_tail)])  # type: ignore[union-attr]
                 row_number_list = list(range(1, n_head + 1)) + list(
                     range(n_rows - n_tail + 1, n_rows + 1)
                 )
         if tbl_type == "pyspark":
-            n_rows = data.count()
+            # Note: pyspark DataFrames have count(), toPandas(), limit(), tail(), sparkSession
+            n_rows = data.count()  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 # Convert to pandas for Great Tables compatibility
-                data = data.toPandas()
+                data = data.toPandas()  # type: ignore[union-attr]
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
             else:
                 # Get head and tail samples, then convert to pandas
-                head_data = data.limit(n_head).toPandas()
+                head_data = data.limit(n_head).toPandas()  # type: ignore[union-attr]
                 # PySpark tail() returns a list of Row objects, need to convert to DataFrame
-                tail_rows = data.tail(n_tail)
+                tail_rows = data.tail(n_tail)  # type: ignore[union-attr]
                 if tail_rows:
                     # Convert list of Row objects back to DataFrame, then to pandas
-                    tail_df = data.sparkSession.createDataFrame(tail_rows, data.schema)
+                    tail_df = data.sparkSession.createDataFrame(tail_rows, data.schema)  # type: ignore[union-attr]
                     tail_data = tail_df.toPandas()
                 else:
                     # If no tail data, create empty DataFrame with same schema
@@ -2391,14 +2414,14 @@ def _generate_display_table(
             tbl_schema = Schema(tbl=data)
     # From the table schema, get a list of tuples containing column names and data types
-    col_dtype_dict = tbl_schema.columns
+    col_dtype_list = tbl_schema.columns or []
     # Extract the column names from the list of tuples (first element of each tuple)
-    col_names = [col[0] for col in col_dtype_dict]
+    col_names = [col[0] for col in col_dtype_list]
     # Iterate over the list of tuples and create a new dictionary with the
     # column names and data types
-    col_dtype_dict = {k: v for k, v in col_dtype_dict}
+    col_dtype_dict = {k: v for k, v in col_dtype_list}
     # Create short versions of the data types by omitting any text in parentheses
     col_dtype_dict_short = {
@@ -2497,21 +2520,21 @@ def _generate_display_table(
     # Prepend a column that contains the row numbers if `show_row_numbers=True`
     if show_row_numbers or has_leading_row_num_col:
         if has_leading_row_num_col:
-            row_number_list = data["_row_num_"].to_list()
+            row_number_list = data["_row_num_"].to_list()  # type: ignore[union-attr]
         else:
             if df_lib_name_gt == "polars":
                 import polars as pl
                 row_number_series = pl.Series("_row_num_", row_number_list)
-                data = data.insert_column(0, row_number_series)
+                data = data.insert_column(0, row_number_series)  # type: ignore[union-attr]
             if df_lib_name_gt == "pandas":
-                data.insert(0, "_row_num_", row_number_list)
+                data.insert(0, "_row_num_", row_number_list)  # type: ignore[union-attr]
             if df_lib_name_gt == "pyspark":
                 # For PySpark converted to pandas, use pandas method
-                data.insert(0, "_row_num_", row_number_list)
+                data.insert(0, "_row_num_", row_number_list)  # type: ignore[union-attr]
         # Get the highest number in the `row_number_list` and calculate a width that will
         # safely fit a number of that magnitude
@@ -2620,7 +2643,7 @@ def _generate_display_table(
     return gt_tbl
-def missing_vals_tbl(data: FrameT | Any) -> GT:
+def missing_vals_tbl(data: Any) -> GT:
     """
     Display a table that shows the missing values in the input table.
@@ -3221,7 +3244,7 @@ def _get_column_names_safe(data: Any) -> list[str]:
         return list(data.columns)  # pragma: no cover
-def _get_column_names(data: FrameT | Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
+def _get_column_names(data: Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
     if ibis_tbl:
         return data.columns if df_lib_name_gt == "polars" else list(data.columns)
@@ -3245,12 +3268,10 @@ def _validate_columns_subset(
                 )
             return columns_subset
-    return columns_subset.resolve(columns=col_names)
+    return columns_subset.resolve(columns=col_names)  # type: ignore[union-attr]
-def _select_columns(
-    data: FrameT | Any, resolved_columns: list[str], ibis_tbl: bool, tbl_type: str
-) -> FrameT | Any:
+def _select_columns(data: Any, resolved_columns: list[str], ibis_tbl: bool, tbl_type: str) -> Any:
     if ibis_tbl:
         return data[resolved_columns]
     if tbl_type == "polars":
@@ -3258,7 +3279,7 @@ def _select_columns(
     return data[resolved_columns]
-def get_column_count(data: FrameT | Any) -> int:
+def get_column_count(data: Any) -> int:
     """
     Get the number of columns in a table.
@@ -3470,7 +3491,7 @@ def _extract_enum_values(set_values: Any) -> list[Any]:
     return [set_values]
-def get_row_count(data: FrameT | Any) -> int:
+def get_row_count(data: Any) -> int:
     """
     Get the number of rows in a table.
@@ -3723,18 +3744,46 @@ class _ValidationInfo:
         insertion order, ensuring notes appear in a consistent sequence in reports and logs.
     """
+    @classmethod
+    def from_agg_validator(
+        cls,
+        assertion_type: str,
+        columns: _PBUnresolvedColumn,
+        value: float | Column | ReferenceColumn,
+        tol: Tolerance = 0,
+        thresholds: float | bool | tuple | dict | Thresholds | None = None,
+        brief: str | bool = False,
+        actions: Actions | None = None,
+        active: bool = True,
+    ) -> _ValidationInfo:
+        # This factory method creates a `_ValidationInfo` instance for aggregate
+        # methods. The reason this is created, is because all agg methods share the same
+        # signature so instead of instantiating the class directly each time, this method
+        # can be used to reduce redundancy, boilerplate and mistakes :)
+        _check_thresholds(thresholds=thresholds)
+        return cls(
+            assertion_type=assertion_type,
+            column=_resolve_columns(columns),
+            values={"value": value, "tol": tol},
+            thresholds=_normalize_thresholds_creation(thresholds),
+            brief=_transform_auto_brief(brief=brief),
+            actions=actions,
+            active=active,
+        )
     # Validation plan
     i: int | None = None
     i_o: int | None = None
     step_id: str | None = None
     sha1: str | None = None
     assertion_type: str | None = None
-    column: any | None = None
-    values: any | list[any] | tuple | None = None
+    column: Any | None = None
+    values: Any | list[Any] | tuple | None = None
     inclusive: tuple[bool, bool] | None = None
     na_pass: bool | None = None
     pre: Callable | None = None
-    segments: any | None = None
+    segments: Any | None = None
     thresholds: Thresholds | None = None
     actions: Actions | None = None
     label: str | None = None
@@ -3753,14 +3802,14 @@ class _ValidationInfo:
     error: bool | None = None
     critical: bool | None = None
     failure_text: str | None = None
-    tbl_checked: FrameT | None = None
-    extract: FrameT | None = None
-    val_info: dict[str, any] | None = None
+    tbl_checked: Any = None
+    extract: Any = None
+    val_info: dict[str, Any] | None = None
     time_processed: str | None = None
     proc_duration_s: float | None = None
     notes: dict[str, dict[str, str]] | None = None
-    def get_val_info(self) -> dict[str, any]:
+    def get_val_info(self) -> dict[str, Any] | None:
         return self.val_info
     def _add_note(self, key: str, markdown: str, text: str | None = None) -> None:
@@ -3936,7 +3985,7 @@ class _ValidationInfo:
         return self.notes is not None and len(self.notes) > 0
-def _handle_connection_errors(e: Exception, connection_string: str) -> None:
+def _handle_connection_errors(e: Exception, connection_string: str) -> NoReturn:
     """
     Shared error handling for database connection failures.
@@ -4777,7 +4826,8 @@ class Validate:
     when table specifications are missing or backend dependencies are not installed.
     """
-    data: FrameT | Any
+    data: IntoDataFrame
+    reference: IntoFrame | None = None
     tbl_name: str | None = None
     label: str | None = None
     thresholds: int | float | bool | tuple | dict | Thresholds | None = None
@@ -4791,6 +4841,10 @@ class Validate:
         # Process data through the centralized data processing pipeline
         self.data = _process_data(self.data)
+        # Process reference data if provided
+        if self.reference is not None:
+            self.reference = _process_data(self.reference)
         # Check input of the `thresholds=` argument
         _check_thresholds(thresholds=self.thresholds)
@@ -4835,9 +4889,107 @@ class Validate:
         self.validation_info = []
+    def _add_agg_validation(
+        self,
+        *,
+        assertion_type: str,
+        columns: str | Collection[str],
+        value,
+        tol=0,
+        thresholds=None,
+        brief=False,
+        actions=None,
+        active=True,
+    ):
+        """
+        Add an aggregation-based validation step to the validation plan.
+        This internal method is used by all aggregation-based column validation methods
+        (e.g., `col_sum_eq`, `col_avg_gt`, `col_sd_le`) to create and register validation
+        steps. It relies heavily on the `_ValidationInfo.from_agg_validator()` class method.
+        Automatic Reference Inference
+        -----------------------------
+        When `value` is None and reference data has been set on the Validate object,
+        this method automatically creates a `ReferenceColumn` pointing to the same
+        column name in the reference data. This enables a convenient shorthand:
+        .. code-block:: python
+            # Instead of writing:
+            Validate(data=df, reference=ref_df).col_sum_eq("a", ref("a"))
+            # You can simply write:
+            Validate(data=df, reference=ref_df).col_sum_eq("a")
+        If `value` is None and no reference data is set, a `ValueError` is raised
+        immediately to provide clear feedback to the user.
+        Parameters
+        ----------
+        assertion_type
+            The type of assertion (e.g., "col_sum_eq", "col_avg_gt").
+        columns
+            Column name or collection of column names to validate.
+        value
+            The target value to compare against. Can be:
+            - A numeric literal (int or float)
+            - A `Column` object for cross-column comparison
+            - A `ReferenceColumn` object for reference data comparison
+            - None to automatically use `ref(column)` when reference data is set
+        tol
+            Tolerance for the comparison. Defaults to 0.
+        thresholds
+            Custom thresholds for the validation step.
+        brief
+            Brief description or auto-generate flag.
+        actions
+            Actions to take based on validation results.
+        active
+            Whether this validation step is active.
+        Returns
+        -------
+        Validate
+            The Validate instance for method chaining.
+        Raises
+        ------
+        ValueError
+            If `value` is None and no reference data is set on the Validate object.
+        """
+        if isinstance(columns, str):
+            columns = [columns]
+        for column in columns:
+            # If value is None, default to referencing the same column from reference data
+            resolved_value = value
+            if value is None:
+                if self.reference is None:
+                    raise ValueError(
+                        f"The 'value' parameter is required for {assertion_type}() "
+                        "when no reference data is set. Either provide a value, or "
+                        "set reference data on the Validate object using "
+                        "Validate(data=..., reference=...)."
+                    )
+                resolved_value = ReferenceColumn(column_name=column)
+            val_info = _ValidationInfo.from_agg_validator(
+                assertion_type=assertion_type,
+                columns=column,
+                value=resolved_value,
+                tol=tol,
+                thresholds=self.thresholds if thresholds is None else thresholds,
+                actions=self.actions if actions is None else actions,
+                brief=self.brief if brief is None else brief,
+                active=active,
+            )
+            self._add_validation(validation_info=val_info)
+        return self
     def set_tbl(
         self,
-        tbl: FrameT | Any,
+        tbl: Any,
         tbl_name: str | None = None,
         label: str | None = None,
     ) -> Validate:
@@ -4980,7 +5132,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5214,7 +5366,6 @@ class Validate:
         - Row 1: `c` is `1` and `b` is `2`.
         - Row 3: `c` is `2` and `b` is `2`.
         """
         assertion_type = _get_fn_name()
         _check_column(column=columns)
@@ -5234,14 +5385,7 @@ class Validate:
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
-        # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
-        # resolve the columns
-        if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
-            columns = col(columns)
-        # If `columns` is Column value or a string, place it in a list for iteration
-        if isinstance(columns, (Column, str)):
-            columns = [columns]
+        columns = _resolve_columns(columns)
         # Determine brief to use (global or local) and transform any shorthands of `brief=`
         brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
@@ -5272,7 +5416,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5563,7 +5707,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5854,7 +5998,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6143,7 +6287,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6435,7 +6579,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6729,7 +6873,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7049,7 +7193,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7366,7 +7510,7 @@ class Validate:
         set: Collection[Any],
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7683,7 +7827,7 @@ class Validate:
         set: Collection[Any],
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7974,7 +8118,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8162,7 +8306,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8347,7 +8491,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8590,7 +8734,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8836,7 +8980,7 @@ class Validate:
         inverse: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9099,7 +9243,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9379,10 +9523,10 @@ class Validate:
     def col_vals_expr(
         self,
-        expr: any,
+        expr: Any,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9600,7 +9744,7 @@ class Validate:
     def col_exists(
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10072,7 +10216,7 @@ class Validate:
         columns_subset: str | list[str] | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10313,7 +10457,7 @@ class Validate:
         columns_subset: str | list[str] | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10558,7 +10702,7 @@ class Validate:
         max_concurrent: int = 3,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10953,7 +11097,7 @@ class Validate:
         case_sensitive_dtypes: bool = True,
         full_match_dtypes: bool = True,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11169,11 +11313,11 @@ class Validate:
     def row_count_match(
         self,
-        count: int | FrameT | Any,
+        count: int | Any,
         tol: Tolerance = 0,
         inverse: bool = False,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11388,10 +11532,10 @@ class Validate:
     def col_count_match(
         self,
-        count: int | FrameT | Any,
+        count: int | Any,
         inverse: bool = False,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11564,9 +11708,9 @@ class Validate:
     def tbl_match(
         self,
-        tbl_compare: FrameT | Any,
+        tbl_compare: Any,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11835,7 +11979,7 @@ class Validate:
         self,
         *exprs: Callable,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -12083,7 +12227,7 @@ class Validate:
         self,
         expr: Callable,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -12577,7 +12721,7 @@ class Validate:
             segment = validation.segments
             # Get compatible data types for this assertion type
-            assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
+            assertion_method = ASSERTION_TYPE_METHOD_MAP.get(assertion_type, assertion_type)
             compatible_dtypes = COMPATIBLE_DTYPES.get(assertion_method, [])
             # Process the `brief` text for the validation step by including template variables to
@@ -12632,7 +12776,11 @@ class Validate:
             # Make a deep copy of the table for this step to ensure proper isolation
             # This prevents modifications from one validation step affecting others
-            data_tbl_step = _copy_dataframe(data_tbl)
+            try:
+                # TODO: This copying should be scrutinized further
+                data_tbl_step: IntoDataFrame = _copy_dataframe(data_tbl)
+            except Exception as e:  # pragma: no cover
+                data_tbl_step: IntoDataFrame = data_tbl  # pragma: no cover
             # Capture original table dimensions and columns before preprocessing
             # (only if preprocessing is present - we'll set these inside the preprocessing block)
@@ -13080,6 +13228,44 @@ class Validate:
                             tbl_type=tbl_type,
                         )
+                    elif is_valid_agg(assertion_type):
+                        agg, comp = resolve_agg_registries(assertion_type)
+                        # Produce a 1-column Narwhals DataFrame
+                        # TODO: Should be able to take lazy too
+                        vec: nw.DataFrame = nw.from_native(data_tbl_step).select(column)
+                        real = agg(vec)
+                        raw_value = value["value"]
+                        tol = value["tol"]
+                        # Handle ReferenceColumn: compute target from reference data
+                        if isinstance(raw_value, ReferenceColumn):
+                            if self.reference is None:
+                                raise ValueError(
+                                    f"Cannot use ref('{raw_value.column_name}') without "
+                                    "setting reference data on the Validate object. "
+                                    "Use Validate(data=..., reference=...) to set reference data."
+                                )
+                            ref_vec: nw.DataFrame = nw.from_native(self.reference).select(
+                                raw_value.column_name
+                            )
+                            target: float | int = agg(ref_vec)
+                        else:
+                            target = raw_value
+                        lower_diff, upper_diff = _derive_bounds(target, tol)
+                        lower_bound = target - lower_diff
+                        upper_bound = target + upper_diff
+                        result_bool: bool = comp(real, lower_bound, upper_bound)
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - result_bool
+                        results_tbl = None
                     else:
                         raise ValueError(
                             f"Unknown assertion type: {assertion_type}"
@@ -13822,12 +14008,14 @@ class Validate:
             )
         # Get the threshold status using the appropriate method
+        # Note: scalar=False (default) always returns a dict
+        status: dict[int, bool]
         if level == "warning":
-            status = self.warning(i=i)
+            status = self.warning(i=i)  # type: ignore[assignment]
         elif level == "error":
-            status = self.error(i=i)
-        elif level == "critical":
-            status = self.critical(i=i)
+            status = self.error(i=i)  # type: ignore[assignment]
+        else:  # level == "critical"
+            status = self.critical(i=i)  # type: ignore[assignment]
         # Find any steps that exceeded the threshold
         failures = []
@@ -13981,12 +14169,14 @@ class Validate:
             )
         # Get the threshold status using the appropriate method
+        # Note: scalar=False (default) always returns a dict
+        status: dict[int, bool]
         if level == "warning":
-            status = self.warning(i=i)
+            status = self.warning(i=i)  # type: ignore[assignment]
         elif level == "error":
-            status = self.error(i=i)
-        elif level == "critical":
-            status = self.critical(i=i)
+            status = self.error(i=i)  # type: ignore[assignment]
+        else:  # level == "critical"
+            status = self.critical(i=i)  # type: ignore[assignment]
         # Return True if any steps exceeded the threshold
         return any(status.values())
@@ -14759,7 +14949,7 @@ class Validate:
     def get_data_extracts(
         self, i: int | list[int] | None = None, frame: bool = False
-    ) -> dict[int, FrameT | None] | FrameT | None:
+    ) -> dict[int, Any] | Any:
         """
         Get the rows that failed for each validation step.
@@ -14782,7 +14972,7 @@ class Validate:
         Returns
         -------
-        dict[int, FrameT | None] | FrameT | None
+        dict[int, Any] | Any
             A dictionary of tables containing the rows that failed in every compatible validation
             step. Alternatively, it can be a DataFrame if `frame=True` and `i=` is a scalar.
@@ -15072,7 +15262,7 @@ class Validate:
         return json.dumps(report, indent=4, default=str)
-    def get_sundered_data(self, type="pass") -> FrameT:
+    def get_sundered_data(self, type="pass") -> Any:
         """
         Get the data that passed or failed the validation steps.
@@ -15108,7 +15298,7 @@ class Validate:
         Returns
         -------
-        FrameT
+        Any
             A table containing the data that passed or failed the validation steps.
         Examples
@@ -15200,6 +15390,7 @@ class Validate:
         # Get all validation step result tables and join together the `pb_is_good_` columns
         # ensuring that the columns are named uniquely (e.g., `pb_is_good_1`, `pb_is_good_2`, ...)
         # and that the index is reset
+        labeled_tbl_nw: nw.DataFrame | nw.LazyFrame | None = None
         for i, validation in enumerate(validation_info):
             results_tbl = nw.from_native(validation.tbl_checked)
@@ -15220,7 +15411,7 @@ class Validate:
             )
             # Add the results table to the list of tables
-            if i == 0:
+            if labeled_tbl_nw is None:
                 labeled_tbl_nw = results_tbl
             else:
                 labeled_tbl_nw = labeled_tbl_nw.join(results_tbl, on=index_name, how="left")
@@ -15396,10 +15587,10 @@ class Validate:
     def get_tabular_report(
         self,
         title: str | None = ":default:",
-        incl_header: bool = None,
-        incl_footer: bool = None,
-        incl_footer_timings: bool = None,
-        incl_footer_notes: bool = None,
+        incl_header: bool | None = None,
+        incl_footer: bool | None = None,
+        incl_footer_timings: bool | None = None,
+        incl_footer_notes: bool | None = None,
     ) -> GT:
         """
         Validation report as a GT table.
@@ -15767,10 +15958,16 @@ class Validate:
             elif assertion_type[i] in ["conjointly", "specially"]:
                 column_text = ""
             else:
-                column_text = str(column)
+                # Handle both string columns and list columns
+                # For single-element lists like ['a'], display as 'a'
+                # For multi-element lists, display as comma-separated values
+                if isinstance(column, list):
+                    column_text = ", ".join(str(c) for c in column)
+                else:
+                    column_text = str(column)
-            # Apply underline styling for synthetic columns (using the purple color from the icon)
-            # Only apply styling if column_text is not empty and not a special marker
+            # Apply underline styling for synthetic columns; only apply styling if column_text is
+            # not empty and not a special marker
             if (
                 has_synthetic_column
                 and column_text
@@ -15889,6 +16086,32 @@ class Validate:
                 else:  # pragma: no cover
                     values_upd.append(str(value))  # pragma: no cover
+            # Handle aggregation methods (col_sum_gt, col_avg_eq, etc.)
+            elif is_valid_agg(assertion_type[i]):
+                # Extract the value and tolerance from the values dict
+                agg_value = value.get("value")
+                tol_value = value.get("tol", 0)
+                # Format the value (could be a number, Column, or ReferenceColumn)
+                if hasattr(agg_value, "__repr__"):
+                    # For Column or ReferenceColumn objects, use their repr
+                    value_str = repr(agg_value)
+                else:
+                    value_str = str(agg_value)
+                # Format tolerance - only show on second line if non-zero
+                if tol_value != 0:
+                    # Format tolerance based on its type
+                    if isinstance(tol_value, tuple):
+                        # Asymmetric bounds: (lower, upper)
+                        tol_str = f"tol=({tol_value[0]}, {tol_value[1]})"
+                    else:
+                        # Symmetric tolerance
+                        tol_str = f"tol={tol_value}"
+                    values_upd.append(f"{value_str}<br/>{tol_str}")
+                else:
+                    values_upd.append(value_str)
             # If the assertion type is not recognized, add the value as a string
             else:  # pragma: no cover
                 values_upd.append(str(value))  # pragma: no cover
@@ -16738,7 +16961,7 @@ class Validate:
                     table = validation.pre(self.data)
                 # Get the columns from the table as a list
-                columns = list(table.columns)
+                columns = list(table.columns)  # type: ignore[union-attr]
                 # Evaluate the column expression
                 if isinstance(column_expr, ColumnSelectorNarwhals):
@@ -17116,7 +17339,7 @@ def _convert_string_to_datetime(value: str) -> datetime.datetime:
             return datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
-def _string_date_dttm_conversion(value: any) -> any:
+def _string_date_dttm_conversion(value: Any) -> Any:
     """
     Convert a string to a date or datetime object if it is in the correct format.
     If the value is not a string, it is returned as is.
@@ -17151,8 +17374,8 @@ def _string_date_dttm_conversion(value: any) -> any:
 def _conditional_string_date_dttm_conversion(
-    value: any, allow_regular_strings: bool = False
-) -> any:
+    value: Any, allow_regular_strings: bool = False
+) -> Any:
     """
     Conditionally convert a string to a date or datetime object if it is in the correct format. If
     `allow_regular_strings=` is `True`, regular strings are allowed to pass through unchanged. If
@@ -17196,9 +17419,9 @@ def _process_brief(
     brief: str | None,
     step: int,
     col: str | list[str] | None,
-    values: any | None,
-    thresholds: any | None,
-    segment: any | None,
+    values: Any | None,
+    thresholds: Any | None,
+    segment: Any | None,
 ) -> str:
     # If there is no brief, return `None`
     if brief is None:
@@ -17285,7 +17508,7 @@ def _process_action_str(
     action_str: str,
     step: int,
     col: str | None,
-    value: any,
+    value: Any,
     type: str,
     level: str,
     time: str,
@@ -17337,8 +17560,8 @@ def _process_action_str(
 def _create_autobrief_or_failure_text(
     assertion_type: str,
     lang: str,
-    column: str | None,
-    values: str | None,
+    column: str,
+    values: Any,
     for_failure: bool,
     locale: str | None = None,
     n_rows: int | None = None,
@@ -17490,7 +17713,7 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
-    return None  # pragma: no cover
+    return None
 def _expect_failure_type(for_failure: bool) -> str:
@@ -17500,7 +17723,7 @@ def _expect_failure_type(for_failure: bool) -> str:
 def _create_text_comparison(
     assertion_type: str,
     lang: str,
-    column: str | list[str] | None,
+    column: str | list[str],
     values: str | None,
     for_failure: bool = False,
 ) -> str:
@@ -17526,7 +17749,7 @@ def _create_text_comparison(
 def _create_text_between(
     lang: str,
-    column: str | None,
+    column: str,
     value_1: str,
     value_2: str,
     not_: bool = False,
@@ -17556,7 +17779,7 @@ def _create_text_between(
 def _create_text_set(
-    lang: str, column: str | None, values: list[any], not_: bool = False, for_failure: bool = False
+    lang: str, column: str, values: list[Any], not_: bool = False, for_failure: bool = False
 ) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
@@ -17578,9 +17801,7 @@ def _create_text_set(
     return text
-def _create_text_null(
-    lang: str, column: str | None, not_: bool = False, for_failure: bool = False
-) -> str:
+def _create_text_null(lang: str, column: str, not_: bool = False, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17597,9 +17818,7 @@ def _create_text_null(
     return text
-def _create_text_regex(
-    lang: str, column: str | None, pattern: str | dict, for_failure: bool = False
-) -> str:
+def _create_text_regex(lang: str, column: str, pattern: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17631,7 +17850,7 @@ def _create_text_expr(lang: str, for_failure: bool) -> str:
     return EXPECT_FAIL_TEXT[f"col_vals_expr_{type_}_text"][lang]
-def _create_text_col_exists(lang: str, column: str | None, for_failure: bool = False) -> str:
+def _create_text_col_exists(lang: str, column: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17681,7 +17900,7 @@ def _create_text_rows_complete(
     return text
-def _create_text_row_count_match(lang: str, value: int, for_failure: bool = False) -> str:
+def _create_text_row_count_match(lang: str, value: dict, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     values_text = _prep_values_text(value["count"], lang=lang)
@@ -17689,7 +17908,7 @@ def _create_text_row_count_match(lang: str, value: int, for_failure: bool = Fals
     return EXPECT_FAIL_TEXT[f"row_count_match_n_{type_}_text"][lang].format(values_text=values_text)
-def _create_text_col_count_match(lang: str, value: int, for_failure: bool = False) -> str:
+def _create_text_col_count_match(lang: str, value: dict, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     values_text = _prep_values_text(value["count"], lang=lang)
@@ -17826,19 +18045,13 @@ def _create_text_prompt(lang: str, prompt: str, for_failure: bool = False) -> st
 def _prep_column_text(column: str | list[str]) -> str:
     if isinstance(column, list):
         return "`" + str(column[0]) + "`"
-    elif isinstance(column, str):
+    if isinstance(column, str):
         return "`" + column + "`"
-    else:
-        return ""
+    raise AssertionError
 def _prep_values_text(
-    values: str
-    | int
-    | float
-    | datetime.datetime
-    | datetime.date
-    | list[str | int | float | datetime.datetime | datetime.date],
+    values: _CompliantValue | _CompliantValues,
     lang: str,
     limit: int = 3,
 ) -> str:
@@ -17886,7 +18099,7 @@ def _prep_values_text(
     return values_str
-def _seg_expr_from_string(data_tbl: any, segments_expr: str) -> list[tuple[str, str]]:
+def _seg_expr_from_string(data_tbl: Any, segments_expr: str) -> tuple[str, str]:
     """
     Obtain the segmentation categories from a table column.
@@ -17989,7 +18202,7 @@ def _seg_expr_from_tuple(segments_expr: tuple) -> list[tuple[str, Any]]:
     return seg_tuples
-def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
+def _apply_segments(data_tbl: Any, segments_expr: tuple[str, str]) -> Any:
     """
     Apply the segments expression to the data table.
@@ -18053,8 +18266,26 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
                 except ValueError:  # pragma: no cover
                     pass  # pragma: no cover
-            # Format 2: Datetime strings with UTC timezone like
-            # "2016-01-04 00:00:01 UTC.strict_cast(...)"
+            # Format 2: Direct datetime strings like "2016-01-04 00:00:01" (Polars 1.36+)
+            # These don't have UTC suffix anymore
+            elif (
+                " " in segment_str
+                and "UTC" not in segment_str
+                and "[" not in segment_str
+                and ".alias" not in segment_str
+            ):
+                try:
+                    parsed_dt = datetime.fromisoformat(segment_str)
+                    # Convert midnight datetimes to dates for consistency
+                    if parsed_dt.time() == datetime.min.time():
+                        parsed_value = parsed_dt.date()  # pragma: no cover
+                    else:
+                        parsed_value = parsed_dt
+                except ValueError:  # pragma: no cover
+                    pass  # pragma: no cover
+            # Format 3: Datetime strings with UTC timezone like
+            # "2016-01-04 00:00:01 UTC.strict_cast(...)" (Polars < 1.36)
             elif " UTC" in segment_str:
                 try:
                     # Extract just the datetime part before "UTC"
@@ -18069,7 +18300,7 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
                 except (ValueError, IndexError):  # pragma: no cover
                     pass  # pragma: no cover
-            # Format 3: Bracketed expressions like ['2016-01-04']
+            # Format 4: Bracketed expressions like ['2016-01-04']
             elif segment_str.startswith("[") and segment_str.endswith("]"):
                 try:  # pragma: no cover
                     # Remove [' and ']
@@ -18204,8 +18435,7 @@ def _validation_info_as_dict(validation_info: _ValidationInfo) -> dict:
 def _get_assertion_icon(icon: list[str], length_val: int = 30) -> list[str]:
     # For each icon, get the assertion icon SVG test from SVG_ICONS_FOR_ASSERTION_TYPES dictionary
-    # TODO: No point in using `get` if we can't handle missing keys anyways
-    icon_svg = [SVG_ICONS_FOR_ASSERTION_TYPES.get(icon) for icon in icon]
+    icon_svg: list[str] = [SVG_ICONS_FOR_ASSERTION_TYPES[icon] for icon in icon]
     # Replace the width and height in the SVG string
     for i in range(len(icon_svg)):
@@ -18214,11 +18444,9 @@ def _get_assertion_icon(icon: list[str], length_val: int = 30) -> list[str]:
     return icon_svg
-def _replace_svg_dimensions(svg: list[str], height_width: int | float) -> list[str]:
+def _replace_svg_dimensions(svg: str, height_width: int | float) -> str:
     svg = re.sub(r'width="[0-9]*?px', f'width="{height_width}px', svg)
-    svg = re.sub(r'height="[0-9]*?px', f'height="{height_width}px', svg)
-    return svg
+    return re.sub(r'height="[0-9]*?px', f'height="{height_width}px', svg)
 def _get_title_text(
@@ -18282,7 +18510,7 @@ def _process_title_text(title: str | None, tbl_name: str | None, lang: str) -> s
     return title_text
-def _transform_tbl_preprocessed(pre: any, seg: any, interrogation_performed: bool) -> list[str]:
+def _transform_tbl_preprocessed(pre: Any, seg: Any, interrogation_performed: bool) -> list[str]:
     # If no interrogation was performed, return a list of empty strings
     if not interrogation_performed:
         return ["" for _ in range(len(pre))]
@@ -18304,9 +18532,7 @@ def _transform_tbl_preprocessed(pre: any, seg: any, interrogation_performed: boo
 def _get_preprocessed_table_icon(icon: list[str]) -> list[str]:
     # For each icon, get the SVG icon from the SVG_ICONS_FOR_TBL_STATUS dictionary
-    icon_svg = [SVG_ICONS_FOR_TBL_STATUS.get(icon) for icon in icon]
-    return icon_svg
+    return [SVG_ICONS_FOR_TBL_STATUS[icon] for icon in icon]
 def _transform_eval(
@@ -18384,9 +18610,9 @@ def _transform_test_units(
             return _format_single_number_with_gt(
                 value, n_sigfig=3, compact=True, locale=locale, df_lib=df_lib
             )
-        else:
-            # Fallback to the original behavior
-            return str(vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)[0])
+        formatted = vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)
+        assert isinstance(formatted, list)
+        return formatted[0]
     return [
         (
@@ -18590,22 +18816,21 @@ def _transform_assertion_str(
     return type_upd
-def _pre_processing_funcs_to_str(pre: Callable) -> str | list[str]:
+def _pre_processing_funcs_to_str(pre: Callable) -> str | list[str] | None:
     if isinstance(pre, Callable):
         return _get_callable_source(fn=pre)
+    return None
 def _get_callable_source(fn: Callable) -> str:
-    if isinstance(fn, Callable):
-        try:
-            source_lines, _ = inspect.getsourcelines(fn)
-            source = "".join(source_lines).strip()
-            # Extract the `pre` argument from the source code
-            pre_arg = _extract_pre_argument(source)
-            return pre_arg
-        except (OSError, TypeError):  # pragma: no cover
-            return fn.__name__
-    return fn  # pragma: no cover
+    try:
+        source_lines, _ = inspect.getsourcelines(fn)
+        source = "".join(source_lines).strip()
+        # Extract the `pre` argument from the source code
+        pre_arg = _extract_pre_argument(source)
+        return pre_arg
+    except (OSError, TypeError):  # pragma: no cover
+        return fn.__name__  # ty: ignore
 def _extract_pre_argument(source: str) -> str:
@@ -18631,6 +18856,7 @@ def _create_table_time_html(
     if time_start is None:
         return ""
+    assert time_end is not None  # typing
     # Get the time duration (difference between `time_end` and `time_start`) in seconds
     time_duration = (time_end - time_start).total_seconds()
@@ -18845,11 +19071,11 @@ def _format_number_safe(
             locale=locale,
             df_lib=df_lib,
         )
-    else:
-        # Fallback to the original behavior
-        return fmt_number(
-            value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
-        )[0]  # pragma: no cover
+    ints = fmt_number(
+        value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
+    )
+    assert isinstance(ints, list)
+    return ints[0]
 def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
@@ -18862,9 +19088,10 @@ def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
     if df_lib is not None and value is not None:
         # Use GT-based formatting to avoid Pandas dependency completely
         return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
-    else:
-        # Fallback to the original behavior
-        return fmt_integer(value, locale=locale)[0]
+    ints = fmt_integer(value, locale=locale)
+    assert isinstance(ints, list)
+    return ints[0]
 def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
@@ -18980,7 +19207,7 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
         HTML string containing the formatted threshold information.
     """
     if thresholds == Thresholds():
-        return ""
+        return ""  # pragma: no cover
     # Get df_lib for formatting
     df_lib = None
@@ -18988,10 +19215,10 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
         import polars as pl
         df_lib = pl
-    elif _is_lib_present("pandas"):
-        import pandas as pd
+    elif _is_lib_present("pandas"):  # pragma: no cover
+        import pandas as pd  # pragma: no cover
-        df_lib = pd
+        df_lib = pd  # pragma: no cover
     # Helper function to format threshold values using the shared formatting functions
     def _format_threshold_value(fraction: float | None, count: int | None) -> str:
@@ -18999,10 +19226,12 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
             # Format as fraction/percentage with locale formatting
             if fraction == 0:
                 return "0"
-            elif fraction < 0.01:
+            elif fraction < 0.01:  # pragma: no cover
                 # For very small fractions, show "<0.01" with locale formatting
-                formatted = _format_number_safe(0.01, decimals=2, locale=locale, df_lib=df_lib)
-                return f"&lt;{formatted}"
+                formatted = _format_number_safe(
+                    0.01, decimals=2, locale=locale, df_lib=df_lib
+                )  # pragma: no cover
+                return f"&lt;{formatted}"  # pragma: no cover
             else:
                 # Use shared formatting function with drop_trailing_zeros
                 formatted = _format_number_safe(
@@ -19079,14 +19308,14 @@ def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
         if fraction is not None:
             if fraction == 0:
                 return "0"
-            elif fraction < 0.01:
-                return "<0.01"
+            elif fraction < 0.01:  # pragma: no cover
+                return "<0.01"  # pragma: no cover
             else:
                 return f"{fraction:.2f}".rstrip("0").rstrip(".")
         elif count is not None:
             return str(count)
         else:
-            return "—"
+            return "—"  # pragma: no cover
     parts = []
@@ -19105,7 +19334,7 @@ def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
     if parts:
         return "Step-specific thresholds set: " + ", ".join(parts)
     else:
-        return ""
+        return ""  # pragma: no cover
 def _create_threshold_reset_note_html(locale: str = "en") -> str:
@@ -19654,13 +19883,13 @@ def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") ->
                 f'<span style="color:#FF3300;">✗</span> {failed_text}: ' + ", ".join(failures) + "."
             )
         else:
-            summary = f'<span style="color:#FF3300;">✗</span> {failed_text}.'
+            summary = f'<span style="color:#FF3300;">✗</span> {failed_text}.'  # pragma: no cover
     # Generate the step report table using the existing function
     # We'll call either _step_report_schema_in_order or _step_report_schema_any_order
     # depending on the in_order parameter
-    if in_order:
-        step_report_gt = _step_report_schema_in_order(
+    if in_order:  # pragma: no cover
+        step_report_gt = _step_report_schema_in_order(  # pragma: no cover
             step=1, schema_info=schema_info, header=None, lang=locale, debug_return_df=False
         )
     else:
@@ -19691,7 +19920,7 @@ def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") ->
 """
     # Add the settings as an additional source note to the step report
-    step_report_gt = step_report_gt.tab_source_note(source_note=html(source_note_html))
+    step_report_gt = step_report_gt.tab_source_note(source_note=html(source_note_html))  # type: ignore[union-attr]
     # Extract the HTML from the GT object
     step_report_html = step_report_gt._repr_html_()
@@ -19743,12 +19972,12 @@ def _step_report_row_based(
     column: str,
     column_position: int,
     columns_subset: list[str] | None,
-    values: any,
+    values: Any,
     inclusive: tuple[bool, bool] | None,
     n: int,
     n_failed: int,
     all_passed: bool,
-    extract: any,
+    extract: Any,
     tbl_preview: GT,
     header: str,
     limit: int | None,
@@ -19775,10 +20004,12 @@ def _step_report_row_based(
     elif assertion_type == "col_vals_le":
         text = f"{column} &le; {values}"
     elif assertion_type == "col_vals_between":
+        assert inclusive is not None
         symbol_left = "&le;" if inclusive[0] else "&lt;"
         symbol_right = "&le;" if inclusive[1] else "&lt;"
         text = f"{values[0]} {symbol_left} {column} {symbol_right} {values[1]}"
     elif assertion_type == "col_vals_outside":
+        assert inclusive is not None
         symbol_left = "&lt;" if inclusive[0] else "&le;"
         symbol_right = "&gt;" if inclusive[1] else "&ge;"
         text = f"{column} {symbol_left} {values[0]}, {column} {symbol_right} {values[1]}"
@@ -19999,7 +20230,7 @@ def _step_report_rows_distinct(
     n: int,
     n_failed: int,
     all_passed: bool,
-    extract: any,
+    extract: Any,
     tbl_preview: GT,
     header: str,
     limit: int | None,
@@ -20126,8 +20357,8 @@ def _step_report_rows_distinct(
 def _step_report_schema_in_order(
-    step: int, schema_info: dict, header: str, lang: str, debug_return_df: bool = False
-) -> GT | any:
+    step: int, schema_info: dict, header: str | None, lang: str, debug_return_df: bool = False
+) -> GT | Any:
     """
     This is the case for schema validation where the schema is supposed to have the same column
     order as the target table.
@@ -20195,22 +20426,22 @@ def _step_report_schema_in_order(
         # Check if this column exists in exp_columns_dict (it might not if it's a duplicate)
         # For duplicates, we need to handle them specially
-        if column_name_exp_i not in exp_columns_dict:
+        if column_name_exp_i not in exp_columns_dict:  # pragma: no cover
             # This is a duplicate or invalid column, mark it as incorrect
-            col_exp_correct.append(CROSS_MARK_SPAN)
+            col_exp_correct.append(CROSS_MARK_SPAN)  # pragma: no cover
             # For dtype, check if there's a dtype specified in the schema
-            if len(expect_schema[i]) > 1:
-                dtype_value = expect_schema[i][1]
-                if isinstance(dtype_value, list):
-                    dtype_exp.append(" | ".join(dtype_value))
-                else:
-                    dtype_exp.append(str(dtype_value))
-            else:
-                dtype_exp.append("&mdash;")
+            if len(expect_schema[i]) > 1:  # pragma: no cover
+                dtype_value = expect_schema[i][1]  # pragma: no cover
+                if isinstance(dtype_value, list):  # pragma: no cover
+                    dtype_exp.append(" | ".join(dtype_value))  # pragma: no cover
+                else:  # pragma: no cover
+                    dtype_exp.append(str(dtype_value))  # pragma: no cover
+            else:  # pragma: no cover
+                dtype_exp.append("&mdash;")  # pragma: no cover
-            dtype_exp_correct.append("&mdash;")
-            continue
+            dtype_exp_correct.append("&mdash;")  # pragma: no cover
+            continue  # pragma: no cover
         #
         # `col_exp_correct` values
@@ -20433,7 +20664,9 @@ def _step_report_schema_in_order(
         # Add a border below the row that terminates the target table schema
         step_report = step_report.tab_style(
             style=style.borders(sides="bottom", color="#6699CC80", style="solid", weight="1px"),
-            locations=loc.body(rows=len(colnames_tgt) - 1),
+            locations=loc.body(
+                rows=len(colnames_tgt) - 1  # ty: ignore (bug in GT, should allow an int)
+            ),
         )
     # If the version of `great_tables` is `>=0.17.0` then disable Quarto table processing
@@ -20482,8 +20715,8 @@ def _step_report_schema_in_order(
 def _step_report_schema_any_order(
-    step: int, schema_info: dict, header: str, lang: str, debug_return_df: bool = False
-) -> GT | any:
+    step: int, schema_info: dict, header: str | None, lang: str, debug_return_df: bool = False
+) -> GT | pl.DataFrame:
     """
     This is the case for schema validation where the schema is permitted to not have to be in the
     same column order as the target table.
@@ -20902,9 +21135,7 @@ def _step_report_schema_any_order(
     header = header.format(title=title, details=details)
     # Create the header with `header` string
-    step_report = step_report.tab_header(title=md(header))
-    return step_report
+    return step_report.tab_header(title=md(header))
 def _create_label_text_html(
@@ -20993,3 +21224,321 @@ def _create_col_schema_match_params_html(
         f"{full_match_dtypes_text}"
         "</div>"
     )
+def _generate_agg_docstring(name: str) -> str:
+    """Generate a comprehensive docstring for an aggregation validation method.
+    This function creates detailed documentation for dynamically generated methods like
+    `col_sum_eq()`, `col_avg_gt()`, `col_sd_le()`, etc. The docstrings follow the same
+    structure and quality as manually written validation methods like `col_vals_gt()`.
+    Parameters
+    ----------
+    name
+        The method name (e.g., "col_sum_eq", "col_avg_gt", "col_sd_le").
+    Returns
+    -------
+    str
+        A complete docstring for the method.
+    """
+    # Parse the method name to extract aggregation type and comparison operator
+    # Format: col_{agg}_{comp} (e.g., col_sum_eq, col_avg_gt, col_sd_le)
+    parts = name.split("_")
+    agg_type = parts[1]  # sum, avg, sd
+    comp_type = parts[2]  # eq, gt, ge, lt, le
+    # Human-readable names for aggregation types
+    agg_names = {
+        "sum": ("sum", "summed"),
+        "avg": ("average", "averaged"),
+        "sd": ("standard deviation", "computed for standard deviation"),
+    }
+    # Human-readable descriptions for comparison operators (with article for title)
+    comp_descriptions = {
+        "eq": ("equal to", "equals", "an"),
+        "gt": ("greater than", "is greater than", "a"),
+        "ge": ("greater than or equal to", "is at least", "a"),
+        "lt": ("less than", "is less than", "a"),
+        "le": ("less than or equal to", "is at most", "a"),
+    }
+    # Mathematical symbols for comparison operators
+    comp_symbols = {
+        "eq": "==",
+        "gt": ">",
+        "ge": ">=",
+        "lt": "<",
+        "le": "<=",
+    }
+    agg_name, agg_verb = agg_names[agg_type]
+    comp_desc, comp_phrase, comp_article = comp_descriptions[comp_type]
+    comp_symbol = comp_symbols[comp_type]
+    # Determine the appropriate example values based on the aggregation and comparison
+    if agg_type == "sum":
+        example_value = "15"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "15"  # sum of a
+        example_ref_sum = "10"  # sum of b
+    elif agg_type == "avg":
+        example_value = "3"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "3.0"  # avg of a
+        example_ref_sum = "2.0"  # avg of b
+    else:  # sd
+        example_value = "2"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "~1.58"  # sd of a
+        example_ref_sum = "0.0"  # sd of b
+    # Build appropriate tolerance explanation based on comparison type
+    if comp_type == "eq":
+        tol_explanation = f"""The `tol=` parameter is particularly useful with `{name}()` since exact equality
+        comparisons on floating-point aggregations can be problematic due to numerical precision.
+        Setting a small tolerance (e.g., `tol=0.001`) allows for minor differences that arise from
+        floating-point arithmetic."""
+    else:
+        tol_explanation = f"""The `tol=` parameter expands the acceptable range for the comparison. For
+        `{name}()`, a tolerance of `tol=0.5` would mean the {agg_name} can be within `0.5` of the
+        target value and still pass validation."""
+    docstring = f"""
+    Does the column {agg_name} satisfy {comp_article} {comp_desc} comparison?
+    The `{name}()` validation method checks whether the {agg_name} of values in a column
+    {comp_phrase} a specified `value=`. This is an aggregation-based validation where the entire
+    column is reduced to a single {agg_name} value that is then compared against the target. The
+    comparison used in this function is `{agg_name}(column) {comp_symbol} value`.
+    Unlike row-level validations (e.g., `col_vals_gt()`), this method treats the entire column as
+    a single test unit. The validation either passes completely (if the aggregated value satisfies
+    the comparison) or fails completely.
+    Parameters
+    ----------
+    columns
+        A single column or a list of columns to validate. If multiple columns are supplied,
+        there will be a separate validation step generated for each column. The columns must
+        contain numeric data for the {agg_name} to be computed.
+    value
+        The value to compare the column {agg_name} against. This can be: (1) a numeric literal
+        (`int` or `float`), (2) a [`col()`](`pointblank.col`) object referencing another column
+        whose {agg_name} will be used for comparison, (3) a [`ref()`](`pointblank.ref`) object
+        referencing a column in reference data (when `Validate(reference=)` has been set), or (4)
+        `None` to automatically compare against the same column in reference data (shorthand for
+        `ref(column_name)` when reference data is set).
+    tol
+        A tolerance value for the comparison. The default is `0`, meaning exact comparison. When
+        set to a positive value, the comparison becomes more lenient. For example, with `tol=0.5`,
+        a {agg_name} that differs from the target by up to `0.5` will still pass. {tol_explanation}
+    thresholds
+        Failure threshold levels so that the validation step can react accordingly when
+        failing test units are level. Since this is an aggregation-based validation with only
+        one test unit, threshold values typically should be set as absolute counts (e.g., `1`) to
+        indicate pass/fail, or as proportions where any value less than `1.0` means failure is
+        acceptable.
+    brief
+        An optional brief description of the validation step that will be displayed in the
+        reporting table. You can use the templating elements like `"{{step}}"` to insert
+        the step number, or `"{{auto}}"` to include an automatically generated brief. If `True`
+        the entire brief will be automatically generated. If `None` (the default) then there
+        won't be a brief.
+    actions
+        Optional actions to take when the validation step meets or exceeds any set threshold
+        levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+        define the actions.
+    active
+        A boolean value indicating whether the validation step should be active. Using `False`
+        will make the validation step inactive (still reporting its presence and keeping indexes
+        for the steps unchanged).
+    Returns
+    -------
+    Validate
+        The `Validate` object with the added validation step.
+    Using Reference Data
+    --------------------
+    The `{name}()` method supports comparing column aggregations against reference data. This
+    is useful for validating that statistical properties remain consistent across different
+    versions of a dataset, or for comparing current data against historical baselines.
+    To use reference data, set the `reference=` parameter when creating the `Validate` object:
+    ```python
+    validation = (
+        pb.Validate(data=current_data, reference=baseline_data)
+        .{name}(columns="revenue")  # Compares sum(current.revenue) vs sum(baseline.revenue)
+        .interrogate()
+    )
+    ```
+    When `value=None` and reference data is set, the method automatically compares against the
+    same column in the reference data. You can also explicitly specify reference columns using
+    the `ref()` helper:
+    ```python
+    .{name}(columns="revenue", value=pb.ref("baseline_revenue"))
+    ```
+    Understanding Tolerance
+    -----------------------
+    The `tol=` parameter allows for fuzzy comparisons, which is especially important for
+    floating-point aggregations where exact equality is often unreliable.
+    {tol_explanation}
+    For equality comparisons (`col_*_eq`), the tolerance creates a range `[value - tol, value + tol]`
+    within which the aggregation is considered valid. For inequality comparisons, the tolerance
+    shifts the comparison boundary.
+    Thresholds
+    ----------
+    The `thresholds=` parameter is used to set the failure-condition levels for the validation
+    step. If they are set here at the step level, these thresholds will override any thresholds
+    set at the global level in `Validate(thresholds=...)`.
+    There are three threshold levels: 'warning', 'error', and 'critical'. Since aggregation
+    validations operate on a single test unit (the aggregated value), threshold values are
+    typically set as absolute counts:
+    - `thresholds=1` means any failure triggers a 'warning'
+    - `thresholds=(1, 1, 1)` means any failure triggers all three levels
+    Thresholds can be defined using one of these input schemes:
+    1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+    thresholds)
+    2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+    the 'error' level, and position `2` is the 'critical' level
+    3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+    'critical'
+    4. a single integer/float value denoting absolute number or fraction of failing test units
+    for the 'warning' level only
+    Examples
+    --------
+    ```{{python}}
+    #| echo: false
+    #| output: false
+    import pointblank as pb
+    pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+    ```
+    For the examples, we'll use a simple Polars DataFrame with numeric columns. The table is
+    shown below:
+    ```{{python}}
+    import pointblank as pb
+    import polars as pl
+    tbl = pl.DataFrame(
+        {{
+            "a": [1, 2, 3, 4, 5],
+            "b": [2, 2, 2, 2, 2],
+        }}
+    )
+    pb.preview(tbl)
+    ```
+    Let's validate that the {agg_name} of column `a` {comp_phrase} `{example_value}`:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns="a", value={example_value})
+        .interrogate()
+    )
+    validation
+    ```
+    The validation result shows whether the {agg_name} comparison passed or failed. Since this
+    is an aggregation-based validation, there is exactly one test unit per column.
+    When validating multiple columns, each column gets its own validation step:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns=["a", "b"], value={example_value})
+        .interrogate()
+    )
+    validation
+    ```
+    Using tolerance for flexible comparisons:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns="a", value={example_value}, tol=1.0)
+        .interrogate()
+    )
+    validation
+    ```
+    """
+    return docstring.strip()
+def make_agg_validator(name: str):
+    """Factory for dynamically generated aggregate validation methods.
+    Why this exists:
+    Aggregate validators all share identical behavior. The only thing that differs
+    between them is the semantic assertion type (their name). The implementation
+    of each aggregate validator is fetched from `from_agg_validator`.
+    Instead of copy/pasting dozens of identical methods, we generate
+    them dynamically and attach them to the Validate class. The types are generated
+    at build time with `make pyi` to allow the methods to be visible to the type checker,
+    documentation builders and the IDEs/LSPs.
+    The returned function is a thin adapter that forwards all arguments to
+    `_add_agg_validation`, supplying the assertion type explicitly.
+    """
+    def agg_validator(
+        self: Validate,
+        columns: str | Collection[str],
+        value: float | int | Column | ReferenceColumn | None = None,
+        tol: float = 0,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        brief: str | bool | None = None,
+        actions: Actions | None = None,
+        active: bool = True,
+    ) -> Validate:
+        # Dynamically generated aggregate validator.
+        # This method is generated per assertion type and forwards all arguments
+        # to the shared aggregate validation implementation.
+        return self._add_agg_validation(
+            assertion_type=name,
+            columns=columns,
+            value=value,
+            tol=tol,
+            thresholds=thresholds,
+            brief=brief,
+            actions=actions,
+            active=active,
+        )
+    # Manually set function identity so this behaves like a real method.
+    # These must be set before attaching the function to the class.
+    agg_validator.__name__ = name
+    agg_validator.__qualname__ = f"Validate.{name}"
+    agg_validator.__doc__ = _generate_agg_docstring(name)
+    return agg_validator
+# Finally, we grab all the valid aggregation method names and attach them to
+# the Validate class, registering each one appropriately.
+for method in load_validation_method_grid():  # -> `col_sum_*`, `col_mean_*`, etc.
+    setattr(Validate, method, make_agg_validator(method))

pointblank 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

pointblank 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl