PyPI - pointblank - Versions diffs - 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

pointblank 0.17.0py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

pointblank/__init__.py +2 -0
pointblank/_agg.py +120 -0
pointblank/_constants.py +334 -55
pointblank/_constants_translations.py +378 -0
pointblank/_datascan_utils.py +28 -10
pointblank/_interrogation.py +406 -149
pointblank/_typing.py +12 -0
pointblank/_utils.py +81 -44
pointblank/_utils_ai.py +4 -5
pointblank/_utils_check_args.py +3 -3
pointblank/_utils_llms_txt.py +40 -2
pointblank/actions.py +1 -1
pointblank/assistant.py +2 -3
pointblank/cli.py +1 -1
pointblank/column.py +162 -46
pointblank/data/api-docs.txt +2695 -49
pointblank/datascan.py +17 -17
pointblank/draft.py +2 -3
pointblank/scan_profile.py +2 -1
pointblank/schema.py +61 -20
pointblank/thresholds.py +15 -13
pointblank/validate.py +2034 -233
pointblank/validate.pyi +1104 -0
pointblank/yaml.py +10 -6
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/METADATA +2 -2
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/RECORD +30 -28
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/WHEEL +1 -1
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/licenses/LICENSE +1 -1
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.17.0.dist-info → pointblank-0.19.0.dist-info}/top_level.txt +0 -0

pointblank/validate.py CHANGED Viewed

@@ -15,8 +15,9 @@ from enum import Enum
 from functools import partial
 from importlib.metadata import version
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Literal
+from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, ParamSpec, TypeVar
 from zipfile import ZipFile
+from zoneinfo import ZoneInfo
 import commonmark
 import narwhals as nw
@@ -24,8 +25,8 @@ from great_tables import GT, from_column, google_font, html, loc, md, style, val
 from great_tables.gt import _get_column_of_values
 from great_tables.vals import fmt_integer, fmt_number
 from importlib_resources import files
-from narwhals.typing import FrameT
+from pointblank._agg import is_valid_agg, load_validation_method_grid, resolve_agg_registries
 from pointblank._constants import (
     ASSERTION_TYPE_METHOD_MAP,
     CHECK_MARK_SPAN,
@@ -92,6 +93,8 @@ from pointblank._utils import (
     _is_lib_present,
     _is_narwhals_table,
     _is_value_a_df,
+    _PBUnresolvedColumn,
+    _resolve_columns,
     _select_df_lib,
 )
 from pointblank._utils_check_args import (
@@ -102,7 +105,14 @@ from pointblank._utils_check_args import (
     _check_thresholds,
 )
 from pointblank._utils_html import _create_table_dims_html, _create_table_type_html
-from pointblank.column import Column, ColumnLiteral, ColumnSelector, ColumnSelectorNarwhals, col
+from pointblank.column import (
+    Column,
+    ColumnLiteral,
+    ColumnSelector,
+    ColumnSelectorNarwhals,
+    ReferenceColumn,
+    col,
+)
 from pointblank.schema import Schema, _get_schema_validation_info
 from pointblank.segments import Segment
 from pointblank.thresholds import (
@@ -113,10 +123,18 @@ from pointblank.thresholds import (
     _normalize_thresholds_creation,
 )
+P = ParamSpec("P")
+R = TypeVar("R")
 if TYPE_CHECKING:
     from collections.abc import Collection
+    from typing import Any
+    import polars as pl
+    from narwhals.typing import IntoDataFrame, IntoFrame
+    from pointblank._typing import AbsoluteBounds, Tolerance, _CompliantValue, _CompliantValues
-    from pointblank._typing import AbsoluteBounds, Tolerance
 __all__ = [
     "Validate",
@@ -135,6 +153,7 @@ __all__ = [
     "get_validation_summary",
 ]
 # Create a thread-local storage for the metadata
 _action_context = threading.local()
@@ -424,12 +443,13 @@ def config(
     global_config.report_incl_footer_timings = report_incl_footer_timings  # pragma: no cover
     global_config.report_incl_footer_notes = report_incl_footer_notes  # pragma: no cover
     global_config.preview_incl_header = preview_incl_header  # pragma: no cover
+    return global_config  # pragma: no cover
 def load_dataset(
     dataset: Literal["small_table", "game_revenue", "nycflights", "global_sales"] = "small_table",
     tbl_type: Literal["polars", "pandas", "duckdb"] = "polars",
-) -> FrameT | Any:
+) -> Any:
     """
     Load a dataset hosted in the library as specified table type.
@@ -450,7 +470,7 @@ def load_dataset(
     Returns
     -------
-    FrameT | Any
+    Any
         The dataset for the `Validate` object. This could be a Polars DataFrame, a Pandas DataFrame,
         or a DuckDB table as an Ibis table.
@@ -1523,7 +1543,7 @@ def get_data_path(
                 return tmp_file.name
-def _process_data(data: FrameT | Any) -> FrameT | Any:
+def _process_data(data: Any) -> Any:
     """
     Centralized data processing pipeline that handles all supported input types.
@@ -1540,7 +1560,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     Parameters
     ----------
-    data : FrameT | Any
+    data
         The input data which could be:
         - a DataFrame object (Polars, Pandas, Ibis, etc.)
         - a GitHub URL pointing to a CSV or Parquet file
@@ -1551,7 +1571,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     Returns
     -------
-    FrameT | Any
+    Any
         Processed data as a DataFrame if input was a supported data source type,
         otherwise the original data unchanged.
     """
@@ -1570,7 +1590,7 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     return data
-def _process_github_url(data: FrameT | Any) -> FrameT | Any:
+def _process_github_url(data: Any) -> Any:
     """
     Process data parameter to handle GitHub URLs pointing to CSV or Parquet files.
@@ -1585,12 +1605,12 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
     Parameters
     ----------
-    data : FrameT | Any
+    data
         The data parameter which may be a GitHub URL string or any other data type.
     Returns
     -------
-    FrameT | Any
+    Any
         If the input is a supported GitHub URL, returns a DataFrame loaded from the downloaded file.
         Otherwise, returns the original data unchanged.
@@ -1675,7 +1695,7 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
         return data
-def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
+def _process_connection_string(data: Any) -> Any:
     """
     Process data parameter to handle database connection strings.
@@ -1702,7 +1722,7 @@ def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
     return connect_to_table(data)
-def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
+def _process_csv_input(data: Any) -> Any:
     """
     Process data parameter to handle CSV file inputs.
@@ -1760,7 +1780,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
         )
-def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
+def _process_parquet_input(data: Any) -> Any:
     """
     Process data parameter to handle Parquet file inputs.
@@ -1903,7 +1923,7 @@ def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
 def preview(
-    data: FrameT | Any,
+    data: Any,
     columns_subset: str | list[str] | Column | None = None,
     n_head: int = 5,
     n_tail: int = 5,
@@ -1911,7 +1931,7 @@ def preview(
     show_row_numbers: bool = True,
     max_col_width: int = 250,
     min_tbl_width: int = 500,
-    incl_header: bool = None,
+    incl_header: bool | None = None,
 ) -> GT:
     """
     Display a table preview that shows some rows from the top, some from the bottom.
@@ -2169,7 +2189,7 @@ def preview(
 def _generate_display_table(
-    data: FrameT | Any,
+    data: Any,
     columns_subset: str | list[str] | Column | None = None,
     n_head: int = 5,
     n_tail: int = 5,
@@ -2177,7 +2197,7 @@ def _generate_display_table(
     show_row_numbers: bool = True,
     max_col_width: int = 250,
     min_tbl_width: int = 500,
-    incl_header: bool = None,
+    incl_header: bool | None = None,
     mark_missing_values: bool = True,
     row_number_list: list[int] | None = None,
 ) -> GT:
@@ -2274,7 +2294,8 @@ def _generate_display_table(
         tbl_schema = Schema(tbl=data)
         # Get the row count for the table
-        ibis_rows = data.count()
+        # Note: ibis tables have count(), to_polars(), to_pandas() methods
+        ibis_rows = data.count()  # type: ignore[union-attr]
         n_rows = ibis_rows.to_polars() if df_lib_name_gt == "polars" else int(ibis_rows.to_pandas())
         # If n_head + n_tail is greater than the row count, display the entire table
@@ -2283,11 +2304,11 @@ def _generate_display_table(
             data_subset = data
             if row_number_list is None:
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
         else:
             # Get the first n and last n rows of the table
-            data_head = data.head(n_head)
-            data_tail = data.filter(
+            data_head = data.head(n_head)  # type: ignore[union-attr]
+            data_tail = data.filter(  # type: ignore[union-attr]
                 [ibis.row_number() >= (n_rows - n_tail), ibis.row_number() <= n_rows]
             )
             data_subset = data_head.union(data_tail)
@@ -2299,9 +2320,9 @@ def _generate_display_table(
         # Convert either to Polars or Pandas depending on the available library
         if df_lib_name_gt == "polars":
-            data = data_subset.to_polars()
+            data = data_subset.to_polars()  # type: ignore[union-attr]
         else:
-            data = data_subset.to_pandas()
+            data = data_subset.to_pandas()  # type: ignore[union-attr]
     # From a DataFrame:
     # - get the row count
@@ -2312,17 +2333,18 @@ def _generate_display_table(
         tbl_schema = Schema(tbl=data)
         if tbl_type == "polars":
-            n_rows = int(data.height)
+            # Note: polars DataFrames have height, head(), tail() attributes
+            n_rows = int(data.height)  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 if row_number_list is None:
-                    row_number_list = range(1, n_rows + 1)
+                    row_number_list = list(range(1, n_rows + 1))
             else:
-                data = pl.concat([data.head(n=n_head), data.tail(n=n_tail)])
+                data = pl.concat([data.head(n=n_head), data.tail(n=n_tail)])  # type: ignore[union-attr]
                 if row_number_list is None:
                     row_number_list = list(range(1, n_head + 1)) + list(
@@ -2330,40 +2352,42 @@ def _generate_display_table(
                     )
         if tbl_type == "pandas":
-            n_rows = data.shape[0]
+            # Note: pandas DataFrames have shape, head(), tail() attributes
+            n_rows = data.shape[0]  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 data_subset = data
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
             else:
-                data = pd.concat([data.head(n=n_head), data.tail(n=n_tail)])
+                data = pd.concat([data.head(n=n_head), data.tail(n=n_tail)])  # type: ignore[union-attr]
                 row_number_list = list(range(1, n_head + 1)) + list(
                     range(n_rows - n_tail + 1, n_rows + 1)
                 )
         if tbl_type == "pyspark":
-            n_rows = data.count()
+            # Note: pyspark DataFrames have count(), toPandas(), limit(), tail(), sparkSession
+            n_rows = data.count()  # type: ignore[union-attr]
             # If n_head + n_tail is greater than the row count, display the entire table
             if n_head + n_tail >= n_rows:
                 full_dataset = True
                 # Convert to pandas for Great Tables compatibility
-                data = data.toPandas()
+                data = data.toPandas()  # type: ignore[union-attr]
-                row_number_list = range(1, n_rows + 1)
+                row_number_list = list(range(1, n_rows + 1))
             else:
                 # Get head and tail samples, then convert to pandas
-                head_data = data.limit(n_head).toPandas()
+                head_data = data.limit(n_head).toPandas()  # type: ignore[union-attr]
                 # PySpark tail() returns a list of Row objects, need to convert to DataFrame
-                tail_rows = data.tail(n_tail)
+                tail_rows = data.tail(n_tail)  # type: ignore[union-attr]
                 if tail_rows:
                     # Convert list of Row objects back to DataFrame, then to pandas
-                    tail_df = data.sparkSession.createDataFrame(tail_rows, data.schema)
+                    tail_df = data.sparkSession.createDataFrame(tail_rows, data.schema)  # type: ignore[union-attr]
                     tail_data = tail_df.toPandas()
                 else:
                     # If no tail data, create empty DataFrame with same schema
@@ -2391,14 +2415,14 @@ def _generate_display_table(
             tbl_schema = Schema(tbl=data)
     # From the table schema, get a list of tuples containing column names and data types
-    col_dtype_dict = tbl_schema.columns
+    col_dtype_list = tbl_schema.columns or []
     # Extract the column names from the list of tuples (first element of each tuple)
-    col_names = [col[0] for col in col_dtype_dict]
+    col_names = [col[0] for col in col_dtype_list]
     # Iterate over the list of tuples and create a new dictionary with the
     # column names and data types
-    col_dtype_dict = {k: v for k, v in col_dtype_dict}
+    col_dtype_dict = {k: v for k, v in col_dtype_list}
     # Create short versions of the data types by omitting any text in parentheses
     col_dtype_dict_short = {
@@ -2497,21 +2521,21 @@ def _generate_display_table(
     # Prepend a column that contains the row numbers if `show_row_numbers=True`
     if show_row_numbers or has_leading_row_num_col:
         if has_leading_row_num_col:
-            row_number_list = data["_row_num_"].to_list()
+            row_number_list = data["_row_num_"].to_list()  # type: ignore[union-attr]
         else:
             if df_lib_name_gt == "polars":
                 import polars as pl
                 row_number_series = pl.Series("_row_num_", row_number_list)
-                data = data.insert_column(0, row_number_series)
+                data = data.insert_column(0, row_number_series)  # type: ignore[union-attr]
             if df_lib_name_gt == "pandas":
-                data.insert(0, "_row_num_", row_number_list)
+                data.insert(0, "_row_num_", row_number_list)  # type: ignore[union-attr]
             if df_lib_name_gt == "pyspark":
                 # For PySpark converted to pandas, use pandas method
-                data.insert(0, "_row_num_", row_number_list)
+                data.insert(0, "_row_num_", row_number_list)  # type: ignore[union-attr]
         # Get the highest number in the `row_number_list` and calculate a width that will
         # safely fit a number of that magnitude
@@ -2620,7 +2644,7 @@ def _generate_display_table(
     return gt_tbl
-def missing_vals_tbl(data: FrameT | Any) -> GT:
+def missing_vals_tbl(data: Any) -> GT:
     """
     Display a table that shows the missing values in the input table.
@@ -3221,7 +3245,7 @@ def _get_column_names_safe(data: Any) -> list[str]:
         return list(data.columns)  # pragma: no cover
-def _get_column_names(data: FrameT | Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
+def _get_column_names(data: Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
     if ibis_tbl:
         return data.columns if df_lib_name_gt == "polars" else list(data.columns)
@@ -3245,12 +3269,10 @@ def _validate_columns_subset(
                 )
             return columns_subset
-    return columns_subset.resolve(columns=col_names)
+    return columns_subset.resolve(columns=col_names)  # type: ignore[union-attr]
-def _select_columns(
-    data: FrameT | Any, resolved_columns: list[str], ibis_tbl: bool, tbl_type: str
-) -> FrameT | Any:
+def _select_columns(data: Any, resolved_columns: list[str], ibis_tbl: bool, tbl_type: str) -> Any:
     if ibis_tbl:
         return data[resolved_columns]
     if tbl_type == "polars":
@@ -3258,7 +3280,7 @@ def _select_columns(
     return data[resolved_columns]
-def get_column_count(data: FrameT | Any) -> int:
+def get_column_count(data: Any) -> int:
     """
     Get the number of columns in a table.
@@ -3470,7 +3492,7 @@ def _extract_enum_values(set_values: Any) -> list[Any]:
     return [set_values]
-def get_row_count(data: FrameT | Any) -> int:
+def get_row_count(data: Any) -> int:
     """
     Get the number of rows in a table.
@@ -3723,18 +3745,46 @@ class _ValidationInfo:
         insertion order, ensuring notes appear in a consistent sequence in reports and logs.
     """
+    @classmethod
+    def from_agg_validator(
+        cls,
+        assertion_type: str,
+        columns: _PBUnresolvedColumn,
+        value: float | Column | ReferenceColumn,
+        tol: Tolerance = 0,
+        thresholds: float | bool | tuple | dict | Thresholds | None = None,
+        brief: str | bool = False,
+        actions: Actions | None = None,
+        active: bool = True,
+    ) -> _ValidationInfo:
+        # This factory method creates a `_ValidationInfo` instance for aggregate
+        # methods. The reason this is created, is because all agg methods share the same
+        # signature so instead of instantiating the class directly each time, this method
+        # can be used to reduce redundancy, boilerplate and mistakes :)
+        _check_thresholds(thresholds=thresholds)
+        return cls(
+            assertion_type=assertion_type,
+            column=_resolve_columns(columns),
+            values={"value": value, "tol": tol},
+            thresholds=_normalize_thresholds_creation(thresholds),
+            brief=_transform_auto_brief(brief=brief),
+            actions=actions,
+            active=active,
+        )
     # Validation plan
     i: int | None = None
     i_o: int | None = None
     step_id: str | None = None
     sha1: str | None = None
     assertion_type: str | None = None
-    column: any | None = None
-    values: any | list[any] | tuple | None = None
+    column: Any | None = None
+    values: Any | list[Any] | tuple | None = None
     inclusive: tuple[bool, bool] | None = None
     na_pass: bool | None = None
     pre: Callable | None = None
-    segments: any | None = None
+    segments: Any | None = None
     thresholds: Thresholds | None = None
     actions: Actions | None = None
     label: str | None = None
@@ -3753,14 +3803,14 @@ class _ValidationInfo:
     error: bool | None = None
     critical: bool | None = None
     failure_text: str | None = None
-    tbl_checked: FrameT | None = None
-    extract: FrameT | None = None
-    val_info: dict[str, any] | None = None
+    tbl_checked: Any = None
+    extract: Any = None
+    val_info: dict[str, Any] | None = None
     time_processed: str | None = None
     proc_duration_s: float | None = None
     notes: dict[str, dict[str, str]] | None = None
-    def get_val_info(self) -> dict[str, any]:
+    def get_val_info(self) -> dict[str, Any] | None:
         return self.val_info
     def _add_note(self, key: str, markdown: str, text: str | None = None) -> None:
@@ -3936,7 +3986,7 @@ class _ValidationInfo:
         return self.notes is not None and len(self.notes) > 0
-def _handle_connection_errors(e: Exception, connection_string: str) -> None:
+def _handle_connection_errors(e: Exception, connection_string: str) -> NoReturn:
     """
     Shared error handling for database connection failures.
@@ -4301,6 +4351,18 @@ class Validate:
         locale's rules. Examples include `"en-US"` for English (United States) and `"fr-FR"` for
         French (France). More simply, this can be a language identifier without a designation of
         territory, like `"es"` for Spanish.
+    owner
+        An optional string identifying the owner of the data being validated. This is useful for
+        governance purposes, indicating who is responsible for the quality and maintenance of the
+        data. For example, `"data-platform-team"` or `"analytics-engineering"`.
+    consumers
+        An optional string or list of strings identifying who depends on or consumes this data.
+        This helps document data dependencies and can be useful for impact analysis when data
+        quality issues are detected. For example, `"ml-team"` or `["ml-team", "analytics"]`.
+    version
+        An optional string representing the version of the validation plan or data contract. This
+        supports semantic versioning (e.g., `"1.0.0"`, `"2.1.0"`) and is useful for tracking changes
+        to validation rules over time and for organizational governance.
     Returns
     -------
@@ -4777,7 +4839,8 @@ class Validate:
     when table specifications are missing or backend dependencies are not installed.
     """
-    data: FrameT | Any
+    data: IntoDataFrame
+    reference: IntoFrame | None = None
     tbl_name: str | None = None
     label: str | None = None
     thresholds: int | float | bool | tuple | dict | Thresholds | None = None
@@ -4786,11 +4849,18 @@ class Validate:
     brief: str | bool | None = None
     lang: str | None = None
     locale: str | None = None
+    owner: str | None = None
+    consumers: str | list[str] | None = None
+    version: str | None = None
     def __post_init__(self):
         # Process data through the centralized data processing pipeline
         self.data = _process_data(self.data)
+        # Process reference data if provided
+        if self.reference is not None:
+            self.reference = _process_data(self.reference)
         # Check input of the `thresholds=` argument
         _check_thresholds(thresholds=self.thresholds)
@@ -4826,6 +4896,36 @@ class Validate:
         # Transform any shorthands of `brief` to string representations
         self.brief = _transform_auto_brief(brief=self.brief)
+        # Validate and normalize the `owner` parameter
+        if self.owner is not None and not isinstance(self.owner, str):
+            raise TypeError(
+                "The `owner=` parameter must be a string representing the owner of the data. "
+                f"Received type: {type(self.owner).__name__}"
+            )
+        # Validate and normalize the `consumers` parameter
+        if self.consumers is not None:
+            if isinstance(self.consumers, str):
+                self.consumers = [self.consumers]
+            elif isinstance(self.consumers, list):
+                if not all(isinstance(c, str) for c in self.consumers):
+                    raise TypeError(
+                        "The `consumers=` parameter must be a string or a list of strings. "
+                        "All elements in the list must be strings."
+                    )
+            else:
+                raise TypeError(
+                    "The `consumers=` parameter must be a string or a list of strings. "
+                    f"Received type: {type(self.consumers).__name__}"
+                )
+        # Validate the `version` parameter
+        if self.version is not None and not isinstance(self.version, str):
+            raise TypeError(
+                "The `version=` parameter must be a string representing the version. "
+                f"Received type: {type(self.version).__name__}"
+            )
         # TODO: Add functionality to obtain the column names and types from the table
         self.col_names = None
         self.col_types = None
@@ -4835,9 +4935,107 @@ class Validate:
         self.validation_info = []
+    def _add_agg_validation(
+        self,
+        *,
+        assertion_type: str,
+        columns: str | Collection[str],
+        value,
+        tol=0,
+        thresholds=None,
+        brief=False,
+        actions=None,
+        active=True,
+    ):
+        """
+        Add an aggregation-based validation step to the validation plan.
+        This internal method is used by all aggregation-based column validation methods
+        (e.g., `col_sum_eq`, `col_avg_gt`, `col_sd_le`) to create and register validation
+        steps. It relies heavily on the `_ValidationInfo.from_agg_validator()` class method.
+        Automatic Reference Inference
+        -----------------------------
+        When `value` is None and reference data has been set on the Validate object,
+        this method automatically creates a `ReferenceColumn` pointing to the same
+        column name in the reference data. This enables a convenient shorthand:
+        .. code-block:: python
+            # Instead of writing:
+            Validate(data=df, reference=ref_df).col_sum_eq("a", ref("a"))
+            # You can simply write:
+            Validate(data=df, reference=ref_df).col_sum_eq("a")
+        If `value` is None and no reference data is set, a `ValueError` is raised
+        immediately to provide clear feedback to the user.
+        Parameters
+        ----------
+        assertion_type
+            The type of assertion (e.g., "col_sum_eq", "col_avg_gt").
+        columns
+            Column name or collection of column names to validate.
+        value
+            The target value to compare against. Can be:
+            - A numeric literal (int or float)
+            - A `Column` object for cross-column comparison
+            - A `ReferenceColumn` object for reference data comparison
+            - None to automatically use `ref(column)` when reference data is set
+        tol
+            Tolerance for the comparison. Defaults to 0.
+        thresholds
+            Custom thresholds for the validation step.
+        brief
+            Brief description or auto-generate flag.
+        actions
+            Actions to take based on validation results.
+        active
+            Whether this validation step is active.
+        Returns
+        -------
+        Validate
+            The Validate instance for method chaining.
+        Raises
+        ------
+        ValueError
+            If `value` is None and no reference data is set on the Validate object.
+        """
+        if isinstance(columns, str):
+            columns = [columns]
+        for column in columns:
+            # If value is None, default to referencing the same column from reference data
+            resolved_value = value
+            if value is None:
+                if self.reference is None:
+                    raise ValueError(
+                        f"The 'value' parameter is required for {assertion_type}() "
+                        "when no reference data is set. Either provide a value, or "
+                        "set reference data on the Validate object using "
+                        "Validate(data=..., reference=...)."
+                    )
+                resolved_value = ReferenceColumn(column_name=column)
+            val_info = _ValidationInfo.from_agg_validator(
+                assertion_type=assertion_type,
+                columns=column,
+                value=resolved_value,
+                tol=tol,
+                thresholds=self.thresholds if thresholds is None else thresholds,
+                actions=self.actions if actions is None else actions,
+                brief=self.brief if brief is None else brief,
+                active=active,
+            )
+            self._add_validation(validation_info=val_info)
+        return self
     def set_tbl(
         self,
-        tbl: FrameT | Any,
+        tbl: Any,
         tbl_name: str | None = None,
         label: str | None = None,
     ) -> Validate:
@@ -4980,7 +5178,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5214,7 +5412,6 @@ class Validate:
         - Row 1: `c` is `1` and `b` is `2`.
         - Row 3: `c` is `2` and `b` is `2`.
         """
         assertion_type = _get_fn_name()
         _check_column(column=columns)
@@ -5234,14 +5431,7 @@ class Validate:
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
-        # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
-        # resolve the columns
-        if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
-            columns = col(columns)
-        # If `columns` is Column value or a string, place it in a list for iteration
-        if isinstance(columns, (Column, str)):
-            columns = [columns]
+        columns = _resolve_columns(columns)
         # Determine brief to use (global or local) and transform any shorthands of `brief=`
         brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
@@ -5272,7 +5462,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5563,7 +5753,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -5854,7 +6044,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6143,7 +6333,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6435,7 +6625,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -6729,7 +6919,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7049,7 +7239,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7366,7 +7556,7 @@ class Validate:
         set: Collection[Any],
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7683,7 +7873,7 @@ class Validate:
         set: Collection[Any],
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -7974,7 +8164,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8162,7 +8352,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8347,7 +8537,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8590,7 +8780,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -8836,7 +9026,7 @@ class Validate:
         inverse: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9099,7 +9289,7 @@ class Validate:
         na_pass: bool = False,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9379,10 +9569,10 @@ class Validate:
     def col_vals_expr(
         self,
-        expr: any,
+        expr: Any,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -9600,7 +9790,7 @@ class Validate:
     def col_exists(
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10072,7 +10262,7 @@ class Validate:
         columns_subset: str | list[str] | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10313,7 +10503,7 @@ class Validate:
         columns_subset: str | list[str] | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10558,7 +10748,7 @@ class Validate:
         max_concurrent: int = 3,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -10953,7 +11143,7 @@ class Validate:
         case_sensitive_dtypes: bool = True,
         full_match_dtypes: bool = True,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11169,11 +11359,11 @@ class Validate:
     def row_count_match(
         self,
-        count: int | FrameT | Any,
+        count: int | Any,
         tol: Tolerance = 0,
         inverse: bool = False,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11386,12 +11576,375 @@ class Validate:
         return self
+    def data_freshness(
+        self,
+        column: str,
+        max_age: str | datetime.timedelta,
+        reference_time: datetime.datetime | str | None = None,
+        timezone: str | None = None,
+        allow_tz_mismatch: bool = False,
+        pre: Callable | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool = True,
+    ) -> Validate:
+        """
+        Validate that data in a datetime column is not older than a specified maximum age.
+        The `data_freshness()` validation method checks whether the most recent timestamp in the
+        specified datetime column is within the allowed `max_age=` from the `reference_time=` (which
+        defaults to the current time). This is useful for ensuring data pipelines are delivering
+        fresh data and for enforcing data SLAs.
+        This method helps detect stale data by comparing the maximum (most recent) value in a
+        datetime column against an expected freshness threshold.
+        Parameters
+        ----------
+        column
+            The name of the datetime column to check for freshness. This column should contain
+            date or datetime values.
+        max_age
+            The maximum allowed age of the data. Can be specified as: (1) a string with a
+            human-readable duration like `"24 hours"`, `"1 day"`, `"30 minutes"`, `"2 weeks"`, etc.
+            (supported units: `seconds`, `minutes`, `hours`, `days`, `weeks`), or (2) a
+            `datetime.timedelta` object for precise control.
+        reference_time
+            The reference point in time to compare against. Defaults to `None`, which uses the
+            current time (UTC if `timezone=` is not specified). Can be: (1) a `datetime.datetime`
+            object (timezone-aware recommended), (2) a string in ISO 8601 format (e.g.,
+            `"2024-01-15T10:30:00"` or `"2024-01-15T10:30:00+05:30"`), or (3) `None` to use the
+            current time.
+        timezone
+            The timezone to use for interpreting the data and reference time. Accepts IANA
+            timezone names (e.g., `"America/New_York"`), hour offsets (e.g., `"-7"`), or ISO 8601
+            offsets (e.g., `"-07:00"`). When `None` (default), naive datetimes are treated as UTC.
+            See the *The `timezone=` Parameter* section for details.
+        allow_tz_mismatch
+            Whether to allow timezone mismatches between the column data and reference time.
+            By default (`False`), a warning note is added when comparing timezone-naive with
+            timezone-aware datetimes. Set to `True` to suppress these warnings.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        How Timezones Affect Freshness Checks
+        -------------------------------------
+        Freshness validation involves comparing two times: the **data time** (the most recent
+        timestamp in your column) and the **execution time** (when and where the validation runs).
+        Timezone confusion typically arises because these two times may originate from different
+        contexts.
+        Consider these common scenarios:
+        - your data timestamps are stored in UTC (common for databases), but you're running
+          validation on your laptop in New York (Eastern Time)
+        - you develop and test validation locally, then deploy it to a cloud workflow that runs
+          in UTC—suddenly your 'same' validation behaves differently
+        - your data comes from servers in multiple regions, each recording timestamps in their
+          local timezone
+        The `timezone=` parameter exists to solve this problem by establishing a single, explicit
+        timezone context for the freshness comparison. When you specify a timezone, Pointblank
+        interprets both the data timestamps (if naive) and the execution time in that timezone,
+        ensuring consistent behavior whether you run validation on your laptop or in a cloud
+        workflow.
+        **Scenario 1: Data has timezone-aware datetimes**
+        ```python
+        # Your data column has values like: 2024-01-15 10:30:00+00:00 (UTC)
+        # Comparison is straightforward as both sides have explicit timezones
+        .data_freshness(column="updated_at", max_age="24 hours")
+        ```
+        **Scenario 2: Data has naive datetimes (no timezone)**
+        ```python
+        # Your data column has values like: 2024-01-15 10:30:00 (no timezone)
+        # Specify the timezone the data was recorded in:
+        .data_freshness(column="updated_at", max_age="24 hours", timezone="America/New_York")
+        ```
+        **Scenario 3: Ensuring consistent behavior across environments**
+        ```python
+        # Pin the timezone to ensure identical results whether running locally or in the cloud
+        .data_freshness(
+            column="updated_at",
+            max_age="24 hours",
+            timezone="UTC",  # Explicit timezone removes environment dependence
+        )
+        ```
+        The `timezone=` Parameter
+        ---------------------------
+        The `timezone=` parameter accepts several convenient formats, making it easy to specify
+        timezones in whatever way is most natural for your use case. The following examples
+        illustrate the three supported input styles.
+        **IANA Timezone Names** (recommended for regions with daylight saving time):
+        ```python
+        timezone="America/New_York"   # Eastern Time (handles DST automatically)
+        timezone="Europe/London"      # UK time
+        timezone="Asia/Tokyo"         # Japan Standard Time
+        timezone="Australia/Sydney"   # Australian Eastern Time
+        timezone="UTC"                # Coordinated Universal Time
+        ```
+        **Simple Hour Offsets** (quick and easy):
+        ```python
+        timezone="-7"    # UTC-7 (e.g., Mountain Standard Time)
+        timezone="+5"    # UTC+5 (e.g., Pakistan Standard Time)
+        timezone="0"     # UTC
+        timezone="-12"   # UTC-12
+        ```
+        **ISO 8601 Offset Format** (precise, including fractional hours):
+        ```python
+        timezone="-07:00"   # UTC-7
+        timezone="+05:30"   # UTC+5:30 (e.g., India Standard Time)
+        timezone="+00:00"   # UTC
+        timezone="-09:30"   # UTC-9:30
+        ```
+        When a timezone is specified:
+        - naive datetime values in the column are assumed to be in this timezone.
+        - the reference time (if naive) is assumed to be in this timezone.
+        - the validation report will show times in this timezone.
+        When `None` (default):
+        - if your column has timezone-aware datetimes, those timezones are used
+        - if your column has naive datetimes, they're treated as UTC
+        - the current time reference uses UTC
+        Note that IANA timezone names are preferred when daylight saving time transitions matter, as
+        they automatically handle the offset changes. Fixed offsets like `"-7"` or `"-07:00"` do not
+        account for DST.
+        Recommendations for Working with Timestamps
+        -------------------------------------------
+        When working with datetime data, storing timestamps in UTC in your databases is strongly
+        recommended since it provides a consistent reference point regardless of where your data
+        originates or where it's consumed. Using timezone-aware datetimes whenever possible helps
+        avoid ambiguity—when a datetime has an explicit timezone, there's no guessing about what
+        time it actually represents.
+        If you're working with naive datetimes (which lack timezone information), always specify the
+        `timezone=` parameter so Pointblank knows how to interpret those values. When providing
+        `reference_time=` as a string, use ISO 8601 format with the timezone offset included (e.g.,
+        `"2024-01-15T10:30:00+00:00"`) to ensure unambiguous parsing. Finally, prefer IANA timezone
+        names (like `"America/New_York"`) over fixed offsets (like `"-05:00"`) when daylight saving
+        time transitions matter, since IANA names automatically handle the twice-yearly offset
+        changes. To see all available IANA timezone names in Python, use
+        `zoneinfo.available_timezones()` from the standard library's `zoneinfo` module.
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False)
+        ```
+        The simplest use of `data_freshness()` requires just two arguments: the `column=` containing
+        your timestamps and `max_age=` specifying how old the data can be. In this first example,
+        we create sample data with an `"updated_at"` column containing timestamps from 1, 12, and
+        20 hours ago. By setting `max_age="24 hours"`, we're asserting that the most recent
+        timestamp should be within 24 hours of the current time. Since the newest record is only
+        1 hour old, this validation passes.
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        from datetime import datetime, timedelta
+        # Create sample data with recent timestamps
+        recent_data = pl.DataFrame({
+            "id": [1, 2, 3],
+            "updated_at": [
+                datetime.now() - timedelta(hours=1),
+                datetime.now() - timedelta(hours=12),
+                datetime.now() - timedelta(hours=20),
+            ]
+        })
+        validation = (
+            pb.Validate(data=recent_data)
+            .data_freshness(column="updated_at", max_age="24 hours")
+            .interrogate()
+        )
+        validation
+        ```
+        The `max_age=` parameter accepts human-readable strings with various time units. You can
+        chain multiple `data_freshness()` calls to check different freshness thresholds
+        simultaneously—useful for tiered SLAs where you might want warnings at 30 minutes but
+        errors at 2 days.
+        ```{python}
+        # Check data is fresh within different time windows
+        validation = (
+            pb.Validate(data=recent_data)
+            .data_freshness(column="updated_at", max_age="30 minutes")  # Very fresh
+            .data_freshness(column="updated_at", max_age="2 days")      # Reasonably fresh
+            .data_freshness(column="updated_at", max_age="1 week")      # Within a week
+            .interrogate()
+        )
+        validation
+        ```
+        When your data contains naive datetimes (timestamps without timezone information), use the
+        `timezone=` parameter to specify what timezone those values represent. Here we have event
+        data recorded in Eastern Time, so we set `timezone="America/New_York"` to ensure the
+        freshness comparison is done correctly.
+        ```{python}
+        # Data with naive datetimes (assume they're in Eastern Time)
+        eastern_data = pl.DataFrame({
+            "event_time": [
+                datetime.now() - timedelta(hours=2),
+                datetime.now() - timedelta(hours=5),
+            ]
+        })
+        validation = (
+            pb.Validate(data=eastern_data)
+            .data_freshness(
+                column="event_time",
+                max_age="12 hours",
+                timezone="America/New_York"  # Interpret times as Eastern
+            )
+            .interrogate()
+        )
+        validation
+        ```
+        For reproducible validations or historical checks, you can use `reference_time=` to compare
+        against a specific point in time instead of the current time. This is particularly useful
+        for testing or when validating data snapshots. The reference time should include a timezone
+        offset (like `+00:00` for UTC) to avoid ambiguity.
+        ```{python}
+        validation = (
+            pb.Validate(data=recent_data)
+            .data_freshness(
+                column="updated_at",
+                max_age="24 hours",
+                reference_time="2024-01-15T12:00:00+00:00"
+            )
+            .interrogate()
+        )
+        validation
+        ```
+        """
+        assertion_type = _get_fn_name()
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        _check_boolean_input(param=allow_tz_mismatch, param_name="allow_tz_mismatch")
+        # Validate and parse the max_age parameter
+        max_age_td = _parse_max_age(max_age)
+        # Validate the column parameter
+        if not isinstance(column, str):
+            raise TypeError(
+                f"The `column` parameter must be a string, got {type(column).__name__}."
+            )
+        # Validate the timezone parameter if provided
+        if timezone is not None:
+            _validate_timezone(timezone)
+        # Parse reference_time if it's a string
+        parsed_reference_time = None
+        if reference_time is not None:
+            if isinstance(reference_time, str):
+                parsed_reference_time = _parse_reference_time(reference_time)
+            elif isinstance(reference_time, datetime.datetime):
+                parsed_reference_time = reference_time
+            else:
+                raise TypeError(
+                    f"The `reference_time` parameter must be a string or datetime object, "
+                    f"got {type(reference_time).__name__}."
+                )
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        # Package up the parameters for later interrogation
+        values = {
+            "max_age": max_age_td,
+            "max_age_str": max_age if isinstance(max_age, str) else str(max_age),
+            "reference_time": parsed_reference_time,
+            "timezone": timezone,
+            "allow_tz_mismatch": allow_tz_mismatch,
+        }
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=column,
+            values=values,
+            pre=pre,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
     def col_count_match(
         self,
-        count: int | FrameT | Any,
+        count: int | Any,
         inverse: bool = False,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11564,9 +12117,9 @@ class Validate:
     def tbl_match(
         self,
-        tbl_compare: FrameT | Any,
+        tbl_compare: Any,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -11835,7 +12388,7 @@ class Validate:
         self,
         *exprs: Callable,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -12083,7 +12636,7 @@ class Validate:
         self,
         expr: Callable,
         pre: Callable | None = None,
-        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
         actions: Actions | None = None,
         brief: str | bool | None = None,
         active: bool = True,
@@ -12577,7 +13130,7 @@ class Validate:
             segment = validation.segments
             # Get compatible data types for this assertion type
-            assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
+            assertion_method = ASSERTION_TYPE_METHOD_MAP.get(assertion_type, assertion_type)
             compatible_dtypes = COMPATIBLE_DTYPES.get(assertion_method, [])
             # Process the `brief` text for the validation step by including template variables to
@@ -12632,7 +13185,11 @@ class Validate:
             # Make a deep copy of the table for this step to ensure proper isolation
             # This prevents modifications from one validation step affecting others
-            data_tbl_step = _copy_dataframe(data_tbl)
+            try:
+                # TODO: This copying should be scrutinized further
+                data_tbl_step: IntoDataFrame = _copy_dataframe(data_tbl)
+            except Exception as e:  # pragma: no cover
+                data_tbl_step: IntoDataFrame = data_tbl  # pragma: no cover
             # Capture original table dimensions and columns before preprocessing
             # (only if preprocessing is present - we'll set these inside the preprocessing block)
@@ -12793,6 +13350,8 @@ class Validate:
                 "col_schema_match",
                 "row_count_match",
                 "col_count_match",
+                "data_freshness",
+                "tbl_match",
             ]
             if validation.n == 0 and assertion_type not in table_level_assertions:
@@ -13053,8 +13612,107 @@ class Validate:
                         results_tbl = None
-                    elif assertion_type == "tbl_match":
-                        from pointblank._interrogation import tbl_match
+                    elif assertion_type == "data_freshness":
+                        from pointblank._interrogation import data_freshness as data_freshness_check
+                        freshness_result = data_freshness_check(
+                            data_tbl=data_tbl_step,
+                            column=column,
+                            max_age=value["max_age"],
+                            reference_time=value["reference_time"],
+                            timezone=value["timezone"],
+                            allow_tz_mismatch=value["allow_tz_mismatch"],
+                        )
+                        result_bool = freshness_result["passed"]
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - int(result_bool)
+                        # Store the freshness check details for reporting
+                        validation.val_info = freshness_result
+                        # Update the values dict with actual computed values for failure text
+                        if freshness_result.get("age") is not None:
+                            value["age"] = freshness_result["age"]
+                        # Add timezone warning note if applicable
+                        if freshness_result.get("tz_warning_key"):
+                            tz_key = freshness_result["tz_warning_key"]
+                            tz_warning_text = NOTES_TEXT.get(tz_key, {}).get(
+                                self.locale, NOTES_TEXT.get(tz_key, {}).get("en", "")
+                            )
+                            validation._add_note(
+                                key="tz_warning",
+                                markdown=f"⚠️ {tz_warning_text}",
+                                text=tz_warning_text,
+                            )
+                        # Add note about column being empty if applicable
+                        if freshness_result.get("column_empty"):
+                            column_empty_text = NOTES_TEXT.get(
+                                "data_freshness_column_empty", {}
+                            ).get(
+                                self.locale,
+                                NOTES_TEXT.get("data_freshness_column_empty", {}).get(
+                                    "en", "The datetime column is empty (no values to check)."
+                                ),
+                            )
+                            validation._add_note(
+                                key="column_empty",
+                                markdown=f"⚠️ {column_empty_text}",
+                                text=column_empty_text,
+                            )
+                        # Add informational note about the freshness check
+                        if freshness_result.get("max_datetime") and freshness_result.get("age"):
+                            max_dt = freshness_result["max_datetime"]
+                            # Format datetime without microseconds for cleaner display
+                            if hasattr(max_dt, "replace"):
+                                max_dt_display = max_dt.replace(microsecond=0)
+                            else:
+                                max_dt_display = max_dt
+                            age = freshness_result["age"]
+                            age_str = _format_timedelta(age)
+                            max_age_str = _format_timedelta(value["max_age"])
+                            # Get translated template for pass/fail
+                            if result_bool:
+                                details_key = "data_freshness_details_pass"
+                                prefix = "✓"
+                            else:
+                                details_key = "data_freshness_details_fail"
+                                prefix = "✗"
+                            details_template = NOTES_TEXT.get(details_key, {}).get(
+                                self.locale,
+                                NOTES_TEXT.get(details_key, {}).get(
+                                    "en",
+                                    "Most recent data: `{max_dt}` (age: {age}, max allowed: {max_age})",
+                                ),
+                            )
+                            # Format the template with values
+                            note_text = details_template.format(
+                                max_dt=max_dt_display, age=age_str, max_age=max_age_str
+                            )
+                            # For markdown, make the age bold
+                            note_md_template = details_template.replace(
+                                "(age: {age}", "(age: **{age}**"
+                            )
+                            note_md = f"{prefix} {note_md_template.format(max_dt=max_dt_display, age=age_str, max_age=max_age_str)}"
+                            validation._add_note(
+                                key="freshness_details",
+                                markdown=note_md,
+                                text=note_text,
+                            )
+                        results_tbl = None
+                    elif assertion_type == "tbl_match":
+                        from pointblank._interrogation import tbl_match
                         # Get the comparison table (could be callable or actual table)
                         tbl_compare = value["tbl_compare"]
@@ -13080,6 +13738,53 @@ class Validate:
                             tbl_type=tbl_type,
                         )
+                    elif is_valid_agg(assertion_type):
+                        agg, comp = resolve_agg_registries(assertion_type)
+                        # Produce a 1-column Narwhals DataFrame
+                        # TODO: Should be able to take lazy too
+                        vec: nw.DataFrame = nw.from_native(data_tbl_step).select(column)
+                        real = agg(vec)
+                        raw_value = value["value"]
+                        tol = value["tol"]
+                        # Handle ReferenceColumn: compute target from reference data
+                        if isinstance(raw_value, ReferenceColumn):
+                            if self.reference is None:
+                                raise ValueError(
+                                    f"Cannot use ref('{raw_value.column_name}') without "
+                                    "setting reference data on the Validate object. "
+                                    "Use Validate(data=..., reference=...) to set reference data."
+                                )
+                            ref_vec: nw.DataFrame = nw.from_native(self.reference).select(
+                                raw_value.column_name
+                            )
+                            target: float | int = agg(ref_vec)
+                        else:
+                            target = raw_value
+                        lower_diff, upper_diff = _derive_bounds(target, tol)
+                        lower_bound = target - lower_diff
+                        upper_bound = target + upper_diff
+                        result_bool: bool = comp(real, lower_bound, upper_bound)
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - result_bool
+                        # Store computed values for step reports
+                        validation.val_info = {
+                            "actual": real,
+                            "target": target,
+                            "tol": tol,
+                            "lower_bound": lower_bound,
+                            "upper_bound": upper_bound,
+                        }
+                        results_tbl = None
                     else:
                         raise ValueError(
                             f"Unknown assertion type: {assertion_type}"
@@ -13822,12 +14527,14 @@ class Validate:
             )
         # Get the threshold status using the appropriate method
+        # Note: scalar=False (default) always returns a dict
+        status: dict[int, bool]
         if level == "warning":
-            status = self.warning(i=i)
+            status = self.warning(i=i)  # type: ignore[assignment]
         elif level == "error":
-            status = self.error(i=i)
-        elif level == "critical":
-            status = self.critical(i=i)
+            status = self.error(i=i)  # type: ignore[assignment]
+        else:  # level == "critical"
+            status = self.critical(i=i)  # type: ignore[assignment]
         # Find any steps that exceeded the threshold
         failures = []
@@ -13981,12 +14688,14 @@ class Validate:
             )
         # Get the threshold status using the appropriate method
+        # Note: scalar=False (default) always returns a dict
+        status: dict[int, bool]
         if level == "warning":
-            status = self.warning(i=i)
+            status = self.warning(i=i)  # type: ignore[assignment]
         elif level == "error":
-            status = self.error(i=i)
-        elif level == "critical":
-            status = self.critical(i=i)
+            status = self.error(i=i)  # type: ignore[assignment]
+        else:  # level == "critical"
+            status = self.critical(i=i)  # type: ignore[assignment]
         # Return True if any steps exceeded the threshold
         return any(status.values())
@@ -14759,7 +15468,7 @@ class Validate:
     def get_data_extracts(
         self, i: int | list[int] | None = None, frame: bool = False
-    ) -> dict[int, FrameT | None] | FrameT | None:
+    ) -> dict[int, Any] | Any:
         """
         Get the rows that failed for each validation step.
@@ -14782,7 +15491,7 @@ class Validate:
         Returns
         -------
-        dict[int, FrameT | None] | FrameT | None
+        dict[int, Any] | Any
             A dictionary of tables containing the rows that failed in every compatible validation
             step. Alternatively, it can be a DataFrame if `frame=True` and `i=` is a scalar.
@@ -15072,7 +15781,7 @@ class Validate:
         return json.dumps(report, indent=4, default=str)
-    def get_sundered_data(self, type="pass") -> FrameT:
+    def get_sundered_data(self, type="pass") -> Any:
         """
         Get the data that passed or failed the validation steps.
@@ -15108,7 +15817,7 @@ class Validate:
         Returns
         -------
-        FrameT
+        Any
             A table containing the data that passed or failed the validation steps.
         Examples
@@ -15200,6 +15909,7 @@ class Validate:
         # Get all validation step result tables and join together the `pb_is_good_` columns
         # ensuring that the columns are named uniquely (e.g., `pb_is_good_1`, `pb_is_good_2`, ...)
         # and that the index is reset
+        labeled_tbl_nw: nw.DataFrame | nw.LazyFrame | None = None
         for i, validation in enumerate(validation_info):
             results_tbl = nw.from_native(validation.tbl_checked)
@@ -15220,7 +15930,7 @@ class Validate:
             )
             # Add the results table to the list of tables
-            if i == 0:
+            if labeled_tbl_nw is None:
                 labeled_tbl_nw = results_tbl
             else:
                 labeled_tbl_nw = labeled_tbl_nw.join(results_tbl, on=index_name, how="left")
@@ -15396,10 +16106,10 @@ class Validate:
     def get_tabular_report(
         self,
         title: str | None = ":default:",
-        incl_header: bool = None,
-        incl_footer: bool = None,
-        incl_footer_timings: bool = None,
-        incl_footer_notes: bool = None,
+        incl_header: bool | None = None,
+        incl_footer: bool | None = None,
+        incl_footer_timings: bool | None = None,
+        incl_footer_notes: bool | None = None,
     ) -> GT:
         """
         Validation report as a GT table.
@@ -15767,10 +16477,16 @@ class Validate:
             elif assertion_type[i] in ["conjointly", "specially"]:
                 column_text = ""
             else:
-                column_text = str(column)
+                # Handle both string columns and list columns
+                # For single-element lists like ['a'], display as 'a'
+                # For multi-element lists, display as comma-separated values
+                if isinstance(column, list):
+                    column_text = ", ".join(str(c) for c in column)
+                else:
+                    column_text = str(column)
-            # Apply underline styling for synthetic columns (using the purple color from the icon)
-            # Only apply styling if column_text is not empty and not a special marker
+            # Apply underline styling for synthetic columns; only apply styling if column_text is
+            # not empty and not a special marker
             if (
                 has_synthetic_column
                 and column_text
@@ -15848,6 +16564,69 @@ class Validate:
                 tol_value = bound_finder.keywords.get("tol", 0) if bound_finder else 0
                 values_upd.append(f"p = {p_value}<br/>tol = {tol_value}")
+            elif assertion_type[i] in ["data_freshness"]:
+                # Format max_age nicely for display
+                max_age = value.get("max_age")
+                max_age_str = _format_timedelta(max_age) if max_age else "&mdash;"
+                # Build additional lines with non-default parameters
+                extra_lines = []
+                if value.get("reference_time") is not None:
+                    ref_time = value["reference_time"]
+                    # Format datetime across two lines: date and time+tz
+                    if hasattr(ref_time, "strftime"):
+                        date_str = ref_time.strftime("@%Y-%m-%d")
+                        time_str = " " + ref_time.strftime("%H:%M:%S")
+                        # Add timezone offset if present
+                        if hasattr(ref_time, "tzinfo") and ref_time.tzinfo is not None:
+                            tz_offset = ref_time.strftime("%z")
+                            if tz_offset:
+                                time_str += tz_offset
+                        extra_lines.append(date_str)
+                        extra_lines.append(time_str)
+                    else:
+                        extra_lines.append(f"@{ref_time}")
+                # Timezone and allow_tz_mismatch on same line
+                tz_line_parts = []
+                if value.get("timezone") is not None:
+                    # Convert timezone name to ISO 8601 offset format
+                    tz_name = value["timezone"]
+                    try:
+                        tz_obj = ZoneInfo(tz_name)
+                        # Get the current offset for this timezone
+                        now = datetime.datetime.now(tz_obj)
+                        offset = now.strftime("%z")
+                        # Format as ISO 8601 extended: -07:00 (insert colon)
+                        if len(offset) == 5:
+                            tz_display = f"{offset[:3]}:{offset[3:]}"
+                        else:
+                            tz_display = offset
+                    except Exception:
+                        tz_display = tz_name
+                    tz_line_parts.append(tz_display)
+                if value.get("allow_tz_mismatch"):
+                    tz_line_parts.append("~tz")
+                if tz_line_parts:
+                    extra_lines.append(" ".join(tz_line_parts))
+                if extra_lines:
+                    extra_html = "<br/>".join(extra_lines)
+                    values_upd.append(
+                        f'{max_age_str}<br/><span style="font-size: 9px;">{extra_html}</span>'
+                    )
+                else:
+                    values_upd.append(max_age_str)
             elif assertion_type[i] in ["col_schema_match"]:
                 values_upd.append("SCHEMA")
@@ -15889,6 +16668,32 @@ class Validate:
                 else:  # pragma: no cover
                     values_upd.append(str(value))  # pragma: no cover
+            # Handle aggregation methods (col_sum_gt, col_avg_eq, etc.)
+            elif is_valid_agg(assertion_type[i]):
+                # Extract the value and tolerance from the values dict
+                agg_value = value.get("value")
+                tol_value = value.get("tol", 0)
+                # Format the value (could be a number, Column, or ReferenceColumn)
+                if hasattr(agg_value, "__repr__"):
+                    # For Column or ReferenceColumn objects, use their repr
+                    value_str = repr(agg_value)
+                else:
+                    value_str = str(agg_value)
+                # Format tolerance - only show on second line if non-zero
+                if tol_value != 0:
+                    # Format tolerance based on its type
+                    if isinstance(tol_value, tuple):
+                        # Asymmetric bounds: (lower, upper)
+                        tol_str = f"tol=({tol_value[0]}, {tol_value[1]})"
+                    else:
+                        # Symmetric tolerance
+                        tol_str = f"tol={tol_value}"
+                    values_upd.append(f"{value_str}<br/>{tol_str}")
+                else:
+                    values_upd.append(value_str)
             # If the assertion type is not recognized, add the value as a string
             else:  # pragma: no cover
                 values_upd.append(str(value))  # pragma: no cover
@@ -16327,6 +17132,15 @@ class Validate:
             if incl_footer_timings:
                 gt_tbl = gt_tbl.tab_source_note(source_note=html(table_time))
+            # Add governance metadata as source note if any metadata is present
+            governance_html = _create_governance_metadata_html(
+                owner=self.owner,
+                consumers=self.consumers,
+                version=self.version,
+            )
+            if governance_html:
+                gt_tbl = gt_tbl.tab_source_note(source_note=html(governance_html))
             # Create notes markdown from validation steps and add as separate source note if enabled
             if incl_footer_notes:
                 notes_markdown = _create_notes_html(self.validation_info)
@@ -16675,6 +17489,18 @@ class Validate:
                     debug_return_df=debug_return_df,
                 )
+        elif is_valid_agg(assertion_type):
+            step_report = _step_report_aggregate(
+                assertion_type=assertion_type,
+                i=i,
+                column=column,
+                values=values,
+                all_passed=all_passed,
+                val_info=val_info,
+                header=header,
+                lang=lang,
+            )
         else:
             step_report = None  # pragma: no cover
@@ -16738,7 +17564,7 @@ class Validate:
                     table = validation.pre(self.data)
                 # Get the columns from the table as a list
-                columns = list(table.columns)
+                columns = list(table.columns)  # type: ignore[union-attr]
                 # Evaluate the column expression
                 if isinstance(column_expr, ColumnSelectorNarwhals):
@@ -17116,7 +17942,7 @@ def _convert_string_to_datetime(value: str) -> datetime.datetime:
             return datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
-def _string_date_dttm_conversion(value: any) -> any:
+def _string_date_dttm_conversion(value: Any) -> Any:
     """
     Convert a string to a date or datetime object if it is in the correct format.
     If the value is not a string, it is returned as is.
@@ -17151,8 +17977,8 @@ def _string_date_dttm_conversion(value: any) -> any:
 def _conditional_string_date_dttm_conversion(
-    value: any, allow_regular_strings: bool = False
-) -> any:
+    value: Any, allow_regular_strings: bool = False
+) -> Any:
     """
     Conditionally convert a string to a date or datetime object if it is in the correct format. If
     `allow_regular_strings=` is `True`, regular strings are allowed to pass through unchanged. If
@@ -17196,9 +18022,9 @@ def _process_brief(
     brief: str | None,
     step: int,
     col: str | list[str] | None,
-    values: any | None,
-    thresholds: any | None,
-    segment: any | None,
+    values: Any | None,
+    thresholds: Any | None,
+    segment: Any | None,
 ) -> str:
     # If there is no brief, return `None`
     if brief is None:
@@ -17271,6 +18097,265 @@ def _process_brief(
     return brief
+def _parse_max_age(max_age: str | datetime.timedelta) -> datetime.timedelta:
+    """
+    Parse a max_age specification into a timedelta.
+    Parameters
+    ----------
+    max_age
+        Either a timedelta object or a string like "24 hours", "1 day", "30 minutes",
+        or compound expressions like "2 hours 15 minutes", "1 day 6 hours", etc.
+    Returns
+    -------
+    datetime.timedelta
+        The parsed timedelta.
+    Raises
+    ------
+    ValueError
+        If the string format is invalid or the unit is not recognized.
+    """
+    if isinstance(max_age, datetime.timedelta):
+        return max_age
+    if not isinstance(max_age, str):
+        raise TypeError(
+            f"The `max_age` parameter must be a string or timedelta, got {type(max_age).__name__}."
+        )
+    # Parse string format like "24 hours", "1 day", "30 minutes", etc.
+    max_age_str = max_age.strip().lower()
+    # Define unit mappings (singular and plural forms)
+    unit_mappings = {
+        "second": "seconds",
+        "seconds": "seconds",
+        "sec": "seconds",
+        "secs": "seconds",
+        "s": "seconds",
+        "minute": "minutes",
+        "minutes": "minutes",
+        "min": "minutes",
+        "mins": "minutes",
+        "m": "minutes",
+        "hour": "hours",
+        "hours": "hours",
+        "hr": "hours",
+        "hrs": "hours",
+        "h": "hours",
+        "day": "days",
+        "days": "days",
+        "d": "days",
+        "week": "weeks",
+        "weeks": "weeks",
+        "wk": "weeks",
+        "wks": "weeks",
+        "w": "weeks",
+    }
+    import re
+    # Pattern to find all number+unit pairs (supports compound expressions)
+    # Matches: "2 hours 15 minutes", "1day6h", "30 min", etc.
+    compound_pattern = r"(\d+(?:\.\d+)?)\s*([a-zA-Z]+)"
+    matches = re.findall(compound_pattern, max_age_str)
+    if not matches:
+        raise ValueError(
+            f"Invalid max_age format: '{max_age}'. Expected format like '24 hours', "
+            f"'1 day', '30 minutes', '2 hours 15 minutes', etc."
+        )
+    # Accumulate timedelta from all matched components
+    total_td = datetime.timedelta()
+    valid_units = ["seconds", "minutes", "hours", "days", "weeks"]
+    for value_str, unit in matches:
+        value = float(value_str)
+        # Normalize the unit
+        unit_lower = unit.lower()
+        if unit_lower not in unit_mappings:
+            raise ValueError(
+                f"Unknown time unit '{unit}' in max_age '{max_age}'. "
+                f"Valid units are: {', '.join(valid_units)} (or their abbreviations)."
+            )
+        normalized_unit = unit_mappings[unit_lower]
+        # Add to total timedelta
+        if normalized_unit == "seconds":
+            total_td += datetime.timedelta(seconds=value)
+        elif normalized_unit == "minutes":
+            total_td += datetime.timedelta(minutes=value)
+        elif normalized_unit == "hours":
+            total_td += datetime.timedelta(hours=value)
+        elif normalized_unit == "days":
+            total_td += datetime.timedelta(days=value)
+        elif normalized_unit == "weeks":
+            total_td += datetime.timedelta(weeks=value)
+    return total_td
+def _parse_timezone(timezone: str) -> datetime.tzinfo:
+    """
+    Parse a timezone string into a tzinfo object.
+    Supports:
+    - IANA timezone names: "America/New_York", "Europe/London", "UTC"
+    - Offset strings: "-7", "+5", "-07:00", "+05:30"
+    Parameters
+    ----------
+    timezone
+        The timezone string to parse.
+    Returns
+    -------
+    datetime.tzinfo
+        The parsed timezone object.
+    Raises
+    ------
+    ValueError
+        If the timezone is not valid.
+    """
+    import re
+    # Check for offset formats: "-7", "+5", "-07:00", "+05:30", etc.
+    # Match: optional sign, 1-2 digits, optional colon and 2 more digits
+    offset_pattern = r"^([+-]?)(\d{1,2})(?::(\d{2}))?$"
+    match = re.match(offset_pattern, timezone.strip())
+    if match:
+        sign_str, hours_str, minutes_str = match.groups()
+        hours = int(hours_str)
+        minutes = int(minutes_str) if minutes_str else 0
+        # Apply sign (default positive if not specified)
+        total_minutes = hours * 60 + minutes
+        if sign_str == "-":
+            total_minutes = -total_minutes
+        return datetime.timezone(datetime.timedelta(minutes=total_minutes))
+    # Try IANA timezone names (zoneinfo is standard in Python 3.9+)
+    try:
+        return ZoneInfo(timezone)
+    except KeyError:
+        pass
+    raise ValueError(
+        f"Invalid timezone: '{timezone}'. Use an IANA timezone name "
+        f"(e.g., 'America/New_York', 'UTC') or an offset (e.g., '-7', '+05:30')."
+    )
+def _validate_timezone(timezone: str) -> None:
+    """
+    Validate that a timezone string is valid.
+    Parameters
+    ----------
+    timezone
+        The timezone string to validate.
+    Raises
+    ------
+    ValueError
+        If the timezone is not valid.
+    """
+    # Use _parse_timezone to validate - it will raise ValueError if invalid
+    _parse_timezone(timezone)
+def _parse_reference_time(reference_time: str) -> datetime.datetime:
+    """
+    Parse a reference time string into a datetime object.
+    Parameters
+    ----------
+    reference_time
+        An ISO 8601 formatted datetime string.
+    Returns
+    -------
+    datetime.datetime
+        The parsed datetime object.
+    Raises
+    ------
+    ValueError
+        If the string cannot be parsed.
+    """
+    # Try parsing with fromisoformat (handles most ISO 8601 formats)
+    try:
+        return datetime.datetime.fromisoformat(reference_time)
+    except ValueError:
+        pass
+    # Try parsing common formats
+    formats = [
+        "%Y-%m-%d %H:%M:%S",
+        "%Y-%m-%d %H:%M:%S%z",
+        "%Y-%m-%dT%H:%M:%S",
+        "%Y-%m-%dT%H:%M:%S%z",
+        "%Y-%m-%d",
+    ]
+    for fmt in formats:
+        try:
+            return datetime.datetime.strptime(reference_time, fmt)
+        except ValueError:
+            continue
+    raise ValueError(
+        f"Could not parse reference_time '{reference_time}'. "
+        f"Please use ISO 8601 format like '2024-01-15T10:30:00' or '2024-01-15T10:30:00+00:00'."
+    )
+def _format_timedelta(td: datetime.timedelta) -> str:
+    """
+    Format a timedelta into a human-readable string.
+    Parameters
+    ----------
+    td
+        The timedelta to format.
+    Returns
+    -------
+    str
+        A human-readable string like "24 hours", "2 days 5 hours", etc.
+    """
+    total_seconds = td.total_seconds()
+    if total_seconds < 60:
+        val = round(total_seconds, 1)
+        return f"{val}s"
+    elif total_seconds < 3600:
+        val = round(total_seconds / 60, 1)
+        return f"{val}m"
+    elif total_seconds < 86400:
+        val = round(total_seconds / 3600, 1)
+        return f"{val}h"
+    elif total_seconds < 604800:
+        # For days, show "xd yh" format for better readability
+        days = int(total_seconds // 86400)
+        remaining_hours = round((total_seconds % 86400) / 3600, 1)
+        if remaining_hours == 0:
+            return f"{days}d"
+        else:
+            return f"{days}d {remaining_hours}h"
+    else:
+        val = round(total_seconds / 604800)
+        return f"{val}w"
 def _transform_auto_brief(brief: str | bool | None) -> str | None:
     if isinstance(brief, bool):
         if brief:
@@ -17285,7 +18370,7 @@ def _process_action_str(
     action_str: str,
     step: int,
     col: str | None,
-    value: any,
+    value: Any,
     type: str,
     level: str,
     time: str,
@@ -17337,8 +18422,8 @@ def _process_action_str(
 def _create_autobrief_or_failure_text(
     assertion_type: str,
     lang: str,
-    column: str | None,
-    values: str | None,
+    column: str,
+    values: Any,
     for_failure: bool,
     locale: str | None = None,
     n_rows: int | None = None,
@@ -17465,6 +18550,14 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
+    if assertion_type == "data_freshness":
+        return _create_text_data_freshness(
+            lang=lang,
+            column=column,
+            value=values,
+            for_failure=for_failure,
+        )
     if assertion_type == "col_pct_null":
         return _create_text_col_pct_null(
             lang=lang,
@@ -17490,7 +18583,7 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
-    return None  # pragma: no cover
+    return None
 def _expect_failure_type(for_failure: bool) -> str:
@@ -17500,7 +18593,7 @@ def _expect_failure_type(for_failure: bool) -> str:
 def _create_text_comparison(
     assertion_type: str,
     lang: str,
-    column: str | list[str] | None,
+    column: str | list[str],
     values: str | None,
     for_failure: bool = False,
 ) -> str:
@@ -17526,7 +18619,7 @@ def _create_text_comparison(
 def _create_text_between(
     lang: str,
-    column: str | None,
+    column: str,
     value_1: str,
     value_2: str,
     not_: bool = False,
@@ -17556,7 +18649,7 @@ def _create_text_between(
 def _create_text_set(
-    lang: str, column: str | None, values: list[any], not_: bool = False, for_failure: bool = False
+    lang: str, column: str, values: list[Any], not_: bool = False, for_failure: bool = False
 ) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
@@ -17578,9 +18671,7 @@ def _create_text_set(
     return text
-def _create_text_null(
-    lang: str, column: str | None, not_: bool = False, for_failure: bool = False
-) -> str:
+def _create_text_null(lang: str, column: str, not_: bool = False, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17597,9 +18688,7 @@ def _create_text_null(
     return text
-def _create_text_regex(
-    lang: str, column: str | None, pattern: str | dict, for_failure: bool = False
-) -> str:
+def _create_text_regex(lang: str, column: str, pattern: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17631,7 +18720,7 @@ def _create_text_expr(lang: str, for_failure: bool) -> str:
     return EXPECT_FAIL_TEXT[f"col_vals_expr_{type_}_text"][lang]
-def _create_text_col_exists(lang: str, column: str | None, for_failure: bool = False) -> str:
+def _create_text_col_exists(lang: str, column: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     column_text = _prep_column_text(column=column)
@@ -17681,7 +18770,7 @@ def _create_text_rows_complete(
     return text
-def _create_text_row_count_match(lang: str, value: int, for_failure: bool = False) -> str:
+def _create_text_row_count_match(lang: str, value: dict, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     values_text = _prep_values_text(value["count"], lang=lang)
@@ -17689,7 +18778,7 @@ def _create_text_row_count_match(lang: str, value: int, for_failure: bool = Fals
     return EXPECT_FAIL_TEXT[f"row_count_match_n_{type_}_text"][lang].format(values_text=values_text)
-def _create_text_col_count_match(lang: str, value: int, for_failure: bool = False) -> str:
+def _create_text_col_count_match(lang: str, value: dict, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
     values_text = _prep_values_text(value["count"], lang=lang)
@@ -17697,6 +18786,33 @@ def _create_text_col_count_match(lang: str, value: int, for_failure: bool = Fals
     return EXPECT_FAIL_TEXT[f"col_count_match_n_{type_}_text"][lang].format(values_text=values_text)
+def _create_text_data_freshness(
+    lang: str,
+    column: str | None,
+    value: dict,
+    for_failure: bool = False,
+) -> str:
+    """Create text for data_freshness validation."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+    column_text = _prep_column_text(column=column)
+    max_age_text = _format_timedelta(value.get("max_age"))
+    if for_failure:
+        age = value.get("age")
+        age_text = _format_timedelta(age) if age else "unknown"
+        return EXPECT_FAIL_TEXT[f"data_freshness_{type_}_text"][lang].format(
+            column_text=column_text,
+            max_age_text=max_age_text,
+            age_text=age_text,
+        )
+    else:
+        return EXPECT_FAIL_TEXT[f"data_freshness_{type_}_text"][lang].format(
+            column_text=column_text,
+            max_age_text=max_age_text,
+        )
 def _create_text_col_pct_null(
     lang: str,
     column: str | None,
@@ -17826,19 +18942,13 @@ def _create_text_prompt(lang: str, prompt: str, for_failure: bool = False) -> st
 def _prep_column_text(column: str | list[str]) -> str:
     if isinstance(column, list):
         return "`" + str(column[0]) + "`"
-    elif isinstance(column, str):
+    if isinstance(column, str):
         return "`" + column + "`"
-    else:
-        return ""
+    raise AssertionError
 def _prep_values_text(
-    values: str
-    | int
-    | float
-    | datetime.datetime
-    | datetime.date
-    | list[str | int | float | datetime.datetime | datetime.date],
+    values: _CompliantValue | _CompliantValues,
     lang: str,
     limit: int = 3,
 ) -> str:
@@ -17886,7 +18996,7 @@ def _prep_values_text(
     return values_str
-def _seg_expr_from_string(data_tbl: any, segments_expr: str) -> list[tuple[str, str]]:
+def _seg_expr_from_string(data_tbl: Any, segments_expr: str) -> tuple[str, str]:
     """
     Obtain the segmentation categories from a table column.
@@ -17989,7 +19099,7 @@ def _seg_expr_from_tuple(segments_expr: tuple) -> list[tuple[str, Any]]:
     return seg_tuples
-def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
+def _apply_segments(data_tbl: Any, segments_expr: tuple[str, str]) -> Any:
     """
     Apply the segments expression to the data table.
@@ -18053,8 +19163,26 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
                 except ValueError:  # pragma: no cover
                     pass  # pragma: no cover
-            # Format 2: Datetime strings with UTC timezone like
-            # "2016-01-04 00:00:01 UTC.strict_cast(...)"
+            # Format 2: Direct datetime strings like "2016-01-04 00:00:01" (Polars 1.36+)
+            # These don't have UTC suffix anymore
+            elif (
+                " " in segment_str
+                and "UTC" not in segment_str
+                and "[" not in segment_str
+                and ".alias" not in segment_str
+            ):
+                try:
+                    parsed_dt = datetime.fromisoformat(segment_str)
+                    # Convert midnight datetimes to dates for consistency
+                    if parsed_dt.time() == datetime.min.time():
+                        parsed_value = parsed_dt.date()  # pragma: no cover
+                    else:
+                        parsed_value = parsed_dt
+                except ValueError:  # pragma: no cover
+                    pass  # pragma: no cover
+            # Format 3: Datetime strings with UTC timezone like
+            # "2016-01-04 00:00:01 UTC.strict_cast(...)" (Polars < 1.36)
             elif " UTC" in segment_str:
                 try:
                     # Extract just the datetime part before "UTC"
@@ -18069,7 +19197,7 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
                 except (ValueError, IndexError):  # pragma: no cover
                     pass  # pragma: no cover
-            # Format 3: Bracketed expressions like ['2016-01-04']
+            # Format 4: Bracketed expressions like ['2016-01-04']
             elif segment_str.startswith("[") and segment_str.endswith("]"):
                 try:  # pragma: no cover
                     # Remove [' and ']
@@ -18204,8 +19332,7 @@ def _validation_info_as_dict(validation_info: _ValidationInfo) -> dict:
 def _get_assertion_icon(icon: list[str], length_val: int = 30) -> list[str]:
     # For each icon, get the assertion icon SVG test from SVG_ICONS_FOR_ASSERTION_TYPES dictionary
-    # TODO: No point in using `get` if we can't handle missing keys anyways
-    icon_svg = [SVG_ICONS_FOR_ASSERTION_TYPES.get(icon) for icon in icon]
+    icon_svg: list[str] = [SVG_ICONS_FOR_ASSERTION_TYPES[icon] for icon in icon]
     # Replace the width and height in the SVG string
     for i in range(len(icon_svg)):
@@ -18214,11 +19341,9 @@ def _get_assertion_icon(icon: list[str], length_val: int = 30) -> list[str]:
     return icon_svg
-def _replace_svg_dimensions(svg: list[str], height_width: int | float) -> list[str]:
+def _replace_svg_dimensions(svg: str, height_width: int | float) -> str:
     svg = re.sub(r'width="[0-9]*?px', f'width="{height_width}px', svg)
-    svg = re.sub(r'height="[0-9]*?px', f'height="{height_width}px', svg)
-    return svg
+    return re.sub(r'height="[0-9]*?px', f'height="{height_width}px', svg)
 def _get_title_text(
@@ -18282,7 +19407,7 @@ def _process_title_text(title: str | None, tbl_name: str | None, lang: str) -> s
     return title_text
-def _transform_tbl_preprocessed(pre: any, seg: any, interrogation_performed: bool) -> list[str]:
+def _transform_tbl_preprocessed(pre: Any, seg: Any, interrogation_performed: bool) -> list[str]:
     # If no interrogation was performed, return a list of empty strings
     if not interrogation_performed:
         return ["" for _ in range(len(pre))]
@@ -18304,9 +19429,7 @@ def _transform_tbl_preprocessed(pre: any, seg: any, interrogation_performed: boo
 def _get_preprocessed_table_icon(icon: list[str]) -> list[str]:
     # For each icon, get the SVG icon from the SVG_ICONS_FOR_TBL_STATUS dictionary
-    icon_svg = [SVG_ICONS_FOR_TBL_STATUS.get(icon) for icon in icon]
-    return icon_svg
+    return [SVG_ICONS_FOR_TBL_STATUS[icon] for icon in icon]
 def _transform_eval(
@@ -18384,9 +19507,9 @@ def _transform_test_units(
             return _format_single_number_with_gt(
                 value, n_sigfig=3, compact=True, locale=locale, df_lib=df_lib
             )
-        else:
-            # Fallback to the original behavior
-            return str(vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)[0])
+        formatted = vals.fmt_number(value, n_sigfig=3, compact=True, locale=locale)
+        assert isinstance(formatted, list)
+        return formatted[0]
     return [
         (
@@ -18590,22 +19713,21 @@ def _transform_assertion_str(
     return type_upd
-def _pre_processing_funcs_to_str(pre: Callable) -> str | list[str]:
+def _pre_processing_funcs_to_str(pre: Callable) -> str | list[str] | None:
     if isinstance(pre, Callable):
         return _get_callable_source(fn=pre)
+    return None
 def _get_callable_source(fn: Callable) -> str:
-    if isinstance(fn, Callable):
-        try:
-            source_lines, _ = inspect.getsourcelines(fn)
-            source = "".join(source_lines).strip()
-            # Extract the `pre` argument from the source code
-            pre_arg = _extract_pre_argument(source)
-            return pre_arg
-        except (OSError, TypeError):  # pragma: no cover
-            return fn.__name__
-    return fn  # pragma: no cover
+    try:
+        source_lines, _ = inspect.getsourcelines(fn)
+        source = "".join(source_lines).strip()
+        # Extract the `pre` argument from the source code
+        pre_arg = _extract_pre_argument(source)
+        return pre_arg
+    except (OSError, TypeError):  # pragma: no cover
+        return fn.__name__  # ty: ignore
 def _extract_pre_argument(source: str) -> str:
@@ -18625,12 +19747,78 @@ def _extract_pre_argument(source: str) -> str:
     return pre_arg
+def _create_governance_metadata_html(
+    owner: str | None,
+    consumers: list[str] | None,
+    version: str | None,
+) -> str:
+    """
+    Create HTML for governance metadata display in the report footer.
+    Parameters
+    ----------
+    owner
+        The owner of the data being validated.
+    consumers
+        List of consumers who depend on the data.
+    version
+        The version of the validation plan.
+    Returns
+    -------
+    str
+        HTML string containing formatted governance metadata, or empty string if no metadata.
+    """
+    if owner is None and consumers is None and version is None:
+        return ""
+    metadata_parts = []
+    # Common style for the metadata badges (similar to timing style but slightly smaller font)
+    badge_style = (
+        "background-color: #FFF; color: #444; padding: 0.5em 0.5em; position: inherit; "
+        "margin-right: 5px; border: solid 1px #999999; font-variant-numeric: tabular-nums; "
+        "border-radius: 0; padding: 2px 10px 2px 10px; font-size: 11px;"
+    )
+    label_style = (
+        "color: #777; font-weight: bold; font-size: 9px; text-transform: uppercase; "
+        "margin-right: 3px;"
+    )
+    if owner is not None:
+        metadata_parts.append(
+            f"<span style='{badge_style}'><span style='{label_style}'>Owner:</span> {owner}</span>"
+        )
+    if consumers is not None and len(consumers) > 0:
+        consumers_str = ", ".join(consumers)
+        metadata_parts.append(
+            f"<span style='{badge_style}'>"
+            f"<span style='{label_style}'>Consumers:</span> {consumers_str}"
+            f"</span>"
+        )
+    if version is not None:
+        metadata_parts.append(
+            f"<span style='{badge_style}'>"
+            f"<span style='{label_style}'>Version:</span> {version}"
+            f"</span>"
+        )
+    return (
+        f"<div style='margin-top: 5px; margin-bottom: 5px; margin-left: 10px;'>"
+        f"{''.join(metadata_parts)}"
+        f"</div>"
+    )
 def _create_table_time_html(
     time_start: datetime.datetime | None, time_end: datetime.datetime | None
 ) -> str:
     if time_start is None:
         return ""
+    assert time_end is not None  # typing
     # Get the time duration (difference between `time_end` and `time_start`) in seconds
     time_duration = (time_end - time_start).total_seconds()
@@ -18845,11 +20033,11 @@ def _format_number_safe(
             locale=locale,
             df_lib=df_lib,
         )
-    else:
-        # Fallback to the original behavior
-        return fmt_number(
-            value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
-        )[0]  # pragma: no cover
+    ints = fmt_number(
+        value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
+    )
+    assert isinstance(ints, list)
+    return ints[0]
 def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
@@ -18862,9 +20050,10 @@ def _format_integer_safe(value: int, locale: str = "en", df_lib=None) -> str:
     if df_lib is not None and value is not None:
         # Use GT-based formatting to avoid Pandas dependency completely
         return _format_single_integer_with_gt(value, locale=locale, df_lib=df_lib)
-    else:
-        # Fallback to the original behavior
-        return fmt_integer(value, locale=locale)[0]
+    ints = fmt_integer(value, locale=locale)
+    assert isinstance(ints, list)
+    return ints[0]
 def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) -> str:
@@ -18980,7 +20169,7 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
         HTML string containing the formatted threshold information.
     """
     if thresholds == Thresholds():
-        return ""
+        return ""  # pragma: no cover
     # Get df_lib for formatting
     df_lib = None
@@ -18988,10 +20177,10 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
         import polars as pl
         df_lib = pl
-    elif _is_lib_present("pandas"):
-        import pandas as pd
+    elif _is_lib_present("pandas"):  # pragma: no cover
+        import pandas as pd  # pragma: no cover
-        df_lib = pd
+        df_lib = pd  # pragma: no cover
     # Helper function to format threshold values using the shared formatting functions
     def _format_threshold_value(fraction: float | None, count: int | None) -> str:
@@ -18999,10 +20188,12 @@ def _create_local_threshold_note_html(thresholds: Thresholds, locale: str = "en"
             # Format as fraction/percentage with locale formatting
             if fraction == 0:
                 return "0"
-            elif fraction < 0.01:
+            elif fraction < 0.01:  # pragma: no cover
                 # For very small fractions, show "<0.01" with locale formatting
-                formatted = _format_number_safe(0.01, decimals=2, locale=locale, df_lib=df_lib)
-                return f"&lt;{formatted}"
+                formatted = _format_number_safe(
+                    0.01, decimals=2, locale=locale, df_lib=df_lib
+                )  # pragma: no cover
+                return f"&lt;{formatted}"  # pragma: no cover
             else:
                 # Use shared formatting function with drop_trailing_zeros
                 formatted = _format_number_safe(
@@ -19079,14 +20270,14 @@ def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
         if fraction is not None:
             if fraction == 0:
                 return "0"
-            elif fraction < 0.01:
-                return "<0.01"
+            elif fraction < 0.01:  # pragma: no cover
+                return "<0.01"  # pragma: no cover
             else:
                 return f"{fraction:.2f}".rstrip("0").rstrip(".")
         elif count is not None:
             return str(count)
         else:
-            return "—"
+            return "—"  # pragma: no cover
     parts = []
@@ -19105,7 +20296,7 @@ def _create_local_threshold_note_text(thresholds: Thresholds) -> str:
     if parts:
         return "Step-specific thresholds set: " + ", ".join(parts)
     else:
-        return ""
+        return ""  # pragma: no cover
 def _create_threshold_reset_note_html(locale: str = "en") -> str:
@@ -19654,13 +20845,13 @@ def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") ->
                 f'<span style="color:#FF3300;">✗</span> {failed_text}: ' + ", ".join(failures) + "."
             )
         else:
-            summary = f'<span style="color:#FF3300;">✗</span> {failed_text}.'
+            summary = f'<span style="color:#FF3300;">✗</span> {failed_text}.'  # pragma: no cover
     # Generate the step report table using the existing function
     # We'll call either _step_report_schema_in_order or _step_report_schema_any_order
     # depending on the in_order parameter
-    if in_order:
-        step_report_gt = _step_report_schema_in_order(
+    if in_order:  # pragma: no cover
+        step_report_gt = _step_report_schema_in_order(  # pragma: no cover
             step=1, schema_info=schema_info, header=None, lang=locale, debug_return_df=False
         )
     else:
@@ -19691,7 +20882,7 @@ def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") ->
 """
     # Add the settings as an additional source note to the step report
-    step_report_gt = step_report_gt.tab_source_note(source_note=html(source_note_html))
+    step_report_gt = step_report_gt.tab_source_note(source_note=html(source_note_html))  # type: ignore[union-attr]
     # Extract the HTML from the GT object
     step_report_html = step_report_gt._repr_html_()
@@ -19743,12 +20934,12 @@ def _step_report_row_based(
     column: str,
     column_position: int,
     columns_subset: list[str] | None,
-    values: any,
+    values: Any,
     inclusive: tuple[bool, bool] | None,
     n: int,
     n_failed: int,
     all_passed: bool,
-    extract: any,
+    extract: Any,
     tbl_preview: GT,
     header: str,
     limit: int | None,
@@ -19775,10 +20966,12 @@ def _step_report_row_based(
     elif assertion_type == "col_vals_le":
         text = f"{column} &le; {values}"
     elif assertion_type == "col_vals_between":
+        assert inclusive is not None
         symbol_left = "&le;" if inclusive[0] else "&lt;"
         symbol_right = "&le;" if inclusive[1] else "&lt;"
         text = f"{values[0]} {symbol_left} {column} {symbol_right} {values[1]}"
     elif assertion_type == "col_vals_outside":
+        assert inclusive is not None
         symbol_left = "&lt;" if inclusive[0] else "&le;"
         symbol_right = "&gt;" if inclusive[1] else "&ge;"
         text = f"{column} {symbol_left} {values[0]}, {column} {symbol_right} {values[1]}"
@@ -19999,7 +21192,7 @@ def _step_report_rows_distinct(
     n: int,
     n_failed: int,
     all_passed: bool,
-    extract: any,
+    extract: Any,
     tbl_preview: GT,
     header: str,
     limit: int | None,
@@ -20125,9 +21318,299 @@ def _step_report_rows_distinct(
     return step_report
+def _step_report_aggregate(
+    assertion_type: str,
+    i: int,
+    column: str,
+    values: dict,
+    all_passed: bool,
+    val_info: dict | None,
+    header: str,
+    lang: str,
+) -> GT:
+    """
+    Generate a step report for aggregate validation methods (col_sum_*, col_avg_*, col_sd_*).
+    This creates a 1-row table showing the computed aggregate value vs. the target value,
+    along with tolerance and pass/fail status.
+    """
+    # Determine whether the `lang` value represents a right-to-left language
+    is_rtl_lang = lang in RTL_LANGUAGES
+    direction_rtl = " direction: rtl;" if is_rtl_lang else ""
+    # Parse assertion type to get aggregate function and comparison operator
+    # Format: col_{agg}_{comp} (e.g., col_sum_eq, col_avg_gt, col_sd_le)
+    parts = assertion_type.split("_")
+    agg_type = parts[1]  # sum, avg, sd
+    comp_type = parts[2]  # eq, gt, ge, lt, le
+    # Map aggregate type to display name
+    agg_display = {"sum": "SUM", "avg": "AVG", "sd": "SD"}.get(agg_type, agg_type.upper())
+    # Map comparison type to symbol
+    comp_symbols = {
+        "eq": "=",
+        "gt": "&gt;",
+        "ge": "&ge;",
+        "lt": "&lt;",
+        "le": "&le;",
+    }
+    comp_symbol = comp_symbols.get(comp_type, comp_type)
+    # Get computed values from val_info (stored during interrogation)
+    if val_info is not None:
+        actual = val_info.get("actual", None)
+        target = val_info.get("target", None)
+        tol = val_info.get("tol", 0)
+        lower_bound = val_info.get("lower_bound", target)
+        upper_bound = val_info.get("upper_bound", target)
+    else:
+        # Fallback if val_info is not available
+        actual = None
+        target = values.get("value", None)
+        tol = values.get("tol", 0)
+        lower_bound = target
+        upper_bound = target
+    # Format column name for display (handle list vs string)
+    if isinstance(column, list):
+        column_display = column[0] if len(column) == 1 else ", ".join(column)
+    else:
+        column_display = str(column)
+    # Generate assertion text for header
+    if target is not None:
+        target_display = f"{target:,.6g}" if isinstance(target, float) else f"{target:,}"
+        assertion_text = f"{agg_display}({column_display}) {comp_symbol} {target_display}"
+    else:
+        assertion_text = f"{agg_display}({column_display}) {comp_symbol} ?"
+    # Calculate difference from boundary
+    if actual is not None and target is not None:
+        if comp_type == "eq":
+            # For equality, show distance from target (considering tolerance)
+            if lower_bound == upper_bound:
+                difference = actual - target
+            else:
+                # With tolerance, show distance from nearest bound
+                if actual < lower_bound:
+                    difference = actual - lower_bound
+                elif actual > upper_bound:
+                    difference = actual - upper_bound
+                else:
+                    difference = 0  # Within bounds
+        elif comp_type in ["gt", "ge"]:
+            # Distance from lower bound (positive if passing)
+            difference = actual - lower_bound
+        elif comp_type in ["lt", "le"]:
+            # Distance from upper bound (negative if passing)
+            difference = actual - upper_bound
+        else:
+            difference = actual - target
+    else:
+        difference = None
+    # Format values for display
+    def format_value(v):
+        if v is None:
+            return "&mdash;"
+        if isinstance(v, float):
+            return f"{v:,.6g}"
+        return f"{v:,}"
+    # Format tolerance for display
+    if tol == 0:
+        tol_display = "&mdash;"
+    elif isinstance(tol, tuple):
+        tol_display = f"(-{tol[0]}, +{tol[1]})"
+    else:
+        tol_display = f"&plusmn;{tol}"
+    # Format difference with sign
+    if difference is not None:
+        if difference == 0:
+            diff_display = "0"
+        elif difference > 0:
+            diff_display = (
+                f"+{difference:,.6g}" if isinstance(difference, float) else f"+{difference:,}"
+            )
+        else:
+            diff_display = (
+                f"{difference:,.6g}" if isinstance(difference, float) else f"{difference:,}"
+            )
+    else:
+        diff_display = "&mdash;"
+    # Create pass/fail indicator
+    if all_passed:
+        status_html = CHECK_MARK_SPAN
+        status_color = "#4CA64C"
+    else:
+        status_html = CROSS_MARK_SPAN
+        status_color = "#CF142B"
+    # Select DataFrame library (prefer Polars, fall back to Pandas)
+    if _is_lib_present("polars"):
+        import polars as pl
+        df_lib = pl
+    elif _is_lib_present("pandas"):  # pragma: no cover
+        import pandas as pd  # pragma: no cover
+        df_lib = pd  # pragma: no cover
+    else:  # pragma: no cover
+        raise ImportError(
+            "Neither Polars nor Pandas is available for step report generation"
+        )  # pragma: no cover
+    # Create the data for the 1-row table
+    report_data = df_lib.DataFrame(
+        {
+            "actual": [format_value(actual)],
+            "target": [format_value(target)],
+            "tolerance": [tol_display],
+            "difference": [diff_display],
+            "status": [status_html],
+        }
+    )
+    # Create GT table with styling matching preview() and other step reports
+    step_report = (
+        GT(report_data, id="pb_step_tbl")
+        .opt_table_font(font=google_font(name="IBM Plex Sans"))
+        .opt_align_table_header(align="left")
+        .cols_label(
+            actual="ACTUAL",
+            target="EXPECTED",
+            tolerance="TOL",
+            difference="DIFFERENCE",
+            status="",
+        )
+        .cols_align(align="center")
+        .fmt_markdown(columns=["actual", "target", "tolerance", "difference", "status"])
+        .tab_style(
+            style=style.text(color="black", font=google_font(name="IBM Plex Mono"), size="13px"),
+            locations=loc.body(columns=["actual", "target", "tolerance", "difference"]),
+        )
+        .tab_style(
+            style=style.text(size="13px"),
+            locations=loc.body(columns="status"),
+        )
+        .tab_style(
+            style=style.text(color="gray20", font=google_font(name="IBM Plex Mono"), size="12px"),
+            locations=loc.column_labels(),
+        )
+        .tab_style(
+            style=style.borders(
+                sides=["top", "bottom"], color="#E9E9E9", style="solid", weight="1px"
+            ),
+            locations=loc.body(),
+        )
+        .tab_options(
+            table_body_vlines_style="solid",
+            table_body_vlines_width="1px",
+            table_body_vlines_color="#E9E9E9",
+            column_labels_vlines_style="solid",
+            column_labels_vlines_width="1px",
+            column_labels_vlines_color="#F2F2F2",
+        )
+        .cols_width(
+            cases={
+                "actual": "200px",
+                "target": "200px",
+                "tolerance": "150px",
+                "difference": "200px",
+                "status": "50px",
+            }
+        )
+    )
+    # Apply styling based on pass/fail
+    if all_passed:
+        step_report = step_report.tab_style(
+            style=[
+                style.text(color="#006400"),
+                style.fill(color="#4CA64C33"),
+            ],
+            locations=loc.body(columns="status"),
+        )
+    else:
+        step_report = step_report.tab_style(
+            style=[
+                style.text(color="#B22222"),
+                style.fill(color="#FFC1C159"),
+            ],
+            locations=loc.body(columns="status"),
+        )
+    # If the version of `great_tables` is `>=0.17.0` then disable Quarto table processing
+    if version("great_tables") >= "0.17.0":
+        step_report = step_report.tab_options(quarto_disable_processing=True)
+    # If no header requested, return the table as-is
+    if header is None:
+        return step_report
+    # Create header content
+    assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
+    # Wrap assertion text in styled code tag
+    assertion_code = (
+        f"<code style='color: #303030; font-family: monospace; font-size: smaller;'>"
+        f"{assertion_text}</code>"
+    )
+    if all_passed:
+        title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
+        result_stmt = STEP_REPORT_TEXT.get("agg_success_statement", {}).get(
+            lang,
+            f"The aggregate value for column <code>{column_display}</code> satisfies the condition.",
+        )
+        if isinstance(result_stmt, str) and "{column}" in result_stmt:
+            result_stmt = result_stmt.format(column=column_display)
+    else:
+        title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CROSS_MARK_SPAN
+        result_stmt = STEP_REPORT_TEXT.get("agg_failure_statement", {}).get(
+            lang,
+            f"The aggregate value for column <code>{column_display}</code> does not satisfy the condition.",
+        )
+        if isinstance(result_stmt, str) and "{column}" in result_stmt:
+            result_stmt = result_stmt.format(column=column_display)
+    details = (
+        f"<div style='font-size: 13.6px; {direction_rtl}'>"
+        "<div style='padding-top: 7px;'>"
+        f"{assertion_header_text} <span style='border-style: solid; border-width: thin; "
+        "border-color: lightblue; padding-left: 2px; padding-right: 2px;'>"
+        "<code style='color: #303030; background-color: transparent; "
+        f"position: relative; bottom: 1px;'>{assertion_code}</code></span>"
+        "</div>"
+        "<div style='padding-top: 7px;'>"
+        f"{result_stmt}"
+        "</div>"
+        "</div>"
+    )
+    # Generate the default template text for the header when `":default:"` is used
+    if header == ":default:":
+        header = "{title}{details}"
+    # Use commonmark to convert the header text to HTML
+    header = commonmark.commonmark(header)
+    # Place any templated text in the header
+    header = header.format(title=title, details=details)
+    # Create the header with `header` string
+    step_report = step_report.tab_header(title=md(header))
+    return step_report
 def _step_report_schema_in_order(
-    step: int, schema_info: dict, header: str, lang: str, debug_return_df: bool = False
-) -> GT | any:
+    step: int, schema_info: dict, header: str | None, lang: str, debug_return_df: bool = False
+) -> GT | Any:
     """
     This is the case for schema validation where the schema is supposed to have the same column
     order as the target table.
@@ -20195,22 +21678,22 @@ def _step_report_schema_in_order(
         # Check if this column exists in exp_columns_dict (it might not if it's a duplicate)
         # For duplicates, we need to handle them specially
-        if column_name_exp_i not in exp_columns_dict:
+        if column_name_exp_i not in exp_columns_dict:  # pragma: no cover
             # This is a duplicate or invalid column, mark it as incorrect
-            col_exp_correct.append(CROSS_MARK_SPAN)
+            col_exp_correct.append(CROSS_MARK_SPAN)  # pragma: no cover
             # For dtype, check if there's a dtype specified in the schema
-            if len(expect_schema[i]) > 1:
-                dtype_value = expect_schema[i][1]
-                if isinstance(dtype_value, list):
-                    dtype_exp.append(" | ".join(dtype_value))
-                else:
-                    dtype_exp.append(str(dtype_value))
-            else:
-                dtype_exp.append("&mdash;")
+            if len(expect_schema[i]) > 1:  # pragma: no cover
+                dtype_value = expect_schema[i][1]  # pragma: no cover
+                if isinstance(dtype_value, list):  # pragma: no cover
+                    dtype_exp.append(" | ".join(dtype_value))  # pragma: no cover
+                else:  # pragma: no cover
+                    dtype_exp.append(str(dtype_value))  # pragma: no cover
+            else:  # pragma: no cover
+                dtype_exp.append("&mdash;")  # pragma: no cover
-            dtype_exp_correct.append("&mdash;")
-            continue
+            dtype_exp_correct.append("&mdash;")  # pragma: no cover
+            continue  # pragma: no cover
         #
         # `col_exp_correct` values
@@ -20433,7 +21916,9 @@ def _step_report_schema_in_order(
         # Add a border below the row that terminates the target table schema
         step_report = step_report.tab_style(
             style=style.borders(sides="bottom", color="#6699CC80", style="solid", weight="1px"),
-            locations=loc.body(rows=len(colnames_tgt) - 1),
+            locations=loc.body(
+                rows=len(colnames_tgt) - 1  # ty: ignore (bug in GT, should allow an int)
+            ),
         )
     # If the version of `great_tables` is `>=0.17.0` then disable Quarto table processing
@@ -20482,8 +21967,8 @@ def _step_report_schema_in_order(
 def _step_report_schema_any_order(
-    step: int, schema_info: dict, header: str, lang: str, debug_return_df: bool = False
-) -> GT | any:
+    step: int, schema_info: dict, header: str | None, lang: str, debug_return_df: bool = False
+) -> GT | pl.DataFrame:
     """
     This is the case for schema validation where the schema is permitted to not have to be in the
     same column order as the target table.
@@ -20902,9 +22387,7 @@ def _step_report_schema_any_order(
     header = header.format(title=title, details=details)
     # Create the header with `header` string
-    step_report = step_report.tab_header(title=md(header))
-    return step_report
+    return step_report.tab_header(title=md(header))
 def _create_label_text_html(
@@ -20993,3 +22476,321 @@ def _create_col_schema_match_params_html(
         f"{full_match_dtypes_text}"
         "</div>"
     )
+def _generate_agg_docstring(name: str) -> str:
+    """Generate a comprehensive docstring for an aggregation validation method.
+    This function creates detailed documentation for dynamically generated methods like
+    `col_sum_eq()`, `col_avg_gt()`, `col_sd_le()`, etc. The docstrings follow the same
+    structure and quality as manually written validation methods like `col_vals_gt()`.
+    Parameters
+    ----------
+    name
+        The method name (e.g., "col_sum_eq", "col_avg_gt", "col_sd_le").
+    Returns
+    -------
+    str
+        A complete docstring for the method.
+    """
+    # Parse the method name to extract aggregation type and comparison operator
+    # Format: col_{agg}_{comp} (e.g., col_sum_eq, col_avg_gt, col_sd_le)
+    parts = name.split("_")
+    agg_type = parts[1]  # sum, avg, sd
+    comp_type = parts[2]  # eq, gt, ge, lt, le
+    # Human-readable names for aggregation types
+    agg_names = {
+        "sum": ("sum", "summed"),
+        "avg": ("average", "averaged"),
+        "sd": ("standard deviation", "computed for standard deviation"),
+    }
+    # Human-readable descriptions for comparison operators (with article for title)
+    comp_descriptions = {
+        "eq": ("equal to", "equals", "an"),
+        "gt": ("greater than", "is greater than", "a"),
+        "ge": ("greater than or equal to", "is at least", "a"),
+        "lt": ("less than", "is less than", "a"),
+        "le": ("less than or equal to", "is at most", "a"),
+    }
+    # Mathematical symbols for comparison operators
+    comp_symbols = {
+        "eq": "==",
+        "gt": ">",
+        "ge": ">=",
+        "lt": "<",
+        "le": "<=",
+    }
+    agg_name, agg_verb = agg_names[agg_type]
+    comp_desc, comp_phrase, comp_article = comp_descriptions[comp_type]
+    comp_symbol = comp_symbols[comp_type]
+    # Determine the appropriate example values based on the aggregation and comparison
+    if agg_type == "sum":
+        example_value = "15"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "15"  # sum of a
+        example_ref_sum = "10"  # sum of b
+    elif agg_type == "avg":
+        example_value = "3"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "3.0"  # avg of a
+        example_ref_sum = "2.0"  # avg of b
+    else:  # sd
+        example_value = "2"
+        example_data = '{"a": [1, 2, 3, 4, 5], "b": [2, 2, 2, 2, 2]}'
+        example_sum = "~1.58"  # sd of a
+        example_ref_sum = "0.0"  # sd of b
+    # Build appropriate tolerance explanation based on comparison type
+    if comp_type == "eq":
+        tol_explanation = f"""The `tol=` parameter is particularly useful with `{name}()` since exact equality
+        comparisons on floating-point aggregations can be problematic due to numerical precision.
+        Setting a small tolerance (e.g., `tol=0.001`) allows for minor differences that arise from
+        floating-point arithmetic."""
+    else:
+        tol_explanation = f"""The `tol=` parameter expands the acceptable range for the comparison. For
+        `{name}()`, a tolerance of `tol=0.5` would mean the {agg_name} can be within `0.5` of the
+        target value and still pass validation."""
+    docstring = f"""
+    Does the column {agg_name} satisfy {comp_article} {comp_desc} comparison?
+    The `{name}()` validation method checks whether the {agg_name} of values in a column
+    {comp_phrase} a specified `value=`. This is an aggregation-based validation where the entire
+    column is reduced to a single {agg_name} value that is then compared against the target. The
+    comparison used in this function is `{agg_name}(column) {comp_symbol} value`.
+    Unlike row-level validations (e.g., `col_vals_gt()`), this method treats the entire column as
+    a single test unit. The validation either passes completely (if the aggregated value satisfies
+    the comparison) or fails completely.
+    Parameters
+    ----------
+    columns
+        A single column or a list of columns to validate. If multiple columns are supplied,
+        there will be a separate validation step generated for each column. The columns must
+        contain numeric data for the {agg_name} to be computed.
+    value
+        The value to compare the column {agg_name} against. This can be: (1) a numeric literal
+        (`int` or `float`), (2) a [`col()`](`pointblank.col`) object referencing another column
+        whose {agg_name} will be used for comparison, (3) a [`ref()`](`pointblank.ref`) object
+        referencing a column in reference data (when `Validate(reference=)` has been set), or (4)
+        `None` to automatically compare against the same column in reference data (shorthand for
+        `ref(column_name)` when reference data is set).
+    tol
+        A tolerance value for the comparison. The default is `0`, meaning exact comparison. When
+        set to a positive value, the comparison becomes more lenient. For example, with `tol=0.5`,
+        a {agg_name} that differs from the target by up to `0.5` will still pass. {tol_explanation}
+    thresholds
+        Failure threshold levels so that the validation step can react accordingly when
+        failing test units are level. Since this is an aggregation-based validation with only
+        one test unit, threshold values typically should be set as absolute counts (e.g., `1`) to
+        indicate pass/fail, or as proportions where any value less than `1.0` means failure is
+        acceptable.
+    brief
+        An optional brief description of the validation step that will be displayed in the
+        reporting table. You can use the templating elements like `"{{step}}"` to insert
+        the step number, or `"{{auto}}"` to include an automatically generated brief. If `True`
+        the entire brief will be automatically generated. If `None` (the default) then there
+        won't be a brief.
+    actions
+        Optional actions to take when the validation step meets or exceeds any set threshold
+        levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+        define the actions.
+    active
+        A boolean value indicating whether the validation step should be active. Using `False`
+        will make the validation step inactive (still reporting its presence and keeping indexes
+        for the steps unchanged).
+    Returns
+    -------
+    Validate
+        The `Validate` object with the added validation step.
+    Using Reference Data
+    --------------------
+    The `{name}()` method supports comparing column aggregations against reference data. This
+    is useful for validating that statistical properties remain consistent across different
+    versions of a dataset, or for comparing current data against historical baselines.
+    To use reference data, set the `reference=` parameter when creating the `Validate` object:
+    ```python
+    validation = (
+        pb.Validate(data=current_data, reference=baseline_data)
+        .{name}(columns="revenue")  # Compares sum(current.revenue) vs sum(baseline.revenue)
+        .interrogate()
+    )
+    ```
+    When `value=None` and reference data is set, the method automatically compares against the
+    same column in the reference data. You can also explicitly specify reference columns using
+    the `ref()` helper:
+    ```python
+    .{name}(columns="revenue", value=pb.ref("baseline_revenue"))
+    ```
+    Understanding Tolerance
+    -----------------------
+    The `tol=` parameter allows for fuzzy comparisons, which is especially important for
+    floating-point aggregations where exact equality is often unreliable.
+    {tol_explanation}
+    For equality comparisons (`col_*_eq`), the tolerance creates a range `[value - tol, value + tol]`
+    within which the aggregation is considered valid. For inequality comparisons, the tolerance
+    shifts the comparison boundary.
+    Thresholds
+    ----------
+    The `thresholds=` parameter is used to set the failure-condition levels for the validation
+    step. If they are set here at the step level, these thresholds will override any thresholds
+    set at the global level in `Validate(thresholds=...)`.
+    There are three threshold levels: 'warning', 'error', and 'critical'. Since aggregation
+    validations operate on a single test unit (the aggregated value), threshold values are
+    typically set as absolute counts:
+    - `thresholds=1` means any failure triggers a 'warning'
+    - `thresholds=(1, 1, 1)` means any failure triggers all three levels
+    Thresholds can be defined using one of these input schemes:
+    1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+    thresholds)
+    2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+    the 'error' level, and position `2` is the 'critical' level
+    3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+    'critical'
+    4. a single integer/float value denoting absolute number or fraction of failing test units
+    for the 'warning' level only
+    Examples
+    --------
+    ```{{python}}
+    #| echo: false
+    #| output: false
+    import pointblank as pb
+    pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+    ```
+    For the examples, we'll use a simple Polars DataFrame with numeric columns. The table is
+    shown below:
+    ```{{python}}
+    import pointblank as pb
+    import polars as pl
+    tbl = pl.DataFrame(
+        {{
+            "a": [1, 2, 3, 4, 5],
+            "b": [2, 2, 2, 2, 2],
+        }}
+    )
+    pb.preview(tbl)
+    ```
+    Let's validate that the {agg_name} of column `a` {comp_phrase} `{example_value}`:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns="a", value={example_value})
+        .interrogate()
+    )
+    validation
+    ```
+    The validation result shows whether the {agg_name} comparison passed or failed. Since this
+    is an aggregation-based validation, there is exactly one test unit per column.
+    When validating multiple columns, each column gets its own validation step:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns=["a", "b"], value={example_value})
+        .interrogate()
+    )
+    validation
+    ```
+    Using tolerance for flexible comparisons:
+    ```{{python}}
+    validation = (
+        pb.Validate(data=tbl)
+        .{name}(columns="a", value={example_value}, tol=1.0)
+        .interrogate()
+    )
+    validation
+    ```
+    """
+    return docstring.strip()
+def make_agg_validator(name: str):
+    """Factory for dynamically generated aggregate validation methods.
+    Why this exists:
+    Aggregate validators all share identical behavior. The only thing that differs
+    between them is the semantic assertion type (their name). The implementation
+    of each aggregate validator is fetched from `from_agg_validator`.
+    Instead of copy/pasting dozens of identical methods, we generate
+    them dynamically and attach them to the Validate class. The types are generated
+    at build time with `make pyi` to allow the methods to be visible to the type checker,
+    documentation builders and the IDEs/LSPs.
+    The returned function is a thin adapter that forwards all arguments to
+    `_add_agg_validation`, supplying the assertion type explicitly.
+    """
+    def agg_validator(
+        self: Validate,
+        columns: str | Collection[str],
+        value: float | int | Column | ReferenceColumn | None = None,
+        tol: float = 0,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        brief: str | bool | None = None,
+        actions: Actions | None = None,
+        active: bool = True,
+    ) -> Validate:
+        # Dynamically generated aggregate validator.
+        # This method is generated per assertion type and forwards all arguments
+        # to the shared aggregate validation implementation.
+        return self._add_agg_validation(
+            assertion_type=name,
+            columns=columns,
+            value=value,
+            tol=tol,
+            thresholds=thresholds,
+            brief=brief,
+            actions=actions,
+            active=active,
+        )
+    # Manually set function identity so this behaves like a real method.
+    # These must be set before attaching the function to the class.
+    agg_validator.__name__ = name
+    agg_validator.__qualname__ = f"Validate.{name}"
+    agg_validator.__doc__ = _generate_agg_docstring(name)
+    return agg_validator
+# Finally, we grab all the valid aggregation method names and attach them to
+# the Validate class, registering each one appropriately.
+for method in load_validation_method_grid():  # -> `col_sum_*`, `col_mean_*`, etc.
+    setattr(Validate, method, make_agg_validator(method))

pointblank 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

pointblank 0.17.0py3-none-any.whl → 0.19.0py3-none-any.whl