PyPI - pointblank - Versions diffs - 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl - Mend

pointblank 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

pointblank/__init__.py +0 -2
pointblank/_constants.py +2 -28
pointblank/_constants_translations.py +54 -0
pointblank/_interrogation.py +1483 -1735
pointblank/column.py +6 -2
pointblank/datascan.py +3 -2
pointblank/schema.py +155 -1
pointblank/validate.py +626 -334
pointblank/yaml.py +154 -44
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/METADATA +3 -2
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/RECORD +15 -16
pointblank/tf.py +0 -287
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/WHEEL +0 -0
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/entry_points.txt +0 -0
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.13.1.dist-info → pointblank-0.13.3.dist-info}/top_level.txt +0 -0

pointblank/validate.py CHANGED Viewed

@@ -31,7 +31,6 @@ from pointblank._constants import (
     CROSS_MARK_SPAN,
     IBIS_BACKENDS,
     LOG_LEVELS_MAP,
-    METHOD_CATEGORY_MAP,
     REPORTING_LANGUAGES,
     ROW_BASED_VALIDATION_TYPES,
     RTL_LANGUAGES,
@@ -46,25 +45,35 @@ from pointblank._constants_translations import (
     VALIDATION_REPORT_TEXT,
 )
 from pointblank._interrogation import (
-    ColCountMatch,
-    ColExistsHasType,
-    ColSchemaMatch,
-    ColValsCompareOne,
-    ColValsCompareSet,
-    ColValsCompareTwo,
-    ColValsExpr,
-    ColValsRegex,
-    ConjointlyValidation,
     NumberOfTestUnits,
-    RowCountMatch,
-    RowsComplete,
-    RowsDistinct,
     SpeciallyValidation,
+    col_count_match,
+    col_exists,
+    col_schema_match,
+    col_vals_expr,
+    conjointly_validation,
+    interrogate_between,
+    interrogate_eq,
+    interrogate_ge,
+    interrogate_gt,
+    interrogate_isin,
+    interrogate_le,
+    interrogate_lt,
+    interrogate_ne,
+    interrogate_not_null,
+    interrogate_notin,
+    interrogate_null,
+    interrogate_outside,
+    interrogate_regex,
+    interrogate_rows_distinct,
+    row_count_match,
+    rows_complete,
 )
 from pointblank._typing import SegmentSpec
 from pointblank._utils import (
     _check_any_df_lib,
     _check_invalid_fields,
+    _column_test_prep,
     _count_null_values_in_column,
     _count_true_values_in_column,
     _derive_bounds,
@@ -1584,13 +1593,22 @@ def _generate_display_table(
                     tail_data = pd.DataFrame(columns=head_data.columns)
-                data = pd.concat([head_data, tail_data])
+                # Suppress the FutureWarning about DataFrame concatenation with empty entries
+                import warnings
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "ignore",
+                        category=FutureWarning,
+                        message="The behavior of DataFrame concatenation with empty or all-NA entries is deprecated",
+                    )
+                    data = pd.concat([head_data, tail_data])
                 row_number_list = list(range(1, n_head + 1)) + list(
                     range(n_rows - n_tail + 1, n_rows + 1)
                 )
-        # For PySpark, update schema after conversion to pandas
+        # For PySpark, update schema after conversion to Pandas
         if tbl_type == "pyspark":
             tbl_schema = Schema(tbl=data)
@@ -1988,9 +2006,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                         # Apply the appropriate conversion method
                         if use_polars_conversion:
-                            null_sum_converted = null_sum.to_polars()
+                            null_sum_converted = null_sum.to_polars()  # pragma: no cover
                         else:
-                            null_sum_converted = null_sum.to_pandas()
+                            null_sum_converted = null_sum.to_pandas()  # pragma: no cover
                         missing_prop = (null_sum_converted / sector_size) * 100
                         col_missing_props.append(missing_prop)
@@ -2007,9 +2025,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                     # Apply the appropriate conversion method
                     if use_polars_conversion:
-                        null_sum_converted = null_sum.to_polars()
+                        null_sum_converted = null_sum.to_polars()  # pragma: no cover
                     else:
-                        null_sum_converted = null_sum.to_pandas()
+                        null_sum_converted = null_sum.to_pandas()  # pragma: no cover
                     missing_prop = (null_sum_converted / sector_size) * 100
                     col_missing_props.append(missing_prop)
@@ -2022,9 +2040,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
         # Use the helper function based on the DataFrame library
         if df_lib_name_gt == "polars":
-            missing_vals = _calculate_missing_proportions(use_polars_conversion=True)
+            missing_vals = _calculate_missing_proportions(
+                use_polars_conversion=True
+            )  # pragma: no cover
         else:
-            missing_vals = _calculate_missing_proportions(use_polars_conversion=False)
+            missing_vals = _calculate_missing_proportions(
+                use_polars_conversion=False
+            )  # pragma: no cover
         # Pivot the `missing_vals` dictionary to create a table with the missing value proportions
         missing_vals = {
@@ -2037,9 +2059,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
         # Get a dictionary of counts of missing values in each column
         if df_lib_name_gt == "polars":
-            missing_val_counts = {col: data[col].isnull().sum().to_polars() for col in data.columns}
+            missing_val_counts = {
+                col: data[col].isnull().sum().to_polars() for col in data.columns
+            }  # pragma: no cover
         else:
-            missing_val_counts = {col: data[col].isnull().sum().to_pandas() for col in data.columns}
+            missing_val_counts = {
+                col: data[col].isnull().sum().to_pandas() for col in data.columns
+            }  # pragma: no cover
     if pl_pb_tbl:
         # Get the column names from the table
@@ -2398,10 +2424,31 @@ def _get_row_ranges(cut_points: list[int], n_rows: int) -> list[list[int]]:
     return [lhs_values, rhs_values]
+def _get_column_names_safe(data: Any) -> list[str]:
+    """
+    Safely get column names from a DataFrame, optimized for LazyFrames.
+    This function avoids the Narwhals PerformanceWarning for LazyFrames.
+    """
+    try:
+        import narwhals as nw
+        df_nw = nw.from_native(data)
+        # Use `collect_schema()` for LazyFrames to avoid performance warnings
+        if hasattr(df_nw, "collect_schema"):
+            return list(df_nw.collect_schema().keys())
+        else:
+            return list(df_nw.columns)  # pragma: no cover
+    except Exception:  # pragma: no cover
+        # Fallback to direct column access
+        return list(data.columns)  # pragma: no cover
 def _get_column_names(data: FrameT | Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
     if ibis_tbl:
         return data.columns if df_lib_name_gt == "polars" else list(data.columns)
-    return list(data.columns)
+    # Use the optimized helper function
+    return _get_column_names_safe(data)
 def _validate_columns_subset(
@@ -2590,7 +2637,11 @@ def get_column_count(data: FrameT | Any) -> int:
         import narwhals as nw
         df_nw = nw.from_native(data)
-        return len(df_nw.columns)
+        # Use `collect_schema()` for LazyFrames to avoid performance warnings
+        if hasattr(df_nw, "collect_schema"):
+            return len(df_nw.collect_schema())
+        else:
+            return len(df_nw.columns)  # pragma: no cover
     except Exception:
         # Fallback for unsupported types
         if "pandas" in str(type(data)):
@@ -2763,11 +2814,11 @@ def get_row_count(data: FrameT | Any) -> int:
         # Try different ways to get row count
         if hasattr(df_nw, "shape"):
             return df_nw.shape[0]
-        elif hasattr(df_nw, "height"):
+        elif hasattr(df_nw, "height"):  # pragma: no cover
             return df_nw.height  # pragma: no cover
         else:  # pragma: no cover
             raise ValueError("Unable to determine row count from Narwhals DataFrame")
-    except Exception:
+    except Exception:  # pragma: no cover
         # Fallback for types that don't work with Narwhals
         if "pandas" in str(type(data)):  # pragma: no cover
             return data.shape[0]
@@ -4702,7 +4753,8 @@ class Validate:
         _check_boolean_input(param=active, param_name="active")
         # If value is a string-based date or datetime, convert it to the appropriate type
-        value = _string_date_dttm_conversion(value=value)
+        # Allow regular strings to pass through for string comparisons
+        value = _conditional_string_date_dttm_conversion(value=value, allow_regular_strings=True)
         # Determine threshold to use (global or local) and normalize a local `thresholds=` value
         thresholds = (
@@ -4990,7 +5042,8 @@ class Validate:
         _check_boolean_input(param=active, param_name="active")
         # If value is a string-based date or datetime, convert it to the appropriate type
-        value = _string_date_dttm_conversion(value=value)
+        # Allow regular strings to pass through for string comparisons
+        value = _conditional_string_date_dttm_conversion(value=value, allow_regular_strings=True)
         # Determine threshold to use (global or local) and normalize a local `thresholds=` value
         thresholds = (
@@ -8356,8 +8409,8 @@ class Validate:
             self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
         )
-        if columns_subset is not None and isinstance(columns_subset, str):
-            columns_subset = [columns_subset]
+        if columns_subset is not None and isinstance(columns_subset, str):  # pragma: no cover
+            columns_subset = [columns_subset]  # pragma: no cover
         # TODO: incorporate Column object
@@ -9738,8 +9791,8 @@ class Validate:
             threshold = validation.thresholds
             segment = validation.segments
+            # Get compatible data types for this assertion type
             assertion_method = ASSERTION_TYPE_METHOD_MAP[assertion_type]
-            assertion_category = METHOD_CATEGORY_MAP[assertion_method]
             compatible_dtypes = COMPATIBLE_DTYPES.get(assertion_method, [])
             # Process the `brief` text for the validation step by including template variables to
@@ -9870,197 +9923,249 @@ class Validate:
             # Validation stage
             # ------------------------------------------------
-            if assertion_category == "COMPARE_ONE":
-                results_tbl = ColValsCompareOne(
-                    data_tbl=data_tbl_step,
-                    column=column,
-                    value=value,
-                    na_pass=na_pass,
-                    threshold=threshold,
-                    assertion_method=assertion_method,
-                    allowed_types=compatible_dtypes,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "COMPARE_TWO":
-                results_tbl = ColValsCompareTwo(
-                    data_tbl=data_tbl_step,
-                    column=column,
-                    value1=value[0],
-                    value2=value[1],
-                    inclusive=inclusive,
-                    na_pass=na_pass,
-                    threshold=threshold,
-                    assertion_method=assertion_method,
-                    allowed_types=compatible_dtypes,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "COMPARE_SET":
-                inside = True if assertion_method == "in_set" else False
-                results_tbl = ColValsCompareSet(
-                    data_tbl=data_tbl_step,
-                    column=column,
-                    values=value,
-                    threshold=threshold,
-                    inside=inside,
-                    allowed_types=compatible_dtypes,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "COMPARE_REGEX":
-                results_tbl = ColValsRegex(
-                    data_tbl=data_tbl_step,
-                    column=column,
-                    pattern=value,
-                    na_pass=na_pass,
-                    threshold=threshold,
-                    allowed_types=compatible_dtypes,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "COMPARE_EXPR":
-                results_tbl = ColValsExpr(
-                    data_tbl=data_tbl_step,
-                    expr=value,
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "ROWS_DISTINCT":
-                results_tbl = RowsDistinct(
-                    data_tbl=data_tbl_step,
-                    columns_subset=column,
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "ROWS_COMPLETE":
-                results_tbl = RowsComplete(
-                    data_tbl=data_tbl_step,
-                    columns_subset=column,
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "COL_EXISTS_HAS_TYPE":
-                result_bool = ColExistsHasType(
-                    data_tbl=data_tbl_step,
-                    column=column,
-                    threshold=threshold,
-                    assertion_method="exists",
-                    tbl_type=tbl_type,
-                ).get_test_results()
-                validation.all_passed = result_bool
-                validation.n = 1
-                validation.n_passed = result_bool
-                validation.n_failed = 1 - result_bool
-                results_tbl = None
-            if assertion_category == "COL_SCHEMA_MATCH":
-                result_bool = ColSchemaMatch(
-                    data_tbl=data_tbl_step,
-                    schema=value["schema"],
-                    complete=value["complete"],
-                    in_order=value["in_order"],
-                    case_sensitive_colnames=value["case_sensitive_colnames"],
-                    case_sensitive_dtypes=value["case_sensitive_dtypes"],
-                    full_match_dtypes=value["full_match_dtypes"],
-                    threshold=threshold,
-                ).get_test_results()
-                schema_validation_info = _get_schema_validation_info(
-                    data_tbl=data_tbl,
-                    schema=value["schema"],
-                    passed=result_bool,
-                    complete=value["complete"],
-                    in_order=value["in_order"],
-                    case_sensitive_colnames=value["case_sensitive_colnames"],
-                    case_sensitive_dtypes=value["case_sensitive_dtypes"],
-                    full_match_dtypes=value["full_match_dtypes"],
-                )
+            # Apply error handling only to data quality validations, not programming error validations
+            if assertion_type != "specially":
+                try:
+                    # validations requiring `_column_test_prep()`
+                    if assertion_type in [
+                        "col_vals_gt",
+                        "col_vals_lt",
+                        "col_vals_eq",
+                        "col_vals_ne",
+                        "col_vals_ge",
+                        "col_vals_le",
+                        "col_vals_null",
+                        "col_vals_not_null",
+                        "col_vals_between",
+                        "col_vals_outside",
+                        "col_vals_in_set",
+                        "col_vals_not_in_set",
+                        "col_vals_regex",
+                    ]:
+                        # Process table for column validation
+                        tbl = _column_test_prep(
+                            df=data_tbl_step, column=column, allowed_types=compatible_dtypes
+                        )
-                # Add the schema validation info to the validation object
-                validation.val_info = schema_validation_info
-                validation.all_passed = result_bool
-                validation.n = 1
-                validation.n_passed = int(result_bool)
-                validation.n_failed = 1 - result_bool
-                results_tbl = None
-            if assertion_category == "ROW_COUNT_MATCH":
-                result_bool = RowCountMatch(
-                    data_tbl=data_tbl_step,
-                    count=value["count"],
-                    inverse=value["inverse"],
-                    threshold=threshold,
-                    abs_tol_bounds=value["abs_tol_bounds"],
-                    tbl_type=tbl_type,
-                ).get_test_results()
-                validation.all_passed = result_bool
-                validation.n = 1
-                validation.n_passed = int(result_bool)
-                validation.n_failed = 1 - result_bool
-                results_tbl = None
-            if assertion_category == "COL_COUNT_MATCH":
-                result_bool = ColCountMatch(
-                    data_tbl=data_tbl_step,
-                    count=value["count"],
-                    inverse=value["inverse"],
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-                validation.all_passed = result_bool
-                validation.n = 1
-                validation.n_passed = int(result_bool)
-                validation.n_failed = 1 - result_bool
-                results_tbl = None
-            if assertion_category == "CONJOINTLY":
-                results_tbl = ConjointlyValidation(
-                    data_tbl=data_tbl_step,
-                    expressions=value["expressions"],
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-            if assertion_category == "SPECIALLY":
-                results_tbl_list = SpeciallyValidation(
-                    data_tbl=data_tbl_step,
-                    expression=value,
-                    threshold=threshold,
-                    tbl_type=tbl_type,
-                ).get_test_results()
-                #
-                # The result from this could either be a table in the conventional form, or,
-                # a list of boolean values; handle both cases
-                #
-                if isinstance(results_tbl_list, list):
-                    # If the result is a list of boolean values, then we need to convert it to a
-                    # set the validation results from the list
-                    validation.all_passed = all(results_tbl_list)
-                    validation.n = len(results_tbl_list)
-                    validation.n_passed = results_tbl_list.count(True)
-                    validation.n_failed = results_tbl_list.count(False)
-                    results_tbl = None
+                        if assertion_method == "gt":
+                            results_tbl = interrogate_gt(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "lt":
+                            results_tbl = interrogate_lt(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "eq":
+                            results_tbl = interrogate_eq(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "ne":
+                            results_tbl = interrogate_ne(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "ge":
+                            results_tbl = interrogate_ge(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "le":
+                            results_tbl = interrogate_le(
+                                tbl=tbl, column=column, compare=value, na_pass=na_pass
+                            )
+                        elif assertion_method == "null":
+                            results_tbl = interrogate_null(tbl=tbl, column=column)
+                        elif assertion_method == "not_null":
+                            results_tbl = interrogate_not_null(tbl=tbl, column=column)
+                        elif assertion_type == "col_vals_between":
+                            results_tbl = interrogate_between(
+                                tbl=tbl,
+                                column=column,
+                                low=value[0],
+                                high=value[1],
+                                inclusive=inclusive,
+                                na_pass=na_pass,
+                            )
-                else:
-                    # If the result is not a list, then we assume it's a table in the conventional
-                    # form (where the column is `pb_is_good_` exists, with boolean values
-                    results_tbl = results_tbl_list
+                        elif assertion_type == "col_vals_outside":
+                            results_tbl = interrogate_outside(
+                                tbl=tbl,
+                                column=column,
+                                low=value[0],
+                                high=value[1],
+                                inclusive=inclusive,
+                                na_pass=na_pass,
+                            )
+                        elif assertion_type == "col_vals_in_set":
+                            results_tbl = interrogate_isin(tbl=tbl, column=column, set_values=value)
+                        elif assertion_type == "col_vals_not_in_set":
+                            results_tbl = interrogate_notin(
+                                tbl=tbl, column=column, set_values=value
+                            )
+                        elif assertion_type == "col_vals_regex":
+                            results_tbl = interrogate_regex(
+                                tbl=tbl, column=column, pattern=value, na_pass=na_pass
+                            )
+                    elif assertion_type == "col_vals_expr":
+                        results_tbl = col_vals_expr(
+                            data_tbl=data_tbl_step, expr=value, tbl_type=tbl_type
+                        )
+                    elif assertion_type == "rows_distinct":
+                        results_tbl = interrogate_rows_distinct(
+                            data_tbl=data_tbl_step, columns_subset=column
+                        )
+                    elif assertion_type == "rows_complete":
+                        results_tbl = rows_complete(data_tbl=data_tbl_step, columns_subset=column)
+                    elif assertion_type == "col_exists":
+                        result_bool = col_exists(
+                            data_tbl=data_tbl_step,
+                            column=column,
+                        )
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - int(result_bool)
+                        results_tbl = None
+                    elif assertion_type == "col_schema_match":
+                        result_bool = col_schema_match(
+                            data_tbl=data_tbl_step,
+                            schema=value["schema"],
+                            complete=value["complete"],
+                            in_order=value["in_order"],
+                            case_sensitive_colnames=value["case_sensitive_colnames"],
+                            case_sensitive_dtypes=value["case_sensitive_dtypes"],
+                            full_match_dtypes=value["full_match_dtypes"],
+                            threshold=threshold,
+                        )
+                        schema_validation_info = _get_schema_validation_info(
+                            data_tbl=data_tbl,
+                            schema=value["schema"],
+                            passed=result_bool,
+                            complete=value["complete"],
+                            in_order=value["in_order"],
+                            case_sensitive_colnames=value["case_sensitive_colnames"],
+                            case_sensitive_dtypes=value["case_sensitive_dtypes"],
+                            full_match_dtypes=value["full_match_dtypes"],
+                        )
+                        # Add the schema validation info to the validation object
+                        validation.val_info = schema_validation_info
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - result_bool
+                        results_tbl = None
+                    elif assertion_type == "row_count_match":
+                        result_bool = row_count_match(
+                            data_tbl=data_tbl_step,
+                            count=value["count"],
+                            inverse=value["inverse"],
+                            abs_tol_bounds=value["abs_tol_bounds"],
+                        )
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - result_bool
+                        results_tbl = None
+                    elif assertion_type == "col_count_match":
+                        result_bool = col_count_match(
+                            data_tbl=data_tbl_step, count=value["count"], inverse=value["inverse"]
+                        )
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - result_bool
+                        results_tbl = None
+                    elif assertion_type == "conjointly":
+                        results_tbl = conjointly_validation(
+                            data_tbl=data_tbl_step,
+                            expressions=value["expressions"],
+                            threshold=threshold,
+                            tbl_type=tbl_type,
+                        )
+                    else:
+                        raise ValueError(
+                            f"Unknown assertion type: {assertion_type}"
+                        )  # pragma: no cover
+                except Exception as e:
+                    # Only catch specific data quality comparison errors, not programming errors
+                    error_msg = str(e).lower()
+                    is_comparison_error = (
+                        "boolean value of na is ambiguous" in error_msg
+                        or "cannot compare" in error_msg
+                        or (
+                            "type" in error_msg
+                            and ("mismatch" in error_msg or "incompatible" in error_msg)
+                        )
+                        or ("dtype" in error_msg and "compare" in error_msg)
+                    )
+                    if is_comparison_error:  # pragma: no cover
+                        # If data quality comparison fails, mark the validation as having an eval_error
+                        validation.eval_error = True  # pragma: no cover
+                        end_time = datetime.datetime.now(datetime.timezone.utc)  # pragma: no cover
+                        validation.proc_duration_s = (
+                            end_time - start_time
+                        ).total_seconds()  # pragma: no cover
+                        validation.time_processed = end_time.isoformat(
+                            timespec="milliseconds"
+                        )  # pragma: no cover
+                        validation.active = False  # pragma: no cover
+                        continue  # pragma: no cover
+                    else:
+                        # For other errors (like missing columns), let them propagate
+                        raise
+            else:
+                # For "specially" validations, let programming errors propagate as exceptions
+                if assertion_type == "specially":
+                    results_tbl_list = SpeciallyValidation(
+                        data_tbl=data_tbl_step,
+                        expression=value,
+                        threshold=threshold,
+                        tbl_type=tbl_type,
+                    ).get_test_results()
+                    #
+                    # The result from this could either be a table in the conventional form, or,
+                    # a list of boolean values; handle both cases
+                    #
+                    if isinstance(results_tbl_list, list):
+                        # If the result is a list of boolean values, then we need to convert it to a
+                        # set the validation results from the list
+                        validation.all_passed = all(results_tbl_list)
+                        validation.n = len(results_tbl_list)
+                        validation.n_passed = results_tbl_list.count(True)
+                        validation.n_failed = results_tbl_list.count(False)
+                        results_tbl = None
+                    else:
+                        # If the result is not a list, then we assume it's a table in the conventional
+                        # form (where the column is `pb_is_good_` exists, with boolean values
+                        results_tbl = results_tbl_list
             # If the results table is not `None`, then we assume there is a table with a column
             # called `pb_is_good_` that contains boolean values; we can then use this table to
@@ -10272,32 +10377,46 @@ class Validate:
                     except AttributeError:
                         # For LazyFrames without sample method, collect first then sample
                         validation_extract_native = validation_extract_nw.collect().to_native()
-                        if hasattr(validation_extract_native, "sample"):
+                        if hasattr(validation_extract_native, "sample"):  # pragma: no cover
                             # PySpark DataFrame has sample method
-                            validation_extract_native = validation_extract_native.sample(
-                                fraction=min(1.0, sample_n / validation_extract_native.count())
-                            ).limit(sample_n)
-                            validation_extract_nw = nw.from_native(validation_extract_native)
+                            validation_extract_native = (
+                                validation_extract_native.sample(  # pragma: no cover
+                                    fraction=min(
+                                        1.0, sample_n / validation_extract_native.count()
+                                    )  # pragma: no cover
+                                ).limit(sample_n)
+                            )  # pragma: no cover
+                            validation_extract_nw = nw.from_native(
+                                validation_extract_native
+                            )  # pragma: no cover
                         else:
                             # Fallback: just take first n rows after collecting
-                            validation_extract_nw = validation_extract_nw.collect().head(sample_n)
+                            validation_extract_nw = validation_extract_nw.collect().head(
+                                sample_n
+                            )  # pragma: no cover
                 elif sample_frac is not None:
                     try:
                         validation_extract_nw = validation_extract_nw.sample(fraction=sample_frac)
-                    except AttributeError:
+                    except AttributeError:  # pragma: no cover
                         # For LazyFrames without sample method, collect first then sample
-                        validation_extract_native = validation_extract_nw.collect().to_native()
-                        if hasattr(validation_extract_native, "sample"):
+                        validation_extract_native = (
+                            validation_extract_nw.collect().to_native()
+                        )  # pragma: no cover
+                        if hasattr(validation_extract_native, "sample"):  # pragma: no cover
                             # PySpark DataFrame has sample method
                             validation_extract_native = validation_extract_native.sample(
                                 fraction=sample_frac
-                            )
-                            validation_extract_nw = nw.from_native(validation_extract_native)
+                            )  # pragma: no cover
+                            validation_extract_nw = nw.from_native(
+                                validation_extract_native
+                            )  # pragma: no cover
                         else:
                             # Fallback: use fraction to calculate head size
-                            collected = validation_extract_nw.collect()
-                            sample_size = max(1, int(len(collected) * sample_frac))
-                            validation_extract_nw = collected.head(sample_size)
+                            collected = validation_extract_nw.collect()  # pragma: no cover
+                            sample_size = max(
+                                1, int(len(collected) * sample_frac)
+                            )  # pragma: no cover
+                            validation_extract_nw = collected.head(sample_size)  # pragma: no cover
                 # Ensure a limit is set on the number of rows to extract
                 try:
@@ -10307,9 +10426,9 @@ class Validate:
                     # For LazyFrames, collect to get length (or use a reasonable default)
                     try:
                         extract_length = len(validation_extract_nw.collect())
-                    except Exception:
+                    except Exception:  # pragma: no cover
                         # If collection fails, apply limit anyway as a safety measure
-                        extract_length = extract_limit + 1  # Force limiting
+                        extract_length = extract_limit + 1  # pragma: no cover
                 if extract_length > extract_limit:
                     validation_extract_nw = validation_extract_nw.head(extract_limit)
@@ -11974,10 +12093,12 @@ class Validate:
         try:
             # Try without order_by first (for DataFrames)
             data_nw = data_nw.with_row_index(name=index_name)
-        except TypeError:
+        except TypeError:  # pragma: no cover
             # LazyFrames require order_by parameter - use first column for ordering
-            first_col = data_nw.columns[0]
-            data_nw = data_nw.with_row_index(name=index_name, order_by=first_col)
+            first_col = data_nw.columns[0]  # pragma: no cover
+            data_nw = data_nw.with_row_index(
+                name=index_name, order_by=first_col
+            )  # pragma: no cover
         # Get all validation step result tables and join together the `pb_is_good_` columns
         # ensuring that the columns are named uniquely (e.g., `pb_is_good_1`, `pb_is_good_2`, ...)
@@ -11989,10 +12110,12 @@ class Validate:
             try:
                 # Try without order_by first (for DataFrames)
                 results_tbl = results_tbl.with_row_index(name=index_name)
-            except TypeError:
+            except TypeError:  # pragma: no cover
                 # LazyFrames require order_by parameter - use first column for ordering
-                first_col = results_tbl.columns[0]
-                results_tbl = results_tbl.with_row_index(name=index_name, order_by=first_col)
+                first_col = results_tbl.columns[0]  # pragma: no cover
+                results_tbl = results_tbl.with_row_index(
+                    name=index_name, order_by=first_col
+                )  # pragma: no cover
             # Add numerical suffix to the `pb_is_good_` column to make it unique
             results_tbl = results_tbl.select([index_name, "pb_is_good_"]).rename(
@@ -12124,15 +12247,15 @@ class Validate:
         # If the table is a Polars one, determine if it's a LazyFrame
         if tbl_info == "polars":
             if _is_lazy_frame(self.data):
-                tbl_info = "polars-lazy"
+                tbl_info = "polars-lazy"  # pragma: no cover
         # Determine if the input table is a Narwhals DF
         if _is_narwhals_table(self.data):
             # Determine if the Narwhals table is a LazyFrame
-            if _is_lazy_frame(self.data):
-                tbl_info = "narwhals-lazy"
+            if _is_lazy_frame(self.data):  # pragma: no cover
+                tbl_info = "narwhals-lazy"  # pragma: no cover
             else:
-                tbl_info = "narwhals"
+                tbl_info = "narwhals"  # pragma: no cover
         # Get the thresholds object
         thresholds = self.thresholds
@@ -12297,7 +12420,7 @@ class Validate:
             if lang in RTL_LANGUAGES:
                 gt_tbl = gt_tbl.tab_style(
                     style=style.css("direction: rtl;"), locations=loc.source_notes()
-                )
+                )  # pragma: no cover
             if incl_header:
                 gt_tbl = gt_tbl.tab_header(title=html(title_text), subtitle=html(combined_subtitle))
@@ -12614,9 +12737,11 @@ class Validate:
             # Get the number of rows in the extract (safe for LazyFrames)
             try:
                 n_rows = len(extract_nw)
-            except TypeError:
+            except TypeError:  # pragma: no cover
                 # For LazyFrames, collect() first to get length
-                n_rows = len(extract_nw.collect()) if hasattr(extract_nw, "collect") else 0
+                n_rows = (
+                    len(extract_nw.collect()) if hasattr(extract_nw, "collect") else 0
+                )  # pragma: no cover
             # If the number of rows is zero, then produce an em dash then go to the next iteration
             if n_rows == 0:
@@ -12624,7 +12749,7 @@ class Validate:
                 continue
             # Write the CSV text (ensure LazyFrames are collected first)
-            if hasattr(extract_nw, "collect"):
+            if hasattr(extract_nw, "collect"):  # pragma: no cover
                 extract_nw = extract_nw.collect()
             csv_text = extract_nw.write_csv()
@@ -13126,7 +13251,7 @@ class Validate:
             elif isinstance(column, list):
                 column_position = [list(self.data.columns).index(col) + 1 for col in column]
             else:
-                column_position = None
+                column_position = None  # pragma: no cover
         else:
             column_position = None
@@ -13218,7 +13343,7 @@ class Validate:
                 )
         else:
-            step_report = None
+            step_report = None  # pragma: no cover
         return step_report
@@ -13670,6 +13795,48 @@ def _string_date_dttm_conversion(value: any) -> any:
     return value
+def _conditional_string_date_dttm_conversion(
+    value: any, allow_regular_strings: bool = False
+) -> any:
+    """
+    Conditionally convert a string to a date or datetime object if it is in the correct format. If
+    `allow_regular_strings=` is `True`, regular strings are allowed to pass through unchanged. If
+    the value is not a string, it is returned as is.
+    Parameters
+    ----------
+    value
+        The value to convert. It can be a string, date, or datetime object.
+    allow_regular_strings
+        If `True`, regular strings (non-date/datetime) are allowed to pass through unchanged. If
+        `False`, behaves like `_string_date_dttm_conversion()` and raises `ValueError` for regular
+        strings.
+    Returns
+    -------
+    any
+        The converted date or datetime object, or the original value.
+    Raises
+    ------
+    ValueError
+        If allow_regular_strings is False and the string cannot be converted to a date or datetime.
+    """
+    if isinstance(value, str):
+        if _is_string_date(value):
+            value = _convert_string_to_date(value)
+        elif _is_string_datetime(value):
+            value = _convert_string_to_datetime(value)
+        elif not allow_regular_strings:
+            raise ValueError(
+                "If `value=` is provided as a string it must be a date or datetime string."
+            )  # pragma: no cover
+        # If allow_regular_strings is True, regular strings pass through unchanged
+    return value
 def _process_brief(
     brief: str | None,
     step: int,
@@ -13718,12 +13885,33 @@ def _process_brief(
     if segment is not None:
         # The segment is always a tuple of the form ("{column}", "{value}")
+        # Handle both regular lists and Segment objects (from seg_group())
+        segment_column = segment[0]
+        segment_value = segment[1]
+        # If segment_value is a Segment object (from seg_group()), format it appropriately
+        if isinstance(segment_value, Segment):
+            # For Segment objects, format the segments as a readable string
+            segments = segment_value.segments
+            if len(segments) == 1:
+                # Single segment: join the values with commas
+                segment_value_str = ", ".join(str(v) for v in segments[0])
+            else:
+                # Multiple segments: join each segment with commas, separate segments with " | "
+                segment_value_str = " | ".join([", ".join(str(v) for v in seg) for seg in segments])
+        else:
+            # For regular lists or other types, convert to string
+            if isinstance(segment_value, list):
+                segment_value_str = ", ".join(str(v) for v in segment_value)
+            else:
+                segment_value_str = str(segment_value)
-        segment_fmt = f"{segment[0]} / {segment[1]}"
+        segment_fmt = f"{segment_column} / {segment_value_str}"
         brief = brief.replace("{segment}", segment_fmt)
-        brief = brief.replace("{segment_column}", segment[0])
-        brief = brief.replace("{segment_value}", segment[1])
+        brief = brief.replace("{segment_column}", segment_column)
+        brief = brief.replace("{segment_value}", segment_value_str)
     return brief
@@ -13757,7 +13945,7 @@ def _process_action_str(
     if col is not None:
         # If a list of columns is provided, then join the columns into a comma-separated string
         if isinstance(col, list):
-            col = ", ".join(col)
+            col = ", ".join(col)  # pragma: no cover
         action_str = action_str.replace("{col}", col)
         action_str = action_str.replace("{column}", col)
@@ -14154,7 +14342,7 @@ def _prep_values_text(
     length_values = len(values)
     if length_values == 0:
-        return ""
+        return ""  # pragma: no cover
     if length_values > limit:
         num_omitted = length_values - limit
@@ -14163,7 +14351,7 @@ def _prep_values_text(
         formatted_values = []
         for value in values[:limit]:
             if isinstance(value, (datetime.datetime, datetime.date)):
-                formatted_values.append(f"`{value.isoformat()}`")
+                formatted_values.append(f"`{value.isoformat()}`")  # pragma: no cover
             else:
                 formatted_values.append(f"`{value}`")
@@ -14319,17 +14507,109 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
     column, segment = segments_expr
     if tbl_type in ["pandas", "polars", "pyspark"]:
-        # If the table is a Pandas, Polars, or PySpark DataFrame, transforming to a Narwhals table
+        # If the table is a Pandas, Polars, or PySpark DataFrame, transform to a Narwhals table
         # and perform the filtering operation
         # Transform to Narwhals table if a DataFrame
         data_tbl_nw = nw.from_native(data_tbl)
+        # Handle Polars expressions by attempting to extract literal values
+        # This is a compatibility measure for cases where `pl.datetime()`, `pl.lit()`, etc.,
+        # are accidentally used instead of native Python types
+        if (
+            hasattr(segment, "__class__")
+            and "polars" in segment.__class__.__module__
+            and segment.__class__.__name__ == "Expr"
+        ):
+            # This is a Polars expression so we should warn about this and suggest native types
+            import warnings
+            from datetime import date, datetime
+            warnings.warn(
+                "Polars expressions in segments are deprecated. Please use native Python types instead. "
+                "For example, use datetime.date(2016, 1, 4) instead of pl.datetime(2016, 1, 4).",
+                DeprecationWarning,
+                stacklevel=3,
+            )
+            # Try to extract the literal value from various Polars expression patterns
+            segment_str = str(segment)
+            parsed_value = None
+            # Handle different Polars expression string formats
+            # Format 1: Direct date strings like "2016-01-04"
+            if len(segment_str) == 10 and segment_str.count("-") == 2:
+                try:
+                    parsed_value = date.fromisoformat(segment_str)
+                except ValueError:  # pragma: no cover
+                    pass  # pragma: no cover
+            # Format 2: Datetime strings with UTC timezone like
+            # "2016-01-04 00:00:01 UTC.strict_cast(...)"
+            elif " UTC" in segment_str:
+                try:
+                    # Extract just the datetime part before "UTC"
+                    datetime_part = segment_str.split(" UTC")[0]
+                    if len(datetime_part) >= 10:
+                        parsed_dt = datetime.fromisoformat(datetime_part)
+                        # Convert midnight datetimes to dates for consistency
+                        if parsed_dt.time() == datetime.min.time():
+                            parsed_value = parsed_dt.date()  # pragma: no cover
+                        else:
+                            parsed_value = parsed_dt
+                except (ValueError, IndexError):  # pragma: no cover
+                    pass  # pragma: no cover
+            # Format 3: Bracketed expressions like ['2016-01-04']
+            elif segment_str.startswith("[") and segment_str.endswith("]"):
+                try:  # pragma: no cover
+                    # Remove [' and ']
+                    content = segment_str[2:-2]  # pragma: no cover
+                    # Try parsing as date first
+                    if len(content) == 10 and content.count("-") == 2:  # pragma: no cover
+                        try:  # pragma: no cover
+                            parsed_value = date.fromisoformat(content)  # pragma: no cover
+                        except ValueError:  # pragma: no cover
+                            pass  # pragma: no cover
+                    # Try parsing as datetime
+                    if parsed_value is None:  # pragma: no cover
+                        try:  # pragma: no cover
+                            parsed_dt = datetime.fromisoformat(content.replace(" UTC", ""))
+                            if parsed_dt.time() == datetime.min.time():
+                                parsed_value = parsed_dt.date()
+                            else:
+                                parsed_value = parsed_dt
+                        except ValueError:
+                            pass
+                except (ValueError, IndexError):  # pragma: no cover
+                    pass  # pragma: no cover
+            # Handle `pl.datetime()` expressions with .alias("datetime")
+            elif "datetime" in segment_str and '.alias("datetime")' in segment_str:
+                try:
+                    datetime_part = segment_str.split('.alias("datetime")')[0]
+                    parsed_dt = datetime.fromisoformat(datetime_part)
+                    if parsed_dt.time() == datetime.min.time():
+                        parsed_value = parsed_dt.date()
+                    else:
+                        parsed_value = parsed_dt  # pragma: no cover
+                except (ValueError, AttributeError):  # pragma: no cover
+                    pass  # pragma: no cover
+            # If we successfully parsed a value, use it; otherwise leave segment as is
+            if parsed_value is not None:
+                segment = parsed_value
         # Filter the data table based on the column name and segment
         if segment is None:
             data_tbl_nw = data_tbl_nw.filter(nw.col(column).is_null())
-        # Check if the segment is a segment group
         elif isinstance(segment, list):
+            # Check if the segment is a segment group
             data_tbl_nw = data_tbl_nw.filter(nw.col(column).is_in(segment))
         else:
             data_tbl_nw = data_tbl_nw.filter(nw.col(column) == segment)
@@ -14341,12 +14621,13 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
         # If the table is an Ibis backend table, perform the filtering operation directly
         # Filter the data table based on the column name and segment
+        # Use the new Ibis API methods to avoid deprecation warnings
         if segment is None:
-            data_tbl = data_tbl[data_tbl[column].isnull()]
+            data_tbl = data_tbl.filter(data_tbl[column].isnull())  # pragma: no cover
         elif isinstance(segment, list):
-            data_tbl = data_tbl[data_tbl[column].isin(segment)]
+            data_tbl = data_tbl.filter(data_tbl[column].isin(segment))  # pragma: no cover
         else:
-            data_tbl = data_tbl[data_tbl[column] == segment]
+            data_tbl = data_tbl.filter(data_tbl[column] == segment)
     return data_tbl
@@ -14465,7 +14746,7 @@ def _get_title_text(
             "</span>"
             f'<span style="float: right;">{title}</span>'
             "</div>"
-        )
+        )  # pragma: no cover
     return html_str
@@ -14543,24 +14824,6 @@ def _transform_eval(
     return symbol_list
-def _format_numbers_with_gt(
-    values: list[int], n_sigfig: int = 3, compact: bool = True, locale: str = "en"
-) -> list[str]:
-    """Format numbers using Great Tables GT object to avoid pandas dependency."""
-    import polars as pl
-    # Create a single-column DataFrame with all values
-    df = pl.DataFrame({"values": values})
-    # Create GT object and format the column
-    gt_obj = GT(df).fmt_number(columns="values", n_sigfig=n_sigfig, compact=compact, locale=locale)
-    # Extract the formatted values using _get_column_of_values
-    formatted_values = _get_column_of_values(gt_obj, column_name="values", context="html")
-    return formatted_values
 def _format_single_number_with_gt(
     value: int, n_sigfig: int = 3, compact: bool = True, locale: str = "en", df_lib=None
 ) -> str:
@@ -14571,12 +14834,14 @@ def _format_single_number_with_gt(
             import polars as pl
             df_lib = pl
-        elif _is_lib_present("pandas"):
-            import pandas as pd
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
-            df_lib = pd
-        else:
-            raise ImportError("Neither Polars nor Pandas is available for formatting")
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
     # Create a single-row, single-column DataFrame using the specified library
     df = df_lib.DataFrame({"value": [value]})
@@ -14642,12 +14907,14 @@ def _format_single_float_with_gt(
             import polars as pl
             df_lib = pl
-        elif _is_lib_present("pandas"):
-            import pandas as pd
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
-            df_lib = pd
-        else:
-            raise ImportError("Neither Polars nor Pandas is available for formatting")
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
     # Create a single-row, single-column DataFrame using the specified library
     df = df_lib.DataFrame({"value": [value]})
@@ -14679,7 +14946,7 @@ def _transform_passed_failed(
             return _format_single_float_with_gt(value, decimals=2, locale=locale, df_lib=df_lib)
         else:
             # Fallback to the original behavior
-            return vals.fmt_number(value, decimals=2, locale=locale)[0]
+            return vals.fmt_number(value, decimals=2, locale=locale)[0]  # pragma: no cover
     passed_failed = [
         (
@@ -14819,7 +15086,7 @@ def _get_callable_source(fn: Callable) -> str:
             return pre_arg
         except (OSError, TypeError):  # pragma: no cover
             return fn.__name__
-    return fn
+    return fn  # pragma: no cover
 def _extract_pre_argument(source: str) -> str:
@@ -14903,12 +15170,14 @@ def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None)
             import polars as pl
             df_lib = pl
-        elif _is_lib_present("pandas"):
-            import pandas as pd
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
-            df_lib = pd
-        else:
-            raise ImportError("Neither Polars nor Pandas is available for formatting")
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
     # Create a single-row, single-column DataFrame using the specified library
     df = df_lib.DataFrame({"value": [value]})
@@ -14936,12 +15205,14 @@ def _format_single_float_with_gt_custom(
             import polars as pl
             df_lib = pl
-        elif _is_lib_present("pandas"):
-            import pandas as pd
+        elif _is_lib_present("pandas"):  # pragma: no cover
+            import pandas as pd  # pragma: no cover
-            df_lib = pd
-        else:
-            raise ImportError("Neither Polars nor Pandas is available for formatting")
+            df_lib = pd  # pragma: no cover
+        else:  # pragma: no cover
+            raise ImportError(
+                "Neither Polars nor Pandas is available for formatting"
+            )  # pragma: no cover
     # Create a single-row, single-column DataFrame using the specified library
     df = df_lib.DataFrame({"value": [value]})
@@ -14976,7 +15247,7 @@ def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) ->
             # Fallback to the original behavior
             return fmt_number(
                 value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
-            )[0]
+            )[0]  # pragma: no cover
     def _format_integer_safe(value: int) -> str:
         if df_lib is not None and value is not None:
@@ -15113,6 +15384,8 @@ def _step_report_row_based(
         text = STEP_REPORT_TEXT["column_is_null"][lang].format(column=column)
     elif assertion_type == "col_vals_not_null":
         text = STEP_REPORT_TEXT["column_is_not_null"][lang].format(column=column)
+    elif assertion_type == "col_vals_expr":
+        text = STEP_REPORT_TEXT["column_expr"][lang].format(values=values)
     elif assertion_type == "rows_complete":
         if column is None:
             text = STEP_REPORT_TEXT["rows_complete_all"][lang]
@@ -15159,10 +15432,17 @@ def _step_report_row_based(
         title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
         assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
-        success_stmt = STEP_REPORT_TEXT["success_statement"][lang].format(
-            n=n,
-            column_position=column_position,
-        )
+        # Use 'success_statement_no_column' for col_vals_expr() since it doesn't target
+        # a specific column
+        if assertion_type == "col_vals_expr":
+            success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(
+                n=n
+            )  # pragma: no cover
+        else:
+            success_stmt = STEP_REPORT_TEXT["success_statement"][lang].format(
+                n=n,
+                column_position=column_position,
+            )
         preview_stmt = STEP_REPORT_TEXT["preview_statement"][lang]
         details = (
@@ -15242,10 +15522,16 @@ def _step_report_row_based(
         assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
         failure_rate_metrics = f"<strong>{n_failed}</strong> / <strong>{n}</strong>"
-        failure_rate_stmt = STEP_REPORT_TEXT["failure_rate_summary"][lang].format(
-            failure_rate=failure_rate_metrics,
-            column_position=column_position,
-        )
+        # Use failure_rate_summary_no_column for col_vals_expr since it doesn't target a specific column
+        if assertion_type == "col_vals_expr":
+            failure_rate_stmt = STEP_REPORT_TEXT["failure_rate_summary_no_column"][lang].format(
+                failure_rate=failure_rate_metrics
+            )
+        else:
+            failure_rate_stmt = STEP_REPORT_TEXT["failure_rate_summary"][lang].format(
+                failure_rate=failure_rate_metrics,
+                column_position=column_position,
+            )
         if limit < extract_length:
             extract_length_resolved = limit
@@ -15864,14 +16150,14 @@ def _step_report_schema_any_order(
         if exp_columns_dict[column_name_exp_i]["colname_matched"]:
             col_exp_correct.append(CHECK_MARK_SPAN)
         else:
-            col_exp_correct.append(CROSS_MARK_SPAN)
+            col_exp_correct.append(CROSS_MARK_SPAN)  # pragma: no cover
         #
         # `dtype_exp` values
         #
         if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
-            dtype_exp.append("")
+            dtype_exp.append("")  # pragma: no cover
         elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
             dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
@@ -15906,9 +16192,9 @@ def _step_report_schema_any_order(
         #
         if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
-            dtype_exp_correct.append("&mdash;")
+            dtype_exp_correct.append("&mdash;")  # pragma: no cover
         elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
-            dtype_exp_correct.append("")
+            dtype_exp_correct.append("")  # pragma: no cover
         elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
             dtype_exp_correct.append(CHECK_MARK_SPAN)
         else:
@@ -15954,13 +16240,17 @@ def _step_report_schema_any_order(
             #
             if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
-                dtype_exp.append("")
+                dtype_exp.append("")  # pragma: no cover
             elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
-                dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
+                dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]  # pragma: no cover
-                if exp_columns_dict[column_name_exp_i]["dtype_matched_pos"] is not None:
-                    pos = exp_columns_dict[column_name_exp_i]["dtype_matched_pos"]
+                if (
+                    exp_columns_dict[column_name_exp_i]["dtype_matched_pos"] is not None
+                ):  # pragma: no cover
+                    pos = exp_columns_dict[column_name_exp_i][
+                        "dtype_matched_pos"
+                    ]  # pragma: no cover
                     # Combine the dtypes together with pipes but underline the matched dtype in
                     # green with an HTML span tag and style attribute
@@ -15972,13 +16262,13 @@ def _step_report_schema_any_order(
                             else dtype[i]
                         )
                         for i in range(len(dtype))
-                    ]
-                    dtype = " | ".join(dtype)
-                    dtype_exp.append(dtype)
+                    ]  # pragma: no cover
+                    dtype = " | ".join(dtype)  # pragma: no cover
+                    dtype_exp.append(dtype)  # pragma: no cover
                 else:
-                    dtype = " | ".join(dtype)
-                    dtype_exp.append(dtype)
+                    dtype = " | ".join(dtype)  # pragma: no cover
+                    dtype_exp.append(dtype)  # pragma: no cover
             else:
                 dtype = exp_columns_dict[column_name_exp_i]["dtype_input"][0]
@@ -15990,12 +16280,12 @@ def _step_report_schema_any_order(
             if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
                 dtype_exp_correct.append("&mdash;")
-            elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
-                dtype_exp_correct.append("")
-            elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
-                dtype_exp_correct.append(CHECK_MARK_SPAN)
-            else:
-                dtype_exp_correct.append(CROSS_MARK_SPAN)
+            elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:  # pragma: no cover
+                dtype_exp_correct.append("")  # pragma: no cover
+            elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:  # pragma: no cover
+                dtype_exp_correct.append(CHECK_MARK_SPAN)  # pragma: no cover
+            else:  # pragma: no cover
+                dtype_exp_correct.append(CROSS_MARK_SPAN)  # pragma: no cover
         if len(columns_found) > 0:
             # Get the last index of the columns found
@@ -16011,7 +16301,9 @@ def _step_report_schema_any_order(
             ]
         else:
-            index_exp = [str(i) for i in range(1, len(colnames_exp_unmatched) + 1)]
+            index_exp = [
+                str(i) for i in range(1, len(colnames_exp_unmatched) + 1)
+            ]  # pragma: no cover
         schema_exp_unmatched = pl.DataFrame(
             {

pointblank 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

pointblank 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl