pointblank 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,15 @@ from __future__ import annotations
3
3
  import functools
4
4
  from collections.abc import Callable
5
5
  from dataclasses import dataclass
6
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import narwhals as nw
9
- from narwhals.dependencies import is_pandas_dataframe, is_polars_dataframe
10
- from narwhals.typing import FrameT
9
+ from narwhals.dependencies import (
10
+ is_narwhals_dataframe,
11
+ is_narwhals_lazyframe,
12
+ is_pandas_dataframe,
13
+ is_polars_dataframe,
14
+ )
11
15
 
12
16
  from pointblank._constants import IBIS_BACKENDS
13
17
  from pointblank._spec_utils import (
@@ -25,6 +29,9 @@ from pointblank._utils import (
25
29
  )
26
30
  from pointblank.column import Column
27
31
 
32
+ if TYPE_CHECKING:
33
+ from narwhals.typing import IntoFrame
34
+
28
35
 
29
36
  def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val: Any) -> Any:
30
37
  """
@@ -94,7 +101,9 @@ def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val:
94
101
  return compare_val
95
102
 
96
103
 
97
- def _safe_is_nan_or_null_expr(data_frame: Any, column_expr: Any, column_name: str = None) -> Any:
104
+ def _safe_is_nan_or_null_expr(
105
+ data_frame: Any, column_expr: Any, column_name: str | None = None
106
+ ) -> Any:
98
107
  """
99
108
  Create an expression that safely checks for both Null and NaN values.
100
109
 
@@ -425,7 +434,7 @@ class SpeciallyValidation:
425
434
  else:
426
435
  self.tbl_type = tbl_type
427
436
 
428
- def get_test_results(self) -> any | list[bool]:
437
+ def get_test_results(self) -> Any | list[bool]:
429
438
  """Evaluate the expression get either a list of booleans or a results table."""
430
439
 
431
440
  # Get the expression and inspect whether there is a `data` argument
@@ -519,7 +528,7 @@ class NumberOfTestUnits:
519
528
  Count the number of test units in a column.
520
529
  """
521
530
 
522
- df: FrameT
531
+ df: Any # Can be IntoFrame or Ibis table
523
532
  column: str
524
533
 
525
534
  def get_test_units(self, tbl_type: str) -> int:
@@ -536,15 +545,18 @@ class NumberOfTestUnits:
536
545
  )
537
546
 
538
547
  # Handle LazyFrames which don't have len()
539
- if hasattr(dfn, "collect"):
548
+ if is_narwhals_lazyframe(dfn):
540
549
  dfn = dfn.collect()
541
550
 
551
+ assert is_narwhals_dataframe(dfn)
542
552
  return len(dfn)
543
553
 
544
554
  if tbl_type in IBIS_BACKENDS:
545
555
  # Get the count of test units and convert to a native format
546
556
  # TODO: check whether pandas or polars is available
547
- return self.df.count().to_polars()
557
+ return self.df.count().to_polars() # type: ignore[union-attr]
558
+
559
+ raise ValueError(f"Unsupported table type: {tbl_type}")
548
560
 
549
561
 
550
562
  def _get_compare_expr_nw(compare: Any) -> Any:
@@ -555,28 +567,25 @@ def _get_compare_expr_nw(compare: Any) -> Any:
555
567
  return compare
556
568
 
557
569
 
558
- def _column_has_null_values(table: FrameT, column: str) -> bool:
570
+ def _column_has_null_values(table: nw.DataFrame[Any] | nw.LazyFrame[Any], column: str) -> bool:
559
571
  try:
560
- # Try the standard null_count() method
561
- null_count = (table.select(column).null_count())[column][0]
572
+ # Try the standard null_count() method (DataFrame)
573
+ null_count = (table.select(column).null_count())[column][0] # type: ignore[union-attr]
562
574
  except AttributeError:
563
575
  # For LazyFrames, collect first then get null count
564
576
  try:
565
- collected = table.select(column).collect()
577
+ collected = table.select(column).collect() # type: ignore[union-attr]
566
578
  null_count = (collected.null_count())[column][0]
567
579
  except Exception:
568
580
  # Fallback: check if any values are null
569
581
  try:
570
- result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect()
582
+ result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect() # type: ignore[union-attr]
571
583
  null_count = result["null_count"][0]
572
584
  except Exception:
573
585
  # Last resort: return False (assume no nulls)
574
586
  return False
575
587
 
576
- if null_count is None or null_count == 0:
577
- return False
578
-
579
- return True
588
+ return null_count is not None and null_count > 0
580
589
 
581
590
 
582
591
  def _check_nulls_across_columns_nw(table, columns_subset):
@@ -596,7 +605,7 @@ def _check_nulls_across_columns_nw(table, columns_subset):
596
605
  return result
597
606
 
598
607
 
599
- def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
608
+ def _modify_datetime_compare_val(tgt_column: Any, compare_val: Any) -> Any:
600
609
  tgt_col_dtype_str = str(tgt_column.dtype).lower()
601
610
 
602
611
  if compare_val is isinstance(compare_val, Column): # pragma: no cover
@@ -640,7 +649,7 @@ def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
640
649
  return compare_expr
641
650
 
642
651
 
643
- def col_vals_expr(data_tbl: FrameT, expr, tbl_type: str = "local"):
652
+ def col_vals_expr(data_tbl: Any, expr: Any, tbl_type: str = "local") -> Any:
644
653
  """Check if values in a column evaluate to True for a given predicate expression."""
645
654
  if tbl_type == "local":
646
655
  # Check the type of expression provided
@@ -670,21 +679,19 @@ def col_vals_expr(data_tbl: FrameT, expr, tbl_type: str = "local"):
670
679
  return data_tbl # pragma: no cover
671
680
 
672
681
 
673
- def rows_complete(data_tbl: FrameT, columns_subset: list[str] | None):
682
+ def rows_complete(data_tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
674
683
  """
675
684
  Check if rows in a DataFrame are complete (no null values).
676
685
 
677
686
  This function replaces the RowsComplete dataclass for direct usage.
678
687
  """
679
- tbl = _convert_to_narwhals(df=data_tbl)
680
-
681
688
  return interrogate_rows_complete(
682
- tbl=tbl,
689
+ tbl=data_tbl,
683
690
  columns_subset=columns_subset,
684
691
  )
685
692
 
686
693
 
687
- def col_exists(data_tbl: FrameT, column: str) -> bool:
694
+ def col_exists(data_tbl: IntoFrame, column: str) -> bool:
688
695
  """
689
696
  Check if a column exists in a DataFrame.
690
697
 
@@ -705,8 +712,8 @@ def col_exists(data_tbl: FrameT, column: str) -> bool:
705
712
 
706
713
 
707
714
  def col_schema_match(
708
- data_tbl: FrameT,
709
- schema,
715
+ data_tbl: IntoFrame,
716
+ schema: Any,
710
717
  complete: bool,
711
718
  in_order: bool,
712
719
  case_sensitive_colnames: bool,
@@ -730,7 +737,9 @@ def col_schema_match(
730
737
  )
731
738
 
732
739
 
733
- def row_count_match(data_tbl: FrameT, count, inverse: bool, abs_tol_bounds) -> bool:
740
+ def row_count_match(
741
+ data_tbl: IntoFrame, count: Any, inverse: bool, abs_tol_bounds: AbsoluteBounds
742
+ ) -> bool:
734
743
  """
735
744
  Check if DataFrame row count matches expected count.
736
745
  """
@@ -748,28 +757,33 @@ def row_count_match(data_tbl: FrameT, count, inverse: bool, abs_tol_bounds) -> b
748
757
 
749
758
 
750
759
  def col_pct_null(
751
- data_tbl: FrameT, column: str, p: float, bound_finder: Callable[[int], AbsoluteBounds]
760
+ data_tbl: IntoFrame, column: str, p: float, bound_finder: Callable[[int], AbsoluteBounds]
752
761
  ) -> bool:
753
762
  """Check if the percentage of null vales are within p given the absolute bounds."""
754
- # Convert to narwhals for consistent API across backends
755
- nw_tbl = nw.from_native(data_tbl)
756
-
763
+ nw_frame = nw.from_native(data_tbl)
757
764
  # Handle LazyFrames by collecting them first
758
- if hasattr(nw_tbl, "collect"):
759
- nw_tbl = nw_tbl.collect()
765
+ if is_narwhals_lazyframe(nw_frame):
766
+ nw_frame = nw_frame.collect()
767
+
768
+ assert is_narwhals_dataframe(nw_frame)
769
+
770
+ # We cast as int because it could come back as an arbitary type. For example if the backend
771
+ # is numpy-like, we might get a scalar from `item()`. `int()` expects a certain signature though
772
+ # and `object` does not satisfy so we have to go with the type ignore.
773
+ total_rows: object = nw_frame.select(nw.len()).item()
774
+ total_rows: int = int(total_rows) # type: ignore
760
775
 
761
- # Get total rows using narwhals
762
- total_rows: int = nw_tbl.select(nw.len()).item()
763
776
  abs_target: float = round(total_rows * p)
764
777
  lower_bound, upper_bound = bound_finder(abs_target)
765
778
 
766
- # Count null values
767
- n_null: int = nw_tbl.select(nw.col(column).is_null().sum()).item()
779
+ # Count null values (see above comment on typing shenanigans)
780
+ n_null: object = nw_frame.select(nw.col(column).is_null().sum()).item()
781
+ n_null: int = int(n_null) # type: ignore
768
782
 
769
783
  return n_null >= (abs_target - lower_bound) and n_null <= (abs_target + upper_bound)
770
784
 
771
785
 
772
- def col_count_match(data_tbl: FrameT, count, inverse: bool) -> bool:
786
+ def col_count_match(data_tbl: IntoFrame, count: Any, inverse: bool) -> bool:
773
787
  """
774
788
  Check if DataFrame column count matches expected count.
775
789
  """
@@ -781,7 +795,7 @@ def col_count_match(data_tbl: FrameT, count, inverse: bool) -> bool:
781
795
  return get_column_count(data=data_tbl) != count
782
796
 
783
797
 
784
- def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[FrameT, FrameT]:
798
+ def _coerce_to_common_backend(data_tbl: Any, tbl_compare: Any) -> tuple[Any, Any]:
785
799
  """
786
800
  Coerce two tables to the same backend if they differ.
787
801
 
@@ -798,7 +812,7 @@ def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[Fr
798
812
 
799
813
  Returns
800
814
  -------
801
- tuple[FrameT, FrameT]
815
+ tuple[Any, Any]
802
816
  Both tables, with tbl_compare potentially converted to data_tbl's backend.
803
817
  """
804
818
  # Get backend types for both tables
@@ -884,7 +898,7 @@ def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[Fr
884
898
  return data_tbl, tbl_compare
885
899
 
886
900
 
887
- def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
901
+ def tbl_match(data_tbl: IntoFrame, tbl_compare: IntoFrame) -> bool:
888
902
  """
889
903
  Check if two tables match exactly in schema, row count, and data.
890
904
 
@@ -998,33 +1012,37 @@ def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
998
1012
 
999
1013
  # Convert to native format for comparison
1000
1014
  # We need to collect if lazy frames
1001
- if hasattr(col_data_1, "collect"):
1015
+ if is_narwhals_lazyframe(col_data_1):
1002
1016
  col_data_1 = col_data_1.collect()
1003
1017
 
1004
- if hasattr(col_data_2, "collect"):
1018
+ if is_narwhals_lazyframe(col_data_2):
1005
1019
  col_data_2 = col_data_2.collect()
1006
1020
 
1007
1021
  # Convert to native and then to lists for comparison
1008
- col_1_native = col_data_1.to_native()
1009
- col_2_native = col_data_2.to_native()
1022
+ # Native frames could be Polars, Pandas, or Ibis - use Any for dynamic access
1023
+ col_1_native: Any = col_data_1.to_native()
1024
+ col_2_native: Any = col_data_2.to_native()
1010
1025
 
1011
1026
  # Extract values as lists for comparison
1012
- if hasattr(col_1_native, "to_list"): # Polars Series
1013
- values_1 = col_1_native[col_name].to_list()
1014
- values_2 = col_2_native[col_name].to_list()
1027
+ # Note: We use hasattr for runtime detection but maintain Any typing
1028
+ values_1: list[Any]
1029
+ values_2: list[Any]
1030
+ if hasattr(col_1_native, "to_list"): # Polars DataFrame
1031
+ values_1 = col_1_native[col_name].to_list() # type: ignore[index]
1032
+ values_2 = col_2_native[col_name].to_list() # type: ignore[index]
1015
1033
 
1016
- elif hasattr(col_1_native, "tolist"): # Pandas Series/DataFrame
1017
- values_1 = col_1_native[col_name].tolist()
1018
- values_2 = col_2_native[col_name].tolist()
1034
+ elif hasattr(col_1_native, "tolist"): # Pandas DataFrame
1035
+ values_1 = col_1_native[col_name].tolist() # type: ignore[index]
1036
+ values_2 = col_2_native[col_name].tolist() # type: ignore[index]
1019
1037
 
1020
1038
  elif hasattr(col_1_native, "collect"): # Ibis
1021
- values_1 = col_1_native[col_name].to_pandas().tolist()
1022
- values_2 = col_2_native[col_name].to_pandas().tolist()
1039
+ values_1 = col_1_native[col_name].to_pandas().tolist() # type: ignore[index]
1040
+ values_2 = col_2_native[col_name].to_pandas().tolist() # type: ignore[index]
1023
1041
 
1024
1042
  else:
1025
1043
  # Fallback: try direct comparison
1026
- values_1 = list(col_1_native[col_name])
1027
- values_2 = list(col_2_native[col_name])
1044
+ values_1 = list(col_1_native[col_name]) # type: ignore[index]
1045
+ values_2 = list(col_2_native[col_name]) # type: ignore[index]
1028
1046
 
1029
1047
  # Compare the two lists element by element, handling NaN/None
1030
1048
  if len(values_1) != len(values_2):
@@ -1086,7 +1104,9 @@ def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
1086
1104
  return True
1087
1105
 
1088
1106
 
1089
- def conjointly_validation(data_tbl: FrameT, expressions, threshold: int, tbl_type: str = "local"):
1107
+ def conjointly_validation(
1108
+ data_tbl: IntoFrame, expressions: Any, threshold: int, tbl_type: str = "local"
1109
+ ) -> Any:
1090
1110
  """
1091
1111
  Perform conjoint validation using multiple expressions.
1092
1112
  """
@@ -1101,30 +1121,32 @@ def conjointly_validation(data_tbl: FrameT, expressions, threshold: int, tbl_typ
1101
1121
  return conjointly_instance.get_test_results()
1102
1122
 
1103
1123
 
1104
- def interrogate_gt(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1124
+ # TODO: we can certainly simplify this
1125
+ def interrogate_gt(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1105
1126
  """Greater than interrogation."""
1106
1127
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "gt")
1107
1128
 
1108
1129
 
1109
- def interrogate_lt(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1130
+ def interrogate_lt(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1110
1131
  """Less than interrogation."""
1111
1132
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "lt")
1112
1133
 
1113
1134
 
1114
- def interrogate_ge(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1135
+ def interrogate_ge(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1115
1136
  """Greater than or equal interrogation."""
1116
1137
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "ge")
1117
1138
 
1118
1139
 
1119
- def interrogate_le(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1140
+ def interrogate_le(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1120
1141
  """Less than or equal interrogation."""
1121
1142
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "le")
1122
1143
 
1123
1144
 
1124
- def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1145
+ def interrogate_eq(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1125
1146
  """Equal interrogation."""
1126
1147
 
1127
1148
  nw_tbl = nw.from_native(tbl)
1149
+ assert is_narwhals_dataframe(nw_tbl) or is_narwhals_lazyframe(nw_tbl)
1128
1150
 
1129
1151
  if isinstance(compare, Column):
1130
1152
  compare_expr = _get_compare_expr_nw(compare=compare)
@@ -1170,10 +1192,10 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1170
1192
  )
1171
1193
  result_tbl = result_tbl.rename({"pb_is_good_4_tmp": "pb_is_good_4"})
1172
1194
  elif "cannot compare" in str(e).lower():
1173
- # Handle genuine type incompatibility
1195
+ # Handle genuine type incompatibility - native_df type varies by backend
1174
1196
  native_df = result_tbl.to_native()
1175
- col_dtype = str(native_df[column].dtype)
1176
- compare_dtype = str(native_df[compare.name].dtype)
1197
+ col_dtype = str(native_df[column].dtype) # type: ignore[index]
1198
+ compare_dtype = str(native_df[compare.name].dtype) # type: ignore[index]
1177
1199
 
1178
1200
  raise TypeError(
1179
1201
  f"Cannot compare columns '{column}' (dtype: {col_dtype}) and "
@@ -1208,21 +1230,19 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1208
1230
  or "conversion" in error_msg
1209
1231
  and "failed" in error_msg
1210
1232
  ):
1211
- # Get column types for a descriptive error message
1233
+ # Get column types for a descriptive error message - native type varies by backend
1234
+ col_dtype = "unknown"
1235
+ compare_dtype = "unknown"
1212
1236
  try:
1213
1237
  native_df = result_tbl.to_native()
1214
1238
  if hasattr(native_df, "dtypes"):
1215
- col_dtype = str(native_df.dtypes.get(column, "unknown"))
1216
- compare_dtype = str(native_df.dtypes.get(compare.name, "unknown"))
1239
+ col_dtype = str(native_df.dtypes.get(column, "unknown")) # type: ignore[union-attr]
1240
+ compare_dtype = str(native_df.dtypes.get(compare.name, "unknown")) # type: ignore[union-attr]
1217
1241
  elif hasattr(native_df, "schema"):
1218
- col_dtype = str(native_df.schema.get(column, "unknown"))
1219
- compare_dtype = str(native_df.schema.get(compare.name, "unknown"))
1220
- else:
1221
- col_dtype = "unknown"
1222
- compare_dtype = "unknown"
1242
+ col_dtype = str(native_df.schema.get(column, "unknown")) # type: ignore[union-attr]
1243
+ compare_dtype = str(native_df.schema.get(compare.name, "unknown")) # type: ignore[union-attr]
1223
1244
  except Exception:
1224
- col_dtype = "unknown"
1225
- compare_dtype = "unknown"
1245
+ pass
1226
1246
 
1227
1247
  raise TypeError(
1228
1248
  f"Cannot compare columns '{column}' (dtype: {col_dtype}) and "
@@ -1271,17 +1291,16 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1271
1291
  or "conversion" in error_msg
1272
1292
  and "failed" in error_msg
1273
1293
  ):
1274
- # Get column type for a descriptive error message
1294
+ # Get column type for a descriptive error message - native type varies by backend
1295
+ col_dtype = "unknown"
1275
1296
  try:
1276
1297
  native_df = result_tbl.to_native()
1277
1298
  if hasattr(native_df, "dtypes"):
1278
- col_dtype = str(native_df.dtypes.get(column, "unknown"))
1299
+ col_dtype = str(native_df.dtypes.get(column, "unknown")) # type: ignore[union-attr]
1279
1300
  elif hasattr(native_df, "schema"):
1280
- col_dtype = str(native_df.schema.get(column, "unknown"))
1281
- else:
1282
- col_dtype = "unknown"
1301
+ col_dtype = str(native_df.schema.get(column, "unknown")) # type: ignore[union-attr]
1283
1302
  except Exception:
1284
- col_dtype = "unknown"
1303
+ pass
1285
1304
 
1286
1305
  compare_type = type(compare).__name__
1287
1306
  compare_value = str(compare)
@@ -1311,10 +1330,11 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1311
1330
  return result_tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
1312
1331
 
1313
1332
 
1314
- def interrogate_ne(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1333
+ def interrogate_ne(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1315
1334
  """Not equal interrogation."""
1316
1335
 
1317
1336
  nw_tbl = nw.from_native(tbl)
1337
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1318
1338
 
1319
1339
  # Determine if the reference and comparison columns have any null values
1320
1340
  ref_col_has_null_vals = _column_has_null_values(table=nw_tbl, column=column)
@@ -1867,14 +1887,15 @@ def interrogate_ne(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1867
1887
 
1868
1888
 
1869
1889
  def interrogate_between(
1870
- tbl: FrameT, column: str, low: any, high: any, inclusive: tuple, na_pass: bool
1871
- ) -> FrameT:
1890
+ tbl: IntoFrame, column: str, low: Any, high: Any, inclusive: tuple[bool, bool], na_pass: bool
1891
+ ) -> Any:
1872
1892
  """Between interrogation."""
1873
1893
 
1874
1894
  low_val = _get_compare_expr_nw(compare=low)
1875
1895
  high_val = _get_compare_expr_nw(compare=high)
1876
1896
 
1877
1897
  nw_tbl = nw.from_native(tbl)
1898
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1878
1899
  low_val = _safe_modify_datetime_compare_val(nw_tbl, column, low_val)
1879
1900
  high_val = _safe_modify_datetime_compare_val(nw_tbl, column, high_val)
1880
1901
 
@@ -1936,14 +1957,15 @@ def interrogate_between(
1936
1957
 
1937
1958
 
1938
1959
  def interrogate_outside(
1939
- tbl: FrameT, column: str, low: any, high: any, inclusive: tuple, na_pass: bool
1940
- ) -> FrameT:
1960
+ tbl: IntoFrame, column: str, low: Any, high: Any, inclusive: tuple[bool, bool], na_pass: bool
1961
+ ) -> Any:
1941
1962
  """Outside range interrogation."""
1942
1963
 
1943
1964
  low_val = _get_compare_expr_nw(compare=low)
1944
1965
  high_val = _get_compare_expr_nw(compare=high)
1945
1966
 
1946
1967
  nw_tbl = nw.from_native(tbl)
1968
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1947
1969
  low_val = _safe_modify_datetime_compare_val(nw_tbl, column, low_val)
1948
1970
  high_val = _safe_modify_datetime_compare_val(nw_tbl, column, high_val)
1949
1971
 
@@ -2002,10 +2024,11 @@ def interrogate_outside(
2002
2024
  return result_tbl.to_native()
2003
2025
 
2004
2026
 
2005
- def interrogate_isin(tbl: FrameT, column: str, set_values: any) -> FrameT:
2027
+ def interrogate_isin(tbl: IntoFrame, column: str, set_values: Any) -> Any:
2006
2028
  """In set interrogation."""
2007
2029
 
2008
2030
  nw_tbl = nw.from_native(tbl)
2031
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2009
2032
 
2010
2033
  can_be_null: bool = None in set_values
2011
2034
  base_expr: nw.Expr = nw.col(column).is_in(set_values)
@@ -2016,17 +2039,20 @@ def interrogate_isin(tbl: FrameT, column: str, set_values: any) -> FrameT:
2016
2039
  return result_tbl.to_native()
2017
2040
 
2018
2041
 
2019
- def interrogate_notin(tbl: FrameT, column: str, set_values: any) -> FrameT:
2042
+ def interrogate_notin(tbl: IntoFrame, column: str, set_values: Any) -> Any:
2020
2043
  """Not in set interrogation."""
2021
2044
 
2022
2045
  nw_tbl = nw.from_native(tbl)
2046
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2023
2047
  result_tbl = nw_tbl.with_columns(
2024
2048
  pb_is_good_=nw.col(column).is_in(set_values),
2025
2049
  ).with_columns(pb_is_good_=~nw.col("pb_is_good_"))
2026
2050
  return result_tbl.to_native()
2027
2051
 
2028
2052
 
2029
- def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: bool) -> FrameT:
2053
+ def interrogate_regex(
2054
+ tbl: IntoFrame, column: str, values: dict[str, Any] | str, na_pass: bool
2055
+ ) -> Any:
2030
2056
  """Regex interrogation."""
2031
2057
 
2032
2058
  # Handle both old and new formats for backward compatibility
@@ -2038,6 +2064,7 @@ def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: boo
2038
2064
  inverse = values["inverse"]
2039
2065
 
2040
2066
  nw_tbl = nw.from_native(tbl)
2067
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2041
2068
  result_tbl = nw_tbl.with_columns(
2042
2069
  pb_is_good_1=nw.col(column).is_null() & na_pass,
2043
2070
  pb_is_good_2=nw.col(column).str.contains(pattern, literal=False).fill_null(False),
@@ -2057,7 +2084,9 @@ def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: boo
2057
2084
  return result_tbl.to_native()
2058
2085
 
2059
2086
 
2060
- def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: bool) -> FrameT:
2087
+ def interrogate_within_spec(
2088
+ tbl: IntoFrame, column: str, values: dict[str, Any], na_pass: bool
2089
+ ) -> Any:
2061
2090
  """Within specification interrogation."""
2062
2091
  from pointblank._spec_utils import (
2063
2092
  regex_email,
@@ -2082,6 +2111,7 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2082
2111
 
2083
2112
  # Convert to Narwhals for cross-backend compatibility
2084
2113
  nw_tbl = nw.from_native(tbl)
2114
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2085
2115
 
2086
2116
  # Regex-based specifications can use Narwhals directly (no materialization needed)
2087
2117
  regex_specs = {
@@ -2135,18 +2165,18 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2135
2165
 
2136
2166
  # For non-Ibis tables or other specs, materialize data and use Python validation
2137
2167
  # Get the column data as a list
2138
- col_data = nw_tbl.select(column).to_native()
2168
+ col_data: Any = nw_tbl.select(column).to_native()
2139
2169
 
2140
- # Convert to list based on backend
2170
+ # Convert to list based on backend - type varies so use duck typing
2141
2171
  if hasattr(col_data, "to_list"): # Polars
2142
- col_list = col_data[column].to_list()
2172
+ col_list = col_data[column].to_list() # type: ignore[index]
2143
2173
  elif hasattr(col_data, "tolist"): # Pandas
2144
- col_list = col_data[column].tolist()
2174
+ col_list = col_data[column].tolist() # type: ignore[index]
2145
2175
  else: # For Ibis tables, we need to execute the query first
2146
2176
  try:
2147
2177
  # Try to execute if it's an Ibis table
2148
2178
  if hasattr(col_data, "execute"):
2149
- col_data_exec = col_data.execute()
2179
+ col_data_exec = col_data.execute() # type: ignore[operator]
2150
2180
  if hasattr(col_data_exec, "to_list"): # Polars result
2151
2181
  col_list = col_data_exec[column].to_list()
2152
2182
  elif hasattr(col_data_exec, "tolist"): # Pandas result
@@ -2159,6 +2189,8 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2159
2189
  # Fallback to direct list conversion
2160
2190
  col_list = list(col_data[column])
2161
2191
 
2192
+ assert isinstance(col_list, list)
2193
+
2162
2194
  # Validate based on spec type (checksum-based validations)
2163
2195
  if spec_lower in ("isbn", "isbn-10", "isbn-13"):
2164
2196
  is_valid_list = check_isbn(col_list)
@@ -2205,7 +2237,9 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2205
2237
  return result_tbl.to_native()
2206
2238
 
2207
2239
 
2208
- def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass: bool) -> FrameT:
2240
+ def interrogate_within_spec_db(
2241
+ tbl: IntoFrame, column: str, values: dict[str, Any], na_pass: bool
2242
+ ) -> Any:
2209
2243
  """
2210
2244
  Database-native specification validation (proof of concept).
2211
2245
 
@@ -2226,7 +2260,7 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2226
2260
 
2227
2261
  Returns
2228
2262
  -------
2229
- FrameT
2263
+ Any
2230
2264
  Result table with pb_is_good_ column indicating validation results.
2231
2265
 
2232
2266
  Notes
@@ -2239,9 +2273,9 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2239
2273
  spec_lower = spec.lower()
2240
2274
 
2241
2275
  # Check if this is an Ibis table
2242
- native_tbl = tbl
2243
- if hasattr(tbl, "to_native"):
2244
- native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl
2276
+ native_tbl: Any = tbl
2277
+ if is_narwhals_dataframe(tbl) or is_narwhals_lazyframe(tbl):
2278
+ native_tbl = tbl.to_native()
2245
2279
 
2246
2280
  is_ibis = hasattr(native_tbl, "execute")
2247
2281
 
@@ -2308,7 +2342,7 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2308
2342
  weights = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2]
2309
2343
 
2310
2344
  # Get the column as an Ibis expression
2311
- col_expr = native_tbl[column]
2345
+ col_expr = native_tbl[column] # type: ignore[index]
2312
2346
 
2313
2347
  # Basic checks: length must be 17, no invalid characters (I, O, Q)
2314
2348
  valid_length = col_expr.length() == 17
@@ -2335,11 +2369,11 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2335
2369
  value = ibis.cases(*conditions, else_=0) # Default: invalid char = 0 (will fail validation)
2336
2370
 
2337
2371
  # Multiply by weight and add to checksum
2338
- checksum = checksum + (value * weights[pos])
2372
+ checksum = checksum + (value * weights[pos]) # type: ignore[operator]
2339
2373
 
2340
2374
  # Check digit calculation: checksum % 11
2341
2375
  # If result is 10, check digit should be 'X', otherwise it's the digit itself
2342
- expected_check = checksum % 11
2376
+ expected_check = checksum % 11 # type: ignore[operator]
2343
2377
  actual_check_char = col_expr.upper().substr(8, 1) # Position 9 (0-indexed 8)
2344
2378
 
2345
2379
  # Validate check digit using ibis.cases()
@@ -2362,14 +2396,14 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2362
2396
  is_valid = is_valid.fill_null(False)
2363
2397
 
2364
2398
  # Add validation column to table
2365
- result_tbl = native_tbl.mutate(pb_is_good_=is_valid)
2399
+ result_tbl = native_tbl.mutate(pb_is_good_=is_valid) # type: ignore[union-attr]
2366
2400
 
2367
2401
  return result_tbl
2368
2402
 
2369
2403
 
2370
2404
  def interrogate_credit_card_db(
2371
- tbl: FrameT, column: str, values: dict[str, str], na_pass: bool
2372
- ) -> FrameT:
2405
+ tbl: IntoFrame, column: str, values: dict[str, str], na_pass: bool
2406
+ ) -> Any:
2373
2407
  """
2374
2408
  Database-native credit card validation using Luhn algorithm in SQL.
2375
2409
 
@@ -2391,7 +2425,7 @@ def interrogate_credit_card_db(
2391
2425
 
2392
2426
  Returns
2393
2427
  -------
2394
- FrameT
2428
+ Any
2395
2429
  Result table with pb_is_good_ column indicating validation results.
2396
2430
 
2397
2431
  Notes
@@ -2408,7 +2442,7 @@ def interrogate_credit_card_db(
2408
2442
  # Check if this is an Ibis table
2409
2443
  native_tbl = tbl
2410
2444
  if hasattr(tbl, "to_native"):
2411
- native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl
2445
+ native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl # type: ignore[operator]
2412
2446
 
2413
2447
  is_ibis = hasattr(native_tbl, "execute")
2414
2448
 
@@ -2422,7 +2456,7 @@ def interrogate_credit_card_db(
2422
2456
  raise ImportError("Ibis is required for database-native validation")
2423
2457
 
2424
2458
  # Get the column as an Ibis expression
2425
- col_expr = native_tbl[column]
2459
+ col_expr = native_tbl[column] # type: ignore[index]
2426
2460
 
2427
2461
  # Step 1: Clean the input and remove spaces and hyphens
2428
2462
  # First check format: only digits, spaces, and hyphens allowed
@@ -2475,7 +2509,7 @@ def interrogate_credit_card_db(
2475
2509
 
2476
2510
  # Calculate contribution to checksum
2477
2511
  # If should_double: double the digit, then if > 9 subtract 9
2478
- doubled = digit_val * 2
2512
+ doubled = digit_val * 2 # type: ignore[operator]
2479
2513
  adjusted = ibis.cases(
2480
2514
  (should_double & (doubled > 9), doubled - 9),
2481
2515
  (should_double, doubled),
@@ -2488,10 +2522,10 @@ def interrogate_credit_card_db(
2488
2522
  else_=0,
2489
2523
  )
2490
2524
 
2491
- checksum = checksum + contribution
2525
+ checksum = checksum + contribution # type: ignore[operator]
2492
2526
 
2493
2527
  # Step 4: Valid if checksum % 10 == 0
2494
- luhn_valid = (checksum % 10) == 0
2528
+ luhn_valid = (checksum % 10) == 0 # type: ignore[operator]
2495
2529
 
2496
2530
  # Combine all validation checks
2497
2531
  is_valid = valid_chars & valid_length & luhn_valid
@@ -2505,30 +2539,32 @@ def interrogate_credit_card_db(
2505
2539
  is_valid = is_valid.fill_null(False)
2506
2540
 
2507
2541
  # Add validation column to table
2508
- result_tbl = native_tbl.mutate(pb_is_good_=is_valid)
2542
+ result_tbl = native_tbl.mutate(pb_is_good_=is_valid) # type: ignore[union-attr]
2509
2543
 
2510
2544
  return result_tbl
2511
2545
 
2512
2546
 
2513
- def interrogate_null(tbl: FrameT, column: str) -> FrameT:
2547
+ def interrogate_null(tbl: IntoFrame, column: str) -> Any:
2514
2548
  """Null interrogation."""
2515
2549
 
2516
2550
  nw_tbl = nw.from_native(tbl)
2551
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2517
2552
  result_tbl = nw_tbl.with_columns(pb_is_good_=nw.col(column).is_null())
2518
2553
  return result_tbl.to_native()
2519
2554
 
2520
2555
 
2521
- def interrogate_not_null(tbl: FrameT, column: str) -> FrameT:
2556
+ def interrogate_not_null(tbl: IntoFrame, column: str) -> Any:
2522
2557
  """Not null interrogation."""
2523
2558
 
2524
2559
  nw_tbl = nw.from_native(tbl)
2560
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2525
2561
  result_tbl = nw_tbl.with_columns(pb_is_good_=~nw.col(column).is_null())
2526
2562
  return result_tbl.to_native()
2527
2563
 
2528
2564
 
2529
2565
  def interrogate_increasing(
2530
- tbl: FrameT, column: str, allow_stationary: bool, decreasing_tol: float, na_pass: bool
2531
- ) -> FrameT:
2566
+ tbl: IntoFrame, column: str, allow_stationary: bool, decreasing_tol: float, na_pass: bool
2567
+ ) -> Any:
2532
2568
  """
2533
2569
  Increasing interrogation.
2534
2570
 
@@ -2549,10 +2585,11 @@ def interrogate_increasing(
2549
2585
 
2550
2586
  Returns
2551
2587
  -------
2552
- FrameT
2588
+ Any
2553
2589
  The table with a `pb_is_good_` column indicating pass/fail for each row.
2554
2590
  """
2555
2591
  nw_tbl = nw.from_native(tbl)
2592
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2556
2593
 
2557
2594
  # Create a lagged difference column
2558
2595
  result_tbl = nw_tbl.with_columns(pb_lagged_difference_=nw.col(column) - nw.col(column).shift(1))
@@ -2585,8 +2622,8 @@ def interrogate_increasing(
2585
2622
 
2586
2623
 
2587
2624
  def interrogate_decreasing(
2588
- tbl: FrameT, column: str, allow_stationary: bool, increasing_tol: float, na_pass: bool
2589
- ) -> FrameT:
2625
+ tbl: IntoFrame, column: str, allow_stationary: bool, increasing_tol: float, na_pass: bool
2626
+ ) -> Any:
2590
2627
  """
2591
2628
  Decreasing interrogation.
2592
2629
 
@@ -2607,10 +2644,11 @@ def interrogate_decreasing(
2607
2644
 
2608
2645
  Returns
2609
2646
  -------
2610
- FrameT
2647
+ Any
2611
2648
  The table with a `pb_is_good_` column indicating pass/fail for each row.
2612
2649
  """
2613
2650
  nw_tbl = nw.from_native(tbl)
2651
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2614
2652
 
2615
2653
  # Create a lagged difference column
2616
2654
  result_tbl = nw_tbl.with_columns(pb_lagged_difference_=nw.col(column) - nw.col(column).shift(1))
@@ -2643,8 +2681,8 @@ def interrogate_decreasing(
2643
2681
 
2644
2682
 
2645
2683
  def _interrogate_comparison_base(
2646
- tbl: FrameT, column: str, compare: any, na_pass: bool, operator: str
2647
- ) -> FrameT:
2684
+ tbl: IntoFrame, column: str, compare: Any, na_pass: bool, operator: str
2685
+ ) -> Any:
2648
2686
  """
2649
2687
  Unified base function for comparison operations (gt, ge, lt, le, eq, ne).
2650
2688
 
@@ -2663,13 +2701,14 @@ def _interrogate_comparison_base(
2663
2701
 
2664
2702
  Returns
2665
2703
  -------
2666
- FrameT
2704
+ Any
2667
2705
  The result table with `pb_is_good_` column indicating the passing test units.
2668
2706
  """
2669
2707
 
2670
2708
  compare_expr = _get_compare_expr_nw(compare=compare)
2671
2709
 
2672
2710
  nw_tbl = nw.from_native(tbl)
2711
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2673
2712
  compare_expr = _safe_modify_datetime_compare_val(nw_tbl, column, compare_expr)
2674
2713
 
2675
2714
  # Create the comparison expression based on the operator
@@ -2716,7 +2755,7 @@ def _interrogate_comparison_base(
2716
2755
  return result_tbl.to_native()
2717
2756
 
2718
2757
 
2719
- def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None) -> FrameT:
2758
+ def interrogate_rows_distinct(data_tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
2720
2759
  """
2721
2760
  Check if rows in a DataFrame are distinct.
2722
2761
 
@@ -2733,10 +2772,11 @@ def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None
2733
2772
 
2734
2773
  Returns
2735
2774
  -------
2736
- FrameT
2775
+ Any
2737
2776
  A DataFrame with a `pb_is_good_` column indicating which rows pass the test.
2738
2777
  """
2739
2778
  tbl = nw.from_native(data_tbl)
2779
+ assert is_narwhals_dataframe(tbl) or is_narwhals_lazyframe(tbl)
2740
2780
 
2741
2781
  # Get the column subset to use for the test
2742
2782
  if columns_subset is None:
@@ -2744,18 +2784,23 @@ def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None
2744
2784
 
2745
2785
  # Create a count of duplicates using group_by approach
2746
2786
  # Group by the columns of interest and count occurrences
2747
- count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2748
-
2749
- # Join back to original table to get count for each row
2750
- tbl = tbl.join(count_tbl, on=columns_subset, how="left")
2751
-
2752
- # Passing rows will have the value `1` (no duplicates, so True), otherwise False applies
2753
- tbl = tbl.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2754
-
2755
- return tbl.to_native()
2787
+ # Handle DataFrame and LazyFrame separately for proper type narrowing
2788
+ if is_narwhals_dataframe(tbl):
2789
+ count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2790
+ result = tbl.join(count_tbl, on=columns_subset, how="left")
2791
+ result = result.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2792
+ return result.to_native()
2793
+ elif is_narwhals_lazyframe(tbl):
2794
+ count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2795
+ result = tbl.join(count_tbl, on=columns_subset, how="left")
2796
+ result = result.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2797
+ return result.to_native()
2798
+ else:
2799
+ msg = f"Expected DataFrame or LazyFrame, got {type(tbl)}"
2800
+ raise TypeError(msg)
2756
2801
 
2757
2802
 
2758
- def interrogate_rows_complete(tbl: FrameT, columns_subset: list[str] | None) -> FrameT:
2803
+ def interrogate_rows_complete(tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
2759
2804
  """Rows complete interrogation."""
2760
2805
  nw_tbl = nw.from_native(tbl)
2761
2806
 
@@ -2771,12 +2816,25 @@ def interrogate_rows_complete(tbl: FrameT, columns_subset: list[str] | None) ->
2771
2816
  return result_tbl.to_native()
2772
2817
 
2773
2818
 
2774
- def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config: dict) -> FrameT:
2819
+ def interrogate_prompt(
2820
+ tbl: IntoFrame, columns_subset: list[str] | None, ai_config: dict[str, Any]
2821
+ ) -> Any:
2775
2822
  """AI-powered interrogation of rows."""
2776
2823
  import logging
2777
2824
 
2778
2825
  logger = logging.getLogger(__name__)
2779
2826
 
2827
+ # Convert to narwhals early for consistent row counting
2828
+ nw_tbl = nw.from_native(tbl)
2829
+ # Get row count - for LazyFrame we need to use select/collect
2830
+ if is_narwhals_lazyframe(nw_tbl):
2831
+ row_count = nw_tbl.select(nw.len()).collect().item()
2832
+ assert isinstance(row_count, int)
2833
+ total_rows = row_count
2834
+ else:
2835
+ assert is_narwhals_dataframe(nw_tbl)
2836
+ total_rows = len(nw_tbl)
2837
+
2780
2838
  try:
2781
2839
  # Import AI validation modules
2782
2840
  from pointblank._utils_ai import (
@@ -2833,28 +2891,25 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2833
2891
  )
2834
2892
 
2835
2893
  # Parse and combine results with signature mapping optimization
2836
- parser = _ValidationResponseParser(total_rows=len(tbl))
2894
+ parser = _ValidationResponseParser(total_rows=total_rows)
2837
2895
  combined_results = parser.combine_batch_results(batch_results, signature_mapping)
2838
2896
 
2839
2897
  # Debug: Log table info and combined results
2840
2898
  logger.debug("🏁 Final result conversion:")
2841
- logger.debug(f" - Table length: {len(tbl)}")
2899
+ logger.debug(f" - Table length: {total_rows}")
2842
2900
  logger.debug(
2843
2901
  f" - Combined results keys: {sorted(combined_results.keys()) if combined_results else 'None'}"
2844
2902
  )
2845
2903
 
2846
- # Convert results to narwhals format
2847
- nw_tbl = nw.from_native(tbl)
2848
-
2849
2904
  # Create a boolean column for validation results
2850
2905
  validation_results = []
2851
- for i in range(len(tbl)):
2906
+ for i in range(total_rows):
2852
2907
  # Default to False if row wasn't processed
2853
2908
  result = combined_results.get(i, False)
2854
2909
  validation_results.append(result)
2855
2910
 
2856
2911
  # Debug: Log first few conversions
2857
- if i < 5 or len(tbl) - i <= 2:
2912
+ if i < 5 or total_rows - i <= 2:
2858
2913
  logger.debug(f" Row {i}: {result} (from combined_results.get({i}, False))")
2859
2914
 
2860
2915
  logger.debug(f" - Final validation_results length: {len(validation_results)}")
@@ -2893,10 +2948,9 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2893
2948
  logger.error(f"Missing dependencies for AI validation: {e}")
2894
2949
  logger.error("Install required packages: pip install openai anthropic aiohttp")
2895
2950
 
2896
- # Return all False results as fallback
2897
- nw_tbl = nw.from_native(tbl)
2951
+ # Return all False results as fallback (nw_tbl and total_rows defined at function start)
2898
2952
  native_tbl = nw_tbl.to_native()
2899
- validation_results = [False] * len(tbl)
2953
+ validation_results = [False] * total_rows
2900
2954
 
2901
2955
  if hasattr(native_tbl, "with_columns"): # Polars
2902
2956
  import polars as pl
@@ -2918,10 +2972,9 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2918
2972
  except Exception as e:
2919
2973
  logger.error(f"AI validation failed: {e}")
2920
2974
 
2921
- # Return all False results as fallback
2922
- nw_tbl = nw.from_native(tbl)
2975
+ # Return all False results as fallback (nw_tbl and total_rows defined at function start)
2923
2976
  native_tbl = nw_tbl.to_native()
2924
- validation_results = [False] * len(tbl)
2977
+ validation_results = [False] * total_rows
2925
2978
 
2926
2979
  if hasattr(native_tbl, "with_columns"): # Polars
2927
2980
  import polars as pl