pointblank 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import functools
4
+ from collections.abc import Callable
4
5
  from dataclasses import dataclass
5
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
6
7
 
7
8
  import narwhals as nw
8
- from narwhals.dependencies import is_pandas_dataframe, is_polars_dataframe
9
- from narwhals.typing import FrameT
9
+ from narwhals.dependencies import (
10
+ is_narwhals_dataframe,
11
+ is_narwhals_lazyframe,
12
+ is_pandas_dataframe,
13
+ is_polars_dataframe,
14
+ )
10
15
 
11
16
  from pointblank._constants import IBIS_BACKENDS
12
17
  from pointblank._spec_utils import (
@@ -16,6 +21,7 @@ from pointblank._spec_utils import (
16
21
  check_postal_code,
17
22
  check_vin,
18
23
  )
24
+ from pointblank._typing import AbsoluteBounds
19
25
  from pointblank._utils import (
20
26
  _column_test_prep,
21
27
  _convert_to_narwhals,
@@ -23,6 +29,9 @@ from pointblank._utils import (
23
29
  )
24
30
  from pointblank.column import Column
25
31
 
32
+ if TYPE_CHECKING:
33
+ from narwhals.typing import IntoFrame
34
+
26
35
 
27
36
  def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val: Any) -> Any:
28
37
  """
@@ -92,7 +101,9 @@ def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val:
92
101
  return compare_val
93
102
 
94
103
 
95
- def _safe_is_nan_or_null_expr(data_frame: Any, column_expr: Any, column_name: str = None) -> Any:
104
+ def _safe_is_nan_or_null_expr(
105
+ data_frame: Any, column_expr: Any, column_name: str | None = None
106
+ ) -> Any:
96
107
  """
97
108
  Create an expression that safely checks for both Null and NaN values.
98
109
 
@@ -423,7 +434,7 @@ class SpeciallyValidation:
423
434
  else:
424
435
  self.tbl_type = tbl_type
425
436
 
426
- def get_test_results(self) -> any | list[bool]:
437
+ def get_test_results(self) -> Any | list[bool]:
427
438
  """Evaluate the expression get either a list of booleans or a results table."""
428
439
 
429
440
  # Get the expression and inspect whether there is a `data` argument
@@ -517,7 +528,7 @@ class NumberOfTestUnits:
517
528
  Count the number of test units in a column.
518
529
  """
519
530
 
520
- df: FrameT
531
+ df: Any # Can be IntoFrame or Ibis table
521
532
  column: str
522
533
 
523
534
  def get_test_units(self, tbl_type: str) -> int:
@@ -534,15 +545,18 @@ class NumberOfTestUnits:
534
545
  )
535
546
 
536
547
  # Handle LazyFrames which don't have len()
537
- if hasattr(dfn, "collect"):
548
+ if is_narwhals_lazyframe(dfn):
538
549
  dfn = dfn.collect()
539
550
 
551
+ assert is_narwhals_dataframe(dfn)
540
552
  return len(dfn)
541
553
 
542
554
  if tbl_type in IBIS_BACKENDS:
543
555
  # Get the count of test units and convert to a native format
544
556
  # TODO: check whether pandas or polars is available
545
- return self.df.count().to_polars()
557
+ return self.df.count().to_polars() # type: ignore[union-attr]
558
+
559
+ raise ValueError(f"Unsupported table type: {tbl_type}")
546
560
 
547
561
 
548
562
  def _get_compare_expr_nw(compare: Any) -> Any:
@@ -553,28 +567,25 @@ def _get_compare_expr_nw(compare: Any) -> Any:
553
567
  return compare
554
568
 
555
569
 
556
- def _column_has_null_values(table: FrameT, column: str) -> bool:
570
+ def _column_has_null_values(table: nw.DataFrame[Any] | nw.LazyFrame[Any], column: str) -> bool:
557
571
  try:
558
- # Try the standard null_count() method
559
- null_count = (table.select(column).null_count())[column][0]
572
+ # Try the standard null_count() method (DataFrame)
573
+ null_count = (table.select(column).null_count())[column][0] # type: ignore[union-attr]
560
574
  except AttributeError:
561
575
  # For LazyFrames, collect first then get null count
562
576
  try:
563
- collected = table.select(column).collect()
577
+ collected = table.select(column).collect() # type: ignore[union-attr]
564
578
  null_count = (collected.null_count())[column][0]
565
579
  except Exception:
566
580
  # Fallback: check if any values are null
567
581
  try:
568
- result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect()
582
+ result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect() # type: ignore[union-attr]
569
583
  null_count = result["null_count"][0]
570
584
  except Exception:
571
585
  # Last resort: return False (assume no nulls)
572
586
  return False
573
587
 
574
- if null_count is None or null_count == 0:
575
- return False
576
-
577
- return True
588
+ return null_count is not None and null_count > 0
578
589
 
579
590
 
580
591
  def _check_nulls_across_columns_nw(table, columns_subset):
@@ -594,7 +605,7 @@ def _check_nulls_across_columns_nw(table, columns_subset):
594
605
  return result
595
606
 
596
607
 
597
- def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
608
+ def _modify_datetime_compare_val(tgt_column: Any, compare_val: Any) -> Any:
598
609
  tgt_col_dtype_str = str(tgt_column.dtype).lower()
599
610
 
600
611
  if compare_val is isinstance(compare_val, Column): # pragma: no cover
@@ -638,7 +649,7 @@ def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
638
649
  return compare_expr
639
650
 
640
651
 
641
- def col_vals_expr(data_tbl: FrameT, expr, tbl_type: str = "local"):
652
+ def col_vals_expr(data_tbl: Any, expr: Any, tbl_type: str = "local") -> Any:
642
653
  """Check if values in a column evaluate to True for a given predicate expression."""
643
654
  if tbl_type == "local":
644
655
  # Check the type of expression provided
@@ -668,21 +679,19 @@ def col_vals_expr(data_tbl: FrameT, expr, tbl_type: str = "local"):
668
679
  return data_tbl # pragma: no cover
669
680
 
670
681
 
671
- def rows_complete(data_tbl: FrameT, columns_subset: list[str] | None):
682
+ def rows_complete(data_tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
672
683
  """
673
684
  Check if rows in a DataFrame are complete (no null values).
674
685
 
675
686
  This function replaces the RowsComplete dataclass for direct usage.
676
687
  """
677
- tbl = _convert_to_narwhals(df=data_tbl)
678
-
679
688
  return interrogate_rows_complete(
680
- tbl=tbl,
689
+ tbl=data_tbl,
681
690
  columns_subset=columns_subset,
682
691
  )
683
692
 
684
693
 
685
- def col_exists(data_tbl: FrameT, column: str) -> bool:
694
+ def col_exists(data_tbl: IntoFrame, column: str) -> bool:
686
695
  """
687
696
  Check if a column exists in a DataFrame.
688
697
 
@@ -703,8 +712,8 @@ def col_exists(data_tbl: FrameT, column: str) -> bool:
703
712
 
704
713
 
705
714
  def col_schema_match(
706
- data_tbl: FrameT,
707
- schema,
715
+ data_tbl: IntoFrame,
716
+ schema: Any,
708
717
  complete: bool,
709
718
  in_order: bool,
710
719
  case_sensitive_colnames: bool,
@@ -728,7 +737,9 @@ def col_schema_match(
728
737
  )
729
738
 
730
739
 
731
- def row_count_match(data_tbl: FrameT, count, inverse: bool, abs_tol_bounds) -> bool:
740
+ def row_count_match(
741
+ data_tbl: IntoFrame, count: Any, inverse: bool, abs_tol_bounds: AbsoluteBounds
742
+ ) -> bool:
732
743
  """
733
744
  Check if DataFrame row count matches expected count.
734
745
  """
@@ -745,7 +756,34 @@ def row_count_match(data_tbl: FrameT, count, inverse: bool, abs_tol_bounds) -> b
745
756
  return row_count >= min_val and row_count <= max_val
746
757
 
747
758
 
748
- def col_count_match(data_tbl: FrameT, count, inverse: bool) -> bool:
759
+ def col_pct_null(
760
+ data_tbl: IntoFrame, column: str, p: float, bound_finder: Callable[[int], AbsoluteBounds]
761
+ ) -> bool:
762
+ """Check if the percentage of null vales are within p given the absolute bounds."""
763
+ nw_frame = nw.from_native(data_tbl)
764
+ # Handle LazyFrames by collecting them first
765
+ if is_narwhals_lazyframe(nw_frame):
766
+ nw_frame = nw_frame.collect()
767
+
768
+ assert is_narwhals_dataframe(nw_frame)
769
+
770
+ # We cast as int because it could come back as an arbitary type. For example if the backend
771
+ # is numpy-like, we might get a scalar from `item()`. `int()` expects a certain signature though
772
+ # and `object` does not satisfy so we have to go with the type ignore.
773
+ total_rows: object = nw_frame.select(nw.len()).item()
774
+ total_rows: int = int(total_rows) # type: ignore
775
+
776
+ abs_target: float = round(total_rows * p)
777
+ lower_bound, upper_bound = bound_finder(abs_target)
778
+
779
+ # Count null values (see above comment on typing shenanigans)
780
+ n_null: object = nw_frame.select(nw.col(column).is_null().sum()).item()
781
+ n_null: int = int(n_null) # type: ignore
782
+
783
+ return n_null >= (abs_target - lower_bound) and n_null <= (abs_target + upper_bound)
784
+
785
+
786
+ def col_count_match(data_tbl: IntoFrame, count: Any, inverse: bool) -> bool:
749
787
  """
750
788
  Check if DataFrame column count matches expected count.
751
789
  """
@@ -757,7 +795,7 @@ def col_count_match(data_tbl: FrameT, count, inverse: bool) -> bool:
757
795
  return get_column_count(data=data_tbl) != count
758
796
 
759
797
 
760
- def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[FrameT, FrameT]:
798
+ def _coerce_to_common_backend(data_tbl: Any, tbl_compare: Any) -> tuple[Any, Any]:
761
799
  """
762
800
  Coerce two tables to the same backend if they differ.
763
801
 
@@ -774,7 +812,7 @@ def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[Fr
774
812
 
775
813
  Returns
776
814
  -------
777
- tuple[FrameT, FrameT]
815
+ tuple[Any, Any]
778
816
  Both tables, with tbl_compare potentially converted to data_tbl's backend.
779
817
  """
780
818
  # Get backend types for both tables
@@ -860,7 +898,7 @@ def _coerce_to_common_backend(data_tbl: FrameT, tbl_compare: FrameT) -> tuple[Fr
860
898
  return data_tbl, tbl_compare
861
899
 
862
900
 
863
- def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
901
+ def tbl_match(data_tbl: IntoFrame, tbl_compare: IntoFrame) -> bool:
864
902
  """
865
903
  Check if two tables match exactly in schema, row count, and data.
866
904
 
@@ -974,33 +1012,37 @@ def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
974
1012
 
975
1013
  # Convert to native format for comparison
976
1014
  # We need to collect if lazy frames
977
- if hasattr(col_data_1, "collect"):
1015
+ if is_narwhals_lazyframe(col_data_1):
978
1016
  col_data_1 = col_data_1.collect()
979
1017
 
980
- if hasattr(col_data_2, "collect"):
1018
+ if is_narwhals_lazyframe(col_data_2):
981
1019
  col_data_2 = col_data_2.collect()
982
1020
 
983
1021
  # Convert to native and then to lists for comparison
984
- col_1_native = col_data_1.to_native()
985
- col_2_native = col_data_2.to_native()
1022
+ # Native frames could be Polars, Pandas, or Ibis - use Any for dynamic access
1023
+ col_1_native: Any = col_data_1.to_native()
1024
+ col_2_native: Any = col_data_2.to_native()
986
1025
 
987
1026
  # Extract values as lists for comparison
988
- if hasattr(col_1_native, "to_list"): # Polars Series
989
- values_1 = col_1_native[col_name].to_list()
990
- values_2 = col_2_native[col_name].to_list()
1027
+ # Note: We use hasattr for runtime detection but maintain Any typing
1028
+ values_1: list[Any]
1029
+ values_2: list[Any]
1030
+ if hasattr(col_1_native, "to_list"): # Polars DataFrame
1031
+ values_1 = col_1_native[col_name].to_list() # type: ignore[index]
1032
+ values_2 = col_2_native[col_name].to_list() # type: ignore[index]
991
1033
 
992
- elif hasattr(col_1_native, "tolist"): # Pandas Series/DataFrame
993
- values_1 = col_1_native[col_name].tolist()
994
- values_2 = col_2_native[col_name].tolist()
1034
+ elif hasattr(col_1_native, "tolist"): # Pandas DataFrame
1035
+ values_1 = col_1_native[col_name].tolist() # type: ignore[index]
1036
+ values_2 = col_2_native[col_name].tolist() # type: ignore[index]
995
1037
 
996
1038
  elif hasattr(col_1_native, "collect"): # Ibis
997
- values_1 = col_1_native[col_name].to_pandas().tolist()
998
- values_2 = col_2_native[col_name].to_pandas().tolist()
1039
+ values_1 = col_1_native[col_name].to_pandas().tolist() # type: ignore[index]
1040
+ values_2 = col_2_native[col_name].to_pandas().tolist() # type: ignore[index]
999
1041
 
1000
1042
  else:
1001
1043
  # Fallback: try direct comparison
1002
- values_1 = list(col_1_native[col_name])
1003
- values_2 = list(col_2_native[col_name])
1044
+ values_1 = list(col_1_native[col_name]) # type: ignore[index]
1045
+ values_2 = list(col_2_native[col_name]) # type: ignore[index]
1004
1046
 
1005
1047
  # Compare the two lists element by element, handling NaN/None
1006
1048
  if len(values_1) != len(values_2):
@@ -1062,7 +1104,9 @@ def tbl_match(data_tbl: FrameT, tbl_compare: FrameT) -> bool:
1062
1104
  return True
1063
1105
 
1064
1106
 
1065
- def conjointly_validation(data_tbl: FrameT, expressions, threshold: int, tbl_type: str = "local"):
1107
+ def conjointly_validation(
1108
+ data_tbl: IntoFrame, expressions: Any, threshold: int, tbl_type: str = "local"
1109
+ ) -> Any:
1066
1110
  """
1067
1111
  Perform conjoint validation using multiple expressions.
1068
1112
  """
@@ -1077,30 +1121,32 @@ def conjointly_validation(data_tbl: FrameT, expressions, threshold: int, tbl_typ
1077
1121
  return conjointly_instance.get_test_results()
1078
1122
 
1079
1123
 
1080
- def interrogate_gt(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1124
+ # TODO: we can certainly simplify this
1125
+ def interrogate_gt(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1081
1126
  """Greater than interrogation."""
1082
1127
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "gt")
1083
1128
 
1084
1129
 
1085
- def interrogate_lt(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1130
+ def interrogate_lt(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1086
1131
  """Less than interrogation."""
1087
1132
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "lt")
1088
1133
 
1089
1134
 
1090
- def interrogate_ge(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1135
+ def interrogate_ge(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1091
1136
  """Greater than or equal interrogation."""
1092
1137
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "ge")
1093
1138
 
1094
1139
 
1095
- def interrogate_le(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1140
+ def interrogate_le(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1096
1141
  """Less than or equal interrogation."""
1097
1142
  return _interrogate_comparison_base(tbl, column, compare, na_pass, "le")
1098
1143
 
1099
1144
 
1100
- def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1145
+ def interrogate_eq(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1101
1146
  """Equal interrogation."""
1102
1147
 
1103
1148
  nw_tbl = nw.from_native(tbl)
1149
+ assert is_narwhals_dataframe(nw_tbl) or is_narwhals_lazyframe(nw_tbl)
1104
1150
 
1105
1151
  if isinstance(compare, Column):
1106
1152
  compare_expr = _get_compare_expr_nw(compare=compare)
@@ -1146,10 +1192,10 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1146
1192
  )
1147
1193
  result_tbl = result_tbl.rename({"pb_is_good_4_tmp": "pb_is_good_4"})
1148
1194
  elif "cannot compare" in str(e).lower():
1149
- # Handle genuine type incompatibility
1195
+ # Handle genuine type incompatibility - native_df type varies by backend
1150
1196
  native_df = result_tbl.to_native()
1151
- col_dtype = str(native_df[column].dtype)
1152
- compare_dtype = str(native_df[compare.name].dtype)
1197
+ col_dtype = str(native_df[column].dtype) # type: ignore[index]
1198
+ compare_dtype = str(native_df[compare.name].dtype) # type: ignore[index]
1153
1199
 
1154
1200
  raise TypeError(
1155
1201
  f"Cannot compare columns '{column}' (dtype: {col_dtype}) and "
@@ -1184,21 +1230,19 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1184
1230
  or "conversion" in error_msg
1185
1231
  and "failed" in error_msg
1186
1232
  ):
1187
- # Get column types for a descriptive error message
1233
+ # Get column types for a descriptive error message - native type varies by backend
1234
+ col_dtype = "unknown"
1235
+ compare_dtype = "unknown"
1188
1236
  try:
1189
1237
  native_df = result_tbl.to_native()
1190
1238
  if hasattr(native_df, "dtypes"):
1191
- col_dtype = str(native_df.dtypes.get(column, "unknown"))
1192
- compare_dtype = str(native_df.dtypes.get(compare.name, "unknown"))
1239
+ col_dtype = str(native_df.dtypes.get(column, "unknown")) # type: ignore[union-attr]
1240
+ compare_dtype = str(native_df.dtypes.get(compare.name, "unknown")) # type: ignore[union-attr]
1193
1241
  elif hasattr(native_df, "schema"):
1194
- col_dtype = str(native_df.schema.get(column, "unknown"))
1195
- compare_dtype = str(native_df.schema.get(compare.name, "unknown"))
1196
- else:
1197
- col_dtype = "unknown"
1198
- compare_dtype = "unknown"
1242
+ col_dtype = str(native_df.schema.get(column, "unknown")) # type: ignore[union-attr]
1243
+ compare_dtype = str(native_df.schema.get(compare.name, "unknown")) # type: ignore[union-attr]
1199
1244
  except Exception:
1200
- col_dtype = "unknown"
1201
- compare_dtype = "unknown"
1245
+ pass
1202
1246
 
1203
1247
  raise TypeError(
1204
1248
  f"Cannot compare columns '{column}' (dtype: {col_dtype}) and "
@@ -1247,17 +1291,16 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1247
1291
  or "conversion" in error_msg
1248
1292
  and "failed" in error_msg
1249
1293
  ):
1250
- # Get column type for a descriptive error message
1294
+ # Get column type for a descriptive error message - native type varies by backend
1295
+ col_dtype = "unknown"
1251
1296
  try:
1252
1297
  native_df = result_tbl.to_native()
1253
1298
  if hasattr(native_df, "dtypes"):
1254
- col_dtype = str(native_df.dtypes.get(column, "unknown"))
1299
+ col_dtype = str(native_df.dtypes.get(column, "unknown")) # type: ignore[union-attr]
1255
1300
  elif hasattr(native_df, "schema"):
1256
- col_dtype = str(native_df.schema.get(column, "unknown"))
1257
- else:
1258
- col_dtype = "unknown"
1301
+ col_dtype = str(native_df.schema.get(column, "unknown")) # type: ignore[union-attr]
1259
1302
  except Exception:
1260
- col_dtype = "unknown"
1303
+ pass
1261
1304
 
1262
1305
  compare_type = type(compare).__name__
1263
1306
  compare_value = str(compare)
@@ -1287,10 +1330,11 @@ def interrogate_eq(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1287
1330
  return result_tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
1288
1331
 
1289
1332
 
1290
- def interrogate_ne(tbl: FrameT, column: str, compare: any, na_pass: bool) -> FrameT:
1333
+ def interrogate_ne(tbl: IntoFrame, column: str, compare: Any, na_pass: bool) -> Any:
1291
1334
  """Not equal interrogation."""
1292
1335
 
1293
1336
  nw_tbl = nw.from_native(tbl)
1337
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1294
1338
 
1295
1339
  # Determine if the reference and comparison columns have any null values
1296
1340
  ref_col_has_null_vals = _column_has_null_values(table=nw_tbl, column=column)
@@ -1843,14 +1887,15 @@ def interrogate_ne(tbl: FrameT, column: str, compare: any, na_pass: bool) -> Fra
1843
1887
 
1844
1888
 
1845
1889
  def interrogate_between(
1846
- tbl: FrameT, column: str, low: any, high: any, inclusive: tuple, na_pass: bool
1847
- ) -> FrameT:
1890
+ tbl: IntoFrame, column: str, low: Any, high: Any, inclusive: tuple[bool, bool], na_pass: bool
1891
+ ) -> Any:
1848
1892
  """Between interrogation."""
1849
1893
 
1850
1894
  low_val = _get_compare_expr_nw(compare=low)
1851
1895
  high_val = _get_compare_expr_nw(compare=high)
1852
1896
 
1853
1897
  nw_tbl = nw.from_native(tbl)
1898
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1854
1899
  low_val = _safe_modify_datetime_compare_val(nw_tbl, column, low_val)
1855
1900
  high_val = _safe_modify_datetime_compare_val(nw_tbl, column, high_val)
1856
1901
 
@@ -1912,14 +1957,15 @@ def interrogate_between(
1912
1957
 
1913
1958
 
1914
1959
  def interrogate_outside(
1915
- tbl: FrameT, column: str, low: any, high: any, inclusive: tuple, na_pass: bool
1916
- ) -> FrameT:
1960
+ tbl: IntoFrame, column: str, low: Any, high: Any, inclusive: tuple[bool, bool], na_pass: bool
1961
+ ) -> Any:
1917
1962
  """Outside range interrogation."""
1918
1963
 
1919
1964
  low_val = _get_compare_expr_nw(compare=low)
1920
1965
  high_val = _get_compare_expr_nw(compare=high)
1921
1966
 
1922
1967
  nw_tbl = nw.from_native(tbl)
1968
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1923
1969
  low_val = _safe_modify_datetime_compare_val(nw_tbl, column, low_val)
1924
1970
  high_val = _safe_modify_datetime_compare_val(nw_tbl, column, high_val)
1925
1971
 
@@ -1978,10 +2024,11 @@ def interrogate_outside(
1978
2024
  return result_tbl.to_native()
1979
2025
 
1980
2026
 
1981
- def interrogate_isin(tbl: FrameT, column: str, set_values: any) -> FrameT:
2027
+ def interrogate_isin(tbl: IntoFrame, column: str, set_values: Any) -> Any:
1982
2028
  """In set interrogation."""
1983
2029
 
1984
2030
  nw_tbl = nw.from_native(tbl)
2031
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1985
2032
 
1986
2033
  can_be_null: bool = None in set_values
1987
2034
  base_expr: nw.Expr = nw.col(column).is_in(set_values)
@@ -1992,17 +2039,20 @@ def interrogate_isin(tbl: FrameT, column: str, set_values: any) -> FrameT:
1992
2039
  return result_tbl.to_native()
1993
2040
 
1994
2041
 
1995
- def interrogate_notin(tbl: FrameT, column: str, set_values: any) -> FrameT:
2042
+ def interrogate_notin(tbl: IntoFrame, column: str, set_values: Any) -> Any:
1996
2043
  """Not in set interrogation."""
1997
2044
 
1998
2045
  nw_tbl = nw.from_native(tbl)
2046
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
1999
2047
  result_tbl = nw_tbl.with_columns(
2000
2048
  pb_is_good_=nw.col(column).is_in(set_values),
2001
2049
  ).with_columns(pb_is_good_=~nw.col("pb_is_good_"))
2002
2050
  return result_tbl.to_native()
2003
2051
 
2004
2052
 
2005
- def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: bool) -> FrameT:
2053
+ def interrogate_regex(
2054
+ tbl: IntoFrame, column: str, values: dict[str, Any] | str, na_pass: bool
2055
+ ) -> Any:
2006
2056
  """Regex interrogation."""
2007
2057
 
2008
2058
  # Handle both old and new formats for backward compatibility
@@ -2014,6 +2064,7 @@ def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: boo
2014
2064
  inverse = values["inverse"]
2015
2065
 
2016
2066
  nw_tbl = nw.from_native(tbl)
2067
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2017
2068
  result_tbl = nw_tbl.with_columns(
2018
2069
  pb_is_good_1=nw.col(column).is_null() & na_pass,
2019
2070
  pb_is_good_2=nw.col(column).str.contains(pattern, literal=False).fill_null(False),
@@ -2033,7 +2084,9 @@ def interrogate_regex(tbl: FrameT, column: str, values: dict | str, na_pass: boo
2033
2084
  return result_tbl.to_native()
2034
2085
 
2035
2086
 
2036
- def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: bool) -> FrameT:
2087
+ def interrogate_within_spec(
2088
+ tbl: IntoFrame, column: str, values: dict[str, Any], na_pass: bool
2089
+ ) -> Any:
2037
2090
  """Within specification interrogation."""
2038
2091
  from pointblank._spec_utils import (
2039
2092
  regex_email,
@@ -2058,6 +2111,7 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2058
2111
 
2059
2112
  # Convert to Narwhals for cross-backend compatibility
2060
2113
  nw_tbl = nw.from_native(tbl)
2114
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2061
2115
 
2062
2116
  # Regex-based specifications can use Narwhals directly (no materialization needed)
2063
2117
  regex_specs = {
@@ -2111,18 +2165,18 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2111
2165
 
2112
2166
  # For non-Ibis tables or other specs, materialize data and use Python validation
2113
2167
  # Get the column data as a list
2114
- col_data = nw_tbl.select(column).to_native()
2168
+ col_data: Any = nw_tbl.select(column).to_native()
2115
2169
 
2116
- # Convert to list based on backend
2170
+ # Convert to list based on backend - type varies so use duck typing
2117
2171
  if hasattr(col_data, "to_list"): # Polars
2118
- col_list = col_data[column].to_list()
2172
+ col_list = col_data[column].to_list() # type: ignore[index]
2119
2173
  elif hasattr(col_data, "tolist"): # Pandas
2120
- col_list = col_data[column].tolist()
2174
+ col_list = col_data[column].tolist() # type: ignore[index]
2121
2175
  else: # For Ibis tables, we need to execute the query first
2122
2176
  try:
2123
2177
  # Try to execute if it's an Ibis table
2124
2178
  if hasattr(col_data, "execute"):
2125
- col_data_exec = col_data.execute()
2179
+ col_data_exec = col_data.execute() # type: ignore[operator]
2126
2180
  if hasattr(col_data_exec, "to_list"): # Polars result
2127
2181
  col_list = col_data_exec[column].to_list()
2128
2182
  elif hasattr(col_data_exec, "tolist"): # Pandas result
@@ -2135,6 +2189,8 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2135
2189
  # Fallback to direct list conversion
2136
2190
  col_list = list(col_data[column])
2137
2191
 
2192
+ assert isinstance(col_list, list)
2193
+
2138
2194
  # Validate based on spec type (checksum-based validations)
2139
2195
  if spec_lower in ("isbn", "isbn-10", "isbn-13"):
2140
2196
  is_valid_list = check_isbn(col_list)
@@ -2181,7 +2237,9 @@ def interrogate_within_spec(tbl: FrameT, column: str, values: dict, na_pass: boo
2181
2237
  return result_tbl.to_native()
2182
2238
 
2183
2239
 
2184
- def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass: bool) -> FrameT:
2240
+ def interrogate_within_spec_db(
2241
+ tbl: IntoFrame, column: str, values: dict[str, Any], na_pass: bool
2242
+ ) -> Any:
2185
2243
  """
2186
2244
  Database-native specification validation (proof of concept).
2187
2245
 
@@ -2202,7 +2260,7 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2202
2260
 
2203
2261
  Returns
2204
2262
  -------
2205
- FrameT
2263
+ Any
2206
2264
  Result table with pb_is_good_ column indicating validation results.
2207
2265
 
2208
2266
  Notes
@@ -2215,9 +2273,9 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2215
2273
  spec_lower = spec.lower()
2216
2274
 
2217
2275
  # Check if this is an Ibis table
2218
- native_tbl = tbl
2219
- if hasattr(tbl, "to_native"):
2220
- native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl
2276
+ native_tbl: Any = tbl
2277
+ if is_narwhals_dataframe(tbl) or is_narwhals_lazyframe(tbl):
2278
+ native_tbl = tbl.to_native()
2221
2279
 
2222
2280
  is_ibis = hasattr(native_tbl, "execute")
2223
2281
 
@@ -2284,7 +2342,7 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2284
2342
  weights = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2]
2285
2343
 
2286
2344
  # Get the column as an Ibis expression
2287
- col_expr = native_tbl[column]
2345
+ col_expr = native_tbl[column] # type: ignore[index]
2288
2346
 
2289
2347
  # Basic checks: length must be 17, no invalid characters (I, O, Q)
2290
2348
  valid_length = col_expr.length() == 17
@@ -2311,11 +2369,11 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2311
2369
  value = ibis.cases(*conditions, else_=0) # Default: invalid char = 0 (will fail validation)
2312
2370
 
2313
2371
  # Multiply by weight and add to checksum
2314
- checksum = checksum + (value * weights[pos])
2372
+ checksum = checksum + (value * weights[pos]) # type: ignore[operator]
2315
2373
 
2316
2374
  # Check digit calculation: checksum % 11
2317
2375
  # If result is 10, check digit should be 'X', otherwise it's the digit itself
2318
- expected_check = checksum % 11
2376
+ expected_check = checksum % 11 # type: ignore[operator]
2319
2377
  actual_check_char = col_expr.upper().substr(8, 1) # Position 9 (0-indexed 8)
2320
2378
 
2321
2379
  # Validate check digit using ibis.cases()
@@ -2338,14 +2396,14 @@ def interrogate_within_spec_db(tbl: FrameT, column: str, values: dict, na_pass:
2338
2396
  is_valid = is_valid.fill_null(False)
2339
2397
 
2340
2398
  # Add validation column to table
2341
- result_tbl = native_tbl.mutate(pb_is_good_=is_valid)
2399
+ result_tbl = native_tbl.mutate(pb_is_good_=is_valid) # type: ignore[union-attr]
2342
2400
 
2343
2401
  return result_tbl
2344
2402
 
2345
2403
 
2346
2404
  def interrogate_credit_card_db(
2347
- tbl: FrameT, column: str, values: dict[str, str], na_pass: bool
2348
- ) -> FrameT:
2405
+ tbl: IntoFrame, column: str, values: dict[str, str], na_pass: bool
2406
+ ) -> Any:
2349
2407
  """
2350
2408
  Database-native credit card validation using Luhn algorithm in SQL.
2351
2409
 
@@ -2367,7 +2425,7 @@ def interrogate_credit_card_db(
2367
2425
 
2368
2426
  Returns
2369
2427
  -------
2370
- FrameT
2428
+ Any
2371
2429
  Result table with pb_is_good_ column indicating validation results.
2372
2430
 
2373
2431
  Notes
@@ -2384,7 +2442,7 @@ def interrogate_credit_card_db(
2384
2442
  # Check if this is an Ibis table
2385
2443
  native_tbl = tbl
2386
2444
  if hasattr(tbl, "to_native"):
2387
- native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl
2445
+ native_tbl = tbl.to_native() if callable(tbl.to_native) else tbl # type: ignore[operator]
2388
2446
 
2389
2447
  is_ibis = hasattr(native_tbl, "execute")
2390
2448
 
@@ -2398,7 +2456,7 @@ def interrogate_credit_card_db(
2398
2456
  raise ImportError("Ibis is required for database-native validation")
2399
2457
 
2400
2458
  # Get the column as an Ibis expression
2401
- col_expr = native_tbl[column]
2459
+ col_expr = native_tbl[column] # type: ignore[index]
2402
2460
 
2403
2461
  # Step 1: Clean the input and remove spaces and hyphens
2404
2462
  # First check format: only digits, spaces, and hyphens allowed
@@ -2451,7 +2509,7 @@ def interrogate_credit_card_db(
2451
2509
 
2452
2510
  # Calculate contribution to checksum
2453
2511
  # If should_double: double the digit, then if > 9 subtract 9
2454
- doubled = digit_val * 2
2512
+ doubled = digit_val * 2 # type: ignore[operator]
2455
2513
  adjusted = ibis.cases(
2456
2514
  (should_double & (doubled > 9), doubled - 9),
2457
2515
  (should_double, doubled),
@@ -2464,10 +2522,10 @@ def interrogate_credit_card_db(
2464
2522
  else_=0,
2465
2523
  )
2466
2524
 
2467
- checksum = checksum + contribution
2525
+ checksum = checksum + contribution # type: ignore[operator]
2468
2526
 
2469
2527
  # Step 4: Valid if checksum % 10 == 0
2470
- luhn_valid = (checksum % 10) == 0
2528
+ luhn_valid = (checksum % 10) == 0 # type: ignore[operator]
2471
2529
 
2472
2530
  # Combine all validation checks
2473
2531
  is_valid = valid_chars & valid_length & luhn_valid
@@ -2481,30 +2539,32 @@ def interrogate_credit_card_db(
2481
2539
  is_valid = is_valid.fill_null(False)
2482
2540
 
2483
2541
  # Add validation column to table
2484
- result_tbl = native_tbl.mutate(pb_is_good_=is_valid)
2542
+ result_tbl = native_tbl.mutate(pb_is_good_=is_valid) # type: ignore[union-attr]
2485
2543
 
2486
2544
  return result_tbl
2487
2545
 
2488
2546
 
2489
- def interrogate_null(tbl: FrameT, column: str) -> FrameT:
2547
+ def interrogate_null(tbl: IntoFrame, column: str) -> Any:
2490
2548
  """Null interrogation."""
2491
2549
 
2492
2550
  nw_tbl = nw.from_native(tbl)
2551
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2493
2552
  result_tbl = nw_tbl.with_columns(pb_is_good_=nw.col(column).is_null())
2494
2553
  return result_tbl.to_native()
2495
2554
 
2496
2555
 
2497
- def interrogate_not_null(tbl: FrameT, column: str) -> FrameT:
2556
+ def interrogate_not_null(tbl: IntoFrame, column: str) -> Any:
2498
2557
  """Not null interrogation."""
2499
2558
 
2500
2559
  nw_tbl = nw.from_native(tbl)
2560
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2501
2561
  result_tbl = nw_tbl.with_columns(pb_is_good_=~nw.col(column).is_null())
2502
2562
  return result_tbl.to_native()
2503
2563
 
2504
2564
 
2505
2565
  def interrogate_increasing(
2506
- tbl: FrameT, column: str, allow_stationary: bool, decreasing_tol: float, na_pass: bool
2507
- ) -> FrameT:
2566
+ tbl: IntoFrame, column: str, allow_stationary: bool, decreasing_tol: float, na_pass: bool
2567
+ ) -> Any:
2508
2568
  """
2509
2569
  Increasing interrogation.
2510
2570
 
@@ -2525,10 +2585,11 @@ def interrogate_increasing(
2525
2585
 
2526
2586
  Returns
2527
2587
  -------
2528
- FrameT
2588
+ Any
2529
2589
  The table with a `pb_is_good_` column indicating pass/fail for each row.
2530
2590
  """
2531
2591
  nw_tbl = nw.from_native(tbl)
2592
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2532
2593
 
2533
2594
  # Create a lagged difference column
2534
2595
  result_tbl = nw_tbl.with_columns(pb_lagged_difference_=nw.col(column) - nw.col(column).shift(1))
@@ -2561,8 +2622,8 @@ def interrogate_increasing(
2561
2622
 
2562
2623
 
2563
2624
  def interrogate_decreasing(
2564
- tbl: FrameT, column: str, allow_stationary: bool, increasing_tol: float, na_pass: bool
2565
- ) -> FrameT:
2625
+ tbl: IntoFrame, column: str, allow_stationary: bool, increasing_tol: float, na_pass: bool
2626
+ ) -> Any:
2566
2627
  """
2567
2628
  Decreasing interrogation.
2568
2629
 
@@ -2583,10 +2644,11 @@ def interrogate_decreasing(
2583
2644
 
2584
2645
  Returns
2585
2646
  -------
2586
- FrameT
2647
+ Any
2587
2648
  The table with a `pb_is_good_` column indicating pass/fail for each row.
2588
2649
  """
2589
2650
  nw_tbl = nw.from_native(tbl)
2651
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2590
2652
 
2591
2653
  # Create a lagged difference column
2592
2654
  result_tbl = nw_tbl.with_columns(pb_lagged_difference_=nw.col(column) - nw.col(column).shift(1))
@@ -2619,8 +2681,8 @@ def interrogate_decreasing(
2619
2681
 
2620
2682
 
2621
2683
  def _interrogate_comparison_base(
2622
- tbl: FrameT, column: str, compare: any, na_pass: bool, operator: str
2623
- ) -> FrameT:
2684
+ tbl: IntoFrame, column: str, compare: Any, na_pass: bool, operator: str
2685
+ ) -> Any:
2624
2686
  """
2625
2687
  Unified base function for comparison operations (gt, ge, lt, le, eq, ne).
2626
2688
 
@@ -2639,13 +2701,14 @@ def _interrogate_comparison_base(
2639
2701
 
2640
2702
  Returns
2641
2703
  -------
2642
- FrameT
2704
+ Any
2643
2705
  The result table with `pb_is_good_` column indicating the passing test units.
2644
2706
  """
2645
2707
 
2646
2708
  compare_expr = _get_compare_expr_nw(compare=compare)
2647
2709
 
2648
2710
  nw_tbl = nw.from_native(tbl)
2711
+ assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
2649
2712
  compare_expr = _safe_modify_datetime_compare_val(nw_tbl, column, compare_expr)
2650
2713
 
2651
2714
  # Create the comparison expression based on the operator
@@ -2692,7 +2755,7 @@ def _interrogate_comparison_base(
2692
2755
  return result_tbl.to_native()
2693
2756
 
2694
2757
 
2695
- def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None) -> FrameT:
2758
+ def interrogate_rows_distinct(data_tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
2696
2759
  """
2697
2760
  Check if rows in a DataFrame are distinct.
2698
2761
 
@@ -2709,10 +2772,11 @@ def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None
2709
2772
 
2710
2773
  Returns
2711
2774
  -------
2712
- FrameT
2775
+ Any
2713
2776
  A DataFrame with a `pb_is_good_` column indicating which rows pass the test.
2714
2777
  """
2715
2778
  tbl = nw.from_native(data_tbl)
2779
+ assert is_narwhals_dataframe(tbl) or is_narwhals_lazyframe(tbl)
2716
2780
 
2717
2781
  # Get the column subset to use for the test
2718
2782
  if columns_subset is None:
@@ -2720,18 +2784,23 @@ def interrogate_rows_distinct(data_tbl: FrameT, columns_subset: list[str] | None
2720
2784
 
2721
2785
  # Create a count of duplicates using group_by approach
2722
2786
  # Group by the columns of interest and count occurrences
2723
- count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2724
-
2725
- # Join back to original table to get count for each row
2726
- tbl = tbl.join(count_tbl, on=columns_subset, how="left")
2727
-
2728
- # Passing rows will have the value `1` (no duplicates, so True), otherwise False applies
2729
- tbl = tbl.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2730
-
2731
- return tbl.to_native()
2787
+ # Handle DataFrame and LazyFrame separately for proper type narrowing
2788
+ if is_narwhals_dataframe(tbl):
2789
+ count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2790
+ result = tbl.join(count_tbl, on=columns_subset, how="left")
2791
+ result = result.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2792
+ return result.to_native()
2793
+ elif is_narwhals_lazyframe(tbl):
2794
+ count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
2795
+ result = tbl.join(count_tbl, on=columns_subset, how="left")
2796
+ result = result.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
2797
+ return result.to_native()
2798
+ else:
2799
+ msg = f"Expected DataFrame or LazyFrame, got {type(tbl)}"
2800
+ raise TypeError(msg)
2732
2801
 
2733
2802
 
2734
- def interrogate_rows_complete(tbl: FrameT, columns_subset: list[str] | None) -> FrameT:
2803
+ def interrogate_rows_complete(tbl: IntoFrame, columns_subset: list[str] | None) -> Any:
2735
2804
  """Rows complete interrogation."""
2736
2805
  nw_tbl = nw.from_native(tbl)
2737
2806
 
@@ -2747,12 +2816,25 @@ def interrogate_rows_complete(tbl: FrameT, columns_subset: list[str] | None) ->
2747
2816
  return result_tbl.to_native()
2748
2817
 
2749
2818
 
2750
- def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config: dict) -> FrameT:
2819
+ def interrogate_prompt(
2820
+ tbl: IntoFrame, columns_subset: list[str] | None, ai_config: dict[str, Any]
2821
+ ) -> Any:
2751
2822
  """AI-powered interrogation of rows."""
2752
2823
  import logging
2753
2824
 
2754
2825
  logger = logging.getLogger(__name__)
2755
2826
 
2827
+ # Convert to narwhals early for consistent row counting
2828
+ nw_tbl = nw.from_native(tbl)
2829
+ # Get row count - for LazyFrame we need to use select/collect
2830
+ if is_narwhals_lazyframe(nw_tbl):
2831
+ row_count = nw_tbl.select(nw.len()).collect().item()
2832
+ assert isinstance(row_count, int)
2833
+ total_rows = row_count
2834
+ else:
2835
+ assert is_narwhals_dataframe(nw_tbl)
2836
+ total_rows = len(nw_tbl)
2837
+
2756
2838
  try:
2757
2839
  # Import AI validation modules
2758
2840
  from pointblank._utils_ai import (
@@ -2809,28 +2891,25 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2809
2891
  )
2810
2892
 
2811
2893
  # Parse and combine results with signature mapping optimization
2812
- parser = _ValidationResponseParser(total_rows=len(tbl))
2894
+ parser = _ValidationResponseParser(total_rows=total_rows)
2813
2895
  combined_results = parser.combine_batch_results(batch_results, signature_mapping)
2814
2896
 
2815
2897
  # Debug: Log table info and combined results
2816
2898
  logger.debug("🏁 Final result conversion:")
2817
- logger.debug(f" - Table length: {len(tbl)}")
2899
+ logger.debug(f" - Table length: {total_rows}")
2818
2900
  logger.debug(
2819
2901
  f" - Combined results keys: {sorted(combined_results.keys()) if combined_results else 'None'}"
2820
2902
  )
2821
2903
 
2822
- # Convert results to narwhals format
2823
- nw_tbl = nw.from_native(tbl)
2824
-
2825
2904
  # Create a boolean column for validation results
2826
2905
  validation_results = []
2827
- for i in range(len(tbl)):
2906
+ for i in range(total_rows):
2828
2907
  # Default to False if row wasn't processed
2829
2908
  result = combined_results.get(i, False)
2830
2909
  validation_results.append(result)
2831
2910
 
2832
2911
  # Debug: Log first few conversions
2833
- if i < 5 or len(tbl) - i <= 2:
2912
+ if i < 5 or total_rows - i <= 2:
2834
2913
  logger.debug(f" Row {i}: {result} (from combined_results.get({i}, False))")
2835
2914
 
2836
2915
  logger.debug(f" - Final validation_results length: {len(validation_results)}")
@@ -2869,10 +2948,9 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2869
2948
  logger.error(f"Missing dependencies for AI validation: {e}")
2870
2949
  logger.error("Install required packages: pip install openai anthropic aiohttp")
2871
2950
 
2872
- # Return all False results as fallback
2873
- nw_tbl = nw.from_native(tbl)
2951
+ # Return all False results as fallback (nw_tbl and total_rows defined at function start)
2874
2952
  native_tbl = nw_tbl.to_native()
2875
- validation_results = [False] * len(tbl)
2953
+ validation_results = [False] * total_rows
2876
2954
 
2877
2955
  if hasattr(native_tbl, "with_columns"): # Polars
2878
2956
  import polars as pl
@@ -2894,10 +2972,9 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
2894
2972
  except Exception as e:
2895
2973
  logger.error(f"AI validation failed: {e}")
2896
2974
 
2897
- # Return all False results as fallback
2898
- nw_tbl = nw.from_native(tbl)
2975
+ # Return all False results as fallback (nw_tbl and total_rows defined at function start)
2899
2976
  native_tbl = nw_tbl.to_native()
2900
- validation_results = [False] * len(tbl)
2977
+ validation_results = [False] * total_rows
2901
2978
 
2902
2979
  if hasattr(native_tbl, "with_columns"): # Polars
2903
2980
  import polars as pl