pointblank 0.11.6__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ from pointblank._utils import (
15
15
  _convert_to_narwhals,
16
16
  _get_tbl_type,
17
17
  )
18
- from pointblank.column import Column, ColumnLiteral
18
+ from pointblank.column import Column
19
19
  from pointblank.schema import Schema
20
20
  from pointblank.thresholds import _threshold_check
21
21
 
@@ -23,6 +23,74 @@ if TYPE_CHECKING:
23
23
  from pointblank._typing import AbsoluteTolBounds
24
24
 
25
25
 
26
+ def _safe_modify_datetime_compare_val(data_frame: Any, column: str, compare_val: Any) -> Any:
27
+ """
28
+ Safely modify datetime comparison values for LazyFrame compatibility.
29
+
30
+ This function handles the case where we can't directly slice LazyFrames
31
+ to get column dtypes for datetime conversion.
32
+ """
33
+ try:
34
+ # First try to get column dtype from schema for LazyFrames
35
+ column_dtype = None
36
+
37
+ if hasattr(data_frame, "collect_schema"):
38
+ schema = data_frame.collect_schema()
39
+ column_dtype = schema.get(column)
40
+ elif hasattr(data_frame, "schema"):
41
+ schema = data_frame.schema
42
+ column_dtype = schema.get(column)
43
+
44
+ # If we got a dtype from schema, use it
45
+ if column_dtype is not None:
46
+ # Create a mock column object for _modify_datetime_compare_val
47
+ class MockColumn:
48
+ def __init__(self, dtype):
49
+ self.dtype = dtype
50
+
51
+ mock_column = MockColumn(column_dtype)
52
+ return _modify_datetime_compare_val(tgt_column=mock_column, compare_val=compare_val)
53
+
54
+ # Fallback: try collecting a small sample if possible
55
+ try:
56
+ sample = data_frame.head(1).collect()
57
+ if hasattr(sample, "dtypes") and column in sample.columns:
58
+ # For pandas-like dtypes
59
+ column_dtype = sample.dtypes[column] if hasattr(sample, "dtypes") else None
60
+ if column_dtype:
61
+
62
+ class MockColumn:
63
+ def __init__(self, dtype):
64
+ self.dtype = dtype
65
+
66
+ mock_column = MockColumn(column_dtype)
67
+ return _modify_datetime_compare_val(
68
+ tgt_column=mock_column, compare_val=compare_val
69
+ )
70
+ except Exception:
71
+ pass
72
+
73
+ # Final fallback: try direct access (for eager DataFrames)
74
+ try:
75
+ if hasattr(data_frame, "dtypes") and column in data_frame.columns:
76
+ column_dtype = data_frame.dtypes[column]
77
+
78
+ class MockColumn:
79
+ def __init__(self, dtype):
80
+ self.dtype = dtype
81
+
82
+ mock_column = MockColumn(column_dtype)
83
+ return _modify_datetime_compare_val(tgt_column=mock_column, compare_val=compare_val)
84
+ except Exception:
85
+ pass
86
+
87
+ except Exception:
88
+ pass
89
+
90
+ # If all else fails, return the original compare_val
91
+ return compare_val
92
+
93
+
26
94
  @dataclass
27
95
  class Interrogator:
28
96
  """
@@ -89,56 +157,25 @@ class Interrogator:
89
157
  na_pass: bool = False
90
158
  tbl_type: str = "local"
91
159
 
92
- def gt(self) -> FrameT | Any:
93
- # Ibis backends ---------------------------------------------
94
-
95
- if self.tbl_type in IBIS_BACKENDS:
96
- import ibis
97
-
98
- if isinstance(self.compare, ColumnLiteral):
99
- #
100
- # Ibis column-to-column comparison
101
- #
102
-
103
- tbl = self.x.mutate(
104
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
105
- & ibis.literal(self.na_pass),
106
- pb_is_good_2=self.x[self.column] > self.x[self.compare.name],
107
- )
108
-
109
- tbl = tbl.mutate(
110
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
111
- )
112
-
113
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
114
- "pb_is_good_1", "pb_is_good_2"
115
- )
116
-
117
- else:
118
- #
119
- # Ibis column-to-literal comparison
120
- #
121
-
122
- tbl = self.x.mutate(
123
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
124
- pb_is_good_2=self.x[self.column] > ibis.literal(self.compare),
125
- )
160
+ def __post_init__(self):
161
+ """
162
+ Post-initialization to process Ibis tables through Narwhals.
126
163
 
127
- tbl = tbl.mutate(
128
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
129
- )
164
+ This converts Ibis tables to Narwhals-wrapped tables to unify
165
+ the processing pathway and reduce code branching.
166
+ """
167
+ # Import the processing function
168
+ from pointblank._utils import _process_ibis_through_narwhals
130
169
 
131
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
132
- "pb_is_good_1", "pb_is_good_2"
133
- )
170
+ # Process Ibis tables through Narwhals
171
+ self.x, self.tbl_type = _process_ibis_through_narwhals(self.x, self.tbl_type)
134
172
 
135
- # Local backends (Narwhals) ---------------------------------
173
+ def gt(self) -> FrameT | Any:
174
+ # All backends now use Narwhals (including former Ibis tables) ---------
136
175
 
137
176
  compare_expr = _get_compare_expr_nw(compare=self.compare)
138
177
 
139
- compare_expr = _modify_datetime_compare_val(
140
- tgt_column=self.x[self.column], compare_val=compare_expr
141
- )
178
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
142
179
 
143
180
  return (
144
181
  self.x.with_columns(
@@ -165,55 +202,11 @@ class Interrogator:
165
202
  )
166
203
 
167
204
  def lt(self) -> FrameT | Any:
168
- # Ibis backends ---------------------------------------------
169
-
170
- if self.tbl_type in IBIS_BACKENDS:
171
- import ibis
172
-
173
- if isinstance(self.compare, Column):
174
- #
175
- # Ibis column-to-column comparison
176
- #
177
-
178
- tbl = self.x.mutate(
179
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
180
- & ibis.literal(self.na_pass),
181
- pb_is_good_2=self.x[self.column] < self.x[self.compare.name],
182
- )
183
-
184
- tbl = tbl.mutate(
185
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
186
- )
187
-
188
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
189
- "pb_is_good_1", "pb_is_good_2"
190
- )
191
-
192
- else:
193
- #
194
- # Ibis column-to-literal comparison
195
- #
196
-
197
- tbl = self.x.mutate(
198
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
199
- pb_is_good_2=self.x[self.column] < ibis.literal(self.compare),
200
- )
201
-
202
- tbl = tbl.mutate(
203
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
204
- )
205
-
206
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
207
- "pb_is_good_1", "pb_is_good_2"
208
- )
209
-
210
- # Local backends (Narwhals) ---------------------------------
205
+ # All backends now use Narwhals (including former Ibis tables) ---------
211
206
 
212
207
  compare_expr = _get_compare_expr_nw(compare=self.compare)
213
208
 
214
- compare_expr = _modify_datetime_compare_val(
215
- tgt_column=self.x[self.column], compare_val=compare_expr
216
- )
209
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
217
210
 
218
211
  return (
219
212
  self.x.with_columns(
@@ -240,49 +233,7 @@ class Interrogator:
240
233
  )
241
234
 
242
235
  def eq(self) -> FrameT | Any:
243
- # Ibis backends ---------------------------------------------
244
-
245
- if self.tbl_type in IBIS_BACKENDS:
246
- import ibis
247
-
248
- if isinstance(self.compare, Column):
249
- #
250
- # Ibis column-to-column comparison
251
- #
252
-
253
- tbl = self.x.mutate(
254
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
255
- & ibis.literal(self.na_pass),
256
- pb_is_good_2=self.x[self.column] == self.x[self.compare.name],
257
- )
258
-
259
- tbl = tbl.mutate(
260
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
261
- )
262
-
263
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
264
- "pb_is_good_1", "pb_is_good_2"
265
- )
266
-
267
- else:
268
- #
269
- # Ibis column-to-literal comparison
270
- #
271
-
272
- tbl = self.x.mutate(
273
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
274
- pb_is_good_2=self.x[self.column] == ibis.literal(self.compare),
275
- )
276
-
277
- tbl = tbl.mutate(
278
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
279
- )
280
-
281
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
282
- "pb_is_good_1", "pb_is_good_2"
283
- )
284
-
285
- # Local backends (Narwhals) ---------------------------------
236
+ # All backends now use Narwhals (including former Ibis tables) ---------
286
237
 
287
238
  if isinstance(self.compare, Column):
288
239
  compare_expr = _get_compare_expr_nw(compare=self.compare)
@@ -329,9 +280,7 @@ class Interrogator:
329
280
  else:
330
281
  compare_expr = _get_compare_expr_nw(compare=self.compare)
331
282
 
332
- compare_expr = _modify_datetime_compare_val(
333
- tgt_column=self.x[self.column], compare_val=compare_expr
334
- )
283
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
335
284
 
336
285
  tbl = self.x.with_columns(
337
286
  pb_is_good_1=nw.col(self.column).is_null() & self.na_pass,
@@ -359,47 +308,7 @@ class Interrogator:
359
308
  return tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
360
309
 
361
310
  def ne(self) -> FrameT | Any:
362
- # Ibis backends ---------------------------------------------
363
-
364
- if self.tbl_type in IBIS_BACKENDS:
365
- import ibis
366
-
367
- if isinstance(self.compare, Column):
368
- #
369
- # Ibis column-to-column comparison
370
- #
371
-
372
- tbl = self.x.mutate(
373
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
374
- & ibis.literal(self.na_pass),
375
- pb_is_good_2=self.x[self.column] != self.x[self.compare.name],
376
- )
377
-
378
- tbl = tbl.mutate(
379
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
380
- )
381
-
382
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
383
- "pb_is_good_1", "pb_is_good_2"
384
- )
385
-
386
- #
387
- # Ibis column-to-literal comparison
388
- #
389
- tbl = self.x.mutate(
390
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
391
- pb_is_good_2=ibis.ifelse(
392
- self.x[self.column].notnull(),
393
- self.x[self.column] != ibis.literal(self.compare),
394
- ibis.literal(False),
395
- ),
396
- )
397
-
398
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
399
- "pb_is_good_1", "pb_is_good_2"
400
- )
401
-
402
- # Local backends (Narwhals) ---------------------------------
311
+ # All backends now use Narwhals (including former Ibis tables) ---------
403
312
 
404
313
  # Determine if the reference and comparison columns have any null values
405
314
  ref_col_has_null_vals = _column_has_null_values(table=self.x, column=self.column)
@@ -421,9 +330,7 @@ class Interrogator:
421
330
  ).to_native()
422
331
 
423
332
  else:
424
- compare_expr = _modify_datetime_compare_val(
425
- tgt_column=self.x[self.column], compare_val=self.compare
426
- )
333
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, self.compare)
427
334
 
428
335
  return self.x.with_columns(
429
336
  pb_is_good_=nw.col(self.column) != nw.lit(compare_expr),
@@ -469,6 +376,12 @@ class Interrogator:
469
376
  tbl = tbl.with_columns(
470
377
  pb_is_good_2=(nw.col("pb_is_good_1") | nw.col("pb_is_good_2"))
471
378
  )
379
+ else:
380
+ # General case (non-Polars): handle na_pass=True properly
381
+ if self.na_pass:
382
+ tbl = tbl.with_columns(
383
+ pb_is_good_2=(nw.col("pb_is_good_1") | nw.col("pb_is_good_2"))
384
+ )
472
385
 
473
386
  return (
474
387
  tbl.with_columns(pb_is_good_=nw.col("pb_is_good_2"))
@@ -500,6 +413,12 @@ class Interrogator:
500
413
  tbl = tbl.with_columns(
501
414
  pb_is_good_1=(nw.col("pb_is_good_1") | nw.col("pb_is_good_2"))
502
415
  )
416
+ else:
417
+ # General case (non-Polars): handle na_pass=True properly
418
+ if self.na_pass:
419
+ tbl = tbl.with_columns(
420
+ pb_is_good_1=(nw.col("pb_is_good_1") | nw.col("pb_is_good_2"))
421
+ )
503
422
 
504
423
  return (
505
424
  tbl.with_columns(pb_is_good_=nw.col("pb_is_good_1"))
@@ -532,6 +451,16 @@ class Interrogator:
532
451
  .otherwise(False)
533
452
  )
534
453
  )
454
+ else:
455
+ # General case (non-Polars): handle na_pass=True properly
456
+ if self.na_pass:
457
+ tbl = tbl.with_columns(
458
+ pb_is_good_3=(
459
+ nw.when(nw.col("pb_is_good_1") | nw.col("pb_is_good_2"))
460
+ .then(True)
461
+ .otherwise(nw.col("pb_is_good_3"))
462
+ )
463
+ )
535
464
 
536
465
  return (
537
466
  tbl.with_columns(pb_is_good_=nw.col("pb_is_good_3"))
@@ -544,9 +473,7 @@ class Interrogator:
544
473
  if ref_col_has_null_vals:
545
474
  # Create individual cases for Pandas and Polars
546
475
 
547
- compare_expr = _modify_datetime_compare_val(
548
- tgt_column=self.x[self.column], compare_val=self.compare
549
- )
476
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, self.compare)
550
477
 
551
478
  if is_pandas_dataframe(self.x.to_native()):
552
479
  tbl = self.x.with_columns(
@@ -584,54 +511,31 @@ class Interrogator:
584
511
 
585
512
  return tbl
586
513
 
587
- def ge(self) -> FrameT | Any:
588
- # Ibis backends ---------------------------------------------
589
-
590
- if self.tbl_type in IBIS_BACKENDS:
591
- import ibis
592
-
593
- if isinstance(self.compare, Column):
594
- #
595
- # Ibis column-to-column comparison
596
- #
597
-
598
- tbl = self.x.mutate(
599
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
600
- & ibis.literal(self.na_pass),
601
- pb_is_good_2=self.x[self.column] >= self.x[self.compare.name],
602
- )
603
-
604
- tbl = tbl.mutate(
605
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
606
- )
607
-
608
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
609
- "pb_is_good_1", "pb_is_good_2"
610
- )
514
+ else:
515
+ # Generic case for other DataFrame types (PySpark, etc.)
516
+ # Use similar logic to Polars but handle potential differences
517
+ tbl = self.x.with_columns(
518
+ pb_is_good_1=nw.col(self.column).is_null(), # val is Null in Column
519
+ pb_is_good_2=nw.lit(self.na_pass), # Pass if any Null in val or compare
520
+ )
611
521
 
612
- #
613
- # Ibis column-to-literal comparison
614
- #
615
- tbl = self.x.mutate(
616
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
617
- pb_is_good_2=self.x[self.column] >= ibis.literal(self.compare),
618
- )
522
+ tbl = tbl.with_columns(pb_is_good_3=nw.col(self.column) != nw.lit(compare_expr))
619
523
 
620
- tbl = tbl.mutate(
621
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
622
- )
524
+ tbl = tbl.with_columns(
525
+ pb_is_good_=(
526
+ (nw.col("pb_is_good_1") & nw.col("pb_is_good_2"))
527
+ | (nw.col("pb_is_good_3") & ~nw.col("pb_is_good_1"))
528
+ )
529
+ )
623
530
 
624
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
625
- "pb_is_good_1", "pb_is_good_2"
626
- )
531
+ return tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
627
532
 
628
- # Local backends (Narwhals) ---------------------------------
533
+ def ge(self) -> FrameT | Any:
534
+ # All backends now use Narwhals (including former Ibis tables) ---------
629
535
 
630
536
  compare_expr = _get_compare_expr_nw(compare=self.compare)
631
537
 
632
- compare_expr = _modify_datetime_compare_val(
633
- tgt_column=self.x[self.column], compare_val=compare_expr
634
- )
538
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
635
539
 
636
540
  tbl = (
637
541
  self.x.with_columns(
@@ -658,53 +562,11 @@ class Interrogator:
658
562
  return tbl.drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3").to_native()
659
563
 
660
564
  def le(self) -> FrameT | Any:
661
- # Ibis backends ---------------------------------------------
662
-
663
- if self.tbl_type in IBIS_BACKENDS:
664
- import ibis
665
-
666
- if isinstance(self.compare, Column):
667
- #
668
- # Ibis column-to-column comparison
669
- #
670
-
671
- tbl = self.x.mutate(
672
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.compare.name].isnull())
673
- & ibis.literal(self.na_pass),
674
- pb_is_good_2=self.x[self.column] <= self.x[self.compare.name],
675
- )
676
-
677
- tbl = tbl.mutate(
678
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
679
- )
680
-
681
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
682
- "pb_is_good_1", "pb_is_good_2"
683
- )
684
-
685
- #
686
- # Ibis column-to-literal comparison
687
- #
688
- tbl = self.x.mutate(
689
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
690
- pb_is_good_2=self.x[self.column] <= ibis.literal(self.compare),
691
- )
692
-
693
- tbl = tbl.mutate(
694
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
695
- )
696
-
697
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
698
- "pb_is_good_1", "pb_is_good_2"
699
- )
700
-
701
- # Local backends (Narwhals) ---------------------------------
565
+ # All backends now use Narwhals (including former Ibis tables) ---------
702
566
 
703
567
  compare_expr = _get_compare_expr_nw(compare=self.compare)
704
568
 
705
- compare_expr = _modify_datetime_compare_val(
706
- tgt_column=self.x[self.column], compare_val=compare_expr
707
- )
569
+ compare_expr = _safe_modify_datetime_compare_val(self.x, self.column, compare_expr)
708
570
 
709
571
  return (
710
572
  self.x.with_columns(
@@ -731,113 +593,13 @@ class Interrogator:
731
593
  )
732
594
 
733
595
  def between(self) -> FrameT | Any:
734
- # Ibis backends ---------------------------------------------
735
-
736
- if self.tbl_type in IBIS_BACKENDS:
737
- import ibis
738
-
739
- if isinstance(self.low, Column) or isinstance(self.high, Column):
740
- #
741
- # Ibis column-to-column/column or column-to-column/literal comparison
742
- #
743
-
744
- if isinstance(self.low, Column):
745
- low_val = self.x[self.low.name]
746
- else:
747
- low_val = ibis.literal(self.low)
748
-
749
- if isinstance(self.high, Column):
750
- high_val = self.x[self.high.name]
751
- else:
752
- high_val = ibis.literal(self.high)
753
-
754
- if isinstance(self.low, Column) and isinstance(self.high, Column):
755
- tbl = self.x.mutate(
756
- pb_is_good_1=(
757
- self.x[self.column].isnull()
758
- | self.x[self.low.name].isnull()
759
- | self.x[self.high.name].isnull()
760
- )
761
- & ibis.literal(self.na_pass)
762
- )
763
- elif isinstance(self.low, Column):
764
- tbl = self.x.mutate(
765
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.low.name].isnull())
766
- & ibis.literal(self.na_pass)
767
- )
768
- elif isinstance(self.high, Column):
769
- tbl = self.x.mutate(
770
- pb_is_good_1=(
771
- self.x[self.column].isnull() | self.x[self.high.name].isnull()
772
- )
773
- & ibis.literal(self.na_pass)
774
- )
775
-
776
- if self.inclusive[0]:
777
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] >= low_val)
778
- else:
779
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] > low_val)
780
-
781
- tbl = tbl.mutate(
782
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
783
- )
784
-
785
- if self.inclusive[1]:
786
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] <= high_val)
787
- else:
788
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] < high_val)
789
-
790
- tbl = tbl.mutate(
791
- pb_is_good_3=ibis.ifelse(tbl.pb_is_good_3.notnull(), tbl.pb_is_good_3, False)
792
- )
793
-
794
- return tbl.mutate(
795
- pb_is_good_=tbl.pb_is_good_1 | (tbl.pb_is_good_2 & tbl.pb_is_good_3)
796
- ).drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3")
797
-
798
- else:
799
- #
800
- # Ibis column-to-literal/literal comparison
801
- #
802
-
803
- low_val = ibis.literal(self.low)
804
- high_val = ibis.literal(self.high)
805
-
806
- tbl = self.x.mutate(
807
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass)
808
- )
809
-
810
- if self.inclusive[0]:
811
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] >= low_val)
812
- else:
813
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] > low_val)
814
-
815
- tbl = tbl.mutate(
816
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
817
- )
818
-
819
- if self.inclusive[1]:
820
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] <= high_val)
821
- else:
822
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] < high_val)
823
-
824
- tbl = tbl.mutate(
825
- pb_is_good_3=ibis.ifelse(tbl.pb_is_good_3.notnull(), tbl.pb_is_good_3, False)
826
- )
827
-
828
- return tbl.mutate(
829
- pb_is_good_=tbl.pb_is_good_1 | (tbl.pb_is_good_2 & tbl.pb_is_good_3)
830
- ).drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3")
831
-
832
- # Local backends (Narwhals) ---------------------------------
596
+ # All backends now use Narwhals (including former Ibis tables) ---------
833
597
 
834
598
  low_val = _get_compare_expr_nw(compare=self.low)
835
599
  high_val = _get_compare_expr_nw(compare=self.high)
836
600
 
837
- low_val = _modify_datetime_compare_val(tgt_column=self.x[self.column], compare_val=low_val)
838
- high_val = _modify_datetime_compare_val(
839
- tgt_column=self.x[self.column], compare_val=high_val
840
- )
601
+ low_val = _safe_modify_datetime_compare_val(self.x, self.column, low_val)
602
+ high_val = _safe_modify_datetime_compare_val(self.x, self.column, high_val)
841
603
 
842
604
  tbl = self.x.with_columns(
843
605
  pb_is_good_1=nw.col(self.column).is_null(), # val is Null in Column
@@ -900,136 +662,16 @@ class Interrogator:
900
662
  return tbl
901
663
 
902
664
  def outside(self) -> FrameT | Any:
903
- # Ibis backends ---------------------------------------------
904
-
905
- if self.tbl_type in IBIS_BACKENDS:
906
- import ibis
907
-
908
- if isinstance(self.low, Column) or isinstance(self.high, Column):
909
- #
910
- # Ibis column-to-column/column or column-to-column/literal comparison
911
- #
912
-
913
- if isinstance(self.low, Column):
914
- low_val = self.x[self.low.name]
915
- else:
916
- low_val = ibis.literal(self.low)
917
-
918
- if isinstance(self.high, Column):
919
- high_val = self.x[self.high.name]
920
- else:
921
- high_val = ibis.literal(self.high)
922
-
923
- if isinstance(self.low, Column) and isinstance(self.high, Column):
924
- tbl = self.x.mutate(
925
- pb_is_good_1=(
926
- self.x[self.column].isnull()
927
- | self.x[self.low.name].isnull()
928
- | self.x[self.high.name].isnull()
929
- )
930
- & ibis.literal(self.na_pass)
931
- )
665
+ # All backends now use Narwhals (including former Ibis tables) ---------
932
666
 
933
- elif isinstance(self.low, Column):
934
- tbl = self.x.mutate(
935
- pb_is_good_1=(self.x[self.column].isnull() | self.x[self.low.name].isnull())
936
- & ibis.literal(self.na_pass)
937
- )
938
- elif isinstance(self.high, Column):
939
- tbl = self.x.mutate(
940
- pb_is_good_1=(
941
- self.x[self.column].isnull() | self.x[self.high.name].isnull()
942
- )
943
- & ibis.literal(self.na_pass)
944
- )
945
-
946
- if self.inclusive[0]:
947
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] < low_val)
948
- else:
949
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] <= low_val)
950
-
951
- if self.inclusive[1]:
952
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] > high_val)
953
- else:
954
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] >= high_val)
955
-
956
- tbl = tbl.mutate(
957
- pb_is_good_2=ibis.ifelse(
958
- tbl.pb_is_good_3.isnull(),
959
- False,
960
- tbl.pb_is_good_2,
961
- )
962
- )
963
-
964
- tbl = tbl.mutate(
965
- pb_is_good_3=ibis.ifelse(
966
- tbl.pb_is_good_2.isnull(),
967
- False,
968
- tbl.pb_is_good_3,
969
- )
970
- )
971
-
972
- tbl = tbl.mutate(
973
- pb_is_good_2=ibis.ifelse(
974
- tbl.pb_is_good_2.isnull(),
975
- False,
976
- tbl.pb_is_good_2,
977
- )
978
- )
979
-
980
- tbl = tbl.mutate(
981
- pb_is_good_3=ibis.ifelse(
982
- tbl.pb_is_good_3.isnull(),
983
- False,
984
- tbl.pb_is_good_3,
985
- )
986
- )
987
-
988
- return tbl.mutate(
989
- pb_is_good_=tbl.pb_is_good_1 | (tbl.pb_is_good_2 | tbl.pb_is_good_3)
990
- ).drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3")
991
-
992
- #
993
- # Ibis column-to-literal/literal comparison
994
- #
995
- low_val = ibis.literal(self.low)
996
- high_val = ibis.literal(self.high)
997
-
998
- tbl = self.x.mutate(
999
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass)
1000
- )
1001
-
1002
- if self.inclusive[0]:
1003
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] < low_val)
1004
- else:
1005
- tbl = tbl.mutate(pb_is_good_2=tbl[self.column] <= low_val)
1006
-
1007
- tbl = tbl.mutate(
1008
- pb_is_good_2=ibis.ifelse(tbl.pb_is_good_2.notnull(), tbl.pb_is_good_2, False)
1009
- )
1010
-
1011
- if self.inclusive[1]:
1012
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] > high_val)
1013
- else:
1014
- tbl = tbl.mutate(pb_is_good_3=tbl[self.column] >= high_val)
1015
-
1016
- tbl = tbl.mutate(
1017
- pb_is_good_3=ibis.ifelse(tbl.pb_is_good_3.notnull(), tbl.pb_is_good_3, False)
1018
- )
1019
-
1020
- return tbl.mutate(
1021
- pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2 | tbl.pb_is_good_3
1022
- ).drop("pb_is_good_1", "pb_is_good_2", "pb_is_good_3")
1023
-
1024
- # Local backends (Narwhals) ---------------------------------
667
+ low_val = _get_compare_expr_nw(compare=self.low)
668
+ high_val = _get_compare_expr_nw(compare=self.high)
1025
669
 
1026
670
  low_val = _get_compare_expr_nw(compare=self.low)
1027
671
  high_val = _get_compare_expr_nw(compare=self.high)
1028
672
 
1029
- low_val = _modify_datetime_compare_val(tgt_column=self.x[self.column], compare_val=low_val)
1030
- high_val = _modify_datetime_compare_val(
1031
- tgt_column=self.x[self.column], compare_val=high_val
1032
- )
673
+ low_val = _safe_modify_datetime_compare_val(self.x, self.column, low_val)
674
+ high_val = _safe_modify_datetime_compare_val(self.x, self.column, high_val)
1033
675
 
1034
676
  tbl = self.x.with_columns(
1035
677
  pb_is_good_1=nw.col(self.column).is_null(), # val is Null in Column
@@ -1088,17 +730,10 @@ class Interrogator:
1088
730
  return tbl
1089
731
 
1090
732
  def isin(self) -> FrameT | Any:
1091
- # Ibis backends ---------------------------------------------
733
+ # All backends now use Narwhals (including former Ibis tables) ---------
1092
734
 
1093
735
  can_be_null: bool = None in self.set
1094
736
 
1095
- if self.tbl_type in IBIS_BACKENDS:
1096
- base_expr = self.x[self.column].isin(self.set)
1097
- if can_be_null:
1098
- base_expr = base_expr | self.x[self.column].isnull()
1099
- return self.x.mutate(pb_is_good_=base_expr)
1100
-
1101
- # Local backends (Narwhals) ---------------------------------
1102
737
  base_expr: nw.Expr = nw.col(self.column).is_in(self.set)
1103
738
  if can_be_null:
1104
739
  base_expr = base_expr | nw.col(self.column).is_null()
@@ -1106,12 +741,7 @@ class Interrogator:
1106
741
  return self.x.with_columns(pb_is_good_=base_expr).to_native()
1107
742
 
1108
743
  def notin(self) -> FrameT | Any:
1109
- # Ibis backends ---------------------------------------------
1110
-
1111
- if self.tbl_type in IBIS_BACKENDS:
1112
- return self.x.mutate(pb_is_good_=self.x[self.column].notin(self.set))
1113
-
1114
- # Local backends (Narwhals) ---------------------------------
744
+ # All backends now use Narwhals (including former Ibis tables) ---------
1115
745
 
1116
746
  return (
1117
747
  self.x.with_columns(
@@ -1122,21 +752,7 @@ class Interrogator:
1122
752
  )
1123
753
 
1124
754
  def regex(self) -> FrameT | Any:
1125
- # Ibis backends ---------------------------------------------
1126
-
1127
- if self.tbl_type in IBIS_BACKENDS:
1128
- import ibis
1129
-
1130
- tbl = self.x.mutate(
1131
- pb_is_good_1=self.x[self.column].isnull() & ibis.literal(self.na_pass),
1132
- pb_is_good_2=self.x[self.column].re_search(self.pattern),
1133
- )
1134
-
1135
- return tbl.mutate(pb_is_good_=tbl.pb_is_good_1 | tbl.pb_is_good_2).drop(
1136
- "pb_is_good_1", "pb_is_good_2"
1137
- )
1138
-
1139
- # Local backends (Narwhals) ---------------------------------
755
+ # All backends now use Narwhals (including former Ibis tables) ---------
1140
756
 
1141
757
  return (
1142
758
  self.x.with_columns(
@@ -1151,55 +767,21 @@ class Interrogator:
1151
767
  )
1152
768
 
1153
769
  def null(self) -> FrameT | Any:
1154
- # Ibis backends ---------------------------------------------
1155
-
1156
- if self.tbl_type in IBIS_BACKENDS:
1157
- return self.x.mutate(
1158
- pb_is_good_=self.x[self.column].isnull(),
1159
- )
1160
-
1161
- # Local backends (Narwhals) ---------------------------------
770
+ # All backends now use Narwhals (including former Ibis tables) ---------
1162
771
 
1163
772
  return self.x.with_columns(
1164
773
  pb_is_good_=nw.col(self.column).is_null(),
1165
774
  ).to_native()
1166
775
 
1167
776
  def not_null(self) -> FrameT | Any:
1168
- # Ibis backends ---------------------------------------------
1169
-
1170
- if self.tbl_type in IBIS_BACKENDS:
1171
- return self.x.mutate(
1172
- pb_is_good_=~self.x[self.column].isnull(),
1173
- )
1174
-
1175
- # Local backends (Narwhals) ---------------------------------
777
+ # All backends now use Narwhals (including former Ibis tables) ---------
1176
778
 
1177
779
  return self.x.with_columns(
1178
780
  pb_is_good_=~nw.col(self.column).is_null(),
1179
781
  ).to_native()
1180
782
 
1181
783
  def rows_distinct(self) -> FrameT | Any:
1182
- # Ibis backends ---------------------------------------------
1183
-
1184
- if self.tbl_type in IBIS_BACKENDS:
1185
- import ibis
1186
-
1187
- tbl = self.x
1188
-
1189
- # Get the column subset to use for the test
1190
- if self.columns_subset is None:
1191
- columns_subset = tbl.columns
1192
- else:
1193
- columns_subset = self.columns_subset
1194
-
1195
- # Create a subset of the table with only the columns of interest and count the
1196
- # number of times each unique row (or portion thereof) appears
1197
- tbl = tbl.group_by(columns_subset).mutate(pb_count_=ibis._.count())
1198
-
1199
- # Passing rows will have the value `1` (no duplicates, so True), otherwise False applies
1200
- return tbl.mutate(pb_is_good_=tbl["pb_count_"] == 1).drop("pb_count_")
1201
-
1202
- # Local backends (Narwhals) ---------------------------------
784
+ # All backends now use Narwhals (including former Ibis tables) ---------
1203
785
 
1204
786
  tbl = self.x
1205
787
 
@@ -1209,32 +791,20 @@ class Interrogator:
1209
791
  else:
1210
792
  columns_subset = self.columns_subset
1211
793
 
1212
- # Create a subset of the table with only the columns of interest
1213
- subset_tbl = tbl.select(columns_subset)
794
+ # Create a count of duplicates using group_by approach like Ibis backend
795
+ # Group by the columns of interest and count occurrences
796
+ count_tbl = tbl.group_by(columns_subset).agg(nw.len().alias("pb_count_"))
1214
797
 
1215
- # Check for duplicates in the subset table, creating a series of booleans
1216
- pb_is_good_series = subset_tbl.is_duplicated()
798
+ # Join back to original table to get count for each row
799
+ tbl = tbl.join(count_tbl, on=columns_subset, how="left")
1217
800
 
1218
- # Add the series to the input table
1219
- tbl = tbl.with_columns(pb_is_good_=~pb_is_good_series)
801
+ # Passing rows will have the value `1` (no duplicates, so True), otherwise False applies
802
+ tbl = tbl.with_columns(pb_is_good_=nw.col("pb_count_") == 1).drop("pb_count_")
1220
803
 
1221
804
  return tbl.to_native()
1222
805
 
1223
806
  def rows_complete(self) -> FrameT | Any:
1224
- # Ibis backends ---------------------------------------------
1225
-
1226
- if self.tbl_type in IBIS_BACKENDS:
1227
- tbl = self.x
1228
-
1229
- # Determine the number of null values in each row (column subsets are handled in
1230
- # the `_check_nulls_across_columns_ibis()` function)
1231
- tbl = _check_nulls_across_columns_ibis(table=tbl, columns_subset=self.columns_subset)
1232
-
1233
- # Failing rows will have the value `True` in the generated column, so we need to negate
1234
- # the result to get the passing rows
1235
- return tbl.mutate(pb_is_good_=~tbl["_any_is_null_"]).drop("_any_is_null_")
1236
-
1237
- # Local backends (Narwhals) ---------------------------------
807
+ # All backends now use Narwhals (including former Ibis tables) ---------
1238
808
 
1239
809
  tbl = self.x
1240
810
 
@@ -1299,10 +869,8 @@ class ColValsCompareOne:
1299
869
  tbl = _column_test_prep(
1300
870
  df=self.data_tbl, column=self.column, allowed_types=self.allowed_types
1301
871
  )
1302
-
1303
- # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1304
- # for now, just pass the table as is
1305
- if self.tbl_type in IBIS_BACKENDS:
872
+ else:
873
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1306
874
  tbl = self.data_tbl
1307
875
 
1308
876
  # Collect results for the test units; the results are a list of booleans where
@@ -1457,7 +1025,8 @@ class ColValsCompareTwo:
1457
1025
 
1458
1026
  # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1459
1027
  # for now, just pass the table as is
1460
- if self.tbl_type in IBIS_BACKENDS:
1028
+ else:
1029
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1461
1030
  tbl = self.data_tbl
1462
1031
 
1463
1032
  # Collect results for the test units; the results are a list of booleans where
@@ -1550,10 +1119,8 @@ class ColValsCompareSet:
1550
1119
  tbl = _column_test_prep(
1551
1120
  df=self.data_tbl, column=self.column, allowed_types=self.allowed_types
1552
1121
  )
1553
-
1554
- # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1555
- # for now, just pass the table as is
1556
- if self.tbl_type in IBIS_BACKENDS:
1122
+ else:
1123
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1557
1124
  tbl = self.data_tbl
1558
1125
 
1559
1126
  # Collect results for the test units; the results are a list of booleans where
@@ -1627,10 +1194,8 @@ class ColValsRegex:
1627
1194
  tbl = _column_test_prep(
1628
1195
  df=self.data_tbl, column=self.column, allowed_types=self.allowed_types
1629
1196
  )
1630
-
1631
- # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1632
- # for now, just pass the table as is
1633
- if self.tbl_type in IBIS_BACKENDS:
1197
+ else:
1198
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1634
1199
  tbl = self.data_tbl
1635
1200
 
1636
1201
  # Collect results for the test units; the results are a list of booleans where
@@ -1758,11 +1323,9 @@ class ColExistsHasType:
1758
1323
  # - check if the `column=` exists
1759
1324
  # - check if the `column=` type is compatible with the test
1760
1325
  tbl = _convert_to_narwhals(df=self.data_tbl)
1761
-
1762
- # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1763
- # for now, just pass the table as is
1764
- if self.tbl_type in IBIS_BACKENDS:
1765
- tbl = self.data_tbl
1326
+ else:
1327
+ # For remote backends (Ibis), pass the table as is since Narwhals can handle it
1328
+ tbl = _convert_to_narwhals(df=self.data_tbl)
1766
1329
 
1767
1330
  if self.assertion_method == "exists":
1768
1331
  res = int(self.column in tbl.columns)
@@ -1810,7 +1373,8 @@ class RowsDistinct:
1810
1373
 
1811
1374
  # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1812
1375
  # for now, just pass the table as is
1813
- if self.tbl_type in IBIS_BACKENDS:
1376
+ else:
1377
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1814
1378
  tbl = self.data_tbl
1815
1379
 
1816
1380
  # Collect results for the test units; the results are a list of booleans where
@@ -1862,7 +1426,8 @@ class RowsComplete:
1862
1426
 
1863
1427
  # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
1864
1428
  # for now, just pass the table as is
1865
- if self.tbl_type in IBIS_BACKENDS:
1429
+ else:
1430
+ # For remote backends (Ibis), pass the table as is since Interrogator now handles Ibis through Narwhals
1866
1431
  tbl = self.data_tbl
1867
1432
 
1868
1433
  # Collect results for the test units; the results are a list of booleans where
@@ -2088,6 +1653,8 @@ class ConjointlyValidation:
2088
1653
  return self._get_pandas_results()
2089
1654
  elif "duckdb" in self.tbl_type or "ibis" in self.tbl_type:
2090
1655
  return self._get_ibis_results()
1656
+ elif "pyspark" in self.tbl_type:
1657
+ return self._get_pyspark_results()
2091
1658
  else: # pragma: no cover
2092
1659
  raise NotImplementedError(f"Support for {self.tbl_type} is not yet implemented")
2093
1660
 
@@ -2247,6 +1814,53 @@ class ConjointlyValidation:
2247
1814
  results_tbl = self.data_tbl.mutate(pb_is_good_=ibis.literal(True))
2248
1815
  return results_tbl
2249
1816
 
1817
+ def _get_pyspark_results(self):
1818
+ """Process expressions for PySpark DataFrames."""
1819
+ from pyspark.sql import functions as F
1820
+
1821
+ pyspark_columns = []
1822
+
1823
+ for expr_fn in self.expressions:
1824
+ try:
1825
+ # First try direct evaluation with PySpark DataFrame
1826
+ expr_result = expr_fn(self.data_tbl)
1827
+
1828
+ # Check if it's a PySpark Column
1829
+ if hasattr(expr_result, "_jc"): # PySpark Column has _jc attribute
1830
+ pyspark_columns.append(expr_result)
1831
+ else:
1832
+ raise TypeError(
1833
+ f"Expression returned {type(expr_result)}, expected PySpark Column"
1834
+ )
1835
+
1836
+ except Exception as e:
1837
+ try:
1838
+ # Try as a ColumnExpression (for pb.expr_col style)
1839
+ col_expr = expr_fn(None)
1840
+
1841
+ if hasattr(col_expr, "to_pyspark_expr"):
1842
+ # Convert to PySpark expression
1843
+ pyspark_expr = col_expr.to_pyspark_expr(self.data_tbl)
1844
+ pyspark_columns.append(pyspark_expr)
1845
+ else:
1846
+ raise TypeError(f"Cannot convert {type(col_expr)} to PySpark Column")
1847
+ except Exception as nested_e:
1848
+ print(f"Error evaluating PySpark expression: {e} -> {nested_e}")
1849
+
1850
+ # Combine results with AND logic
1851
+ if pyspark_columns:
1852
+ final_result = pyspark_columns[0]
1853
+ for col in pyspark_columns[1:]:
1854
+ final_result = final_result & col
1855
+
1856
+ # Create results table with boolean column
1857
+ results_tbl = self.data_tbl.withColumn("pb_is_good_", final_result)
1858
+ return results_tbl
1859
+
1860
+ # Default case
1861
+ results_tbl = self.data_tbl.withColumn("pb_is_good_", F.lit(True))
1862
+ return results_tbl
1863
+
2250
1864
 
2251
1865
  class SpeciallyValidation:
2252
1866
  def __init__(self, data_tbl, expression, threshold, tbl_type):
@@ -2359,13 +1973,22 @@ class NumberOfTestUnits:
2359
1973
  column: str
2360
1974
 
2361
1975
  def get_test_units(self, tbl_type: str) -> int:
2362
- if tbl_type == "pandas" or tbl_type == "polars":
1976
+ if (
1977
+ tbl_type == "pandas"
1978
+ or tbl_type == "polars"
1979
+ or tbl_type == "pyspark"
1980
+ or tbl_type == "local"
1981
+ ):
2363
1982
  # Convert the DataFrame to a format that narwhals can work with and:
2364
1983
  # - check if the column exists
2365
1984
  dfn = _column_test_prep(
2366
1985
  df=self.df, column=self.column, allowed_types=None, check_exists=False
2367
1986
  )
2368
1987
 
1988
+ # Handle LazyFrames which don't have len()
1989
+ if hasattr(dfn, "collect"):
1990
+ dfn = dfn.collect()
1991
+
2369
1992
  return len(dfn)
2370
1993
 
2371
1994
  if tbl_type in IBIS_BACKENDS:
@@ -2383,7 +2006,22 @@ def _get_compare_expr_nw(compare: Any) -> Any:
2383
2006
 
2384
2007
 
2385
2008
  def _column_has_null_values(table: FrameT, column: str) -> bool:
2386
- null_count = (table.select(column).null_count())[column][0]
2009
+ try:
2010
+ # Try the standard null_count() method
2011
+ null_count = (table.select(column).null_count())[column][0]
2012
+ except AttributeError:
2013
+ # For LazyFrames, collect first then get null count
2014
+ try:
2015
+ collected = table.select(column).collect()
2016
+ null_count = (collected.null_count())[column][0]
2017
+ except Exception:
2018
+ # Fallback: check if any values are null
2019
+ try:
2020
+ result = table.select(nw.col(column).is_null().sum().alias("null_count")).collect()
2021
+ null_count = result["null_count"][0]
2022
+ except Exception:
2023
+ # Last resort: return False (assume no nulls)
2024
+ return False
2387
2025
 
2388
2026
  if null_count is None or null_count == 0:
2389
2027
  return False
@@ -2414,7 +2052,7 @@ def _check_nulls_across_columns_nw(table, columns_subset):
2414
2052
 
2415
2053
  # Build the expression by combining each column's `is_null()` with OR operations
2416
2054
  null_expr = functools.reduce(
2417
- lambda acc, col: acc | table[col].is_null() if acc is not None else table[col].is_null(),
2055
+ lambda acc, col: acc | nw.col(col).is_null() if acc is not None else nw.col(col).is_null(),
2418
2056
  column_names,
2419
2057
  None,
2420
2058
  )