pointblank 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/validate.py CHANGED
@@ -10,6 +10,7 @@ import re
10
10
  import tempfile
11
11
  import threading
12
12
  from dataclasses import dataclass
13
+ from enum import Enum
13
14
  from importlib.metadata import version
14
15
  from typing import TYPE_CHECKING, Any, Callable, Literal
15
16
  from zipfile import ZipFile
@@ -74,6 +75,7 @@ from pointblank._utils import (
74
75
  _check_any_df_lib,
75
76
  _check_invalid_fields,
76
77
  _column_test_prep,
78
+ _copy_dataframe,
77
79
  _count_null_values_in_column,
78
80
  _count_true_values_in_column,
79
81
  _derive_bounds,
@@ -2006,9 +2008,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
2006
2008
 
2007
2009
  # Apply the appropriate conversion method
2008
2010
  if use_polars_conversion:
2009
- null_sum_converted = null_sum.to_polars()
2011
+ null_sum_converted = null_sum.to_polars() # pragma: no cover
2010
2012
  else:
2011
- null_sum_converted = null_sum.to_pandas()
2013
+ null_sum_converted = null_sum.to_pandas() # pragma: no cover
2012
2014
 
2013
2015
  missing_prop = (null_sum_converted / sector_size) * 100
2014
2016
  col_missing_props.append(missing_prop)
@@ -2025,9 +2027,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
2025
2027
 
2026
2028
  # Apply the appropriate conversion method
2027
2029
  if use_polars_conversion:
2028
- null_sum_converted = null_sum.to_polars()
2030
+ null_sum_converted = null_sum.to_polars() # pragma: no cover
2029
2031
  else:
2030
- null_sum_converted = null_sum.to_pandas()
2032
+ null_sum_converted = null_sum.to_pandas() # pragma: no cover
2031
2033
 
2032
2034
  missing_prop = (null_sum_converted / sector_size) * 100
2033
2035
  col_missing_props.append(missing_prop)
@@ -2040,9 +2042,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
2040
2042
 
2041
2043
  # Use the helper function based on the DataFrame library
2042
2044
  if df_lib_name_gt == "polars":
2043
- missing_vals = _calculate_missing_proportions(use_polars_conversion=True)
2045
+ missing_vals = _calculate_missing_proportions(
2046
+ use_polars_conversion=True
2047
+ ) # pragma: no cover
2044
2048
  else:
2045
- missing_vals = _calculate_missing_proportions(use_polars_conversion=False)
2049
+ missing_vals = _calculate_missing_proportions(
2050
+ use_polars_conversion=False
2051
+ ) # pragma: no cover
2046
2052
 
2047
2053
  # Pivot the `missing_vals` dictionary to create a table with the missing value proportions
2048
2054
  missing_vals = {
@@ -2055,9 +2061,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
2055
2061
 
2056
2062
  # Get a dictionary of counts of missing values in each column
2057
2063
  if df_lib_name_gt == "polars":
2058
- missing_val_counts = {col: data[col].isnull().sum().to_polars() for col in data.columns}
2064
+ missing_val_counts = {
2065
+ col: data[col].isnull().sum().to_polars() for col in data.columns
2066
+ } # pragma: no cover
2059
2067
  else:
2060
- missing_val_counts = {col: data[col].isnull().sum().to_pandas() for col in data.columns}
2068
+ missing_val_counts = {
2069
+ col: data[col].isnull().sum().to_pandas() for col in data.columns
2070
+ } # pragma: no cover
2061
2071
 
2062
2072
  if pl_pb_tbl:
2063
2073
  # Get the column names from the table
@@ -2429,10 +2439,10 @@ def _get_column_names_safe(data: Any) -> list[str]:
2429
2439
  if hasattr(df_nw, "collect_schema"):
2430
2440
  return list(df_nw.collect_schema().keys())
2431
2441
  else:
2432
- return list(df_nw.columns)
2433
- except Exception:
2442
+ return list(df_nw.columns) # pragma: no cover
2443
+ except Exception: # pragma: no cover
2434
2444
  # Fallback to direct column access
2435
- return list(data.columns)
2445
+ return list(data.columns) # pragma: no cover
2436
2446
 
2437
2447
 
2438
2448
  def _get_column_names(data: FrameT | Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
@@ -2633,7 +2643,7 @@ def get_column_count(data: FrameT | Any) -> int:
2633
2643
  if hasattr(df_nw, "collect_schema"):
2634
2644
  return len(df_nw.collect_schema())
2635
2645
  else:
2636
- return len(df_nw.columns)
2646
+ return len(df_nw.columns) # pragma: no cover
2637
2647
  except Exception:
2638
2648
  # Fallback for unsupported types
2639
2649
  if "pandas" in str(type(data)):
@@ -2642,6 +2652,48 @@ def get_column_count(data: FrameT | Any) -> int:
2642
2652
  raise ValueError("The input table type supplied in `data=` is not supported.")
2643
2653
 
2644
2654
 
2655
+ def _extract_enum_values(set_values: Any) -> list[Any]:
2656
+ """
2657
+ Extract values from Enum classes or collections containing Enum instances.
2658
+
2659
+ This helper function handles:
2660
+ 1. Enum classes: extracts all enum values
2661
+ 2. Collections containing Enum instances: extracts their values
2662
+ 3. Regular collections: returns as-is
2663
+
2664
+ Parameters
2665
+ ----------
2666
+ set_values
2667
+ The input collection that may contain Enum class or Enum instances.
2668
+
2669
+ Returns
2670
+ -------
2671
+ list[Any]
2672
+ A list of extracted values
2673
+ """
2674
+ from collections.abc import Collection
2675
+
2676
+ # Check if set_values is an Enum class (not an instance)
2677
+ if inspect.isclass(set_values) and issubclass(set_values, Enum):
2678
+ # Extract all values from the Enum class
2679
+ return [enum_member.value for enum_member in set_values]
2680
+
2681
+ # Check if set_values is a collection
2682
+ if isinstance(set_values, Collection) and not isinstance(set_values, (str, bytes)):
2683
+ extracted_values = []
2684
+ for item in set_values:
2685
+ if isinstance(item, Enum):
2686
+ # If item is an Enum instance, extract its value
2687
+ extracted_values.append(item.value)
2688
+ else:
2689
+ # If item is not an Enum instance, keep as-is
2690
+ extracted_values.append(item)
2691
+ return extracted_values
2692
+
2693
+ # If set_values is neither an Enum class nor a collection, return as list
2694
+ return [set_values]
2695
+
2696
+
2645
2697
  def get_row_count(data: FrameT | Any) -> int:
2646
2698
  """
2647
2699
  Get the number of rows in a table.
@@ -2806,11 +2858,11 @@ def get_row_count(data: FrameT | Any) -> int:
2806
2858
  # Try different ways to get row count
2807
2859
  if hasattr(df_nw, "shape"):
2808
2860
  return df_nw.shape[0]
2809
- elif hasattr(df_nw, "height"):
2861
+ elif hasattr(df_nw, "height"): # pragma: no cover
2810
2862
  return df_nw.height # pragma: no cover
2811
2863
  else: # pragma: no cover
2812
2864
  raise ValueError("Unable to determine row count from Narwhals DataFrame")
2813
- except Exception:
2865
+ except Exception: # pragma: no cover
2814
2866
  # Fallback for types that don't work with Narwhals
2815
2867
  if "pandas" in str(type(data)): # pragma: no cover
2816
2868
  return data.shape[0]
@@ -6324,7 +6376,10 @@ class Validate:
6324
6376
  multiple columns are supplied or resolved, there will be a separate validation step
6325
6377
  generated for each column.
6326
6378
  set
6327
- A list of values to compare against.
6379
+ A collection of values to compare against. Can be a list of values, a Python Enum class,
6380
+ or a collection containing Enum instances. When an Enum class is provided, all enum
6381
+ values will be used. When a collection contains Enum instances, their values will be
6382
+ extracted automatically.
6328
6383
  pre
6329
6384
  An optional preprocessing function or lambda to apply to the data table during
6330
6385
  interrogation. This function should take a table as input and return a modified table.
@@ -6501,12 +6556,69 @@ class Validate:
6501
6556
 
6502
6557
  The validation table reports two failing test units. The specific failing cases are for the
6503
6558
  column `b` values of `8` and `1`, which are not in the set of `[2, 3, 4, 5, 6]`.
6559
+
6560
+ **Using Python Enums**
6561
+
6562
+ The `col_vals_in_set()` method also supports Python Enum classes and instances, which can
6563
+ make validations more readable and maintainable:
6564
+
6565
+ ```{python}
6566
+ from enum import Enum
6567
+
6568
+ class Color(Enum):
6569
+ RED = "red"
6570
+ GREEN = "green"
6571
+ BLUE = "blue"
6572
+
6573
+ # Create a table with color data
6574
+ tbl_colors = pl.DataFrame({
6575
+ "product": ["shirt", "pants", "hat", "shoes"],
6576
+ "color": ["red", "blue", "green", "yellow"]
6577
+ })
6578
+
6579
+ # Validate using an Enum class (all enum values are allowed)
6580
+ validation = (
6581
+ pb.Validate(data=tbl_colors)
6582
+ .col_vals_in_set(columns="color", set=Color)
6583
+ .interrogate()
6584
+ )
6585
+
6586
+ validation
6587
+ ```
6588
+
6589
+ This validation will fail for the `"yellow"` value since it's not in the `Color` enum.
6590
+
6591
+ You can also use specific Enum instances or mix them with regular values:
6592
+
6593
+ ```{python}
6594
+ # Validate using specific Enum instances
6595
+ validation = (
6596
+ pb.Validate(data=tbl_colors)
6597
+ .col_vals_in_set(columns="color", set=[Color.RED, Color.BLUE])
6598
+ .interrogate()
6599
+ )
6600
+
6601
+ # Mix Enum instances with regular values
6602
+ validation = (
6603
+ pb.Validate(data=tbl_colors)
6604
+ .col_vals_in_set(columns="color", set=[Color.RED, Color.BLUE, "yellow"])
6605
+ .interrogate()
6606
+ )
6607
+
6608
+ validation
6609
+ ```
6610
+
6611
+ In this case, the `"green"` value will cause a failing test unit since it's not part of the
6612
+ specified set.
6504
6613
  """
6505
6614
 
6506
6615
  assertion_type = _get_fn_name()
6507
6616
 
6508
6617
  _check_column(column=columns)
6509
6618
 
6619
+ # Extract values from Enum classes or Enum instances if present
6620
+ set = _extract_enum_values(set)
6621
+
6510
6622
  for val in set:
6511
6623
  if val is None:
6512
6624
  continue
@@ -6557,7 +6669,7 @@ class Validate:
6557
6669
  def col_vals_not_in_set(
6558
6670
  self,
6559
6671
  columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
6560
- set: list[float | int],
6672
+ set: Collection[Any],
6561
6673
  pre: Callable | None = None,
6562
6674
  segments: SegmentSpec | None = None,
6563
6675
  thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -6581,7 +6693,10 @@ class Validate:
6581
6693
  multiple columns are supplied or resolved, there will be a separate validation step
6582
6694
  generated for each column.
6583
6695
  set
6584
- A list of values to compare against.
6696
+ A collection of values to compare against. Can be a list of values, a Python Enum class,
6697
+ or a collection containing Enum instances. When an Enum class is provided, all enum
6698
+ values will be used. When a collection contains Enum instances, their values will be
6699
+ extracted automatically.
6585
6700
  pre
6586
6701
  An optional preprocessing function or lambda to apply to the data table during
6587
6702
  interrogation. This function should take a table as input and return a modified table.
@@ -6759,11 +6874,45 @@ class Validate:
6759
6874
 
6760
6875
  The validation table reports two failing test units. The specific failing cases are for the
6761
6876
  column `b` values of `2` and `6`, both of which are in the set of `[2, 3, 4, 5, 6]`.
6877
+
6878
+ **Using Python Enums**
6879
+
6880
+ Like `col_vals_in_set()`, this method also supports Python Enum classes and instances:
6881
+
6882
+ ```{python}
6883
+ from enum import Enum
6884
+
6885
+ class InvalidStatus(Enum):
6886
+ DELETED = "deleted"
6887
+ ARCHIVED = "archived"
6888
+
6889
+ # Create a table with status data
6890
+ status_table = pl.DataFrame({
6891
+ "product": ["widget", "gadget", "tool", "device"],
6892
+ "status": ["active", "pending", "deleted", "active"]
6893
+ })
6894
+
6895
+ # Validate that no values are in the invalid status set
6896
+ validation = (
6897
+ pb.Validate(data=status_table)
6898
+ .col_vals_not_in_set(columns="status", set=InvalidStatus)
6899
+ .interrogate()
6900
+ )
6901
+
6902
+ validation
6903
+ ```
6904
+
6905
+ This `"deleted"` value in the `status` column will fail since it matches one of the invalid
6906
+ statuses in the `InvalidStatus` enum.
6762
6907
  """
6763
6908
 
6764
6909
  assertion_type = _get_fn_name()
6765
6910
 
6766
6911
  _check_column(column=columns)
6912
+
6913
+ # Extract values from Enum classes or Enum instances if present
6914
+ set = _extract_enum_values(set)
6915
+
6767
6916
  _check_set_types(set=set)
6768
6917
  _check_pre(pre=pre)
6769
6918
  # TODO: add check for segments
@@ -7297,6 +7446,7 @@ class Validate:
7297
7446
  columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
7298
7447
  pattern: str,
7299
7448
  na_pass: bool = False,
7449
+ inverse: bool = False,
7300
7450
  pre: Callable | None = None,
7301
7451
  segments: SegmentSpec | None = None,
7302
7452
  thresholds: int | float | bool | tuple | dict | Thresholds = None,
@@ -7324,6 +7474,9 @@ class Validate:
7324
7474
  na_pass
7325
7475
  Should any encountered None, NA, or Null values be considered as passing test units? By
7326
7476
  default, this is `False`. Set to `True` to pass test units with missing values.
7477
+ inverse
7478
+ Should the validation step be inverted? If `True`, then the expectation is that column
7479
+ values should *not* match the specified `pattern=` regex.
7327
7480
  pre
7328
7481
  An optional preprocessing function or lambda to apply to the data table during
7329
7482
  interrogation. This function should take a table as input and return a modified table.
@@ -7510,6 +7663,7 @@ class Validate:
7510
7663
  # _check_segments(segments=segments)
7511
7664
  _check_thresholds(thresholds=thresholds)
7512
7665
  _check_boolean_input(param=na_pass, param_name="na_pass")
7666
+ _check_boolean_input(param=inverse, param_name="inverse")
7513
7667
  _check_boolean_input(param=active, param_name="active")
7514
7668
 
7515
7669
  # Determine threshold to use (global or local) and normalize a local `thresholds=` value
@@ -7529,12 +7683,15 @@ class Validate:
7529
7683
  # Determine brief to use (global or local) and transform any shorthands of `brief=`
7530
7684
  brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
7531
7685
 
7686
+ # Package up the `pattern=` and boolean params into a dictionary for later interrogation
7687
+ values = {"pattern": pattern, "inverse": inverse}
7688
+
7532
7689
  # Iterate over the columns and create a validation step for each
7533
7690
  for column in columns:
7534
7691
  val_info = _ValidationInfo(
7535
7692
  assertion_type=assertion_type,
7536
7693
  column=column,
7537
- values=pattern,
7694
+ values=values,
7538
7695
  na_pass=na_pass,
7539
7696
  pre=pre,
7540
7697
  segments=segments,
@@ -8401,8 +8558,8 @@ class Validate:
8401
8558
  self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
8402
8559
  )
8403
8560
 
8404
- if columns_subset is not None and isinstance(columns_subset, str):
8405
- columns_subset = [columns_subset]
8561
+ if columns_subset is not None and isinstance(columns_subset, str): # pragma: no cover
8562
+ columns_subset = [columns_subset] # pragma: no cover
8406
8563
 
8407
8564
  # TODO: incorporate Column object
8408
8565
 
@@ -9830,8 +9987,9 @@ class Validate:
9830
9987
  validation.active = False
9831
9988
  continue
9832
9989
 
9833
- # Make a copy of the table for this step
9834
- data_tbl_step = data_tbl
9990
+ # Make a deep copy of the table for this step to ensure proper isolation
9991
+ # This prevents modifications from one validation step affecting others
9992
+ data_tbl_step = _copy_dataframe(data_tbl)
9835
9993
 
9836
9994
  # ------------------------------------------------
9837
9995
  # Preprocessing stage
@@ -9998,7 +10156,7 @@ class Validate:
9998
10156
 
9999
10157
  elif assertion_type == "col_vals_regex":
10000
10158
  results_tbl = interrogate_regex(
10001
- tbl=tbl, column=column, pattern=value, na_pass=na_pass
10159
+ tbl=tbl, column=column, values=value, na_pass=na_pass
10002
10160
  )
10003
10161
 
10004
10162
  elif assertion_type == "col_vals_expr":
@@ -10096,7 +10254,9 @@ class Validate:
10096
10254
  )
10097
10255
 
10098
10256
  else:
10099
- raise ValueError(f"Unknown assertion type: {assertion_type}")
10257
+ raise ValueError(
10258
+ f"Unknown assertion type: {assertion_type}"
10259
+ ) # pragma: no cover
10100
10260
 
10101
10261
  except Exception as e:
10102
10262
  # Only catch specific data quality comparison errors, not programming errors
@@ -10111,14 +10271,18 @@ class Validate:
10111
10271
  or ("dtype" in error_msg and "compare" in error_msg)
10112
10272
  )
10113
10273
 
10114
- if is_comparison_error:
10274
+ if is_comparison_error: # pragma: no cover
10115
10275
  # If data quality comparison fails, mark the validation as having an eval_error
10116
- validation.eval_error = True
10117
- end_time = datetime.datetime.now(datetime.timezone.utc)
10118
- validation.proc_duration_s = (end_time - start_time).total_seconds()
10119
- validation.time_processed = end_time.isoformat(timespec="milliseconds")
10120
- validation.active = False
10121
- continue
10276
+ validation.eval_error = True # pragma: no cover
10277
+ end_time = datetime.datetime.now(datetime.timezone.utc) # pragma: no cover
10278
+ validation.proc_duration_s = (
10279
+ end_time - start_time
10280
+ ).total_seconds() # pragma: no cover
10281
+ validation.time_processed = end_time.isoformat(
10282
+ timespec="milliseconds"
10283
+ ) # pragma: no cover
10284
+ validation.active = False # pragma: no cover
10285
+ continue # pragma: no cover
10122
10286
  else:
10123
10287
  # For other errors (like missing columns), let them propagate
10124
10288
  raise
@@ -10363,32 +10527,46 @@ class Validate:
10363
10527
  except AttributeError:
10364
10528
  # For LazyFrames without sample method, collect first then sample
10365
10529
  validation_extract_native = validation_extract_nw.collect().to_native()
10366
- if hasattr(validation_extract_native, "sample"):
10530
+ if hasattr(validation_extract_native, "sample"): # pragma: no cover
10367
10531
  # PySpark DataFrame has sample method
10368
- validation_extract_native = validation_extract_native.sample(
10369
- fraction=min(1.0, sample_n / validation_extract_native.count())
10370
- ).limit(sample_n)
10371
- validation_extract_nw = nw.from_native(validation_extract_native)
10532
+ validation_extract_native = (
10533
+ validation_extract_native.sample( # pragma: no cover
10534
+ fraction=min(
10535
+ 1.0, sample_n / validation_extract_native.count()
10536
+ ) # pragma: no cover
10537
+ ).limit(sample_n)
10538
+ ) # pragma: no cover
10539
+ validation_extract_nw = nw.from_native(
10540
+ validation_extract_native
10541
+ ) # pragma: no cover
10372
10542
  else:
10373
10543
  # Fallback: just take first n rows after collecting
10374
- validation_extract_nw = validation_extract_nw.collect().head(sample_n)
10544
+ validation_extract_nw = validation_extract_nw.collect().head(
10545
+ sample_n
10546
+ ) # pragma: no cover
10375
10547
  elif sample_frac is not None:
10376
10548
  try:
10377
10549
  validation_extract_nw = validation_extract_nw.sample(fraction=sample_frac)
10378
- except AttributeError:
10550
+ except AttributeError: # pragma: no cover
10379
10551
  # For LazyFrames without sample method, collect first then sample
10380
- validation_extract_native = validation_extract_nw.collect().to_native()
10381
- if hasattr(validation_extract_native, "sample"):
10552
+ validation_extract_native = (
10553
+ validation_extract_nw.collect().to_native()
10554
+ ) # pragma: no cover
10555
+ if hasattr(validation_extract_native, "sample"): # pragma: no cover
10382
10556
  # PySpark DataFrame has sample method
10383
10557
  validation_extract_native = validation_extract_native.sample(
10384
10558
  fraction=sample_frac
10385
- )
10386
- validation_extract_nw = nw.from_native(validation_extract_native)
10559
+ ) # pragma: no cover
10560
+ validation_extract_nw = nw.from_native(
10561
+ validation_extract_native
10562
+ ) # pragma: no cover
10387
10563
  else:
10388
10564
  # Fallback: use fraction to calculate head size
10389
- collected = validation_extract_nw.collect()
10390
- sample_size = max(1, int(len(collected) * sample_frac))
10391
- validation_extract_nw = collected.head(sample_size)
10565
+ collected = validation_extract_nw.collect() # pragma: no cover
10566
+ sample_size = max(
10567
+ 1, int(len(collected) * sample_frac)
10568
+ ) # pragma: no cover
10569
+ validation_extract_nw = collected.head(sample_size) # pragma: no cover
10392
10570
 
10393
10571
  # Ensure a limit is set on the number of rows to extract
10394
10572
  try:
@@ -10398,9 +10576,9 @@ class Validate:
10398
10576
  # For LazyFrames, collect to get length (or use a reasonable default)
10399
10577
  try:
10400
10578
  extract_length = len(validation_extract_nw.collect())
10401
- except Exception:
10579
+ except Exception: # pragma: no cover
10402
10580
  # If collection fails, apply limit anyway as a safety measure
10403
- extract_length = extract_limit + 1 # Force limiting
10581
+ extract_length = extract_limit + 1 # pragma: no cover
10404
10582
 
10405
10583
  if extract_length > extract_limit:
10406
10584
  validation_extract_nw = validation_extract_nw.head(extract_limit)
@@ -12065,10 +12243,12 @@ class Validate:
12065
12243
  try:
12066
12244
  # Try without order_by first (for DataFrames)
12067
12245
  data_nw = data_nw.with_row_index(name=index_name)
12068
- except TypeError:
12246
+ except TypeError: # pragma: no cover
12069
12247
  # LazyFrames require order_by parameter - use first column for ordering
12070
- first_col = data_nw.columns[0]
12071
- data_nw = data_nw.with_row_index(name=index_name, order_by=first_col)
12248
+ first_col = data_nw.columns[0] # pragma: no cover
12249
+ data_nw = data_nw.with_row_index(
12250
+ name=index_name, order_by=first_col
12251
+ ) # pragma: no cover
12072
12252
 
12073
12253
  # Get all validation step result tables and join together the `pb_is_good_` columns
12074
12254
  # ensuring that the columns are named uniquely (e.g., `pb_is_good_1`, `pb_is_good_2`, ...)
@@ -12080,10 +12260,12 @@ class Validate:
12080
12260
  try:
12081
12261
  # Try without order_by first (for DataFrames)
12082
12262
  results_tbl = results_tbl.with_row_index(name=index_name)
12083
- except TypeError:
12263
+ except TypeError: # pragma: no cover
12084
12264
  # LazyFrames require order_by parameter - use first column for ordering
12085
- first_col = results_tbl.columns[0]
12086
- results_tbl = results_tbl.with_row_index(name=index_name, order_by=first_col)
12265
+ first_col = results_tbl.columns[0] # pragma: no cover
12266
+ results_tbl = results_tbl.with_row_index(
12267
+ name=index_name, order_by=first_col
12268
+ ) # pragma: no cover
12087
12269
 
12088
12270
  # Add numerical suffix to the `pb_is_good_` column to make it unique
12089
12271
  results_tbl = results_tbl.select([index_name, "pb_is_good_"]).rename(
@@ -12215,15 +12397,15 @@ class Validate:
12215
12397
  # If the table is a Polars one, determine if it's a LazyFrame
12216
12398
  if tbl_info == "polars":
12217
12399
  if _is_lazy_frame(self.data):
12218
- tbl_info = "polars-lazy"
12400
+ tbl_info = "polars-lazy" # pragma: no cover
12219
12401
 
12220
12402
  # Determine if the input table is a Narwhals DF
12221
12403
  if _is_narwhals_table(self.data):
12222
12404
  # Determine if the Narwhals table is a LazyFrame
12223
- if _is_lazy_frame(self.data):
12224
- tbl_info = "narwhals-lazy"
12405
+ if _is_lazy_frame(self.data): # pragma: no cover
12406
+ tbl_info = "narwhals-lazy" # pragma: no cover
12225
12407
  else:
12226
- tbl_info = "narwhals"
12408
+ tbl_info = "narwhals" # pragma: no cover
12227
12409
 
12228
12410
  # Get the thresholds object
12229
12411
  thresholds = self.thresholds
@@ -12388,7 +12570,7 @@ class Validate:
12388
12570
  if lang in RTL_LANGUAGES:
12389
12571
  gt_tbl = gt_tbl.tab_style(
12390
12572
  style=style.css("direction: rtl;"), locations=loc.source_notes()
12391
- )
12573
+ ) # pragma: no cover
12392
12574
 
12393
12575
  if incl_header:
12394
12576
  gt_tbl = gt_tbl.tab_header(title=html(title_text), subtitle=html(combined_subtitle))
@@ -12537,6 +12719,11 @@ class Validate:
12537
12719
  elif assertion_type[i] in ["specially"]:
12538
12720
  values_upd.append("EXPR")
12539
12721
 
12722
+ elif assertion_type[i] in ["col_vals_regex"]:
12723
+ pattern = value["pattern"]
12724
+
12725
+ values_upd.append(str(pattern))
12726
+
12540
12727
  # If the assertion type is not recognized, add the value as a string
12541
12728
  else:
12542
12729
  values_upd.append(str(value))
@@ -12705,9 +12892,11 @@ class Validate:
12705
12892
  # Get the number of rows in the extract (safe for LazyFrames)
12706
12893
  try:
12707
12894
  n_rows = len(extract_nw)
12708
- except TypeError:
12895
+ except TypeError: # pragma: no cover
12709
12896
  # For LazyFrames, collect() first to get length
12710
- n_rows = len(extract_nw.collect()) if hasattr(extract_nw, "collect") else 0
12897
+ n_rows = (
12898
+ len(extract_nw.collect()) if hasattr(extract_nw, "collect") else 0
12899
+ ) # pragma: no cover
12711
12900
 
12712
12901
  # If the number of rows is zero, then produce an em dash then go to the next iteration
12713
12902
  if n_rows == 0:
@@ -12715,7 +12904,7 @@ class Validate:
12715
12904
  continue
12716
12905
 
12717
12906
  # Write the CSV text (ensure LazyFrames are collected first)
12718
- if hasattr(extract_nw, "collect"):
12907
+ if hasattr(extract_nw, "collect"): # pragma: no cover
12719
12908
  extract_nw = extract_nw.collect()
12720
12909
  csv_text = extract_nw.write_csv()
12721
12910
 
@@ -13217,7 +13406,7 @@ class Validate:
13217
13406
  elif isinstance(column, list):
13218
13407
  column_position = [list(self.data.columns).index(col) + 1 for col in column]
13219
13408
  else:
13220
- column_position = None
13409
+ column_position = None # pragma: no cover
13221
13410
  else:
13222
13411
  column_position = None
13223
13412
 
@@ -13309,7 +13498,7 @@ class Validate:
13309
13498
  )
13310
13499
 
13311
13500
  else:
13312
- step_report = None
13501
+ step_report = None # pragma: no cover
13313
13502
 
13314
13503
  return step_report
13315
13504
 
@@ -13797,7 +13986,7 @@ def _conditional_string_date_dttm_conversion(
13797
13986
  elif not allow_regular_strings:
13798
13987
  raise ValueError(
13799
13988
  "If `value=` is provided as a string it must be a date or datetime string."
13800
- )
13989
+ ) # pragma: no cover
13801
13990
  # If allow_regular_strings is True, regular strings pass through unchanged
13802
13991
 
13803
13992
  return value
@@ -13851,12 +14040,33 @@ def _process_brief(
13851
14040
 
13852
14041
  if segment is not None:
13853
14042
  # The segment is always a tuple of the form ("{column}", "{value}")
14043
+ # Handle both regular lists and Segment objects (from seg_group())
14044
+
14045
+ segment_column = segment[0]
14046
+ segment_value = segment[1]
14047
+
14048
+ # If segment_value is a Segment object (from seg_group()), format it appropriately
14049
+ if isinstance(segment_value, Segment):
14050
+ # For Segment objects, format the segments as a readable string
14051
+ segments = segment_value.segments
14052
+ if len(segments) == 1:
14053
+ # Single segment: join the values with commas
14054
+ segment_value_str = ", ".join(str(v) for v in segments[0])
14055
+ else:
14056
+ # Multiple segments: join each segment with commas, separate segments with " | "
14057
+ segment_value_str = " | ".join([", ".join(str(v) for v in seg) for seg in segments])
14058
+ else:
14059
+ # For regular lists or other types, convert to string
14060
+ if isinstance(segment_value, list):
14061
+ segment_value_str = ", ".join(str(v) for v in segment_value)
14062
+ else:
14063
+ segment_value_str = str(segment_value)
13854
14064
 
13855
- segment_fmt = f"{segment[0]} / {segment[1]}"
14065
+ segment_fmt = f"{segment_column} / {segment_value_str}"
13856
14066
 
13857
14067
  brief = brief.replace("{segment}", segment_fmt)
13858
- brief = brief.replace("{segment_column}", segment[0])
13859
- brief = brief.replace("{segment_value}", segment[1])
14068
+ brief = brief.replace("{segment_column}", segment_column)
14069
+ brief = brief.replace("{segment_value}", segment_value_str)
13860
14070
 
13861
14071
  return brief
13862
14072
 
@@ -13890,7 +14100,7 @@ def _process_action_str(
13890
14100
  if col is not None:
13891
14101
  # If a list of columns is provided, then join the columns into a comma-separated string
13892
14102
  if isinstance(col, list):
13893
- col = ", ".join(col)
14103
+ col = ", ".join(col) # pragma: no cover
13894
14104
 
13895
14105
  action_str = action_str.replace("{col}", col)
13896
14106
  action_str = action_str.replace("{column}", col)
@@ -14163,15 +14373,30 @@ def _create_text_null(
14163
14373
 
14164
14374
 
14165
14375
  def _create_text_regex(
14166
- lang: str, column: str | None, pattern: str, for_failure: bool = False
14376
+ lang: str, column: str | None, pattern: str | dict, for_failure: bool = False
14167
14377
  ) -> str:
14168
14378
  type_ = _expect_failure_type(for_failure=for_failure)
14169
14379
 
14170
14380
  column_text = _prep_column_text(column=column)
14171
14381
 
14172
- return EXPECT_FAIL_TEXT[f"regex_{type_}_text"][lang].format(
14382
+ # Handle case where pattern is a dictionary containing `pattern` and `inverse`
14383
+ if isinstance(pattern, dict):
14384
+ pattern_str = pattern["pattern"]
14385
+ inverse = pattern.get("inverse", False)
14386
+ else:
14387
+ # For backward compatibility, assume it's just the pattern string
14388
+ pattern_str = pattern
14389
+ inverse = False
14390
+
14391
+ # Use inverse-specific translations if inverse=True
14392
+ if inverse:
14393
+ text_key = f"regex_inverse_{type_}_text"
14394
+ else:
14395
+ text_key = f"regex_{type_}_text"
14396
+
14397
+ return EXPECT_FAIL_TEXT[text_key][lang].format(
14173
14398
  column_text=column_text,
14174
- values_text=pattern,
14399
+ values_text=pattern_str,
14175
14400
  )
14176
14401
 
14177
14402
 
@@ -14287,7 +14512,7 @@ def _prep_values_text(
14287
14512
  length_values = len(values)
14288
14513
 
14289
14514
  if length_values == 0:
14290
- return ""
14515
+ return "" # pragma: no cover
14291
14516
 
14292
14517
  if length_values > limit:
14293
14518
  num_omitted = length_values - limit
@@ -14296,7 +14521,7 @@ def _prep_values_text(
14296
14521
  formatted_values = []
14297
14522
  for value in values[:limit]:
14298
14523
  if isinstance(value, (datetime.datetime, datetime.date)):
14299
- formatted_values.append(f"`{value.isoformat()}`")
14524
+ formatted_values.append(f"`{value.isoformat()}`") # pragma: no cover
14300
14525
  else:
14301
14526
  formatted_values.append(f"`{value}`")
14302
14527
 
@@ -14486,8 +14711,8 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
14486
14711
  if len(segment_str) == 10 and segment_str.count("-") == 2:
14487
14712
  try:
14488
14713
  parsed_value = date.fromisoformat(segment_str)
14489
- except ValueError:
14490
- pass
14714
+ except ValueError: # pragma: no cover
14715
+ pass # pragma: no cover
14491
14716
 
14492
14717
  # Format 2: Datetime strings with UTC timezone like
14493
14718
  # "2016-01-04 00:00:01 UTC.strict_cast(...)"
@@ -14499,27 +14724,28 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
14499
14724
  parsed_dt = datetime.fromisoformat(datetime_part)
14500
14725
  # Convert midnight datetimes to dates for consistency
14501
14726
  if parsed_dt.time() == datetime.min.time():
14502
- parsed_value = parsed_dt.date()
14727
+ parsed_value = parsed_dt.date() # pragma: no cover
14503
14728
  else:
14504
14729
  parsed_value = parsed_dt
14505
- except (ValueError, IndexError):
14506
- pass
14730
+ except (ValueError, IndexError): # pragma: no cover
14731
+ pass # pragma: no cover
14507
14732
 
14508
14733
  # Format 3: Bracketed expressions like ['2016-01-04']
14509
14734
  elif segment_str.startswith("[") and segment_str.endswith("]"):
14510
- try:
14511
- content = segment_str[2:-2] # Remove [' and ']
14735
+ try: # pragma: no cover
14736
+ # Remove [' and ']
14737
+ content = segment_str[2:-2] # pragma: no cover
14512
14738
 
14513
14739
  # Try parsing as date first
14514
- if len(content) == 10 and content.count("-") == 2:
14515
- try:
14516
- parsed_value = date.fromisoformat(content)
14517
- except ValueError:
14518
- pass
14740
+ if len(content) == 10 and content.count("-") == 2: # pragma: no cover
14741
+ try: # pragma: no cover
14742
+ parsed_value = date.fromisoformat(content) # pragma: no cover
14743
+ except ValueError: # pragma: no cover
14744
+ pass # pragma: no cover
14519
14745
 
14520
14746
  # Try parsing as datetime
14521
- if parsed_value is None:
14522
- try:
14747
+ if parsed_value is None: # pragma: no cover
14748
+ try: # pragma: no cover
14523
14749
  parsed_dt = datetime.fromisoformat(content.replace(" UTC", ""))
14524
14750
  if parsed_dt.time() == datetime.min.time():
14525
14751
  parsed_value = parsed_dt.date()
@@ -14528,8 +14754,8 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
14528
14754
  except ValueError:
14529
14755
  pass
14530
14756
 
14531
- except (ValueError, IndexError):
14532
- pass
14757
+ except (ValueError, IndexError): # pragma: no cover
14758
+ pass # pragma: no cover
14533
14759
 
14534
14760
  # Handle `pl.datetime()` expressions with .alias("datetime")
14535
14761
  elif "datetime" in segment_str and '.alias("datetime")' in segment_str:
@@ -14540,10 +14766,10 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
14540
14766
  if parsed_dt.time() == datetime.min.time():
14541
14767
  parsed_value = parsed_dt.date()
14542
14768
  else:
14543
- parsed_value = parsed_dt
14769
+ parsed_value = parsed_dt # pragma: no cover
14544
14770
 
14545
- except (ValueError, AttributeError):
14546
- pass
14771
+ except (ValueError, AttributeError): # pragma: no cover
14772
+ pass # pragma: no cover
14547
14773
 
14548
14774
  # If we successfully parsed a value, use it; otherwise leave segment as is
14549
14775
  if parsed_value is not None:
@@ -14567,9 +14793,9 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
14567
14793
  # Filter the data table based on the column name and segment
14568
14794
  # Use the new Ibis API methods to avoid deprecation warnings
14569
14795
  if segment is None:
14570
- data_tbl = data_tbl.filter(data_tbl[column].isnull())
14796
+ data_tbl = data_tbl.filter(data_tbl[column].isnull()) # pragma: no cover
14571
14797
  elif isinstance(segment, list):
14572
- data_tbl = data_tbl.filter(data_tbl[column].isin(segment))
14798
+ data_tbl = data_tbl.filter(data_tbl[column].isin(segment)) # pragma: no cover
14573
14799
  else:
14574
14800
  data_tbl = data_tbl.filter(data_tbl[column] == segment)
14575
14801
 
@@ -14690,7 +14916,7 @@ def _get_title_text(
14690
14916
  "</span>"
14691
14917
  f'<span style="float: right;">{title}</span>'
14692
14918
  "</div>"
14693
- )
14919
+ ) # pragma: no cover
14694
14920
 
14695
14921
  return html_str
14696
14922
 
@@ -14768,24 +14994,6 @@ def _transform_eval(
14768
14994
  return symbol_list
14769
14995
 
14770
14996
 
14771
- def _format_numbers_with_gt(
14772
- values: list[int], n_sigfig: int = 3, compact: bool = True, locale: str = "en"
14773
- ) -> list[str]:
14774
- """Format numbers using Great Tables GT object to avoid pandas dependency."""
14775
- import polars as pl
14776
-
14777
- # Create a single-column DataFrame with all values
14778
- df = pl.DataFrame({"values": values})
14779
-
14780
- # Create GT object and format the column
14781
- gt_obj = GT(df).fmt_number(columns="values", n_sigfig=n_sigfig, compact=compact, locale=locale)
14782
-
14783
- # Extract the formatted values using _get_column_of_values
14784
- formatted_values = _get_column_of_values(gt_obj, column_name="values", context="html")
14785
-
14786
- return formatted_values
14787
-
14788
-
14789
14997
  def _format_single_number_with_gt(
14790
14998
  value: int, n_sigfig: int = 3, compact: bool = True, locale: str = "en", df_lib=None
14791
14999
  ) -> str:
@@ -14796,12 +15004,14 @@ def _format_single_number_with_gt(
14796
15004
  import polars as pl
14797
15005
 
14798
15006
  df_lib = pl
14799
- elif _is_lib_present("pandas"):
14800
- import pandas as pd
15007
+ elif _is_lib_present("pandas"): # pragma: no cover
15008
+ import pandas as pd # pragma: no cover
14801
15009
 
14802
- df_lib = pd
14803
- else:
14804
- raise ImportError("Neither Polars nor Pandas is available for formatting")
15010
+ df_lib = pd # pragma: no cover
15011
+ else: # pragma: no cover
15012
+ raise ImportError(
15013
+ "Neither Polars nor Pandas is available for formatting"
15014
+ ) # pragma: no cover
14805
15015
 
14806
15016
  # Create a single-row, single-column DataFrame using the specified library
14807
15017
  df = df_lib.DataFrame({"value": [value]})
@@ -14867,12 +15077,14 @@ def _format_single_float_with_gt(
14867
15077
  import polars as pl
14868
15078
 
14869
15079
  df_lib = pl
14870
- elif _is_lib_present("pandas"):
14871
- import pandas as pd
15080
+ elif _is_lib_present("pandas"): # pragma: no cover
15081
+ import pandas as pd # pragma: no cover
14872
15082
 
14873
- df_lib = pd
14874
- else:
14875
- raise ImportError("Neither Polars nor Pandas is available for formatting")
15083
+ df_lib = pd # pragma: no cover
15084
+ else: # pragma: no cover
15085
+ raise ImportError(
15086
+ "Neither Polars nor Pandas is available for formatting"
15087
+ ) # pragma: no cover
14876
15088
 
14877
15089
  # Create a single-row, single-column DataFrame using the specified library
14878
15090
  df = df_lib.DataFrame({"value": [value]})
@@ -14904,7 +15116,7 @@ def _transform_passed_failed(
14904
15116
  return _format_single_float_with_gt(value, decimals=2, locale=locale, df_lib=df_lib)
14905
15117
  else:
14906
15118
  # Fallback to the original behavior
14907
- return vals.fmt_number(value, decimals=2, locale=locale)[0]
15119
+ return vals.fmt_number(value, decimals=2, locale=locale)[0] # pragma: no cover
14908
15120
 
14909
15121
  passed_failed = [
14910
15122
  (
@@ -15044,7 +15256,7 @@ def _get_callable_source(fn: Callable) -> str:
15044
15256
  return pre_arg
15045
15257
  except (OSError, TypeError): # pragma: no cover
15046
15258
  return fn.__name__
15047
- return fn
15259
+ return fn # pragma: no cover
15048
15260
 
15049
15261
 
15050
15262
  def _extract_pre_argument(source: str) -> str:
@@ -15128,12 +15340,14 @@ def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None)
15128
15340
  import polars as pl
15129
15341
 
15130
15342
  df_lib = pl
15131
- elif _is_lib_present("pandas"):
15132
- import pandas as pd
15343
+ elif _is_lib_present("pandas"): # pragma: no cover
15344
+ import pandas as pd # pragma: no cover
15133
15345
 
15134
- df_lib = pd
15135
- else:
15136
- raise ImportError("Neither Polars nor Pandas is available for formatting")
15346
+ df_lib = pd # pragma: no cover
15347
+ else: # pragma: no cover
15348
+ raise ImportError(
15349
+ "Neither Polars nor Pandas is available for formatting"
15350
+ ) # pragma: no cover
15137
15351
 
15138
15352
  # Create a single-row, single-column DataFrame using the specified library
15139
15353
  df = df_lib.DataFrame({"value": [value]})
@@ -15161,12 +15375,14 @@ def _format_single_float_with_gt_custom(
15161
15375
  import polars as pl
15162
15376
 
15163
15377
  df_lib = pl
15164
- elif _is_lib_present("pandas"):
15165
- import pandas as pd
15378
+ elif _is_lib_present("pandas"): # pragma: no cover
15379
+ import pandas as pd # pragma: no cover
15166
15380
 
15167
- df_lib = pd
15168
- else:
15169
- raise ImportError("Neither Polars nor Pandas is available for formatting")
15381
+ df_lib = pd # pragma: no cover
15382
+ else: # pragma: no cover
15383
+ raise ImportError(
15384
+ "Neither Polars nor Pandas is available for formatting"
15385
+ ) # pragma: no cover
15170
15386
 
15171
15387
  # Create a single-row, single-column DataFrame using the specified library
15172
15388
  df = df_lib.DataFrame({"value": [value]})
@@ -15201,7 +15417,7 @@ def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) ->
15201
15417
  # Fallback to the original behavior
15202
15418
  return fmt_number(
15203
15419
  value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
15204
- )[0]
15420
+ )[0] # pragma: no cover
15205
15421
 
15206
15422
  def _format_integer_safe(value: int) -> str:
15207
15423
  if df_lib is not None and value is not None:
@@ -15333,7 +15549,8 @@ def _step_report_row_based(
15333
15549
  elements = ", ".join(values)
15334
15550
  text = f"{column} &NotElement; {{{elements}}}"
15335
15551
  elif assertion_type == "col_vals_regex":
15336
- text = STEP_REPORT_TEXT["column_matches_regex"][lang].format(column=column, values=values)
15552
+ pattern = values["pattern"]
15553
+ text = STEP_REPORT_TEXT["column_matches_regex"][lang].format(column=column, values=pattern)
15337
15554
  elif assertion_type == "col_vals_null":
15338
15555
  text = STEP_REPORT_TEXT["column_is_null"][lang].format(column=column)
15339
15556
  elif assertion_type == "col_vals_not_null":
@@ -15386,9 +15603,12 @@ def _step_report_row_based(
15386
15603
  title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
15387
15604
  assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
15388
15605
 
15389
- # Use success_statement_no_column for col_vals_expr since it doesn't target a specific column
15606
+ # Use 'success_statement_no_column' for col_vals_expr() since it doesn't target
15607
+ # a specific column
15390
15608
  if assertion_type == "col_vals_expr":
15391
- success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(n=n)
15609
+ success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(
15610
+ n=n
15611
+ ) # pragma: no cover
15392
15612
  else:
15393
15613
  success_stmt = STEP_REPORT_TEXT["success_statement"][lang].format(
15394
15614
  n=n,
@@ -16101,14 +16321,14 @@ def _step_report_schema_any_order(
16101
16321
  if exp_columns_dict[column_name_exp_i]["colname_matched"]:
16102
16322
  col_exp_correct.append(CHECK_MARK_SPAN)
16103
16323
  else:
16104
- col_exp_correct.append(CROSS_MARK_SPAN)
16324
+ col_exp_correct.append(CROSS_MARK_SPAN) # pragma: no cover
16105
16325
 
16106
16326
  #
16107
16327
  # `dtype_exp` values
16108
16328
  #
16109
16329
 
16110
16330
  if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
16111
- dtype_exp.append("")
16331
+ dtype_exp.append("") # pragma: no cover
16112
16332
 
16113
16333
  elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
16114
16334
  dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
@@ -16143,9 +16363,9 @@ def _step_report_schema_any_order(
16143
16363
  #
16144
16364
 
16145
16365
  if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
16146
- dtype_exp_correct.append("&mdash;")
16366
+ dtype_exp_correct.append("&mdash;") # pragma: no cover
16147
16367
  elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
16148
- dtype_exp_correct.append("")
16368
+ dtype_exp_correct.append("") # pragma: no cover
16149
16369
  elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
16150
16370
  dtype_exp_correct.append(CHECK_MARK_SPAN)
16151
16371
  else:
@@ -16191,13 +16411,17 @@ def _step_report_schema_any_order(
16191
16411
  #
16192
16412
 
16193
16413
  if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
16194
- dtype_exp.append("")
16414
+ dtype_exp.append("") # pragma: no cover
16195
16415
 
16196
16416
  elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
16197
- dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
16417
+ dtype = exp_columns_dict[column_name_exp_i]["dtype_input"] # pragma: no cover
16198
16418
 
16199
- if exp_columns_dict[column_name_exp_i]["dtype_matched_pos"] is not None:
16200
- pos = exp_columns_dict[column_name_exp_i]["dtype_matched_pos"]
16419
+ if (
16420
+ exp_columns_dict[column_name_exp_i]["dtype_matched_pos"] is not None
16421
+ ): # pragma: no cover
16422
+ pos = exp_columns_dict[column_name_exp_i][
16423
+ "dtype_matched_pos"
16424
+ ] # pragma: no cover
16201
16425
 
16202
16426
  # Combine the dtypes together with pipes but underline the matched dtype in
16203
16427
  # green with an HTML span tag and style attribute
@@ -16209,13 +16433,13 @@ def _step_report_schema_any_order(
16209
16433
  else dtype[i]
16210
16434
  )
16211
16435
  for i in range(len(dtype))
16212
- ]
16213
- dtype = " | ".join(dtype)
16214
- dtype_exp.append(dtype)
16436
+ ] # pragma: no cover
16437
+ dtype = " | ".join(dtype) # pragma: no cover
16438
+ dtype_exp.append(dtype) # pragma: no cover
16215
16439
 
16216
16440
  else:
16217
- dtype = " | ".join(dtype)
16218
- dtype_exp.append(dtype)
16441
+ dtype = " | ".join(dtype) # pragma: no cover
16442
+ dtype_exp.append(dtype) # pragma: no cover
16219
16443
 
16220
16444
  else:
16221
16445
  dtype = exp_columns_dict[column_name_exp_i]["dtype_input"][0]
@@ -16227,12 +16451,12 @@ def _step_report_schema_any_order(
16227
16451
 
16228
16452
  if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
16229
16453
  dtype_exp_correct.append("&mdash;")
16230
- elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
16231
- dtype_exp_correct.append("")
16232
- elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
16233
- dtype_exp_correct.append(CHECK_MARK_SPAN)
16234
- else:
16235
- dtype_exp_correct.append(CROSS_MARK_SPAN)
16454
+ elif not exp_columns_dict[column_name_exp_i]["dtype_present"]: # pragma: no cover
16455
+ dtype_exp_correct.append("") # pragma: no cover
16456
+ elif exp_columns_dict[column_name_exp_i]["dtype_matched"]: # pragma: no cover
16457
+ dtype_exp_correct.append(CHECK_MARK_SPAN) # pragma: no cover
16458
+ else: # pragma: no cover
16459
+ dtype_exp_correct.append(CROSS_MARK_SPAN) # pragma: no cover
16236
16460
 
16237
16461
  if len(columns_found) > 0:
16238
16462
  # Get the last index of the columns found
@@ -16248,7 +16472,9 @@ def _step_report_schema_any_order(
16248
16472
  ]
16249
16473
 
16250
16474
  else:
16251
- index_exp = [str(i) for i in range(1, len(colnames_exp_unmatched) + 1)]
16475
+ index_exp = [
16476
+ str(i) for i in range(1, len(colnames_exp_unmatched) + 1)
16477
+ ] # pragma: no cover
16252
16478
 
16253
16479
  schema_exp_unmatched = pl.DataFrame(
16254
16480
  {