pointblank 0.11.1__py3-none-any.whl → 0.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import copy
3
4
  import sys
4
5
  from pathlib import Path
5
6
  from typing import Any
@@ -274,7 +275,7 @@ def _format_dtype_compact(dtype_str: str) -> str:
274
275
  elif "str" in dtype_str:
275
276
  return "str"
276
277
 
277
- # Unknown or complex types - truncate if too long
278
+ # Unknown or complex types: truncate if too long
278
279
  elif len(dtype_str) > 8:
279
280
  return dtype_str[:8] + "…"
280
281
  else:
@@ -395,7 +396,7 @@ def _rich_print_scan_table(
395
396
  # Clean up HTML formatting from the raw data
396
397
  str_val = str(value)
397
398
 
398
- # Handle multi-line values with <br> tags FIRST - take the first line (absolute number)
399
+ # Handle multi-line values with <br> tags FIRST: take the first line (absolute number)
399
400
  if "<br>" in str_val:
400
401
  str_val = str_val.split("<br>")[0].strip()
401
402
  # For unique values, we want just the integer part
@@ -414,14 +415,14 @@ def _rich_print_scan_table(
414
415
  # Clean up extra whitespace
415
416
  str_val = re.sub(r"\s+", " ", str_val).strip()
416
417
 
417
- # Handle values like "2<.01" - extract the first number
418
+ # Handle values like "2<.01": extract the first number
418
419
  if "<" in str_val and not (str_val.startswith("<") and str_val.endswith(">")):
419
420
  # Extract number before the < symbol
420
421
  before_lt = str_val.split("<")[0].strip()
421
422
  if before_lt and before_lt.replace(".", "").replace("-", "").isdigit():
422
423
  str_val = before_lt
423
424
 
424
- # Handle boolean unique values like "T0.62F0.38" - extract the more readable format
425
+ # Handle boolean unique values like "T0.62F0.38": extract the more readable format
425
426
  if re.match(r"^[TF]\d+\.\d+[TF]\d+\.\d+$", str_val):
426
427
  # Extract T and F values
427
428
  t_match = re.search(r"T(\d+\.\d+)", str_val)
@@ -451,7 +452,7 @@ def _rich_print_scan_table(
451
452
  # Simple integers under 10000
452
453
  return str(int(num_val))
453
454
  elif abs(num_val) >= 10000000 and abs(num_val) < 100000000:
454
- # Likely dates in YYYYMMDD format - format as date-like
455
+ # Likely dates in YYYYMMDD format: format as date-like
455
456
  int_val = int(num_val)
456
457
  if 19000101 <= int_val <= 29991231: # Reasonable date range
457
458
  str_date = str(int_val)
@@ -463,29 +464,29 @@ def _rich_print_scan_table(
463
464
  # Otherwise treat as large number
464
465
  return f"{num_val / 1000000:.1f}M"
465
466
  elif abs(num_val) >= 1000000:
466
- # Large numbers - use scientific notation or M/k notation
467
+ # Large numbers: use scientific notation or M/k notation
467
468
 
468
469
  if abs(num_val) >= 1000000000:
469
470
  return f"{num_val:.1e}"
470
471
  else:
471
472
  return f"{num_val / 1000000:.1f}M"
472
473
  elif abs(num_val) >= 10000:
473
- # Numbers >= 10k - use compact notation
474
+ # Numbers >= 10k: use compact notation
474
475
  return f"{num_val / 1000:.1f}k"
475
476
  elif abs(num_val) >= 100:
476
- # Numbers 100-9999 - show with minimal decimals
477
+ # Numbers 100-9999: show with minimal decimals
477
478
  return f"{num_val:.1f}"
478
479
  elif abs(num_val) >= 10:
479
- # Numbers 10-99 - show with one decimal
480
+ # Numbers 10-99: show with one decimal
480
481
  return f"{num_val:.1f}"
481
482
  elif abs(num_val) >= 1:
482
- # Numbers 1-9 - show with two decimals
483
+ # Numbers 1-9: show with two decimals
483
484
  return f"{num_val:.2f}"
484
485
  elif abs(num_val) >= 0.01:
485
- # Small numbers - show with appropriate precision
486
+ # Small numbers: show with appropriate precision
486
487
  return f"{num_val:.2f}"
487
488
  else:
488
- # Very small numbers - use scientific notation
489
+ # Very small numbers: use scientific notation
489
490
 
490
491
  return f"{num_val:.1e}"
491
492
 
@@ -493,7 +494,7 @@ def _rich_print_scan_table(
493
494
  # Not a number, handle as string
494
495
  pass
495
496
 
496
- # Handle date/datetime strings - show abbreviated format
497
+ # Handle date/datetime strings: show abbreviated format
497
498
  if len(str_val) > 10 and any(char in str_val for char in ["-", "/", ":"]):
498
499
  # Likely a date/datetime, show abbreviated
499
500
  if len(str_val) > max_width:
@@ -933,14 +934,19 @@ def _rich_print_gt_table(
933
934
 
934
935
 
935
936
  def _display_validation_summary(validation: Any) -> None:
936
- """Display a validation summary in a Rich table format."""
937
+ """Display a validation summary in a compact Rich table format."""
937
938
  try:
938
939
  # Try to get the summary from the validation report
939
940
  if hasattr(validation, "validation_info") and validation.validation_info is not None:
940
941
  # Use the validation_info to create a summary
941
942
  info = validation.validation_info
942
943
  n_steps = len(info)
943
- n_passed = sum(1 for step in info if step.all_passed)
944
+
945
+ # Count steps based on their threshold status
946
+ n_passed = sum(
947
+ 1 for step in info if not step.warning and not step.error and not step.critical
948
+ )
949
+ n_all_passed = sum(1 for step in info if step.all_passed)
944
950
  n_failed = n_steps - n_passed
945
951
 
946
952
  # Calculate severity counts
@@ -950,64 +956,213 @@ def _display_validation_summary(validation: Any) -> None:
950
956
 
951
957
  all_passed = n_failed == 0
952
958
 
953
- # Determine highest severity
959
+ # Determine highest severity and its color
954
960
  if n_critical > 0:
955
961
  highest_severity = "critical"
962
+ severity_color = "red"
956
963
  elif n_error > 0:
957
964
  highest_severity = "error"
965
+ severity_color = "yellow"
958
966
  elif n_warning > 0:
959
967
  highest_severity = "warning"
960
- elif n_failed > 0:
961
- highest_severity = "some failing"
962
- else:
968
+ severity_color = "bright_black" # gray
969
+ elif n_all_passed == n_steps:
970
+ # All steps passed AND all steps had 100% pass rate
963
971
  highest_severity = "all passed"
972
+ severity_color = "bold green"
973
+ else:
974
+ # Steps passed (no threshold exceedances) but some had failing test units
975
+ highest_severity = "passed"
976
+ severity_color = "green"
977
+
978
+ # Create compact summary header
979
+ # Format: Steps: 6 / P: 3 (3 AP) / W: 3 / E: 0 / C: 0 / warning
980
+ summary_header = (
981
+ f"Steps: {n_steps} / P: {n_passed} ({n_all_passed} AP) / "
982
+ f"W: {n_warning} / E: {n_error} / C: {n_critical} / "
983
+ f"[{severity_color}]{highest_severity}[/{severity_color}]"
984
+ )
964
985
 
965
- # Create a summary table
966
- table = Table(title="Validation Summary", show_header=True, header_style="bold magenta")
967
- table.add_column("Metric", style="cyan", no_wrap=True)
968
- table.add_column("Value", style="green")
969
-
970
- # Add summary statistics
971
- table.add_row("Total Steps", str(n_steps))
972
- table.add_row("Passing Steps", str(n_passed))
973
- table.add_row("Failing Steps", str(n_failed))
974
- table.add_row("Warning Steps", str(n_warning))
975
- table.add_row("Error Steps", str(n_error))
976
- table.add_row("Critical Steps", str(n_critical))
977
- table.add_row("All Passed", str(all_passed))
978
- table.add_row("Highest Severity", highest_severity)
979
-
980
- console.print(table)
986
+ # Print the report title and summary
987
+ console.print()
988
+ console.print("[blue]Validation Report[/blue]")
989
+ console.print(f"[white]{summary_header}[/white]")
981
990
 
982
991
  # Display step details
983
992
  if n_steps > 0:
993
+ from rich.box import SIMPLE_HEAD
994
+
984
995
  steps_table = Table(
985
- title="Validation Steps", show_header=True, header_style="bold cyan"
996
+ show_header=True,
997
+ header_style="bold cyan",
998
+ box=SIMPLE_HEAD,
986
999
  )
987
- steps_table.add_column("Step", style="dim")
988
- steps_table.add_column("Type", style="white")
1000
+ steps_table.add_column("", style="dim")
1001
+ steps_table.add_column("Step", style="white")
989
1002
  steps_table.add_column("Column", style="cyan")
990
- steps_table.add_column("Status", style="white")
991
- steps_table.add_column("Passed/Total", style="green")
1003
+ steps_table.add_column("Values", style="yellow")
1004
+ steps_table.add_column("Units", style="blue")
1005
+ steps_table.add_column("Pass", style="green")
1006
+ steps_table.add_column("Fail", style="red")
1007
+ steps_table.add_column("W", style="bright_black")
1008
+ steps_table.add_column("E", style="yellow")
1009
+ steps_table.add_column("C", style="red")
1010
+ steps_table.add_column("Ext", style="blue", justify="center")
1011
+
1012
+ def format_units(n: int) -> str:
1013
+ """Format large numbers with K, M, B abbreviations for values above 10,000."""
1014
+ if n is None:
1015
+ return "—"
1016
+ if n >= 1000000000: # Billions
1017
+ return f"{n / 1000000000:.1f}B"
1018
+ elif n >= 1000000: # Millions
1019
+ return f"{n / 1000000:.1f}M"
1020
+ elif n >= 10000: # Use K for 10,000 and above
1021
+ return f"{n / 1000:.0f}K"
1022
+ else:
1023
+ return str(n)
1024
+
1025
+ def format_pass_fail(passed: int, total: int) -> str:
1026
+ """Format pass/fail counts with abbreviated numbers and fractions."""
1027
+ if passed is None or total is None or total == 0:
1028
+ return "—/—"
1029
+
1030
+ # Calculate fraction
1031
+ fraction = passed / total
1032
+
1033
+ # Format fraction with special handling for very small and very large values
1034
+ if fraction == 0.0:
1035
+ fraction_str = "0.00"
1036
+ elif fraction == 1.0:
1037
+ fraction_str = "1.00"
1038
+ elif fraction < 0.005: # Less than 0.005 rounds to 0.00
1039
+ fraction_str = "<0.01"
1040
+ elif fraction > 0.995: # Greater than 0.995 rounds to 1.00
1041
+ fraction_str = ">0.99"
1042
+ else:
1043
+ fraction_str = f"{fraction:.2f}"
1044
+
1045
+ # Format absolute number with abbreviations
1046
+ absolute_str = format_units(passed)
1047
+
1048
+ return f"{absolute_str}/{fraction_str}"
992
1049
 
993
1050
  for step in info:
994
- status_icon = "✓" if step.all_passed else "✗"
995
- status_color = "green" if step.all_passed else "red"
1051
+ # Extract values information for the Values column
1052
+ values_str = "" # Default to em dash if no values
1053
+
1054
+ # Handle different validation types
1055
+ if step.assertion_type == "col_schema_match":
1056
+ values_str = "—" # Schema is too complex to display inline
1057
+ elif step.assertion_type == "col_vals_between":
1058
+ # For between validations, try to get left and right bounds
1059
+ if (
1060
+ hasattr(step, "left")
1061
+ and hasattr(step, "right")
1062
+ and step.left is not None
1063
+ and step.right is not None
1064
+ ):
1065
+ values_str = f"[{step.left}, {step.right}]"
1066
+ elif hasattr(step, "values") and step.values is not None:
1067
+ if isinstance(step.values, (list, tuple)) and len(step.values) >= 2:
1068
+ values_str = f"[{step.values[0]}, {step.values[1]}]"
1069
+ else:
1070
+ values_str = str(step.values)
1071
+ elif step.assertion_type in ["row_count_match", "col_count_match"]:
1072
+ # For count match validations, extract the 'count' value from the dictionary
1073
+ if hasattr(step, "values") and step.values is not None:
1074
+ if isinstance(step.values, dict) and "count" in step.values:
1075
+ values_str = str(step.values["count"])
1076
+ else:
1077
+ values_str = str(step.values)
1078
+ else:
1079
+ values_str = "—"
1080
+ elif step.assertion_type in ["col_vals_expr", "conjointly"]:
1081
+ values_str = "COLUMN EXPR"
1082
+ elif step.assertion_type == "specially":
1083
+ values_str = "EXPR"
1084
+ elif hasattr(step, "values") and step.values is not None:
1085
+ if isinstance(step.values, (list, tuple)):
1086
+ if len(step.values) <= 3:
1087
+ values_str = ", ".join(str(v) for v in step.values)
1088
+ else:
1089
+ values_str = f"{', '.join(str(v) for v in step.values[:3])}..."
1090
+ else:
1091
+ values_str = str(step.values)
1092
+ elif hasattr(step, "value") and step.value is not None:
1093
+ values_str = str(step.value)
1094
+ elif hasattr(step, "set") and step.set is not None:
1095
+ if isinstance(step.set, (list, tuple)):
1096
+ if len(step.set) <= 3:
1097
+ values_str = ", ".join(str(v) for v in step.set)
1098
+ else:
1099
+ values_str = f"{', '.join(str(v) for v in step.set[:3])}..."
1100
+ else:
1101
+ values_str = str(step.set)
1102
+
1103
+ # Determine threshold status for W, E, C columns
1104
+ # Check if thresholds are set and whether they were exceeded
1105
+
1106
+ # Warning threshold
1107
+ if (
1108
+ hasattr(step, "thresholds")
1109
+ and step.thresholds
1110
+ and hasattr(step.thresholds, "warning")
1111
+ and step.thresholds.warning is not None
1112
+ ):
1113
+ w_status = (
1114
+ "[bright_black]●[/bright_black]"
1115
+ if step.warning
1116
+ else "[bright_black]○[/bright_black]"
1117
+ )
1118
+ else:
1119
+ w_status = "—"
1120
+
1121
+ # Error threshold
1122
+ if (
1123
+ hasattr(step, "thresholds")
1124
+ and step.thresholds
1125
+ and hasattr(step.thresholds, "error")
1126
+ and step.thresholds.error is not None
1127
+ ):
1128
+ e_status = "[yellow]●[/yellow]" if step.error else "[yellow]○[/yellow]"
1129
+ else:
1130
+ e_status = "—"
996
1131
 
997
- severity = ""
998
- if step.critical:
999
- severity = " [red](CRITICAL)[/red]"
1000
- elif step.error:
1001
- severity = " [red](ERROR)[/red]"
1002
- elif step.warning:
1003
- severity = " [yellow](WARNING)[/yellow]"
1132
+ # Critical threshold
1133
+ if (
1134
+ hasattr(step, "thresholds")
1135
+ and step.thresholds
1136
+ and hasattr(step.thresholds, "critical")
1137
+ and step.thresholds.critical is not None
1138
+ ):
1139
+ c_status = "[red]●[/red]" if step.critical else "[red]○[/red]"
1140
+ else:
1141
+ c_status = "—"
1142
+
1143
+ # Extract status, here we check if the step has any extract data
1144
+ if (
1145
+ hasattr(step, "extract")
1146
+ and step.extract is not None
1147
+ and hasattr(step.extract, "__len__")
1148
+ and len(step.extract) > 0
1149
+ ):
1150
+ ext_status = "[blue]✓[/blue]"
1151
+ else:
1152
+ ext_status = "[bright_black]—[/bright_black]"
1004
1153
 
1005
1154
  steps_table.add_row(
1006
1155
  str(step.i),
1007
1156
  step.assertion_type,
1008
1157
  str(step.column) if step.column else "—",
1009
- f"[{status_color}]{status_icon}[/{status_color}]{severity}",
1010
- f"{step.n_passed}/{step.n}",
1158
+ values_str,
1159
+ format_units(step.n),
1160
+ format_pass_fail(step.n_passed, step.n),
1161
+ format_pass_fail(step.n - step.n_passed, step.n),
1162
+ w_status,
1163
+ e_status,
1164
+ c_status,
1165
+ ext_status,
1011
1166
  )
1012
1167
 
1013
1168
  console.print(steps_table)
@@ -1015,18 +1170,32 @@ def _display_validation_summary(validation: Any) -> None:
1015
1170
  # Display status with appropriate color
1016
1171
  if highest_severity == "all passed":
1017
1172
  console.print(
1018
- Panel("[green]✓ All validations passed![/green]", border_style="green")
1173
+ Panel(
1174
+ "[green]✓ All validations passed![/green]",
1175
+ border_style="green",
1176
+ expand=False,
1177
+ )
1019
1178
  )
1020
- elif highest_severity == "some failing":
1179
+ elif highest_severity == "passed":
1021
1180
  console.print(
1022
- Panel("[yellow]⚠ Some validations failed[/yellow]", border_style="yellow")
1181
+ Panel(
1182
+ "[dim green]⚠ Some steps had failing test units[/dim green]",
1183
+ border_style="dim green",
1184
+ expand=False,
1185
+ )
1023
1186
  )
1024
1187
  elif highest_severity in ["warning", "error", "critical"]:
1025
- color = "yellow" if highest_severity == "warning" else "red"
1188
+ if highest_severity == "warning":
1189
+ color = "bright_black" # gray
1190
+ elif highest_severity == "error":
1191
+ color = "yellow"
1192
+ else: # critical
1193
+ color = "red"
1026
1194
  console.print(
1027
1195
  Panel(
1028
1196
  f"[{color}]✗ Validation failed with {highest_severity} severity[/{color}]",
1029
1197
  border_style=color,
1198
+ expand=False,
1030
1199
  )
1031
1200
  )
1032
1201
  else:
@@ -1043,7 +1212,7 @@ def _display_validation_summary(validation: Any) -> None:
1043
1212
  @click.version_option(version=pb.__version__, prog_name="pb")
1044
1213
  def cli():
1045
1214
  """
1046
- Pointblank CLI - Data validation and quality tools for data engineers.
1215
+ Pointblank CLI: Data validation and quality tools for data engineers.
1047
1216
 
1048
1217
  Use this CLI to run validation scripts, preview tables, and generate reports
1049
1218
  directly from the command line.
@@ -1455,10 +1624,11 @@ def missing(data_source: str, output_html: str | None):
1455
1624
 
1456
1625
 
1457
1626
  @cli.command(name="validate")
1458
- @click.argument("data_source", type=str)
1627
+ @click.argument("data_source", type=str, required=False)
1628
+ @click.option("--list-checks", is_flag=True, help="List available validation checks and exit")
1459
1629
  @click.option(
1460
1630
  "--check",
1461
- "checks", # Changed to collect multiple values
1631
+ "checks",
1462
1632
  type=click.Choice(
1463
1633
  [
1464
1634
  "rows-distinct",
@@ -1472,25 +1642,25 @@ def missing(data_source: str, output_html: str | None):
1472
1642
  "col-vals-le",
1473
1643
  ]
1474
1644
  ),
1645
+ metavar="CHECK_TYPE",
1475
1646
  multiple=True, # Allow multiple --check options
1476
1647
  help="Type of validation check to perform. Can be used multiple times for multiple checks.",
1477
1648
  )
1478
- @click.option("--list-checks", is_flag=True, help="List available validation checks and exit")
1479
1649
  @click.option(
1480
1650
  "--column",
1481
- "columns", # Changed to collect multiple values
1651
+ "columns",
1482
1652
  multiple=True, # Allow multiple --column options
1483
1653
  help="Column name or integer position as #N (1-based index) for validation.",
1484
1654
  )
1485
1655
  @click.option(
1486
1656
  "--set",
1487
- "sets", # Changed to collect multiple values
1657
+ "sets",
1488
1658
  multiple=True, # Allow multiple --set options
1489
1659
  help="Comma-separated allowed values for col-vals-in-set checks.",
1490
1660
  )
1491
1661
  @click.option(
1492
1662
  "--value",
1493
- "values", # Changed to collect multiple values
1663
+ "values",
1494
1664
  type=float,
1495
1665
  multiple=True, # Allow multiple --value options
1496
1666
  help="Numeric value for comparison checks.",
@@ -1502,17 +1672,17 @@ def missing(data_source: str, output_html: str | None):
1502
1672
  "--write-extract", type=str, help="Save failing rows to folder. Provide base name for folder."
1503
1673
  )
1504
1674
  @click.option(
1505
- "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
1675
+ "--limit", default=500, help="Maximum number of failing rows to save to CSV (default: 500)"
1506
1676
  )
1507
1677
  @click.option("--exit-code", is_flag=True, help="Exit with non-zero code if validation fails")
1508
1678
  @click.pass_context
1509
1679
  def validate(
1510
1680
  ctx: click.Context,
1511
- data_source: str,
1512
- checks: tuple[str, ...], # Changed to tuple
1513
- columns: tuple[str, ...], # Changed to tuple
1514
- sets: tuple[str, ...], # Changed to tuple
1515
- values: tuple[float, ...], # Changed to tuple
1681
+ data_source: str | None,
1682
+ checks: tuple[str, ...],
1683
+ columns: tuple[str, ...],
1684
+ sets: tuple[str, ...],
1685
+ values: tuple[float, ...],
1516
1686
  show_extract: bool,
1517
1687
  write_extract: str | None,
1518
1688
  limit: int,
@@ -1534,21 +1704,21 @@ def validate(
1534
1704
  - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
1535
1705
  - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
1536
1706
 
1537
- AVAILABLE CHECKS:
1707
+ AVAILABLE CHECK_TYPES:
1538
1708
 
1539
1709
  Use --list-checks to see all available validation methods with examples.
1540
1710
 
1541
- The default check is 'rows-distinct' which checks for duplicate rows.
1711
+ The default CHECK_TYPE is 'rows-distinct' which checks for duplicate rows.
1542
1712
 
1543
1713
  \b
1544
1714
  - rows-distinct: Check if all rows in the dataset are unique (no duplicates)
1545
1715
  - rows-complete: Check if all rows are complete (no missing values in any column)
1546
1716
  - col-exists: Check if a specific column exists in the dataset (requires --column)
1547
1717
  - col-vals-not-null: Check if all values in a column are not null/missing (requires --column)
1548
- - col-vals-gt: Check if all values in a column are greater than a threshold (requires --column and --value)
1549
- - col-vals-ge: Check if all values in a column are greater than or equal to a threshold (requires --column and --value)
1550
- - col-vals-lt: Check if all values in a column are less than a threshold (requires --column and --value)
1551
- - col-vals-le: Check if all values in a column are less than or equal to a threshold (requires --column and --value)
1718
+ - col-vals-gt: Check if all values in a column are greater than a comparison value (requires --column and --value)
1719
+ - col-vals-ge: Check if all values in a column are greater than or equal to a comparison value (requires --column and --value)
1720
+ - col-vals-lt: Check if all values in a column are less than a comparison value (requires --column and --value)
1721
+ - col-vals-le: Check if all values in a column are less than or equal to a comparison value (requires --column and --value)
1552
1722
  - col-vals-in-set: Check if all values in a column are in an allowed set (requires --column and --set)
1553
1723
 
1554
1724
  Examples:
@@ -1571,28 +1741,7 @@ def validate(
1571
1741
  pb validate data.csv --check col-vals-not-null --column email --check col-vals-gt --column age --value 18
1572
1742
  """
1573
1743
  try:
1574
- # Handle backward compatibility and parameter conversion
1575
- import sys
1576
-
1577
- # Convert parameter tuples to lists, handling default case
1578
- if not checks:
1579
- # No --check options provided, use default
1580
- checks_list = ["rows-distinct"]
1581
- is_using_default_check = True
1582
- else:
1583
- checks_list = list(checks)
1584
- is_using_default_check = False
1585
-
1586
- columns_list = list(columns) if columns else []
1587
- sets_list = list(sets) if sets else []
1588
- values_list = list(values) if values else []
1589
-
1590
- # Map parameters to checks intelligently
1591
- mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
1592
- checks_list, columns_list, sets_list, values_list
1593
- )
1594
-
1595
- # Handle --list-checks option
1744
+ # Handle --list-checks option early (doesn't need data source)
1596
1745
  if list_checks:
1597
1746
  console.print("[bold bright_cyan]Available Validation Checks:[/bold bright_cyan]")
1598
1747
  console.print()
@@ -1616,14 +1765,16 @@ def validate(
1616
1765
  "[bold magenta]Value comparison checks [bright_black](require --column and --value)[/bright_black]:[/bold magenta]"
1617
1766
  )
1618
1767
  console.print(
1619
- " • [bold cyan]col-vals-gt[/bold cyan] Values greater than threshold"
1768
+ " • [bold cyan]col-vals-gt[/bold cyan] Values greater than comparison value"
1620
1769
  )
1621
1770
  console.print(
1622
- " • [bold cyan]col-vals-ge[/bold cyan] Values greater than or equal to threshold"
1771
+ " • [bold cyan]col-vals-ge[/bold cyan] Values greater than or equal to comparison value"
1623
1772
  )
1624
- console.print(" • [bold cyan]col-vals-lt[/bold cyan] Values less than threshold")
1625
1773
  console.print(
1626
- " • [bold cyan]col-vals-le[/bold cyan] Values less than or equal to threshold"
1774
+ " • [bold cyan]col-vals-lt[/bold cyan] Values less than comparison value"
1775
+ )
1776
+ console.print(
1777
+ " • [bold cyan]col-vals-le[/bold cyan] Values less than or equal to comparison value"
1627
1778
  )
1628
1779
  console.print()
1629
1780
  console.print(
@@ -1634,19 +1785,47 @@ def validate(
1634
1785
  )
1635
1786
  console.print()
1636
1787
  console.print("[bold bright_yellow]Examples:[/bold bright_yellow]")
1788
+ console.print(" [bright_blue]pb validate data.csv --check rows-distinct[/bright_blue]")
1637
1789
  console.print(
1638
- f" [bright_blue]pb validate {data_source} --check rows-distinct[/bright_blue]"
1639
- )
1640
- console.print(
1641
- f" [bright_blue]pb validate {data_source} --check col-vals-not-null --column price[/bright_blue]"
1790
+ " [bright_blue]pb validate data.csv --check col-vals-not-null --column price[/bright_blue]"
1642
1791
  )
1643
1792
  console.print(
1644
- f" [bright_blue]pb validate {data_source} --check col-vals-gt --column age --value 18[/bright_blue]"
1793
+ " [bright_blue]pb validate data.csv --check col-vals-gt --column age --value 18[/bright_blue]"
1645
1794
  )
1646
1795
  import sys
1647
1796
 
1648
1797
  sys.exit(0)
1649
1798
 
1799
+ # Check if data_source is provided (required for all operations except --list-checks)
1800
+ if data_source is None:
1801
+ console.print("[red]Error:[/red] DATA_SOURCE is required")
1802
+ console.print("Use 'pb validate --help' for usage information")
1803
+ console.print("Or use 'pb validate --list-checks' to see available validation types")
1804
+ import sys
1805
+
1806
+ sys.exit(1)
1807
+
1808
+ # Handle backward compatibility and parameter conversion
1809
+ import sys
1810
+
1811
+ # Convert parameter tuples to lists, handling default case
1812
+ if not checks:
1813
+ # No --check options provided, use default
1814
+ checks_list = ["rows-distinct"]
1815
+ is_using_default_check = True
1816
+ else:
1817
+ checks_list = list(checks)
1818
+ is_using_default_check = False
1819
+
1820
+ columns_list = list(columns) if columns else []
1821
+ sets_list = list(sets) if sets else []
1822
+ values_list = list(values) if values else []
1823
+
1824
+ # Map parameters to checks intelligently
1825
+ mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
1826
+ checks_list, columns_list, sets_list, values_list
1827
+ )
1828
+
1650
1829
  # Validate required parameters for different check types
1651
1830
  # Check parameters for each check in the list using mapped parameters
1652
1831
  for i, check in enumerate(checks_list):
@@ -1791,7 +1970,7 @@ def validate(
1791
1970
 
1792
1971
  # Display results based on whether we have single or multiple checks
1793
1972
  if len(checks_list) == 1:
1794
- # Single check - use current display format
1973
+ # Single check: use current display format
1795
1974
  _display_validation_result(
1796
1975
  validation,
1797
1976
  checks_list,
@@ -1806,7 +1985,7 @@ def validate(
1806
1985
  limit,
1807
1986
  )
1808
1987
  else:
1809
- # Multiple checks - use stacked display format
1988
+ # Multiple checks: use stacked display format
1810
1989
  any_failed = False
1811
1990
  for i in range(len(checks_list)):
1812
1991
  console.print() # Add spacing between results
@@ -1845,7 +2024,7 @@ def validate(
1845
2024
  console.print()
1846
2025
  console.print("[bold magenta]Common validation options:[/bold magenta]")
1847
2026
  console.print(
1848
- " • [bold cyan]--check rows-complete[/bold cyan] Check for rows with missing values"
2027
+ " • [bold cyan]--check rows-complete[/bold cyan] Check for rows with missing values"
1849
2028
  )
1850
2029
  console.print(
1851
2030
  " • [bold cyan]--check col-vals-not-null[/bold cyan] Check for null values in a column [bright_black](requires --column)[/bright_black]"
@@ -2070,7 +2249,7 @@ def _rich_print_scan_table(
2070
2249
  # Clean up HTML formatting from the raw data
2071
2250
  str_val = str(value)
2072
2251
 
2073
- # Handle multi-line values with <br> tags FIRST - take the first line (absolute number)
2252
+ # Handle multi-line values with <br> tags FIRST: take the first line (absolute number)
2074
2253
  if "<br>" in str_val:
2075
2254
  str_val = str_val.split("<br>")[0].strip()
2076
2255
  # For unique values, we want just the integer part
@@ -2089,14 +2268,14 @@ def _rich_print_scan_table(
2089
2268
  # Clean up extra whitespace
2090
2269
  str_val = re.sub(r"\s+", " ", str_val).strip()
2091
2270
 
2092
- # Handle values like "2<.01" - extract the first number
2271
+ # Handle values like "2<.01": extract the first number
2093
2272
  if "<" in str_val and not (str_val.startswith("<") and str_val.endswith(">")):
2094
2273
  # Extract number before the < symbol
2095
2274
  before_lt = str_val.split("<")[0].strip()
2096
2275
  if before_lt and before_lt.replace(".", "").replace("-", "").isdigit():
2097
2276
  str_val = before_lt
2098
2277
 
2099
- # Handle boolean unique values like "T0.62F0.38" - extract the more readable format
2278
+ # Handle boolean unique values like "T0.62F0.38": extract the more readable format
2100
2279
  if re.match(r"^[TF]\d+\.\d+[TF]\d+\.\d+$", str_val):
2101
2280
  # Extract T and F values
2102
2281
  t_match = re.search(r"T(\d+\.\d+)", str_val)
@@ -2126,7 +2305,7 @@ def _rich_print_scan_table(
2126
2305
  # Simple integers under 10000
2127
2306
  return str(int(num_val))
2128
2307
  elif abs(num_val) >= 10000000 and abs(num_val) < 100000000:
2129
- # Likely dates in YYYYMMDD format - format as date-like
2308
+ # Likely dates in YYYYMMDD format: format as date-like
2130
2309
  int_val = int(num_val)
2131
2310
  if 19000101 <= int_val <= 29991231: # Reasonable date range
2132
2311
  str_date = str(int_val)
@@ -2138,29 +2317,29 @@ def _rich_print_scan_table(
2138
2317
  # Otherwise treat as large number
2139
2318
  return f"{num_val / 1000000:.1f}M"
2140
2319
  elif abs(num_val) >= 1000000:
2141
- # Large numbers - use scientific notation or M/k notation
2320
+ # Large numbers: use scientific notation or M/k notation
2142
2321
 
2143
2322
  if abs(num_val) >= 1000000000:
2144
2323
  return f"{num_val:.1e}"
2145
2324
  else:
2146
2325
  return f"{num_val / 1000000:.1f}M"
2147
2326
  elif abs(num_val) >= 10000:
2148
- # Numbers >= 10k - use compact notation
2327
+ # Numbers >= 10k: use compact notation
2149
2328
  return f"{num_val / 1000:.1f}k"
2150
2329
  elif abs(num_val) >= 100:
2151
- # Numbers 100-9999 - show with minimal decimals
2330
+ # Numbers 100-9999: show with minimal decimals
2152
2331
  return f"{num_val:.1f}"
2153
2332
  elif abs(num_val) >= 10:
2154
- # Numbers 10-99 - show with one decimal
2333
+ # Numbers 10-99: show with one decimal
2155
2334
  return f"{num_val:.1f}"
2156
2335
  elif abs(num_val) >= 1:
2157
- # Numbers 1-9 - show with two decimals
2336
+ # Numbers 1-9: show with two decimals
2158
2337
  return f"{num_val:.2f}"
2159
2338
  elif abs(num_val) >= 0.01:
2160
- # Small numbers - show with appropriate precision
2339
+ # Small numbers: show with appropriate precision
2161
2340
  return f"{num_val:.2f}"
2162
2341
  else:
2163
- # Very small numbers - use scientific notation
2342
+ # Very small numbers: use scientific notation
2164
2343
 
2165
2344
  return f"{num_val:.1e}"
2166
2345
 
@@ -2168,7 +2347,7 @@ def _rich_print_scan_table(
2168
2347
  # Not a number, handle as string
2169
2348
  pass
2170
2349
 
2171
- # Handle date/datetime strings - show abbreviated format
2350
+ # Handle date/datetime strings: show abbreviated format
2172
2351
  if len(str_val) > 10 and any(char in str_val for char in ["-", "/", ":"]):
2173
2352
  # Likely a date/datetime, show abbreviated
2174
2353
  if len(str_val) > max_width:
@@ -2528,7 +2707,7 @@ def _display_validation_result(
2528
2707
 
2529
2708
  # Create friendly title for table
2530
2709
  if total_checks == 1:
2531
- # Single check - use original title format
2710
+ # Single check: use original title format
2532
2711
  if check == "rows-distinct":
2533
2712
  table_title = "Validation Result: Rows Distinct"
2534
2713
  elif check == "col-vals-not-null":
@@ -2550,7 +2729,7 @@ def _display_validation_result(
2550
2729
  else:
2551
2730
  table_title = f"Validation Result: {check.replace('-', ' ').title()}"
2552
2731
  else:
2553
- # Multiple checks - add numbering
2732
+ # Multiple checks: add numbering
2554
2733
  if check == "rows-distinct":
2555
2734
  base_title = "Rows Distinct"
2556
2735
  elif check == "col-vals-not-null":
@@ -2617,7 +2796,7 @@ def _display_validation_result(
2617
2796
  operator = "<"
2618
2797
  elif check == "col-vals-le":
2619
2798
  operator = "<="
2620
- result_table.add_row("Threshold", f"{operator} {value}")
2799
+ result_table.add_row("Comparison Value", f"{operator} {value}")
2621
2800
 
2622
2801
  # Get validation details
2623
2802
  if step_info:
@@ -2728,6 +2907,7 @@ def _display_validation_result(
2728
2907
  Panel(
2729
2908
  success_message,
2730
2909
  border_style="green",
2910
+ expand=False,
2731
2911
  )
2732
2912
  )
2733
2913
  else:
@@ -2757,6 +2937,7 @@ def _display_validation_result(
2757
2937
  Panel(
2758
2938
  failure_message,
2759
2939
  border_style="red",
2940
+ expand=False,
2760
2941
  )
2761
2942
  )
2762
2943
 
@@ -2837,7 +3018,7 @@ def _show_extract_for_multi_check(
2837
3018
  console.print()
2838
3019
  console.print(extract_message)
2839
3020
 
2840
- # Special handling for col-exists check - no rows to show when column doesn't exist
3021
+ # Special handling for col-exists check: no rows to show when column doesn't exist
2841
3022
  if check == "col-exists":
2842
3023
  if show_extract:
2843
3024
  console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
@@ -2848,16 +3029,17 @@ def _show_extract_for_multi_check(
2848
3029
  console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
2849
3030
  else:
2850
3031
  try:
2851
- # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
3032
+ # Get failing rows extract: use step_index + 1 since extracts are 1-indexed
2852
3033
  failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
2853
3034
 
2854
3035
  if failing_rows is not None and len(failing_rows) > 0:
2855
3036
  if show_extract:
2856
- # Limit the number of rows shown
2857
- if len(failing_rows) > limit:
2858
- display_rows = failing_rows.head(limit)
3037
+ # Always limit to 10 rows for display, regardless of limit option
3038
+ display_limit = 10
3039
+ if len(failing_rows) > display_limit:
3040
+ display_rows = failing_rows.head(display_limit)
2859
3041
  console.print(
2860
- f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
3042
+ f"[dim]Showing first {display_limit} of {len(failing_rows)} {row_type}[/dim]"
2861
3043
  )
2862
3044
  else:
2863
3045
  display_rows = failing_rows
@@ -2868,9 +3050,9 @@ def _show_extract_for_multi_check(
2868
3050
 
2869
3051
  preview_table = pb.preview(
2870
3052
  data=display_rows,
2871
- n_head=min(limit, len(display_rows)),
3053
+ n_head=min(display_limit, len(display_rows)),
2872
3054
  n_tail=0,
2873
- limit=limit,
3055
+ limit=display_limit,
2874
3056
  show_row_numbers=True,
2875
3057
  )
2876
3058
 
@@ -2892,7 +3074,7 @@ def _show_extract_for_multi_check(
2892
3074
  filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
2893
3075
  filepath = output_folder / filename
2894
3076
 
2895
- # Limit the output if needed
3077
+ # Use limit option for write_extract
2896
3078
  write_rows = failing_rows
2897
3079
  if len(failing_rows) > limit:
2898
3080
  write_rows = failing_rows.head(limit)
@@ -2997,7 +3179,7 @@ def _show_extract_and_summary(
2997
3179
  if show_extract:
2998
3180
  console.print(extract_message)
2999
3181
 
3000
- # Special handling for col-exists check - no rows to show when column doesn't exist
3182
+ # Special handling for col-exists check: no rows to show when column doesn't exist
3001
3183
  if check == "col-exists" and not step_passed:
3002
3184
  if show_extract:
3003
3185
  console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
@@ -3008,16 +3190,17 @@ def _show_extract_and_summary(
3008
3190
  console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
3009
3191
  else:
3010
3192
  try:
3011
- # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
3193
+ # Get failing rows extract: use step_index + 1 since extracts are 1-indexed
3012
3194
  failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
3013
3195
 
3014
3196
  if failing_rows is not None and len(failing_rows) > 0:
3015
3197
  if show_extract:
3016
- # Limit the number of rows shown
3017
- if len(failing_rows) > limit:
3018
- display_rows = failing_rows.head(limit)
3198
+ # Always limit to 10 rows for display, regardless of limit option
3199
+ display_limit = 10
3200
+ if len(failing_rows) > display_limit:
3201
+ display_rows = failing_rows.head(display_limit)
3019
3202
  console.print(
3020
- f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
3203
+ f"[dim]Showing first {display_limit} of {len(failing_rows)} {row_type}[/dim]"
3021
3204
  )
3022
3205
  else:
3023
3206
  display_rows = failing_rows
@@ -3028,9 +3211,9 @@ def _show_extract_and_summary(
3028
3211
 
3029
3212
  preview_table = pb.preview(
3030
3213
  data=display_rows,
3031
- n_head=min(limit, len(display_rows)),
3214
+ n_head=min(display_limit, len(display_rows)),
3032
3215
  n_tail=0,
3033
- limit=limit,
3216
+ limit=display_limit,
3034
3217
  show_row_numbers=True,
3035
3218
  )
3036
3219
 
@@ -3052,7 +3235,7 @@ def _show_extract_and_summary(
3052
3235
  filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
3053
3236
  filepath = output_folder / filename
3054
3237
 
3055
- # Limit the output if needed
3238
+ # Use limit option for write_extract
3056
3239
  write_rows = failing_rows
3057
3240
  if len(failing_rows) > limit:
3058
3241
  write_rows = failing_rows.head(limit)
@@ -3123,7 +3306,7 @@ def _show_extract_and_summary(
3123
3306
  f"[green]✓ Validation PASSED: {check} check passed for {data_source}[/green]"
3124
3307
  )
3125
3308
 
3126
- console.print(Panel(success_message, border_style="green"))
3309
+ console.print(Panel(success_message, border_style="green", expand=False))
3127
3310
  else:
3128
3311
  if step_info:
3129
3312
  if check == "rows-distinct":
@@ -3151,7 +3334,7 @@ def _show_extract_and_summary(
3151
3334
  if not show_extract and check != "col-exists":
3152
3335
  failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
3153
3336
 
3154
- console.print(Panel(failure_message, border_style="red"))
3337
+ console.print(Panel(failure_message, border_style="red", expand=False))
3155
3338
  else:
3156
3339
  if check == "rows-distinct":
3157
3340
  failure_message = (
@@ -3170,7 +3353,7 @@ def _show_extract_and_summary(
3170
3353
  if not show_extract:
3171
3354
  failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
3172
3355
 
3173
- console.print(Panel(failure_message, border_style="red"))
3356
+ console.print(Panel(failure_message, border_style="red", expand=False))
3174
3357
 
3175
3358
 
3176
3359
  @cli.command()
@@ -3196,6 +3379,9 @@ Example Pointblank validation script.
3196
3379
 
3197
3380
  This script demonstrates how to create validation rules for your data.
3198
3381
  Modify the data loading and validation rules below to match your requirements.
3382
+
3383
+ When using 'pb run' with --data option, the CLI will automatically replace
3384
+ the data source in your validation object with the provided data.
3199
3385
  """
3200
3386
 
3201
3387
  import pointblank as pb
@@ -3239,11 +3425,6 @@ validation = (
3239
3425
  # Finalize the validation
3240
3426
  .interrogate()
3241
3427
  )
3242
-
3243
- # The validation object will be automatically used by the CLI
3244
- # You can also access results programmatically:
3245
- # print(f"All passed: {validation.all_passed()}")
3246
- # print(f"Failed steps: {validation.n_failed()}")
3247
3428
  '''
3248
3429
 
3249
3430
  Path(output_file).write_text(example_script)
@@ -3251,13 +3432,17 @@ validation = (
3251
3432
  console.print("\nEdit the template to add your data loading and validation rules, then run:")
3252
3433
  console.print(f"[cyan]pb run {output_file}[/cyan]")
3253
3434
  console.print(
3254
- f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Override data source[/dim]"
3435
+ f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Replace data source automatically[/dim]"
3255
3436
  )
3256
3437
 
3257
3438
 
3258
3439
  @cli.command()
3259
3440
  @click.argument("validation_script", type=click.Path(exists=True))
3260
- @click.option("--data", type=str, help="Optional data source to override script's data loading")
3441
+ @click.option(
3442
+ "--data",
3443
+ type=str,
3444
+ help="Data source to replace in validation objects (single validation scripts only)",
3445
+ )
3261
3446
  @click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
3262
3447
  @click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
3263
3448
  @click.option(
@@ -3269,7 +3454,7 @@ validation = (
3269
3454
  help="Save failing rows to folders (one CSV per step). Provide base name for folder.",
3270
3455
  )
3271
3456
  @click.option(
3272
- "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
3457
+ "--limit", default=500, help="Maximum number of failing rows to save to CSV (default: 500)"
3273
3458
  )
3274
3459
  @click.option(
3275
3460
  "--fail-on",
@@ -3292,8 +3477,11 @@ def run(
3292
3477
  VALIDATION_SCRIPT should be a Python file that defines validation logic.
3293
3478
  The script should load its own data and create validation objects.
3294
3479
 
3295
- If --data is provided, it will be available as a 'cli_data' variable in the script,
3296
- allowing you to optionally override your script's data loading.
3480
+ If --data is provided, it will automatically replace the data source in your
3481
+ validation objects. This works with scripts containing a single validation.
3482
+ For scripts with multiple validations, use separate script files or remove --data.
3483
+
3484
+ To get started quickly, use 'pb make-template' to create a validation script template.
3297
3485
 
3298
3486
  DATA can be:
3299
3487
 
@@ -3307,6 +3495,7 @@ def run(
3307
3495
  Examples:
3308
3496
 
3309
3497
  \b
3498
+ pb make-template my_validation.py # Create a template first
3310
3499
  pb run validation_script.py
3311
3500
  pb run validation_script.py --data data.csv
3312
3501
  pb run validation_script.py --data small_table --output-html report.html
@@ -3369,6 +3558,72 @@ def run(
3369
3558
 
3370
3559
  console.print(f"[green]✓[/green] Found {len(validations)} validation object(s)")
3371
3560
 
3561
+ # Implement automatic data replacement for Validate objects if --data was provided
3562
+ if cli_data is not None:
3563
+ # Check if we have multiple validations (this is not supported)
3564
+ if len(validations) > 1:
3565
+ console.print(
3566
+ f"[red]Error: Found {len(validations)} validation objects in the script.[/red]"
3567
+ )
3568
+ console.print(
3569
+ "[yellow]The --data option replaces data in ALL validation objects,[/yellow]"
3570
+ )
3571
+ console.print(
3572
+ "[yellow]which may cause failures if validations expect different schemas.[/yellow]"
3573
+ )
3574
+ console.print("\n[cyan]Options:[/cyan]")
3575
+ console.print(" 1. Split your script into separate files with one validation each")
3576
+ console.print(
3577
+ " 2. Remove the --data option to use each validation's original data"
3578
+ )
3579
+ sys.exit(1)
3580
+
3581
+ console.print(
3582
+ f"[yellow]Replacing data in {len(validations)} validation object(s) with CLI data[/yellow]"
3583
+ )
3584
+
3585
+ for idx, validation in enumerate(validations, 1):
3586
+ # Check if it's a Validate object with data attribute
3587
+ if hasattr(validation, "data") and hasattr(validation, "interrogate"):
3588
+ console.print("[cyan]Updating validation with new data source...[/cyan]")
3589
+
3590
+ # Store the original validation_info as our "plan"
3591
+ original_validation_info = validation.validation_info.copy()
3592
+
3593
+ # Replace the data
3594
+ validation.data = cli_data
3595
+
3596
+ # Re-process the data (same as what happens in __post_init__)
3597
+ from pointblank.validate import _process_data
3598
+
3599
+ validation.data = _process_data(validation.data)
3600
+
3601
+ # Reset validation results but keep the plan
3602
+ validation.validation_info = []
3603
+
3604
+ # Re-add each validation step from the original plan
3605
+ for val_info in original_validation_info:
3606
+ # Create a copy and reset any interrogation results
3607
+ new_val_info = copy.deepcopy(val_info)
3608
+ # Reset interrogation-specific attributes if they exist
3609
+ if hasattr(new_val_info, "n_passed"):
3610
+ new_val_info.n_passed = None
3611
+ if hasattr(new_val_info, "n_failed"):
3612
+ new_val_info.n_failed = None
3613
+ if hasattr(new_val_info, "all_passed"):
3614
+ new_val_info.all_passed = None
3615
+ if hasattr(new_val_info, "warning"):
3616
+ new_val_info.warning = None
3617
+ if hasattr(new_val_info, "error"):
3618
+ new_val_info.error = None
3619
+ if hasattr(new_val_info, "critical"):
3620
+ new_val_info.critical = None
3621
+ validation.validation_info.append(new_val_info)
3622
+
3623
+ # Re-interrogate with the new data
3624
+ console.print("[cyan]Re-interrogating with new data...[/cyan]")
3625
+ validation.interrogate()
3626
+
3372
3627
  # Process each validation
3373
3628
  overall_failed = False
3374
3629
  overall_critical = False
@@ -3432,11 +3687,12 @@ def run(
3432
3687
  f"\n[cyan]Step {step_num}:[/cyan] {step_info.assertion_type}"
3433
3688
  )
3434
3689
 
3435
- # Limit the number of rows shown
3436
- if len(failing_rows) > limit:
3437
- display_rows = failing_rows.head(limit)
3690
+ # Always limit to 10 rows for display, regardless of limit option
3691
+ display_limit = 10
3692
+ if len(failing_rows) > display_limit:
3693
+ display_rows = failing_rows.head(display_limit)
3438
3694
  console.print(
3439
- f"[dim]Showing first {limit} of {len(failing_rows)} failing rows[/dim]"
3695
+ f"[dim]Showing first {display_limit} of {len(failing_rows)} failing rows[/dim]"
3440
3696
  )
3441
3697
  else:
3442
3698
  display_rows = failing_rows
@@ -3447,9 +3703,9 @@ def run(
3447
3703
  # Create a preview table using pointblank's preview function
3448
3704
  preview_table = pb.preview(
3449
3705
  data=display_rows,
3450
- n_head=min(limit, len(display_rows)),
3706
+ n_head=min(display_limit, len(display_rows)),
3451
3707
  n_tail=0,
3452
- limit=limit,
3708
+ limit=display_limit,
3453
3709
  show_row_numbers=True,
3454
3710
  )
3455
3711
 
@@ -3502,7 +3758,7 @@ def run(
3502
3758
  filename = f"step_{step_num:02d}_{safe_assertion_type}.csv"
3503
3759
  filepath = output_folder / filename
3504
3760
 
3505
- # Limit the output if needed
3761
+ # Use limit for CSV output
3506
3762
  save_rows = failing_rows
3507
3763
  if hasattr(failing_rows, "head") and len(failing_rows) > limit:
3508
3764
  save_rows = failing_rows.head(limit)
@@ -3521,7 +3777,11 @@ def run(
3521
3777
  pd_data = pd.DataFrame(save_rows)
3522
3778
  pd_data.to_csv(str(filepath), index=False)
3523
3779
 
3524
- saved_files.append((filename, len(failing_rows)))
3780
+ # Record the actual number of rows saved
3781
+ rows_saved = (
3782
+ len(save_rows) if hasattr(save_rows, "__len__") else limit
3783
+ )
3784
+ saved_files.append((filename, rows_saved))
3525
3785
 
3526
3786
  except Exception as e:
3527
3787
  console.print(
@@ -3548,11 +3808,11 @@ def run(
3548
3808
  if output_html:
3549
3809
  try:
3550
3810
  if len(validations) == 1:
3551
- # Single validation - save directly
3811
+ # Single validation: save directly
3552
3812
  html_content = validations[0]._repr_html_()
3553
3813
  Path(output_html).write_text(html_content, encoding="utf-8")
3554
3814
  else:
3555
- # Multiple validations - combine them
3815
+ # Multiple validations: combine them
3556
3816
  html_parts = []
3557
3817
  html_parts.append("<html><body>")
3558
3818
  html_parts.append("<h1>Pointblank Validation Report</h1>")
@@ -3572,11 +3832,11 @@ def run(
3572
3832
  if output_json:
3573
3833
  try:
3574
3834
  if len(validations) == 1:
3575
- # Single validation - save directly
3835
+ # Single validation: save directly
3576
3836
  json_report = validations[0].get_json_report()
3577
3837
  Path(output_json).write_text(json_report, encoding="utf-8")
3578
3838
  else:
3579
- # Multiple validations - combine them
3839
+ # Multiple validations: combine them
3580
3840
  import json
3581
3841
 
3582
3842
  combined_report = {"validations": []}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.11.1
3
+ Version: 0.11.2
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -156,11 +156,11 @@ validation
156
156
 
157
157
  ## Why Choose Pointblank?
158
158
 
159
- - **Works with your existing stack** - Seamlessly integrates with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, Parquet, PySpark, Snowflake, and more!
160
- - **Beautiful, interactive reports** - Crystal-clear validation results that highlight issues and help communicate data quality
161
- - **Composable validation pipeline** - Chain validation steps into a complete data quality workflow
162
- - **Threshold-based alerts** - Set 'warning', 'error', and 'critical' thresholds with custom actions
163
- - **Practical outputs** - Use validation results to filter tables, extract problematic data, or trigger downstream processes
159
+ - **Works with your existing stack**: Seamlessly integrates with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, Parquet, PySpark, Snowflake, and more!
160
+ - **Beautiful, interactive reports**: Crystal-clear validation results that highlight issues and help communicate data quality
161
+ - **Composable validation pipeline**: Chain validation steps into a complete data quality workflow
162
+ - **Threshold-based alerts**: Set 'warning', 'error', and 'critical' thresholds with custom actions
163
+ - **Practical outputs**: Use validation results to filter tables, extract problematic data, or trigger downstream processes
164
164
 
165
165
  ## Real-World Example
166
166
 
@@ -240,7 +240,7 @@ validation.get_step_report(i=3).show("browser") # Get failing records from step
240
240
  Pointblank includes a powerful CLI utility called `pb` that lets you run data validation workflows directly from the command line. Perfect for CI/CD pipelines, scheduled data quality checks, or quick validation tasks.
241
241
 
242
242
  <div align="center">
243
- <img src="https://posit-dev.github.io/pointblank/assets/vhs/cli-complete-workflow.gif" width="800px">
243
+ <img src="https://posit-dev.github.io/pointblank/assets/vhs/cli-complete-workflow.gif" width="100%">
244
244
  </div>
245
245
 
246
246
  **Explore Your Data**
@@ -279,19 +279,17 @@ pb validate small_table --check col-vals-gt --column a --value 5 --show-extract
279
279
 
280
280
  ```bash
281
281
  # Use exit codes for automation (0 = pass, 1 = fail)
282
- pb validate small_table --check rows-distinct && echo "✅ Quality checks passed"
282
+ pb validate small_table --check rows-distinct --exit-code
283
283
  ```
284
284
 
285
- Learn more in our [CLI documentation](https://posit-dev.github.io/pointblank/user-guide/cli.html).
286
-
287
285
  ## Features That Set Pointblank Apart
288
286
 
289
- - **Complete validation workflow** - From data access to validation to reporting in a single pipeline
290
- - **Built for collaboration** - Share results with colleagues through beautiful interactive reports
291
- - **Practical outputs** - Get exactly what you need: counts, extracts, summaries, or full reports
292
- - **Flexible deployment** - Use in notebooks, scripts, or data pipelines
293
- - **Customizable** - Tailor validation steps and reporting to your specific needs
294
- - **Internationalization** - Reports can be generated in over 20 languages, including English, Spanish, French, and German
287
+ - **Complete validation workflow**: From data access to validation to reporting in a single pipeline
288
+ - **Built for collaboration**: Share results with colleagues through beautiful interactive reports
289
+ - **Practical outputs**: Get exactly what you need: counts, extracts, summaries, or full reports
290
+ - **Flexible deployment**: Use in notebooks, scripts, or data pipelines
291
+ - **Customizable**: Tailor validation steps and reporting to your specific needs
292
+ - **Internationalization**: Reports can be generated in over 20 languages, including English, Spanish, French, and German
295
293
 
296
294
  ## Documentation and Examples
297
295
 
@@ -10,7 +10,7 @@ pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeT
10
10
  pointblank/_utils_html.py,sha256=uJWvS9JwQVEZgwsGmScA_u_EBRND75rzUvnJPalbRVs,3731
11
11
  pointblank/actions.py,sha256=D6o9B2_ES9PNQg9HZwREacrrt-3A5bhdrBkL1UXz__s,18281
12
12
  pointblank/assistant.py,sha256=YsQ9U1wacVIuYFRIJ4maBbBDTzEQPzirhUUPgySosM4,15428
13
- pointblank/cli.py,sha256=aS1auedTJFk7SMPy5hfItkV5_9olUEE1CCRkAel4Thk,156930
13
+ pointblank/cli.py,sha256=jkevhsMpSQMqG1rNqfjNpOffqVcqzJYb_6knoOR22-g,169757
14
14
  pointblank/column.py,sha256=_FJjpjv760D1p6YGgqbwmKYktouG7AJ2A9uIMYQBTYA,76560
15
15
  pointblank/compare.py,sha256=kFd18CehHz7g-2MF1kSmJSdOoAP80q_9PaF6QzHC1ds,866
16
16
  pointblank/datascan.py,sha256=nmTcRLW8nAZfvRS_Nf00Wgx4oUX-o6WFOZqLDbedbu8,24563
@@ -31,9 +31,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
31
31
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
32
32
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
33
33
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
34
- pointblank-0.11.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
35
- pointblank-0.11.1.dist-info/METADATA,sha256=ii-Rawr1JWm6WLqTm5zGNpxtbQbwpRSFzaAbuUa6PFQ,16609
36
- pointblank-0.11.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- pointblank-0.11.1.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
38
- pointblank-0.11.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
39
- pointblank-0.11.1.dist-info/RECORD,,
34
+ pointblank-0.11.2.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
35
+ pointblank-0.11.2.dist-info/METADATA,sha256=qe_reU_6Jidz8zPSUzp_ohcCcDyOSBs72CxINJDxoPU,16473
36
+ pointblank-0.11.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
+ pointblank-0.11.2.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
38
+ pointblank-0.11.2.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
39
+ pointblank-0.11.2.dist-info/RECORD,,