PyPI - pointblank - Versions diffs - 0.11.0__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

pointblank 0.11.0py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

pointblank/assistant.py +14 -3
pointblank/cli.py +2418 -1511
pointblank/compare.py +9 -0
pointblank/datascan.py +25 -3
pointblank/validate.py +346 -37
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/METADATA +16 -10
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/RECORD +11 -11
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/WHEEL +0 -0
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/entry_points.txt +0 -0
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.11.0.dist-info → pointblank-0.11.1.dist-info}/top_level.txt +0 -0

pointblank/cli.py CHANGED Viewed

@@ -15,6 +15,81 @@ from pointblank._utils import _get_tbl_type, _is_lib_present
 console = Console()
+class OrderedGroup(click.Group):
+    """A Click Group that displays commands in a custom order."""
+    def list_commands(self, ctx):
+        """Return commands in the desired logical order."""
+        # Define the desired order
+        desired_order = [
+            # Data Discovery/Exploration
+            "info",
+            "preview",
+            "scan",
+            "missing",
+            # Validation
+            "validate",
+            "run",
+            "make-template",
+            # Utilities
+            "datasets",
+            "requirements",
+        ]
+        # Get all available commands
+        available_commands = super().list_commands(ctx)
+        # Return commands in desired order, followed by any not in the list
+        ordered = []
+        for cmd in desired_order:
+            if cmd in available_commands:
+                ordered.append(cmd)
+        # Add any commands not in our desired order (safety fallback)
+        for cmd in available_commands:
+            if cmd not in ordered:
+                ordered.append(cmd)
+        return ordered
+def _load_data_source(data_source: str) -> Any:
+    """
+    Centralized data loading function for CLI that handles all supported data source types.
+    This function provides a consistent way to load data across all CLI commands by leveraging
+    the _process_data() utility function and adding support for pointblank dataset names.
+    Parameters
+    ----------
+    data_source : str
+        The data source which could be:
+        - A pointblank dataset name (small_table, game_revenue, nycflights, global_sales)
+        - A GitHub URL pointing to a CSV or Parquet file
+        - A database connection string (e.g., "duckdb:///path/to/file.ddb::table_name")
+        - A CSV file path (string or Path object with .csv extension)
+        - A Parquet file path, glob pattern, directory, or partitioned dataset
+    Returns
+    -------
+    Any
+        Loaded data as a DataFrame or other data object
+    Raises
+    ------
+    ValueError
+        If the pointblank dataset name is not recognized
+    """
+    # Check if it's a pointblank dataset name first
+    if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
+        return pb.load_dataset(data_source)
+    # Otherwise, use the centralized _process_data() function for all other data sources
+    from pointblank.validate import _process_data
+    return _process_data(data_source)
 def _format_cell_value(
     value: Any, is_row_number: bool = False, max_width: int = 50, num_columns: int = 10
 ) -> str:
@@ -206,173 +281,443 @@ def _format_dtype_compact(dtype_str: str) -> str:
         return dtype_str
-def _rich_print_gt_table(gt_table: Any, preview_info: dict | None = None) -> None:
-    """Convert a GT table to Rich table and display it in the terminal.
+def _rich_print_scan_table(
+    scan_result: Any,
+    data_source: str,
+    source_type: str,
+    table_type: str,
+    total_rows: int | None = None,
+    total_columns: int | None = None,
+) -> None:
+    """
+    Display scan results as a Rich table in the terminal with statistical measures.
     Args:
-        gt_table: The GT table object to display
-        preview_info: Optional dict with preview context info:
-            - total_rows: Total rows in the dataset
-            - head_rows: Number of head rows shown
-            - tail_rows: Number of tail rows shown
-            - is_complete: Whether the entire dataset is shown
+        scan_result: The GT object from col_summary_tbl()
+        data_source: Name of the data source being scanned
+        source_type: Type of data source (e.g., "Pointblank dataset: small_table")
+        table_type: Type of table (e.g., "polars.LazyFrame")
+        total_rows: Total number of rows in the dataset
+        total_columns: Total number of columns in the dataset
     """
     try:
-        # Try to extract the underlying data from the GT table
-        df = None
+        import re
-        # Great Tables stores the original data in different places depending on how it was created
-        # Let's try multiple approaches to get the data
-        if hasattr(gt_table, "_tbl_data") and gt_table._tbl_data is not None:
-            df = gt_table._tbl_data
-        elif (
-            hasattr(gt_table, "_body")
-            and hasattr(gt_table._body, "body")
-            and gt_table._body.body is not None
-        ):
-            df = gt_table._body.body
-        elif hasattr(gt_table, "_data") and gt_table._data is not None:
-            df = gt_table._data
-        elif hasattr(gt_table, "data") and gt_table.data is not None:
-            df = gt_table.data
+        import narwhals as nw
+        from rich.box import SIMPLE_HEAD
-        if df is not None:
-            # Create a Rich table with horizontal lines
-            from rich.box import SIMPLE_HEAD
+        # Extract the underlying DataFrame from the GT object
+        # The GT object has a _tbl_data attribute that contains the DataFrame
+        gt_data = scan_result._tbl_data
-            # Create enhanced title if preview_info contains metadata
-            table_title = None
-            if preview_info and "source_type" in preview_info and "table_type" in preview_info:
-                source_type = preview_info["source_type"]
-                table_type = preview_info["table_type"]
-                table_title = f"Data Preview / {source_type} / {table_type}"
+        # Convert to Narwhals DataFrame for consistent handling
+        nw_data = nw.from_native(gt_data)
-            rich_table = Table(
-                title=table_title,
-                show_header=True,
-                header_style="bold magenta",
-                box=SIMPLE_HEAD,
-                title_style="bold cyan",
-                title_justify="left",
-            )
+        # Convert to dictionary for easier access
+        data_dict = nw_data.to_dict(as_series=False)
-            # Get column names
-            columns = []
-            if hasattr(df, "columns"):
-                columns = list(df.columns)
-            elif hasattr(df, "schema"):  # pragma: no cover
-                columns = list(df.schema.names)
-            elif hasattr(df, "column_names"):  # pragma: no cover
-                columns = list(df.column_names)
+        # Create main scan table with missing data table styling
+        # Create a comprehensive title with data source, source type, and table type
+        title_text = f"Column Summary / {source_type} / {table_type}"
-            if not columns:  # pragma: no cover
-                # Fallback: try to determine columns from first row
-                try:
-                    if hasattr(df, "to_dicts") and len(df) > 0:
-                        first_dict = df.to_dicts()[0]
-                        columns = list(first_dict.keys())
-                    elif hasattr(df, "to_dict") and len(df) > 0:
-                        first_dict = df.to_dict("records")[0]
-                        columns = list(first_dict.keys())
-                except Exception:  # pragma: no cover
-                    columns = [f"Column {i + 1}" for i in range(10)]  # Default fallback
+        # Add dimensions subtitle in gray if available
+        if total_rows is not None and total_columns is not None:
+            title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
-            # Add columns to Rich table
-            # Handle wide tables by limiting columns displayed
-            max_terminal_cols = 15  # Reasonable limit for terminal display
+        scan_table = Table(
+            title=title_text,
+            show_header=True,
+            header_style="bold magenta",
+            box=SIMPLE_HEAD,
+            title_style="bold cyan",
+            title_justify="left",
+        )
-            # Get terminal width to adjust column behavior
-            try:
-                terminal_width = console.size.width
-                # Estimate max column width based on terminal size and number of columns
-                if len(columns) <= 5:
-                    max_col_width = min(60, terminal_width // 4)
-                elif len(columns) <= 10:
-                    max_col_width = min(40, terminal_width // 6)
-                else:
-                    max_col_width = min(30, terminal_width // 8)
-            except Exception:  # pragma: no cover
-                # Fallback if we can't get terminal width
-                max_col_width = 40 if len(columns) <= 10 else 25
+        # Add columns with specific styling and appropriate widths
+        scan_table.add_column("Column", style="cyan", no_wrap=True, width=20)
+        scan_table.add_column("Type", style="yellow", no_wrap=True, width=10)
+        scan_table.add_column(
+            "NA", style="red", width=6, justify="right"
+        )  # Adjusted for better formatting
+        scan_table.add_column(
+            "UQ", style="green", width=8, justify="right"
+        )  # Adjusted for boolean values
-            if len(columns) > max_terminal_cols:
-                # For wide tables, show first few, middle indicator, and last few columns
-                first_cols = 7
-                last_cols = 7
+        # Add statistical columns if they exist with appropriate widths
+        stat_columns = []
+        column_mapping = {
+            "mean": ("Mean", "blue", 9),
+            "std": ("SD", "blue", 9),
+            "min": ("Min", "yellow", 9),
+            "median": ("Med", "yellow", 9),
+            "max": ("Max", "yellow", 9),
+            "q_1": ("Q₁", "magenta", 8),
+            "q_3": ("Q₃", "magenta", 9),
+            "iqr": ("IQR", "magenta", 8),
+        }
-                display_columns = columns[:first_cols] + ["...more..."] + columns[-last_cols:]
+        for col_key, (display_name, color, width) in column_mapping.items():
+            if col_key in data_dict:
+                scan_table.add_column(display_name, style=color, width=width, justify="right")
+                stat_columns.append(col_key)
-                console.print(
-                    f"\n[yellow]⚠ Table has {len(columns)} columns. Showing first {first_cols} and last {last_cols} columns.[/yellow]"
-                )
-                console.print("[dim]Use --columns to specify which columns to display.[/dim]")
-                console.print(
-                    f"[dim]Full column list: {', '.join(columns[:5])}...{', '.join(columns[-5:])}[/dim]\n"
-                )
+        # Helper function to extract column name and type from HTML
+        def extract_column_info(html_content: str) -> tuple[str, str]:
+            """Extract column name and type from HTML formatted content."""
+            # Extract column name from first div
+            name_match = re.search(r"<div[^>]*>([^<]+)</div>", html_content)
+            column_name = name_match.group(1) if name_match else "Unknown"
+            # Extract data type from second div (with gray color)
+            type_match = re.search(r"<div[^>]*color: gray[^>]*>([^<]+)</div>", html_content)
+            if type_match:
+                data_type = type_match.group(1)
+                # Convert to compact format using the existing function
+                compact_type = _format_dtype_compact(data_type)
+                data_type = compact_type
             else:
-                display_columns = columns
+                data_type = "unknown"
-            # Get data types for columns
-            dtypes_dict = _get_column_dtypes(df, columns)
+            return column_name, data_type
-            # Calculate row number column width if needed
-            row_num_width = 6  # Default width
-            if "_row_num_" in columns:
-                try:
-                    # Get the maximum row number to calculate appropriate width
-                    if hasattr(df, "to_dicts"):
-                        data_dict = df.to_dicts()
-                        if data_dict:
-                            row_nums = [row.get("_row_num_", 0) for row in data_dict]
-                            max_row_num = max(row_nums) if row_nums else 0
-                            row_num_width = max(len(str(max_row_num)) + 1, 6)  # +1 for padding
-                    elif hasattr(df, "to_dict"):
-                        data_dict = df.to_dict("records")
-                        if data_dict:
-                            row_nums = [row.get("_row_num_", 0) for row in data_dict]
-                            max_row_num = max(row_nums) if row_nums else 0
-                            row_num_width = max(len(str(max_row_num)) + 1, 6)  # +1 for padding
-                except Exception:  # pragma: no cover
-                    # If we can't determine max row number, use default
-                    row_num_width = 8  # Slightly larger default for safety
+        # Helper function to format values with improved number formatting
+        def format_value(
+            value: Any, is_missing: bool = False, is_unique: bool = False, max_width: int = 8
+        ) -> str:
+            """Format values for display with smart number formatting and HTML cleanup."""
+            if value is None or (isinstance(value, str) and value.strip() == ""):
+                return "[dim]—[/dim]"
-            for i, col in enumerate(display_columns):
-                if col == "...more...":
-                    # Add a special indicator column
-                    rich_table.add_column("···", style="dim", width=3, no_wrap=True)
-                else:
-                    # Handle row number column specially
-                    if col == "_row_num_":
-                        # Row numbers get no header, right alignment, and dim gray style
-                        # Use dynamic width to prevent truncation
-                        rich_table.add_column(
-                            "", style="dim", justify="right", no_wrap=True, width=row_num_width
-                        )
-                    else:
-                        display_col = str(col)
+            # Handle missing values indicator
+            if is_missing and str(value) == "0":
+                return "[green]●[/green]"  # No missing values
-                        # Get data type for this column (if available)
-                        if col in dtypes_dict:
-                            dtype_display = f"<{dtypes_dict[col]}>"
-                            # Create header with column name and data type
-                            header_text = f"{display_col}\n[dim yellow]{dtype_display}[/dim yellow]"
-                        else:
-                            header_text = display_col
+            # Clean up HTML formatting from the raw data
+            str_val = str(value)
-                        rich_table.add_column(
-                            header_text,
-                            style="cyan",
-                            no_wrap=False,
-                            overflow="ellipsis",
-                            max_width=max_col_width,
-                        )
+            # Handle multi-line values with <br> tags FIRST - take the first line (absolute number)
+            if "<br>" in str_val:
+                str_val = str_val.split("<br>")[0].strip()
+                # For unique values, we want just the integer part
+                if is_unique:
+                    try:
+                        # Try to extract just the integer part for unique counts
+                        num_val = float(str_val)
+                        return str(int(num_val))
+                    except (ValueError, TypeError):
+                        pass
-            # Convert data to list of rows
-            rows = []
-            try:
-                if hasattr(df, "to_dicts"):
-                    # Polars interface
-                    data_dict = df.to_dicts()
+            # Now handle HTML content (especially from boolean unique values)
+            if "<" in str_val and ">" in str_val:
+                # Remove HTML tags completely for cleaner display
+                str_val = re.sub(r"<[^>]+>", "", str_val).strip()
+                # Clean up extra whitespace
+                str_val = re.sub(r"\s+", " ", str_val).strip()
+            # Handle values like "2<.01" - extract the first number
+            if "<" in str_val and not (str_val.startswith("<") and str_val.endswith(">")):
+                # Extract number before the < symbol
+                before_lt = str_val.split("<")[0].strip()
+                if before_lt and before_lt.replace(".", "").replace("-", "").isdigit():
+                    str_val = before_lt
+            # Handle boolean unique values like "T0.62F0.38" - extract the more readable format
+            if re.match(r"^[TF]\d+\.\d+[TF]\d+\.\d+$", str_val):
+                # Extract T and F values
+                t_match = re.search(r"T(\d+\.\d+)", str_val)
+                f_match = re.search(r"F(\d+\.\d+)", str_val)
+                if t_match and f_match:
+                    t_val = float(t_match.group(1))
+                    f_val = float(f_match.group(1))
+                    # Show as "T0.62F0.38" but truncated if needed
+                    formatted = f"T{t_val:.2f}F{f_val:.2f}"
+                    if len(formatted) > max_width:
+                        # Truncate to fit, showing dominant value
+                        if t_val > f_val:
+                            return f"T{t_val:.1f}"
+                        else:
+                            return f"F{f_val:.1f}"
+                    return formatted
+            # Try to parse as a number for better formatting
+            try:
+                # Try to convert to float first
+                num_val = float(str_val)
+                # Handle special cases
+                if num_val == 0:
+                    return "0"
+                elif abs(num_val) == int(abs(num_val)) and abs(num_val) < 10000:
+                    # Simple integers under 10000
+                    return str(int(num_val))
+                elif abs(num_val) >= 10000000 and abs(num_val) < 100000000:
+                    # Likely dates in YYYYMMDD format - format as date-like
+                    int_val = int(num_val)
+                    if 19000101 <= int_val <= 29991231:  # Reasonable date range
+                        str_date = str(int_val)
+                        if len(str_date) == 8:
+                            return (
+                                f"{str_date[:4]}-{str_date[4:6]}-{str_date[6:]}"[: max_width - 1]
+                                + "…"
+                            )
+                    # Otherwise treat as large number
+                    return f"{num_val / 1000000:.1f}M"
+                elif abs(num_val) >= 1000000:
+                    # Large numbers - use scientific notation or M/k notation
+                    if abs(num_val) >= 1000000000:
+                        return f"{num_val:.1e}"
+                    else:
+                        return f"{num_val / 1000000:.1f}M"
+                elif abs(num_val) >= 10000:
+                    # Numbers >= 10k - use compact notation
+                    return f"{num_val / 1000:.1f}k"
+                elif abs(num_val) >= 100:
+                    # Numbers 100-9999 - show with minimal decimals
+                    return f"{num_val:.1f}"
+                elif abs(num_val) >= 10:
+                    # Numbers 10-99 - show with one decimal
+                    return f"{num_val:.1f}"
+                elif abs(num_val) >= 1:
+                    # Numbers 1-9 - show with two decimals
+                    return f"{num_val:.2f}"
+                elif abs(num_val) >= 0.01:
+                    # Small numbers - show with appropriate precision
+                    return f"{num_val:.2f}"
+                else:
+                    # Very small numbers - use scientific notation
+                    return f"{num_val:.1e}"
+            except (ValueError, TypeError):
+                # Not a number, handle as string
+                pass
+            # Handle date/datetime strings - show abbreviated format
+            if len(str_val) > 10 and any(char in str_val for char in ["-", "/", ":"]):
+                # Likely a date/datetime, show abbreviated
+                if len(str_val) > max_width:
+                    return str_val[: max_width - 1] + "…"
+            # General string truncation with ellipsis
+            if len(str_val) > max_width:
+                return str_val[: max_width - 1] + "…"
+            return str_val
+        # Populate table rows
+        num_rows = len(data_dict["colname"])
+        for i in range(num_rows):
+            row_data = []
+            # Column name and type from HTML content
+            colname_html = data_dict["colname"][i]
+            column_name, data_type = extract_column_info(colname_html)
+            row_data.append(column_name)
+            row_data.append(data_type)
+            # Missing values (NA)
+            missing_val = data_dict.get("n_missing", [None] * num_rows)[i]
+            row_data.append(format_value(missing_val, is_missing=True, max_width=6))
+            # Unique values (UQ)
+            unique_val = data_dict.get("n_unique", [None] * num_rows)[i]
+            row_data.append(format_value(unique_val, is_unique=True, max_width=8))
+            # Statistical columns
+            for stat_col in stat_columns:
+                stat_val = data_dict.get(stat_col, [None] * num_rows)[i]
+                # Use appropriate width based on column type
+                if stat_col in ["q_1", "iqr"]:
+                    width = 8
+                elif stat_col in ["mean", "std", "min", "median", "max", "q_3"]:
+                    width = 9
+                else:
+                    width = 8
+                row_data.append(format_value(stat_val, max_width=width))
+            scan_table.add_row(*row_data)
+        # Display the results
+        console.print()
+        console.print(scan_table)
+    except Exception as e:
+        # Fallback to simple message if table creation fails
+        console.print(f"[yellow]Scan results available for {data_source}[/yellow]")
+        console.print(f"[red]Error displaying table: {str(e)}[/red]")
+def _rich_print_gt_table(
+    gt_table: Any, preview_info: dict | None = None, show_summary: bool = True
+) -> None:
+    """Convert a GT table to Rich table and display it in the terminal.
+    Args:
+        gt_table: The GT table object to display
+        preview_info: Optional dict with preview context info:
+            - total_rows: Total rows in the dataset
+            - head_rows: Number of head rows shown
+            - tail_rows: Number of tail rows shown
+            - is_complete: Whether the entire dataset is shown
+        show_summary: Whether to show the row count summary at the bottom
+    """
+    try:
+        # Try to extract the underlying data from the GT table
+        df = None
+        # Great Tables stores the original data in different places depending on how it was created
+        # Let's try multiple approaches to get the data
+        if hasattr(gt_table, "_tbl_data") and gt_table._tbl_data is not None:
+            df = gt_table._tbl_data
+        elif (
+            hasattr(gt_table, "_body")
+            and hasattr(gt_table._body, "body")
+            and gt_table._body.body is not None
+        ):
+            df = gt_table._body.body
+        elif hasattr(gt_table, "_data") and gt_table._data is not None:
+            df = gt_table._data
+        elif hasattr(gt_table, "data") and gt_table.data is not None:
+            df = gt_table.data
+        if df is not None:
+            # Create a Rich table with horizontal lines
+            from rich.box import SIMPLE_HEAD
+            # Create enhanced title if preview_info contains metadata
+            table_title = None
+            if preview_info and "source_type" in preview_info and "table_type" in preview_info:
+                source_type = preview_info["source_type"]
+                table_type = preview_info["table_type"]
+                table_title = f"Data Preview / {source_type} / {table_type}"
+            rich_table = Table(
+                title=table_title,
+                show_header=True,
+                header_style="bold magenta",
+                box=SIMPLE_HEAD,
+                title_style="bold cyan",
+                title_justify="left",
+            )
+            # Get column names
+            columns = []
+            if hasattr(df, "columns"):
+                columns = list(df.columns)
+            elif hasattr(df, "schema"):  # pragma: no cover
+                columns = list(df.schema.names)
+            elif hasattr(df, "column_names"):  # pragma: no cover
+                columns = list(df.column_names)
+            if not columns:  # pragma: no cover
+                # Fallback: try to determine columns from first row
+                try:
+                    if hasattr(df, "to_dicts") and len(df) > 0:
+                        first_dict = df.to_dicts()[0]
+                        columns = list(first_dict.keys())
+                    elif hasattr(df, "to_dict") and len(df) > 0:
+                        first_dict = df.to_dict("records")[0]
+                        columns = list(first_dict.keys())
+                except Exception:  # pragma: no cover
+                    columns = [f"Column {i + 1}" for i in range(10)]  # Default fallback
+            # Add columns to Rich table
+            # Handle wide tables by limiting columns displayed
+            max_terminal_cols = 15  # Reasonable limit for terminal display
+            # Get terminal width to adjust column behavior
+            try:
+                terminal_width = console.size.width
+                # Estimate max column width based on terminal size and number of columns
+                if len(columns) <= 5:
+                    max_col_width = min(60, terminal_width // 4)
+                elif len(columns) <= 10:
+                    max_col_width = min(40, terminal_width // 6)
+                else:
+                    max_col_width = min(30, terminal_width // 8)
+            except Exception:  # pragma: no cover
+                # Fallback if we can't get terminal width
+                max_col_width = 40 if len(columns) <= 10 else 25
+            if len(columns) > max_terminal_cols:
+                # For wide tables, show first few, middle indicator, and last few columns
+                first_cols = 7
+                last_cols = 7
+                display_columns = columns[:first_cols] + ["...more..."] + columns[-last_cols:]
+                console.print(
+                    f"\n[yellow]⚠ Table has {len(columns)} columns. Showing first {first_cols} and last {last_cols} columns.[/yellow]"
+                )
+                console.print("[dim]Use --columns to specify which columns to display.[/dim]")
+                console.print(
+                    f"[dim]Full column list: {', '.join(columns[:5])}...{', '.join(columns[-5:])}[/dim]\n"
+                )
+            else:
+                display_columns = columns
+            # Get data types for columns
+            dtypes_dict = _get_column_dtypes(df, columns)
+            # Calculate row number column width if needed
+            row_num_width = 6  # Default width
+            if "_row_num_" in columns:
+                try:
+                    # Get the maximum row number to calculate appropriate width
+                    if hasattr(df, "to_dicts"):
+                        data_dict = df.to_dicts()
+                        if data_dict:
+                            row_nums = [row.get("_row_num_", 0) for row in data_dict]
+                            max_row_num = max(row_nums) if row_nums else 0
+                            row_num_width = max(len(str(max_row_num)) + 1, 6)  # +1 for padding
+                    elif hasattr(df, "to_dict"):
+                        data_dict = df.to_dict("records")
+                        if data_dict:
+                            row_nums = [row.get("_row_num_", 0) for row in data_dict]
+                            max_row_num = max(row_nums) if row_nums else 0
+                            row_num_width = max(len(str(max_row_num)) + 1, 6)  # +1 for padding
+                except Exception:  # pragma: no cover
+                    # If we can't determine max row number, use default
+                    row_num_width = 8  # Slightly larger default for safety
+            for i, col in enumerate(display_columns):
+                if col == "...more...":
+                    # Add a special indicator column
+                    rich_table.add_column("···", style="dim", width=3, no_wrap=True)
+                else:
+                    # Handle row number column specially
+                    if col == "_row_num_":
+                        # Row numbers get no header, right alignment, and dim gray style
+                        # Use dynamic width to prevent truncation
+                        rich_table.add_column(
+                            "", style="dim", justify="right", no_wrap=True, width=row_num_width
+                        )
+                    else:
+                        display_col = str(col)
+                        # Get data type for this column (if available)
+                        if col in dtypes_dict:
+                            dtype_display = f"<{dtypes_dict[col]}>"
+                            # Create header with column name and data type
+                            header_text = f"{display_col}\n[dim yellow]{dtype_display}[/dim yellow]"
+                        else:
+                            header_text = display_col
+                        rich_table.add_column(
+                            header_text,
+                            style="cyan",
+                            no_wrap=False,
+                            overflow="ellipsis",
+                            max_width=max_col_width,
+                        )
+            # Convert data to list of rows
+            rows = []
+            try:
+                if hasattr(df, "to_dicts"):
+                    # Polars interface
+                    data_dict = df.to_dicts()
                     if len(columns) > max_terminal_cols:
                         # For wide tables, extract only the displayed columns
                         display_data_columns = (
@@ -519,44 +864,45 @@ def _rich_print_gt_table(gt_table: Any, preview_info: dict | None = None) -> Non
             console.print()
             console.print(rich_table)
-            # Show summary info
-            total_rows = len(rows)
+            # Show summary info (conditionally)
+            if show_summary:
+                total_rows = len(rows)
-            # Use preview info if available, otherwise fall back to old logic
-            if preview_info:
-                total_dataset_rows = preview_info.get("total_rows", total_rows)
-                head_rows = preview_info.get("head_rows", 0)
-                tail_rows = preview_info.get("tail_rows", 0)
-                is_complete = preview_info.get("is_complete", False)
+                # Use preview info if available, otherwise fall back to old logic
+                if preview_info:
+                    total_dataset_rows = preview_info.get("total_rows", total_rows)
+                    head_rows = preview_info.get("head_rows", 0)
+                    tail_rows = preview_info.get("tail_rows", 0)
+                    is_complete = preview_info.get("is_complete", False)
-                if is_complete:
-                    console.print(f"\n[dim]Showing all {total_rows} rows.[/dim]")
-                elif head_rows > 0 and tail_rows > 0:
-                    console.print(
-                        f"\n[dim]Showing first {head_rows} and last {tail_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
-                    )
-                elif head_rows > 0:
-                    console.print(
-                        f"\n[dim]Showing first {head_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
-                    )
-                elif tail_rows > 0:
-                    console.print(
-                        f"\n[dim]Showing last {tail_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
-                    )
-                else:
-                    # Fallback for other cases
-                    console.print(
-                        f"\n[dim]Showing {total_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
-                    )
-            else:
-                # Original logic as fallback
-                max_rows = 50  # This should match the limit used above
-                if total_rows > max_rows:
-                    console.print(
-                        f"\n[dim]Showing first {max_rows} of {total_rows} rows. Use --output-html to see all data.[/dim]"
-                    )
+                    if is_complete:
+                        console.print(f"\n[dim]Showing all {total_rows} rows.[/dim]")
+                    elif head_rows > 0 and tail_rows > 0:
+                        console.print(
+                            f"\n[dim]Showing first {head_rows} and last {tail_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
+                        )
+                    elif head_rows > 0:
+                        console.print(
+                            f"\n[dim]Showing first {head_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
+                        )
+                    elif tail_rows > 0:
+                        console.print(
+                            f"\n[dim]Showing last {tail_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
+                        )
+                    else:
+                        # Fallback for other cases
+                        console.print(
+                            f"\n[dim]Showing {total_rows} rows from {total_dataset_rows:,} total rows.[/dim]"
+                        )
                 else:
-                    console.print(f"\n[dim]Showing all {total_rows} rows.[/dim]")
+                    # Original logic as fallback
+                    max_rows = 50  # This should match the limit used above
+                    if total_rows > max_rows:
+                        console.print(
+                            f"\n[dim]Showing first {max_rows} of {total_rows} rows. Use --output-html to see all data.[/dim]"
+                        )
+                    else:
+                        console.print(f"\n[dim]Showing all {total_rows} rows.[/dim]")
         else:
             # If we can't extract data, show the success message
@@ -693,73 +1039,71 @@ def _display_validation_summary(validation: Any) -> None:
         console.print(f"[dim]{traceback.format_exc()}[/dim]")  # pragma: no cover
-@click.group()
+@click.group(cls=OrderedGroup)
 @click.version_option(version=pb.__version__, prog_name="pb")
 def cli():
     """
     Pointblank CLI - Data validation and quality tools for data engineers.
-    Use this CLI to validate data, preview tables, and generate reports
+    Use this CLI to run validation scripts, preview tables, and generate reports
     directly from the command line.
     """
     pass
 @cli.command()
-def datasets():
-    """
-    List available built-in datasets.
+@click.argument("data_source", type=str)
+def info(data_source: str):
     """
-    datasets_info = [
-        ("small_table", "13 rows × 8 columns", "Small demo dataset for testing"),
-        ("game_revenue", "2,000 rows × 11 columns", "Game development company revenue data"),
-        ("nycflights", "336,776 rows × 18 columns", "NYC airport flights data from 2013"),
-        ("global_sales", "50,000 rows × 20 columns", "Global sales data across regions"),
-    ]
+    Display information about a data source.
-    table = Table(
-        title="Available Pointblank Datasets", show_header=True, header_style="bold magenta"
-    )
-    table.add_column("Dataset Name", style="cyan", no_wrap=True)
-    table.add_column("Dimensions", style="green")
-    table.add_column("Description", style="white")
+    Shows table type, dimensions, column names, and data types.
-    for name, dims, desc in datasets_info:
-        table.add_row(name, dims, desc)
+    DATA_SOURCE can be:
-    console.print(table)
-    console.print("\n[dim]Use these dataset names directly with any pb CLI command.[/dim]")
-    console.print("[dim]Example: pb preview small_table[/dim]")
+    \b
+    - CSV file path (e.g., data.csv)
+    - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
+    - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
+    - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
+    """
+    try:
+        with console.status("[bold green]Loading data..."):
+            # Load the data source using the centralized function
+            data = _load_data_source(data_source)
+            # Get table information
+            tbl_type = _get_tbl_type(data)
+            row_count = pb.get_row_count(data)
+            col_count = pb.get_column_count(data)
-@cli.command()
-def requirements():
-    """
-    Check installed dependencies and their availability.
-    """
-    dependencies = [
-        ("polars", "Polars DataFrame support"),
-        ("pandas", "Pandas DataFrame support"),
-        ("ibis", "Ibis backend support (DuckDB, etc.)"),
-        ("duckdb", "DuckDB database support"),
-        ("pyarrow", "Parquet file support"),
-    ]
+            # Import the box style
+            from rich.box import SIMPLE_HEAD
-    table = Table(title="Dependency Status", show_header=True, header_style="bold magenta")
-    table.add_column("Package", style="cyan", no_wrap=True)
-    table.add_column("Status", style="white")
-    table.add_column("Description", style="dim")
+            # Create info table
+            info_table = Table(
+                title="Data Source Information",
+                show_header=True,
+                header_style="bold magenta",
+                box=SIMPLE_HEAD,
+                title_style="bold cyan",
+                title_justify="left",
+            )
+            info_table.add_column("Property", style="cyan", no_wrap=True)
+            info_table.add_column("Value", style="green")
-    for package, description in dependencies:
-        if _is_lib_present(package):
-            status = "[green]✓ Installed[/green]"
-        else:
-            status = "[red]✗ Not installed[/red]"
+            info_table.add_row("Source", data_source)
+            info_table.add_row("Table Type", tbl_type)
+            info_table.add_row("Rows", f"{row_count:,}")
+            info_table.add_row("Columns", f"{col_count:,}")
-        table.add_row(package, status, description)
+            console.print()
+            console.print(info_table)
-    console.print(table)
-    console.print("\n[dim]Install missing packages to enable additional functionality.[/dim]")
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        sys.exit(1)
 @cli.command()
@@ -799,6 +1143,7 @@ def preview(
     \b
     - CSV file path (e.g., data.csv)
     - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
@@ -816,14 +1161,10 @@ def preview(
     """
     try:
         with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Load the data source using the centralized function
+            data = _load_data_source(data_source)
+            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Parse columns if provided
         columns_list = None
@@ -832,18 +1173,8 @@ def preview(
             # If data has _row_num_ and it's not explicitly included, add it at the beginning
             try:
-                from pointblank.validate import (
-                    _process_connection_string,
-                    _process_csv_input,
-                    _process_parquet_input,
-                )
-                # Process the data source to get actual data object to check for _row_num_
+                # Data is already processed, just use it directly
                 processed_data = data
-                if isinstance(data, str):
-                    processed_data = _process_connection_string(data)
-                    processed_data = _process_csv_input(processed_data)
-                    processed_data = _process_parquet_input(processed_data)
                 # Get column names from the processed data
                 all_columns = []
@@ -860,19 +1191,8 @@ def preview(
                 pass
         elif col_range or col_first or col_last:
             # Need to get column names to apply range/first/last selection
-            # Load the data to get column names
-            from pointblank.validate import (
-                _process_connection_string,
-                _process_csv_input,
-                _process_parquet_input,
-            )
-            # Process the data source to get actual data object
+            # Data is already processed, just use it directly
             processed_data = data
-            if isinstance(data, str):
-                processed_data = _process_connection_string(data)
-                processed_data = _process_csv_input(processed_data)
-                processed_data = _process_parquet_input(processed_data)
             # Get column names from the processed data
             all_columns = []
@@ -934,20 +1254,11 @@ def preview(
         with console.status("[bold green]Generating preview..."):
             # Get total dataset size before preview and gather metadata
             try:
-                # Process the data to get the actual data object for row count and metadata
-                from pointblank.validate import (
-                    _process_connection_string,
-                    _process_csv_input,
-                    _process_parquet_input,
-                )
+                # Data is already processed, just use it directly
                 processed_data = data
-                if isinstance(data, str):
-                    processed_data = _process_connection_string(data)
-                    processed_data = _process_csv_input(processed_data)
-                    processed_data = _process_parquet_input(processed_data)
                 total_dataset_rows = pb.get_row_count(processed_data)
+                total_dataset_columns = pb.get_column_count(processed_data)
                 # Determine source type and table type for enhanced preview title
                 if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
@@ -959,6 +1270,7 @@ def preview(
             except Exception:
                 # If we can't get metadata, set defaults
                 total_dataset_rows = None
+                total_dataset_columns = None
                 source_type = f"Data source: {data_source}"
                 table_type = "unknown"
@@ -989,6 +1301,7 @@ def preview(
                 preview_info = {
                     "total_rows": total_dataset_rows,
+                    "total_columns": total_dataset_columns,
                     "head_rows": head,
                     "tail_rows": tail,
                     "is_complete": is_complete,
@@ -1003,71 +1316,6 @@ def preview(
         sys.exit(1)  # pragma: no cover
-@cli.command()
-@click.argument("data_source", type=str)
-def info(data_source: str):
-    """
-    Display information about a data source.
-    Shows table type, dimensions, column names, and data types.
-    """
-    try:
-        with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                source_type = f"Pointblank dataset: {data_source}"
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                source_type = f"External source: {data_source}"
-                # Process the data to get actual table object for inspection
-                from pointblank.validate import (
-                    _process_connection_string,
-                    _process_csv_input,
-                    _process_parquet_input,
-                )
-                data = _process_connection_string(data)
-                data = _process_csv_input(data)
-                data = _process_parquet_input(data)
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
-        # Get table information
-        tbl_type = _get_tbl_type(data)
-        row_count = pb.get_row_count(data)
-        col_count = pb.get_column_count(data)
-        # Import the box style for consistent styling with scan table
-        from rich.box import SIMPLE_HEAD
-        # Create info table with same styling as scan table
-        info_table = Table(
-            title="Data Source Information",
-            show_header=True,
-            header_style="bold magenta",
-            box=SIMPLE_HEAD,
-            title_style="bold cyan",
-            title_justify="left",
-        )
-        info_table.add_column("Property", style="cyan", no_wrap=True)
-        info_table.add_column("Value", style="green")
-        info_table.add_row("Source", source_type)
-        info_table.add_row("Table Type", tbl_type)
-        info_table.add_row("Rows", f"{row_count:,}")
-        info_table.add_row("Columns", f"{col_count:,}")
-        console.print()
-        console.print(info_table)
-    except Exception as e:
-        console.print(f"[red]Error:[/red] {e}")
-        sys.exit(1)
 @cli.command()
 @click.argument("data_source", type=str)
 @click.option("--output-html", type=click.Path(), help="Save HTML scan report to file")
@@ -1093,6 +1341,7 @@ def scan(
     \b
     - CSV file path (e.g., data.csv)
     - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
     """
@@ -1102,14 +1351,10 @@ def scan(
         start_time = time.time()
         with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Load the data source using the centralized function
+            data = _load_data_source(data_source)
+            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Parse columns if provided
         columns_list = None
@@ -1119,35 +1364,22 @@ def scan(
         # Generate data scan
         with console.status("[bold green]Generating data scan..."):
             # Use col_summary_tbl for comprehensive column scanning
+            # Data is already processed by _load_data_source
+            scan_result = pb.col_summary_tbl(data=data)
             if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                # For pointblank datasets, data is already the loaded dataframe
-                scan_result = pb.col_summary_tbl(data=data)
                 source_type = f"Pointblank dataset: {data_source}"
-                table_type = _get_tbl_type(data)
-                # Get row count for footer
-                try:
-                    total_rows = pb.get_row_count(data)
-                except Exception:
-                    total_rows = None
             else:
-                # For file paths and connection strings, load the data first
-                from pointblank.validate import (
-                    _process_connection_string,
-                    _process_csv_input,
-                    _process_parquet_input,
-                )
-                processed_data = _process_connection_string(data)
-                processed_data = _process_csv_input(processed_data)
-                processed_data = _process_parquet_input(processed_data)
-                scan_result = pb.col_summary_tbl(data=processed_data)
                 source_type = f"External source: {data_source}"
-                table_type = _get_tbl_type(processed_data)
-                # Get row count for footer
-                try:
-                    total_rows = pb.get_row_count(processed_data)
-                except Exception:
-                    total_rows = None
+            table_type = _get_tbl_type(data)
+            # Get row count and column count for header
+            try:
+                total_rows = pb.get_row_count(data)
+                total_columns = pb.get_column_count(data)
+            except Exception:
+                total_rows = None
+                total_columns = None
         scan_time = time.time() - start_time
@@ -1167,7 +1399,7 @@ def scan(
             # Display detailed column summary using rich formatting
             try:
                 _rich_print_scan_table(
-                    scan_result, data_source, source_type, table_type, total_rows
+                    scan_result, data_source, source_type, table_type, total_rows, total_columns
                 )
             except Exception as e:
@@ -1190,40 +1422,23 @@ def missing(data_source: str, output_html: str | None):
     \b
     - CSV file path (e.g., data.csv)
     - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
     """
     try:
         with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Load the data source using the centralized function
+            data = _load_data_source(data_source)
+            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Generate missing values table
         with console.status("[bold green]Analyzing missing values..."):
             gt_table = pb.missing_vals_tbl(data)
-            # Get original data for column types
+            # Data is already processed, just use it directly
             original_data = data
-            if isinstance(data, str):
-                # Process the data to get the actual data object
-                from pointblank.validate import (
-                    _process_connection_string,
-                    _process_csv_input,
-                    _process_parquet_input,
-                )
-                try:
-                    original_data = _process_connection_string(data)
-                    original_data = _process_csv_input(original_data)
-                    original_data = _process_parquet_input(original_data)
-                except Exception:  # pragma: no cover
-                    pass  # Use the string data as fallback
         if output_html:
             # Save HTML to file
@@ -1239,556 +1454,505 @@ def missing(data_source: str, output_html: str | None):
         sys.exit(1)
-@cli.command()
-@click.argument("output_file", type=click.Path())
-def validate_example(output_file: str):
-    """
-    Generate an example validation script.
-    Creates a sample Python script showing how to use Pointblank for validation.
-    """
-    example_script = '''"""
-Example Pointblank validation script.
-This script demonstrates how to create validation rules for your data.
-Modify the validation rules below to match your data requirements.
-"""
-import pointblank as pb
-# Create a validation object
-# The 'data' variable is automatically provided by the CLI
-validation = (
-    pb.Validate(
-        data=data,
-        tbl_name="Example Data",
-        label="CLI Validation Example",
-        thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
-    )
-    # Add your validation rules here
-    # Example rules (modify these based on your data structure):
-    # Check that specific columns exist
-    # .col_exists(["column1", "column2"])
-    # Check for null values
-    # .col_vals_not_null(columns="important_column")
-    # Check value ranges
-    # .col_vals_gt(columns="amount", value=0)
-    # .col_vals_between(columns="score", left=0, right=100)
-    # Check string patterns
-    # .col_vals_regex(columns="email", pattern=r"^[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}$")
-    # Check unique values
-    # .col_vals_unique(columns="id")
-    # Finalize the validation
-    .interrogate()
-)
-# The validation object will be automatically used by the CLI
-'''
-    Path(output_file).write_text(example_script)
-    console.print(f"[green]✓[/green] Example validation script created: {output_file}")
-    console.print("\nEdit the script to add your validation rules, then run:")
-    console.print(f"[cyan]pb validate your_data.csv {output_file}[/cyan]")
-@cli.command()
+@cli.command(name="validate")
 @click.argument("data_source", type=str)
-@click.argument("validation_script", type=click.Path(exists=True))
-@click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
-@click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
-@click.option("--fail-on-error", is_flag=True, help="Exit with non-zero code if validation fails")
+@click.option(
+    "--check",
+    "checks",  # Changed to collect multiple values
+    type=click.Choice(
+        [
+            "rows-distinct",
+            "col-vals-not-null",
+            "rows-complete",
+            "col-exists",
+            "col-vals-in-set",
+            "col-vals-gt",
+            "col-vals-ge",
+            "col-vals-lt",
+            "col-vals-le",
+        ]
+    ),
+    multiple=True,  # Allow multiple --check options
+    help="Type of validation check to perform. Can be used multiple times for multiple checks.",
+)
+@click.option("--list-checks", is_flag=True, help="List available validation checks and exit")
+@click.option(
+    "--column",
+    "columns",  # Changed to collect multiple values
+    multiple=True,  # Allow multiple --column options
+    help="Column name or integer position as #N (1-based index) for validation.",
+)
+@click.option(
+    "--set",
+    "sets",  # Changed to collect multiple values
+    multiple=True,  # Allow multiple --set options
+    help="Comma-separated allowed values for col-vals-in-set checks.",
+)
+@click.option(
+    "--value",
+    "values",  # Changed to collect multiple values
+    type=float,
+    multiple=True,  # Allow multiple --value options
+    help="Numeric value for comparison checks.",
+)
+@click.option(
+    "--show-extract", is_flag=True, help="Show extract of failing rows if validation fails"
+)
+@click.option(
+    "--write-extract", type=str, help="Save failing rows to folder. Provide base name for folder."
+)
+@click.option(
+    "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
+)
+@click.option("--exit-code", is_flag=True, help="Exit with non-zero code if validation fails")
+@click.pass_context
 def validate(
+    ctx: click.Context,
     data_source: str,
-    validation_script: str,
-    output_html: str | None,
-    output_json: str | None,
-    fail_on_error: bool,
+    checks: tuple[str, ...],  # Changed to tuple
+    columns: tuple[str, ...],  # Changed to tuple
+    sets: tuple[str, ...],  # Changed to tuple
+    values: tuple[float, ...],  # Changed to tuple
+    show_extract: bool,
+    write_extract: str | None,
+    limit: int,
+    exit_code: bool,
+    list_checks: bool,
 ):
     """
-    Run validation using a Python validation script.
+    Perform single or multiple data validations.
+    Run one or more validation checks on your data in a single command.
+    Use multiple --check options to perform multiple validations.
     DATA_SOURCE can be:
     \b
     - CSV file path (e.g., data.csv)
     - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
-    VALIDATION_SCRIPT should be a Python file that defines validation rules.
-    See 'pb validate-example' for a sample script.
-    """
-    try:
-        with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
-        # Execute the validation script
-        with console.status("[bold green]Running validation..."):
-            # Read and execute the validation script
-            script_content = Path(validation_script).read_text()
-            # Create a namespace with pointblank and the data
-            namespace = {
-                "pb": pb,
-                "pointblank": pb,
-                "data": data,
-                "__name__": "__main__",
-            }
-            # Execute the script
-            try:
-                exec(script_content, namespace)
-            except Exception as e:
-                console.print(f"[red]Error executing validation script:[/red] {e}")
-                sys.exit(1)
-            # Look for a validation object in the namespace
-            validation = None
-            # Try to find the 'validation' variable specifically first
-            if "validation" in namespace:
-                validation = namespace["validation"]
-            else:
-                # Look for any validation object in the namespace
-                for key, value in namespace.items():
-                    if hasattr(value, "interrogate") and hasattr(value, "validation_info"):
-                        validation = value
-                        break
-                    # Also check if it's a Validate object that has been interrogated
-                    elif str(type(value)).find("Validate") != -1:
-                        validation = value
-                        break
-            if validation is None:
-                raise ValueError(
-                    "No validation object found in script. "
-                    "Script should create a Validate object and assign it to a variable named 'validation'."
-                )
-        console.print("[green]✓[/green] Validation completed")
-        # Display summary
-        _display_validation_summary(validation)
-        # Save outputs
-        if output_html:
-            try:
-                # Get HTML representation
-                html_content = validation._repr_html_()
-                Path(output_html).write_text(html_content, encoding="utf-8")
-                console.print(f"[green]✓[/green] HTML report saved to: {output_html}")
-            except Exception as e:
-                console.print(f"[yellow]Warning: Could not save HTML report: {e}[/yellow]")
-        if output_json:
-            try:
-                # Get JSON report
-                json_report = validation.get_json_report()
-                Path(output_json).write_text(json_report, encoding="utf-8")
-                console.print(f"[green]✓[/green] JSON summary saved to: {output_json}")
-            except Exception as e:
-                console.print(f"[yellow]Warning: Could not save JSON report: {e}[/yellow]")
-        # Check if we should fail on error
-        if fail_on_error:
-            try:
-                if (
-                    hasattr(validation, "validation_info")
-                    and validation.validation_info is not None
-                ):
-                    info = validation.validation_info
-                    n_critical = sum(1 for step in info if step.critical)
-                    n_error = sum(1 for step in info if step.error)
-                    if n_critical > 0 or n_error > 0:
-                        severity = "critical" if n_critical > 0 else "error"
-                        console.print(
-                            f"[red]Exiting with error due to {severity} validation failures[/red]"
-                        )
-                        sys.exit(1)
-            except Exception as e:
-                console.print(
-                    f"[yellow]Warning: Could not check validation status for fail-on-error: {e}[/yellow]"
-                )
-    except Exception as e:
-        console.print(f"[red]Error:[/red] {e}")
-        sys.exit(1)
-@cli.command()
-@click.argument("data_source", type=str)
-@click.argument("validation_script", type=click.Path(exists=True))
-@click.argument("step_number", type=int)
-@click.option(
-    "--limit", "-l", default=100, help="Maximum number of failing rows to show (default: 100)"
-)
-@click.option("--output-csv", type=click.Path(), help="Save failing rows to CSV file")
-@click.option("--output-html", type=click.Path(), help="Save failing rows table to HTML file")
-def extract(
-    data_source: str,
-    validation_script: str,
-    step_number: int,
-    limit: int,
-    output_csv: str | None,
-    output_html: str | None,
-):
-    """
-    Extract failing rows from a specific validation step.
-    This command runs a validation and extracts the rows that failed
-    a specific validation step, which is useful for debugging data quality issues.
-    DATA_SOURCE: Same as validate command
-    VALIDATION_SCRIPT: Path to validation script
-    STEP_NUMBER: The step number to extract failing rows from (1-based)
-    """
-    try:
-        with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
-        # Execute the validation script
-        with console.status("[bold green]Running validation..."):
-            # Read and execute the validation script
-            script_content = Path(validation_script).read_text()
-            # Create a namespace with pointblank and the data
-            namespace = {
-                "pb": pb,
-                "pointblank": pb,
-                "data": data,
-                "__name__": "__main__",
-            }
-            # Execute the script
-            try:
-                exec(script_content, namespace)
-            except Exception as e:
-                console.print(f"[red]Error executing validation script:[/red] {e}")
-                sys.exit(1)
-            # Look for a validation object in the namespace
-            validation = None
-            if "validation" in namespace:
-                validation = namespace["validation"]
-            else:
-                # Look for any validation object in the namespace
-                for key, value in namespace.items():
-                    if hasattr(value, "interrogate") and hasattr(value, "validation_info"):
-                        validation = value
-                        break
-                    elif str(type(value)).find("Validate") != -1:
-                        validation = value
-                        break
-            if validation is None:
-                raise ValueError(
-                    "No validation object found in script. "
-                    "Script should create a Validate object and assign it to a variable named 'validation'."
-                )
-        console.print("[green]✓[/green] Validation completed")
-        # Extract failing rows from the specified step
-        with console.status(f"[bold green]Extracting failing rows from step {step_number}..."):
-            try:
-                # Get the data extracts for the specific step
-                step_extract = validation.get_data_extracts(i=step_number, frame=True)
+    AVAILABLE CHECKS:
-                if step_extract is None or len(step_extract) == 0:
-                    console.print(f"[yellow]No failing rows found for step {step_number}[/yellow]")
-                    return
+    Use --list-checks to see all available validation methods with examples.
-                # Limit the results
-                if len(step_extract) > limit:
-                    step_extract = step_extract.head(limit)
-                    console.print(f"[yellow]Limited to first {limit} failing rows[/yellow]")
+    The default check is 'rows-distinct' which checks for duplicate rows.
-                console.print(f"[green]✓[/green] Extracted {len(step_extract)} failing rows")
+    \b
+    - rows-distinct: Check if all rows in the dataset are unique (no duplicates)
+    - rows-complete: Check if all rows are complete (no missing values in any column)
+    - col-exists: Check if a specific column exists in the dataset (requires --column)
+    - col-vals-not-null: Check if all values in a column are not null/missing (requires --column)
+    - col-vals-gt: Check if all values in a column are greater than a threshold (requires --column and --value)
+    - col-vals-ge: Check if all values in a column are greater than or equal to a threshold (requires --column and --value)
+    - col-vals-lt: Check if all values in a column are less than a threshold (requires --column and --value)
+    - col-vals-le: Check if all values in a column are less than or equal to a threshold (requires --column and --value)
+    - col-vals-in-set: Check if all values in a column are in an allowed set (requires --column and --set)
-                # Save outputs
-                if output_csv:
-                    if hasattr(step_extract, "write_csv"):
-                        step_extract.write_csv(output_csv)
-                    else:
-                        step_extract.to_csv(output_csv, index=False)
-                    console.print(f"[green]✓[/green] Failing rows saved to CSV: {output_csv}")
+    Examples:
-                if output_html:
-                    # Create a preview of the failing rows
-                    preview_table = pb.preview(
-                        step_extract, n_head=min(10, len(step_extract)), n_tail=0
-                    )
-                    html_content = preview_table._repr_html_()
-                    Path(output_html).write_text(html_content, encoding="utf-8")
-                    console.print(
-                        f"[green]✓[/green] Failing rows table saved to HTML: {output_html}"
-                    )
+    \b
+    pb validate data.csv                                             # Uses default validation (rows-distinct)
+    pb validate data.csv --list-checks                               # Show all available checks
+    pb validate data.csv --check rows-distinct
+    pb validate data.csv --check rows-distinct --show-extract
+    pb validate data.csv --check rows-distinct --write-extract failing_rows_folder
+    pb validate data.csv --check rows-distinct --exit-code
+    pb validate data.csv --check rows-complete
+    pb validate data.csv --check col-exists --column price
+    pb validate data.csv --check col-vals-not-null --column email
+    pb validate data.csv --check col-vals-gt --column score --value 50
+    pb validate data.csv --check col-vals-in-set --column status --set "active,inactive,pending"
+    Multiple validations in one command:
+    pb validate data.csv --check rows-distinct --check rows-complete
+    pb validate data.csv --check col-vals-not-null --column email --check col-vals-gt --column age --value 18
+    """
+    try:
+        # Handle backward compatibility and parameter conversion
+        import sys
+        # Convert parameter tuples to lists, handling default case
+        if not checks:
+            # No --check options provided, use default
+            checks_list = ["rows-distinct"]
+            is_using_default_check = True
+        else:
+            checks_list = list(checks)
+            is_using_default_check = False
-                if not output_csv and not output_html:
-                    # Display basic info about the failing rows
-                    info_table = Table(
-                        title=f"Failing Rows - Step {step_number}",
-                        show_header=True,
-                        header_style="bold red",
-                    )
-                    info_table.add_column("Property", style="cyan")
-                    info_table.add_column("Value", style="white")
+        columns_list = list(columns) if columns else []
+        sets_list = list(sets) if sets else []
+        values_list = list(values) if values else []
-                    info_table.add_row("Total Failing Rows", f"{len(step_extract):,}")
-                    info_table.add_row(
-                        "Columns",
-                        f"{len(step_extract.columns) if hasattr(step_extract, 'columns') else 'N/A'}",
-                    )
+        # Map parameters to checks intelligently
+        mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
+            checks_list, columns_list, sets_list, values_list
+        )
-                    console.print(info_table)
-                    console.print(
-                        "\n[dim]Use --output-csv or --output-html to save the failing rows.[/dim]"
-                    )
+        # Handle --list-checks option
+        if list_checks:
+            console.print("[bold bright_cyan]Available Validation Checks:[/bold bright_cyan]")
+            console.print()
+            console.print("[bold magenta]Basic checks:[/bold magenta]")
+            console.print(
+                "  • [bold cyan]rows-distinct[/bold cyan]     Check for duplicate rows [yellow](default)[/yellow]"
+            )
+            console.print(
+                "  • [bold cyan]rows-complete[/bold cyan]     Check for missing values in any column"
+            )
+            console.print()
+            console.print(
+                "[bold magenta]Column-specific checks [bright_black](require --column)[/bright_black]:[/bold magenta]"
+            )
+            console.print("  • [bold cyan]col-exists[/bold cyan]        Check if a column exists")
+            console.print(
+                "  • [bold cyan]col-vals-not-null[/bold cyan] Check for null values in a column"
+            )
+            console.print()
+            console.print(
+                "[bold magenta]Value comparison checks [bright_black](require --column and --value)[/bright_black]:[/bold magenta]"
+            )
+            console.print(
+                "  • [bold cyan]col-vals-gt[/bold cyan]       Values greater than threshold"
+            )
+            console.print(
+                "  • [bold cyan]col-vals-ge[/bold cyan]       Values greater than or equal to threshold"
+            )
+            console.print("  • [bold cyan]col-vals-lt[/bold cyan]       Values less than threshold")
+            console.print(
+                "  • [bold cyan]col-vals-le[/bold cyan]       Values less than or equal to threshold"
+            )
+            console.print()
+            console.print(
+                "[bold magenta]Set validation check [bright_black](requires --column and --set)[/bright_black]:[/bold magenta]"
+            )
+            console.print(
+                "  • [bold cyan]col-vals-in-set[/bold cyan]   Values must be in allowed set"
+            )
+            console.print()
+            console.print("[bold bright_yellow]Examples:[/bold bright_yellow]")
+            console.print(
+                f"  [bright_blue]pb validate {data_source} --check rows-distinct[/bright_blue]"
+            )
+            console.print(
+                f"  [bright_blue]pb validate {data_source} --check col-vals-not-null --column price[/bright_blue]"
+            )
+            console.print(
+                f"  [bright_blue]pb validate {data_source} --check col-vals-gt --column age --value 18[/bright_blue]"
+            )
+            import sys
-            except Exception as e:
-                console.print(f"[red]Error extracting failing rows:[/red] {e}")
-                # Try to provide helpful information
-                if hasattr(validation, "validation_info") and validation.validation_info:
-                    max_step = len(validation.validation_info)
-                    console.print(f"[yellow]Available steps: 1 to {max_step}[/yellow]")
-                    # Show step information
-                    steps_table = Table(title="Available Validation Steps", show_header=True)
-                    steps_table.add_column("Step", style="cyan")
-                    steps_table.add_column("Type", style="white")
-                    steps_table.add_column("Column", style="green")
-                    steps_table.add_column("Has Failures", style="yellow")
-                    for i, step in enumerate(validation.validation_info, 1):
-                        has_failures = "Yes" if not step.all_passed else "No"
-                        steps_table.add_row(
-                            str(i),
-                            step.assertion_type,
-                            str(step.column) if step.column else "—",
-                            has_failures,
-                        )
+            sys.exit(0)
-                    console.print(steps_table)
+        # Validate required parameters for different check types
+        # Check parameters for each check in the list using mapped parameters
+        for i, check in enumerate(checks_list):
+            # Get corresponding mapped parameters for this check
+            column = mapped_columns[i] if i < len(mapped_columns) else None
+            set_val = mapped_sets[i] if i < len(mapped_sets) else None
+            value = mapped_values[i] if i < len(mapped_values) else None
+            if check == "col-vals-not-null" and not column:
+                console.print(f"[red]Error:[/red] --column is required for {check} check")
+                console.print(
+                    "Example: pb validate data.csv --check col-vals-not-null --column email"
+                )
                 sys.exit(1)
-    except Exception as e:
-        console.print(f"[red]Error:[/red] {e}")
-        sys.exit(1)
+            if check == "col-exists" and not column:
+                console.print(f"[red]Error:[/red] --column is required for {check} check")
+                console.print("Example: pb validate data.csv --check col-exists --column price")
+                sys.exit(1)
+            if check == "col-vals-in-set" and not column:
+                console.print(f"[red]Error:[/red] --column is required for {check} check")
+                console.print(
+                    "Example: pb validate data.csv --check col-vals-in-set --column status --set 'active,inactive'"
+                )
+                sys.exit(1)
-def _format_missing_percentage(value: float) -> str:
-    """Format missing value percentages for display.
+            if check == "col-vals-in-set" and not set_val:
+                console.print(f"[red]Error:[/red] --set is required for {check} check")
+                console.print(
+                    "Example: pb validate data.csv --check col-vals-in-set --column status --set 'active,inactive'"
+                )
+                sys.exit(1)
-    Args:
-        value: The percentage value (0-100)
+            if check in ["col-vals-gt", "col-vals-ge", "col-vals-lt", "col-vals-le"] and not column:
+                console.print(f"[red]Error:[/red] --column is required for {check} check")
+                console.print(
+                    f"Example: pb validate data.csv --check {check} --column score --value 50"
+                )
+                sys.exit(1)
-    Returns:
-        Formatted string with proper percentage display
-    """
-    if value == 0.0:
-        return "[green]●[/green]"  # Large green circle for no missing values
-    elif value == 100.0:
-        return "[red]●[/red]"  # Large red circle for completely missing values
-    elif value < 1.0 and value > 0:
-        return "<1%"  # Less than 1%
-    elif value > 99.0 and value < 100.0:
-        return ">99%"  # More than 99%
-    else:
-        return f"{int(round(value))}%"  # Round to nearest integer with % sign
+            if (
+                check in ["col-vals-gt", "col-vals-ge", "col-vals-lt", "col-vals-le"]
+                and value is None
+            ):
+                console.print(f"[red]Error:[/red] --value is required for {check} check")
+                console.print(
+                    f"Example: pb validate data.csv --check {check} --column score --value 50"
+                )
+                sys.exit(1)
+        with console.status("[bold green]Loading data..."):
+            # Load the data source using the centralized function
+            data = _load_data_source(data_source)
+            # Get all column names for error reporting
+            if hasattr(data, "columns"):
+                all_columns = list(data.columns)
+            elif hasattr(data, "schema"):
+                all_columns = list(data.schema.names)
+            else:
+                all_columns = []
-def _rich_print_missing_table(gt_table: Any, original_data: Any = None) -> None:
-    """Convert a missing values GT table to Rich table with special formatting.
+            # Resolve any '#N' column references to actual column names
+            columns_list = _resolve_column_indices(columns_list, data)
-    Args:
-        gt_table: The GT table object for missing values
-        original_data: The original data source to extract column types
-    """
-    try:
-        # Extract the underlying data from the GT table
-        df = None
+            # Check for out-of-range #N columns and provide a helpful error
+            for col in columns_list:
+                if isinstance(col, str) and col.startswith("#"):
+                    try:
+                        idx = int(col[1:])
+                        if idx < 1 or idx > len(all_columns):
+                            console.print(
+                                f"[red]Error:[/red] There is no column {idx} (the column position "
+                                f"range is 1 to {len(all_columns)})"
+                            )
+                            sys.exit(1)
+                    except Exception:
+                        pass  # Let later validation handle other errors
-        if hasattr(gt_table, "_tbl_data") and gt_table._tbl_data is not None:
-            df = gt_table._tbl_data
-        elif hasattr(gt_table, "_data") and gt_table._data is not None:
-            df = gt_table._data
-        elif hasattr(gt_table, "data") and gt_table.data is not None:
-            df = gt_table.data
+            # Update mapped_columns to use resolved column names
+            mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
+                checks_list, columns_list, sets_list, values_list
+            )
-        if df is not None:
-            # Create a Rich table with horizontal lines
-            from rich.box import SIMPLE_HEAD
+            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
-            rich_table = Table(show_header=True, header_style="bold magenta", box=SIMPLE_HEAD)
+        # Build a single validation object with chained checks
+        with console.status(f"[bold green]Running {len(checks_list)} validation check(s)..."):
+            # Initialize validation object
+            validation = pb.Validate(
+                data=data,
+                tbl_name=f"Data from {data_source}",
+                label=f"CLI Validation: {', '.join(checks_list)}",
+            )
-            # Get column names
-            columns = []
-            try:
-                if hasattr(df, "columns"):
-                    columns = list(df.columns)
-                elif hasattr(df, "schema"):
-                    columns = list(df.schema.names)
-            except Exception as e:
-                console.print(f"[red]Error getting columns:[/red] {e}")
-                columns = []
+            # Add each check to the validation chain
+            for i, check in enumerate(checks_list):
+                # Get corresponding mapped parameters for this check
+                column = mapped_columns[i] if i < len(mapped_columns) else None
+                set_val = mapped_sets[i] if i < len(mapped_sets) else None
+                value = mapped_values[i] if i < len(mapped_values) else None
-            if not columns:
-                columns = [f"Column {i + 1}" for i in range(10)]  # Fallback
+                if check == "rows-distinct":
+                    validation = validation.rows_distinct()
+                elif check == "col-vals-not-null":
+                    validation = validation.col_vals_not_null(columns=column)
+                elif check == "rows-complete":
+                    validation = validation.rows_complete()
+                elif check == "col-exists":
+                    validation = validation.col_exists(columns=column)
+                elif check == "col-vals-in-set":
+                    # Parse the comma-separated set values
+                    allowed_values = [v.strip() for v in set_val.split(",")]
+                    validation = validation.col_vals_in_set(columns=column, set=allowed_values)
+                elif check == "col-vals-gt":
+                    validation = validation.col_vals_gt(columns=column, value=value)
+                elif check == "col-vals-ge":
+                    validation = validation.col_vals_ge(columns=column, value=value)
+                elif check == "col-vals-lt":
+                    validation = validation.col_vals_lt(columns=column, value=value)
+                elif check == "col-vals-le":
+                    validation = validation.col_vals_le(columns=column, value=value)
+                else:
+                    console.print(f"[red]Error:[/red] Unknown check type: {check}")
+                    sys.exit(1)
-            # Get original data to extract column types
-            column_types = {}
-            if original_data is not None:
-                try:
-                    # Get column types from original data
-                    if hasattr(original_data, "columns"):
-                        original_columns = list(original_data.columns)
-                        column_types = _get_column_dtypes(original_data, original_columns)
-                except Exception as e:
-                    console.print(f"[red]Error getting column types:[/red] {e}")
-                    pass  # Use empty dict as fallback
+            # Execute all validations
+            validation = validation.interrogate()
+            all_passed = validation.all_passed()
-            # Add columns to Rich table with special formatting for missing values table
-            sector_columns = [col for col in columns if col != "columns" and col.isdigit()]
+            # Display completion message
+            if len(checks_list) == 1:
+                if is_using_default_check:
+                    console.print(
+                        f"[green]✓[/green] {checks_list[0]} validation completed [dim](default validation)[/dim]"
+                    )
+                else:
+                    console.print(f"[green]✓[/green] {checks_list[0]} validation completed")
+            else:
+                console.print(f"[green]✓[/green] {len(checks_list)} validations completed")
+        # Display results based on whether we have single or multiple checks
+        if len(checks_list) == 1:
+            # Single check - use current display format
+            _display_validation_result(
+                validation,
+                checks_list,
+                mapped_columns,
+                mapped_sets,
+                mapped_values,
+                data_source,
+                0,
+                1,
+                show_extract,
+                write_extract,
+                limit,
+            )
+        else:
+            # Multiple checks - use stacked display format
+            any_failed = False
+            for i in range(len(checks_list)):
+                console.print()  # Add spacing between results
+                _display_validation_result(
+                    validation,
+                    checks_list,
+                    mapped_columns,
+                    mapped_sets,
+                    mapped_values,
+                    data_source,
+                    i,
+                    len(checks_list),
+                    show_extract,
+                    write_extract,
+                    limit,
+                )
-            # Two separate columns: Column name (20 chars) and Data type (10 chars)
-            rich_table.add_column("Column", style="cyan", no_wrap=True, width=20)
-            rich_table.add_column("Type", style="yellow", no_wrap=True, width=10)
+                # Check if this validation failed
+                if hasattr(validation, "validation_info") and len(validation.validation_info) > i:
+                    step_info = validation.validation_info[i]
+                    if step_info.n_failed > 0:
+                        any_failed = True
-            # Sector columns: All same width, optimized for "100%" (4 chars + padding)
-            for sector in sector_columns:
-                rich_table.add_column(
-                    sector,
-                    style="cyan",
-                    justify="center",
-                    no_wrap=True,
-                    width=5,  # Fixed width optimized for percentage values
+            # Show tip about --show-extract if any failed and not already used
+            if any_failed and not show_extract:
+                console.print()
+                console.print(
+                    "[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
                 )
-            # Convert data to rows with special formatting
-            rows = []
-            try:
-                if hasattr(df, "to_dicts"):
-                    data_dict = df.to_dicts()
-                elif hasattr(df, "to_dict"):
-                    data_dict = df.to_dict("records")
-                else:
-                    data_dict = []
+        # Add informational hints when using default validation (only for single check)
+        if len(checks_list) == 1 and is_using_default_check:
+            console.print()
+            console.print("[bold blue]ℹ️  Information:[/bold blue] Using default validation method")
+            console.print("To specify a different validation, use the --check option.")
+            console.print()
+            console.print("[bold magenta]Common validation options:[/bold magenta]")
+            console.print(
+                "  • [bold cyan]--check rows-complete[/bold cyan]        Check for rows with missing values"
+            )
+            console.print(
+                "  • [bold cyan]--check col-vals-not-null[/bold cyan]   Check for null values in a column [bright_black](requires --column)[/bright_black]"
+            )
+            console.print(
+                "  • [bold cyan]--check col-exists[/bold cyan]          Check if a column exists [bright_black](requires --column)[/bright_black]"
+            )
+            console.print()
+            console.print("[bold bright_yellow]Examples:[/bold bright_yellow]")
+            console.print(
+                f"  [bright_blue]pb validate {data_source} --check rows-complete[/bright_blue]"
+            )
+            console.print(
+                f"  [bright_blue]pb validate {data_source} --check col-vals-not-null --column price[/bright_blue]"
+            )
-                for i, row in enumerate(data_dict):
-                    try:
-                        # Each row should have: [column_name, data_type, sector1, sector2, ...]
-                        column_name = str(row.get("columns", ""))
+        # Exit with appropriate code if requested
+        if exit_code and not all_passed:
+            console.print("[dim]Exiting with non-zero code due to validation failure[/dim]")
+            import sys
-                        # Truncate column name to 20 characters with ellipsis if needed
-                        if len(column_name) > 20:
-                            truncated_name = column_name[:17] + "…"
-                        else:
-                            truncated_name = column_name
+            sys.exit(1)
-                        # Get data type for this column
-                        if column_name in column_types:
-                            dtype = column_types[column_name]
-                            if len(dtype) > 10:
-                                truncated_dtype = dtype[:9] + "…"
-                            else:
-                                truncated_dtype = dtype
-                        else:
-                            truncated_dtype = "?"
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        sys.exit(1)
-                        # Start building the row with column name and type
-                        formatted_row = [truncated_name, truncated_dtype]
-                        # Add sector values (formatted percentages)
-                        for sector in sector_columns:
-                            value = row.get(sector, 0.0)
-                            if isinstance(value, (int, float)):
-                                formatted_row.append(_format_missing_percentage(float(value)))
-                            else:
-                                formatted_row.append(str(value))
+@cli.command()
+def datasets():
+    """
+    List available built-in datasets.
+    """
+    from rich.box import SIMPLE_HEAD
-                        rows.append(formatted_row)
+    datasets_info = [
+        ("small_table", "13 rows × 8 columns", "Small demo dataset for testing"),
+        ("game_revenue", "2,000 rows × 11 columns", "Game development company revenue data"),
+        ("nycflights", "336,776 rows × 18 columns", "NYC airport flights data from 2013"),
+        ("global_sales", "50,000 rows × 20 columns", "Global sales data across regions"),
+    ]
-                    except Exception as e:
-                        console.print(f"[red]Error processing row {i}:[/red] {e}")
-                        continue
+    table = Table(
+        title="Available Pointblank Datasets", show_header=True, header_style="bold magenta"
+    )
-            except Exception as e:
-                console.print(f"[red]Error extracting data:[/red] {e}")
-                rows = [["Error extracting data", "?", *["" for _ in sector_columns]]]
+    # Create the datasets table
+    table = Table(
+        title="Available Pointblank Datasets",
+        show_header=True,
+        header_style="bold magenta",
+        box=SIMPLE_HEAD,
+        title_style="bold cyan",
+        title_justify="left",
+    )
-            # Add rows to Rich table
-            for row in rows:
-                try:
-                    rich_table.add_row(*row)
-                except Exception as e:
-                    console.print(f"[red]Error adding row:[/red] {e}")
-                    break
+    table.add_column("Dataset Name", style="cyan", no_wrap=True)
+    table.add_column("Dimensions", style="green")
+    table.add_column("Description", style="white")
-            # Show the table with custom spanner header if we have sector columns
-            if sector_columns:
-                # Create a custom header line that shows the spanner
-                header_parts = []
-                header_parts.append(" " * 20)  # Space for Column header
-                header_parts.append(" " * 10)  # Space for Type header
+    for name, dims, desc in datasets_info:
+        table.add_row(name, dims, desc)
-                # Left-align "Row Sectors" with the first numbered column
-                row_sectors_text = "Row Sectors"
-                header_parts.append(row_sectors_text)
+    console.print(table)
+    console.print("\n[dim]Use these dataset names directly with any pb CLI command.[/dim]")
+    console.print("[dim]Example: pb preview small_table[/dim]")
-                # Print the custom spanner header
-                console.print("[dim]" + "  ".join(header_parts) + "[/dim]")
-                # Add a horizontal rule below the spanner
-                rule_parts = []
-                rule_parts.append(" " * 20)  # Space for Column header
-                rule_parts.append(" " * 10)  # Space for Type header
+@cli.command()
+def requirements():
+    """
+    Check installed dependencies and their availability.
+    """
+    from rich.box import SIMPLE_HEAD
-                # Use a fixed width horizontal rule for "Row Sectors"
-                horizontal_rule = "─" * 20
-                rule_parts.append(horizontal_rule)
+    dependencies = [
+        ("polars", "Polars DataFrame support"),
+        ("pandas", "Pandas DataFrame support"),
+        ("ibis", "Ibis backend support (DuckDB, etc.)"),
+        ("duckdb", "DuckDB database support"),
+        ("pyarrow", "Parquet file support"),
+    ]
-                # Print the horizontal rule
-                console.print("[dim]" + "  ".join(rule_parts) + "[/dim]")
+    # Create requirements table
+    table = Table(
+        title="Dependency Status",
+        show_header=True,
+        header_style="bold magenta",
+        box=SIMPLE_HEAD,
+        title_style="bold cyan",
+        title_justify="left",
+    )
-            # Print the Rich table (will handle terminal width automatically)
-            console.print(rich_table)
-            footer_text = (
-                "[dim]Symbols: [green]●[/green] = no missing values, "
-                "[red]●[/red] = completely missing, "
-                "<1% = less than 1% missing, "
-                ">99% = more than 99% missing[/dim]"
-            )
-            console.print(footer_text)
+    table.add_column("Package", style="cyan", no_wrap=True)
+    table.add_column("Status", style="white")
+    table.add_column("Description", style="dim")
+    for package, description in dependencies:
+        if _is_lib_present(package):
+            status = "[green]✓ Installed[/green]"
         else:
-            # Fallback to regular table display
-            _rich_print_gt_table(gt_table)
+            status = "[red]✗ Not installed[/red]"
-    except Exception as e:
-        console.print(f"[red]Error rendering missing values table:[/red] {e}")
-        # Fallback to regular table display
-        _rich_print_gt_table(gt_table)
+        table.add_row(package, status, description)
+    console.print(table)
+    console.print("\n[dim]Install missing packages to enable additional functionality.[/dim]")
 def _rich_print_scan_table(
@@ -1797,6 +1961,7 @@ def _rich_print_scan_table(
     source_type: str,
     table_type: str,
     total_rows: int | None = None,
+    total_columns: int | None = None,
 ) -> None:
     """
     Display scan results as a Rich table in the terminal with statistical measures.
@@ -1807,6 +1972,7 @@ def _rich_print_scan_table(
         source_type: Type of data source (e.g., "Pointblank dataset: small_table")
         table_type: Type of table (e.g., "polars.LazyFrame")
         total_rows: Total number of rows in the dataset
+        total_columns: Total number of columns in the dataset
     """
     try:
         import re
@@ -1828,6 +1994,11 @@ def _rich_print_scan_table(
         # Create a comprehensive title with data source, source type, and table type
         title_text = f"Column Summary / {source_type} / {table_type}"
+        # Add dimensions subtitle in gray if available
+        if total_rows is not None and total_columns is not None:
+            title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
+        # Create the scan table
         scan_table = Table(
             title=title_text,
             show_header=True,
@@ -1990,6 +2161,7 @@ def _rich_print_scan_table(
                     return f"{num_val:.2f}"
                 else:
                     # Very small numbers - use scientific notation
                     return f"{num_val:.1e}"
             except (ValueError, TypeError):
@@ -2002,83 +2174,260 @@ def _rich_print_scan_table(
                 if len(str_val) > max_width:
                     return str_val[: max_width - 1] + "…"
-            # General string truncation with ellipsis
-            if len(str_val) > max_width:
-                return str_val[: max_width - 1] + "…"
+            # General string truncation with ellipsis
+            if len(str_val) > max_width:
+                return str_val[: max_width - 1] + "…"
+            return str_val
+        # Populate table rows
+        num_rows = len(data_dict["colname"])
+        for i in range(num_rows):
+            row_data = []
+            # Column name and type from HTML content
+            colname_html = data_dict["colname"][i]
+            column_name, data_type = extract_column_info(colname_html)
+            row_data.append(column_name)
+            row_data.append(data_type)
+            # Missing values (NA)
+            missing_val = data_dict.get("n_missing", [None] * num_rows)[i]
+            row_data.append(format_value(missing_val, is_missing=True, max_width=6))
+            # Unique values (UQ)
+            unique_val = data_dict.get("n_unique", [None] * num_rows)[i]
+            row_data.append(format_value(unique_val, is_unique=True, max_width=8))
+            # Statistical columns
+            for stat_col in stat_columns:
+                stat_val = data_dict.get(stat_col, [None] * num_rows)[i]
+                # Use appropriate width based on column type
+                if stat_col in ["q_1", "iqr"]:
+                    width = 8
+                elif stat_col in ["mean", "std", "min", "median", "max", "q_3"]:
+                    width = 9
+                else:
+                    width = 8
+                row_data.append(format_value(stat_val, max_width=width))
+            scan_table.add_row(*row_data)
+        # Display the results
+        console.print()
+        console.print(scan_table)
+    except Exception as e:
+        # Fallback to simple message if table creation fails
+        console.print(f"[yellow]Scan results available for {data_source}[/yellow]")
+        console.print(f"[red]Error displaying table: {str(e)}[/red]")
+def _rich_print_missing_table(gt_table: Any, original_data: Any = None) -> None:
+    """Convert a missing values GT table to Rich table with special formatting.
+    Args:
+        gt_table: The GT table object for missing values
+        original_data: The original data source to extract column types
+    """
+    try:
+        # Extract the underlying data from the GT table
+        df = None
+        if hasattr(gt_table, "_tbl_data") and gt_table._tbl_data is not None:
+            df = gt_table._tbl_data
+        elif hasattr(gt_table, "_data") and gt_table._data is not None:
+            df = gt_table._data
+        elif hasattr(gt_table, "data") and gt_table.data is not None:
+            df = gt_table.data
+        if df is not None:
+            from rich.box import SIMPLE_HEAD
+            # Create the missing values table
+            rich_table = Table(show_header=True, header_style="bold magenta", box=SIMPLE_HEAD)
+            # Get column names
+            columns = []
+            try:
+                if hasattr(df, "columns"):
+                    columns = list(df.columns)
+                elif hasattr(df, "schema"):
+                    columns = list(df.schema.names)
+            except Exception as e:
+                console.print(f"[red]Error getting columns:[/red] {e}")
+                columns = []
+            if not columns:
+                columns = [f"Column {i + 1}" for i in range(10)]  # Fallback
+            # Get original data to extract column types
+            column_types = {}
+            if original_data is not None:
+                try:
+                    # Get column types from original data
+                    if hasattr(original_data, "columns"):
+                        original_columns = list(original_data.columns)
+                        column_types = _get_column_dtypes(original_data, original_columns)
+                except Exception as e:
+                    console.print(f"[red]Error getting column types:[/red] {e}")
+                    pass  # Use empty dict as fallback
+            # Add columns to Rich table with special formatting for missing values table
+            sector_columns = [col for col in columns if col != "columns" and col.isdigit()]
+            # Two separate columns: Column name (20 chars) and Data type (10 chars)
+            rich_table.add_column("Column", style="cyan", no_wrap=True, width=20)
+            rich_table.add_column("Type", style="yellow", no_wrap=True, width=10)
+            # Sector columns: All same width, optimized for "100%" (4 chars + padding)
+            for sector in sector_columns:
+                rich_table.add_column(
+                    sector,
+                    style="cyan",
+                    justify="center",
+                    no_wrap=True,
+                    width=5,  # Fixed width optimized for percentage values
+                )
+            # Convert data to rows with special formatting
+            rows = []
+            try:
+                if hasattr(df, "to_dicts"):
+                    data_dict = df.to_dicts()
+                elif hasattr(df, "to_dict"):
+                    data_dict = df.to_dict("records")
+                else:
+                    data_dict = []
+                for i, row in enumerate(data_dict):
+                    try:
+                        # Each row should have: [column_name, data_type, sector1, sector2, ...]
+                        column_name = str(row.get("columns", ""))
+                        # Truncate column name to 20 characters with ellipsis if needed
+                        if len(column_name) > 20:
+                            truncated_name = column_name[:17] + "…"
+                        else:
+                            truncated_name = column_name
+                        # Get data type for this column
+                        if column_name in column_types:
+                            dtype = column_types[column_name]
+                            if len(dtype) > 10:
+                                truncated_dtype = dtype[:9] + "…"
+                            else:
+                                truncated_dtype = dtype
+                        else:
+                            truncated_dtype = "?"
+                        # Start building the row with column name and type
+                        formatted_row = [truncated_name, truncated_dtype]
+                        # Add sector values (formatted percentages)
+                        for sector in sector_columns:
+                            value = row.get(sector, 0.0)
+                            if isinstance(value, (int, float)):
+                                formatted_row.append(_format_missing_percentage(float(value)))
+                            else:
+                                formatted_row.append(str(value))
+                        rows.append(formatted_row)
+                    except Exception as e:
+                        console.print(f"[red]Error processing row {i}:[/red] {e}")
+                        continue
+            except Exception as e:
+                console.print(f"[red]Error extracting data:[/red] {e}")
+                rows = [["Error extracting data", "?", *["" for _ in sector_columns]]]
+            # Add rows to Rich table
+            for row in rows:
+                try:
+                    rich_table.add_row(*row)
+                except Exception as e:
+                    console.print(f"[red]Error adding row:[/red] {e}")
+                    break
+            # Show the table with custom spanner header if we have sector columns
+            if sector_columns:
+                # Create a custom header line that shows the spanner
+                header_parts = []
+                header_parts.append(" " * 20)  # Space for Column header
+                header_parts.append(" " * 10)  # Space for Type header
+                # Left-align "Row Sectors" with the first numbered column
+                row_sectors_text = "Row Sectors"
+                header_parts.append(row_sectors_text)
-            return str_val
+                # Print the custom spanner header
+                console.print("[dim]" + "  ".join(header_parts) + "[/dim]")
-        # Populate table rows
-        num_rows = len(data_dict["colname"])
-        for i in range(num_rows):
-            row_data = []
+                # Add a horizontal rule below the spanner
+                rule_parts = []
+                rule_parts.append(" " * 20)  # Space for Column header
+                rule_parts.append(" " * 10)  # Space for Type header
-            # Column name and type from HTML content
-            colname_html = data_dict["colname"][i]
-            column_name, data_type = extract_column_info(colname_html)
-            row_data.append(column_name)
-            row_data.append(data_type)
+                # Use a fixed width horizontal rule for "Row Sectors"
+                horizontal_rule = "─" * 20
+                rule_parts.append(horizontal_rule)
-            # Missing values (NA)
-            missing_val = data_dict.get("n_missing", [None] * num_rows)[i]
-            row_data.append(format_value(missing_val, is_missing=True, max_width=6))
+                # Print the horizontal rule
+                console.print("[dim]" + "  ".join(rule_parts) + "[/dim]")
-            # Unique values (UQ)
-            unique_val = data_dict.get("n_unique", [None] * num_rows)[i]
-            row_data.append(format_value(unique_val, is_unique=True, max_width=8))
+            # Print the Rich table (will handle terminal width automatically)
+            console.print(rich_table)
+            footer_text = (
+                "[dim]Symbols: [green]●[/green] = no missing values, "
+                "[red]●[/red] = completely missing, "
+                "<1% = less than 1% missing, "
+                ">99% = more than 99% missing[/dim]"
+            )
+            console.print(footer_text)
-            # Statistical columns
-            for stat_col in stat_columns:
-                stat_val = data_dict.get(stat_col, [None] * num_rows)[i]
-                # Use appropriate width based on column type
-                if stat_col in ["q_1", "iqr"]:
-                    width = 8
-                elif stat_col in ["mean", "std", "min", "median", "max", "q_3"]:
-                    width = 9
-                else:
-                    width = 8
-                row_data.append(format_value(stat_val, max_width=width))
+        else:
+            # Fallback to regular table display
+            _rich_print_gt_table(gt_table)
-            scan_table.add_row(*row_data)
+    except Exception as e:
+        console.print(f"[red]Error rendering missing values table:[/red] {e}")
+        # Fallback to regular table display
+        _rich_print_gt_table(gt_table)
-        # Display the results
-        console.print()
-        console.print(scan_table)  # Add informational footer about the scan scope
-        try:
-            if total_rows is not None:
-                # Full table scan
-                footer_text = f"[dim]Scan from all {total_rows:,} rows in the table.[/dim]"
-                # Create a simple footer
-                footer_table = Table(
-                    show_header=False,
-                    show_lines=False,
-                    box=None,
-                    padding=(0, 0),
-                )
-                footer_table.add_column("", style="dim", width=80)
-                footer_table.add_row(footer_text)
-                console.print(footer_table)
-        except Exception:
-            # If we can't determine the scan scope, don't show a footer
-            pass
+def _map_parameters_to_checks(
+    checks_list: list[str], columns_list: list[str], sets_list: list[str], values_list: list[float]
+) -> tuple[list[str], list[str], list[float]]:
+    """
+    Map parameters to checks intelligently, handling flexible parameter ordering.
-    except Exception as e:
-        # Fallback to simple message if table creation fails
-        console.print(f"[yellow]Scan results available for {data_source}[/yellow]")
-        console.print(f"[red]Error displaying table: {str(e)}[/red]")
+    This function distributes the provided parameters across checks based on what each check needs.
+    For checks that don't need certain parameters, None/empty values are assigned.
+    Args:
+        checks_list: List of validation check types
+        columns_list: List of column names provided by user
+        sets_list: List of set values provided by user
+        values_list: List of numeric values provided by user
-@cli.command(name="validate-simple")
-@click.argument("data_source", type=str)
-@click.option(
-    "--check",
-    type=click.Choice(
-        [
-            "rows-distinct",
+    Returns:
+        Tuple of (mapped_columns, mapped_sets, mapped_values) where each list
+        has the same length as checks_list
+    """
+    mapped_columns = []
+    mapped_sets = []
+    mapped_values = []
+    # Keep track of which parameters we've used
+    column_index = 0
+    set_index = 0
+    value_index = 0
+    for check in checks_list:
+        # Determine what parameters this check needs
+        needs_column = check in [
             "col-vals-not-null",
-            "rows-complete",
             "col-exists",
             "col-vals-in-set",
             "col-vals-gt",
@@ -2086,652 +2435,1210 @@ def _rich_print_scan_table(
             "col-vals-lt",
             "col-vals-le",
         ]
-    ),
-    default="rows-distinct",
-    help="Type of validation check to perform",
-)
-@click.option(
-    "--column",
-    help="Column name to validate (required for col-vals-not-null, col-exists, col-vals-in-set, col-vals-gt, col-vals-ge, col-vals-lt, and col-vals-le checks)",
-)
-@click.option("--set", help="Comma-separated allowed values (required for col-vals-in-set check)")
-@click.option(
-    "--value",
-    type=float,
-    help="Numeric value for comparison (required for col-vals-gt, col-vals-ge, col-vals-lt, and col-vals-le checks)",
-)
-@click.option(
-    "--show-extract", is_flag=True, help="Show preview of failing rows if validation fails"
-)
-@click.option(
-    "--limit", "-l", default=10, help="Maximum number of failing rows to show (default: 10)"
-)
-@click.option("--exit-code", is_flag=True, help="Exit with non-zero code if validation fails")
-def validate_simple(
+        needs_set = check == "col-vals-in-set"
+        needs_value = check in ["col-vals-gt", "col-vals-ge", "col-vals-lt", "col-vals-le"]
+        # Assign column parameter if needed
+        if needs_column:
+            if column_index < len(columns_list):
+                mapped_columns.append(columns_list[column_index])
+                column_index += 1
+            else:
+                mapped_columns.append(None)  # Will cause validation error later
+        else:
+            mapped_columns.append(None)
+        # Assign set parameter if needed
+        if needs_set:
+            if set_index < len(sets_list):
+                mapped_sets.append(sets_list[set_index])
+                set_index += 1
+            else:
+                mapped_sets.append(None)  # Will cause validation error later
+        else:
+            mapped_sets.append(None)
+        # Assign value parameter if needed
+        if needs_value:
+            if value_index < len(values_list):
+                mapped_values.append(values_list[value_index])
+                value_index += 1
+            else:
+                mapped_values.append(None)  # Will cause validation error later
+        else:
+            mapped_values.append(None)
+    return mapped_columns, mapped_sets, mapped_values
+def _resolve_column_indices(columns_list, data):
+    """
+    Replace any '#N' entries in columns_list with the actual column name from data (1-based).
+    """
+    # Get column names from the data
+    if hasattr(data, "columns"):
+        all_columns = list(data.columns)
+    elif hasattr(data, "schema"):
+        all_columns = list(data.schema.names)
+    else:
+        return columns_list  # Can't resolve, return as-is
+    resolved = []
+    for col in columns_list:
+        if isinstance(col, str) and col.startswith("#"):
+            try:
+                idx = int(col[1:]) - 1  # 1-based to 0-based
+                if 0 <= idx < len(all_columns):
+                    resolved.append(all_columns[idx])
+                else:
+                    resolved.append(col)  # Out of range, keep as-is
+            except Exception:
+                resolved.append(col)  # Not a valid number, keep as-is
+        else:
+            resolved.append(col)
+    return resolved
+def _display_validation_result(
+    validation: Any,
+    checks_list: list[str],
+    columns_list: list[str],
+    sets_list: list[str],
+    values_list: list[float],
+    data_source: str,
+    step_index: int,
+    total_checks: int,
+    show_extract: bool,
+    write_extract: str | None,
+    limit: int,
+) -> None:
+    """Display a single validation result with proper formatting for single or multiple checks."""
+    from rich.box import SIMPLE_HEAD
+    # Get parameters for this specific check
+    check = checks_list[step_index]
+    column = columns_list[step_index] if step_index < len(columns_list) else None
+    set_val = sets_list[step_index] if step_index < len(sets_list) else None
+    value = values_list[step_index] if step_index < len(values_list) else None
+    # Get validation step info
+    step_info = None
+    if hasattr(validation, "validation_info") and len(validation.validation_info) > step_index:
+        step_info = validation.validation_info[step_index]
+    # Create friendly title for table
+    if total_checks == 1:
+        # Single check - use original title format
+        if check == "rows-distinct":
+            table_title = "Validation Result: Rows Distinct"
+        elif check == "col-vals-not-null":
+            table_title = "Validation Result: Column Values Not Null"
+        elif check == "rows-complete":
+            table_title = "Validation Result: Rows Complete"
+        elif check == "col-exists":
+            table_title = "Validation Result: Column Exists"
+        elif check == "col-vals-in-set":
+            table_title = "Validation Result: Column Values In Set"
+        elif check == "col-vals-gt":
+            table_title = "Validation Result: Column Values Greater Than"
+        elif check == "col-vals-ge":
+            table_title = "Validation Result: Column Values Greater Than Or Equal"
+        elif check == "col-vals-lt":
+            table_title = "Validation Result: Column Values Less Than"
+        elif check == "col-vals-le":
+            table_title = "Validation Result: Column Values Less Than Or Equal"
+        else:
+            table_title = f"Validation Result: {check.replace('-', ' ').title()}"
+    else:
+        # Multiple checks - add numbering
+        if check == "rows-distinct":
+            base_title = "Rows Distinct"
+        elif check == "col-vals-not-null":
+            base_title = "Column Values Not Null"
+        elif check == "rows-complete":
+            base_title = "Rows Complete"
+        elif check == "col-exists":
+            base_title = "Column Exists"
+        elif check == "col-vals-in-set":
+            base_title = "Column Values In Set"
+        elif check == "col-vals-gt":
+            base_title = "Column Values Greater Than"
+        elif check == "col-vals-ge":
+            base_title = "Column Values Greater Than Or Equal"
+        elif check == "col-vals-lt":
+            base_title = "Column Values Less Than"
+        elif check == "col-vals-le":
+            base_title = "Column Values Less Than Or Equal"
+        else:
+            base_title = check.replace("-", " ").title()
+        table_title = f"Validation Result ({step_index + 1} of {total_checks}): {base_title}"
+    # Create the validation results table
+    result_table = Table(
+        title=table_title,
+        show_header=True,
+        header_style="bold magenta",
+        box=SIMPLE_HEAD,
+        title_style="bold cyan",
+        title_justify="left",
+    )
+    result_table.add_column("Property", style="cyan", no_wrap=True)
+    result_table.add_column("Value", style="white")
+    # Add basic info
+    result_table.add_row("Data Source", data_source)
+    result_table.add_row("Check Type", check)
+    # Add column info for column-specific checks
+    if check in [
+        "col-vals-not-null",
+        "col-exists",
+        "col-vals-in-set",
+        "col-vals-gt",
+        "col-vals-ge",
+        "col-vals-lt",
+        "col-vals-le",
+    ]:
+        result_table.add_row("Column", column)
+    # Add set info for col-vals-in-set check
+    if check == "col-vals-in-set" and set_val:
+        allowed_values = [v.strip() for v in set_val.split(",")]
+        result_table.add_row("Allowed Values", ", ".join(allowed_values))
+    # Add value info for range checks
+    if check in ["col-vals-gt", "col-vals-ge", "col-vals-lt", "col-vals-le"] and value is not None:
+        if check == "col-vals-gt":
+            operator = ">"
+        elif check == "col-vals-ge":
+            operator = ">="
+        elif check == "col-vals-lt":
+            operator = "<"
+        elif check == "col-vals-le":
+            operator = "<="
+        result_table.add_row("Threshold", f"{operator} {value}")
+    # Get validation details
+    if step_info:
+        result_table.add_row("Total Rows Tested", f"{step_info.n:,}")
+        result_table.add_row("Passing Rows", f"{step_info.n_passed:,}")
+        result_table.add_row("Failing Rows", f"{step_info.n_failed:,}")
+        # Check if this step passed
+        step_passed = step_info.n_failed == 0
+        # Overall result with color coding
+        if step_passed:
+            result_table.add_row("Result", "[green]✓ PASSED[/green]")
+            if check == "rows-distinct":
+                result_table.add_row("Duplicate Rows", "[green]None found[/green]")
+            elif check == "col-vals-not-null":
+                result_table.add_row("Null Values", "[green]None found[/green]")
+            elif check == "rows-complete":
+                result_table.add_row("Incomplete Rows", "[green]None found[/green]")
+            elif check == "col-exists":
+                result_table.add_row("Column Status", "[green]Column exists[/green]")
+            elif check == "col-vals-in-set":
+                result_table.add_row("Values Status", "[green]All values in allowed set[/green]")
+            elif check == "col-vals-gt":
+                result_table.add_row("Values Status", f"[green]All values > {value}[/green]")
+            elif check == "col-vals-ge":
+                result_table.add_row("Values Status", f"[green]All values >= {value}[/green]")
+            elif check == "col-vals-lt":
+                result_table.add_row("Values Status", f"[green]All values < {value}[/green]")
+            elif check == "col-vals-le":
+                result_table.add_row("Values Status", f"[green]All values <= {value}[/green]")
+        else:
+            result_table.add_row("Result", "[red]✗ FAILED[/red]")
+            if check == "rows-distinct":
+                result_table.add_row("Duplicate Rows", f"[red]{step_info.n_failed:,} found[/red]")
+            elif check == "col-vals-not-null":
+                result_table.add_row("Null Values", f"[red]{step_info.n_failed:,} found[/red]")
+            elif check == "rows-complete":
+                result_table.add_row("Incomplete Rows", f"[red]{step_info.n_failed:,} found[/red]")
+            elif check == "col-exists":
+                result_table.add_row("Column Status", "[red]Column does not exist[/red]")
+            elif check == "col-vals-in-set":
+                result_table.add_row("Invalid Values", f"[red]{step_info.n_failed:,} found[/red]")
+            elif check == "col-vals-gt":
+                result_table.add_row(
+                    "Invalid Values", f"[red]{step_info.n_failed:,} values <= {value}[/red]"
+                )
+            elif check == "col-vals-ge":
+                result_table.add_row(
+                    "Invalid Values", f"[red]{step_info.n_failed:,} values < {value}[/red]"
+                )
+            elif check == "col-vals-lt":
+                result_table.add_row(
+                    "Invalid Values", f"[red]{step_info.n_failed:,} values >= {value}[/red]"
+                )
+            elif check == "col-vals-le":
+                result_table.add_row(
+                    "Invalid Values", f"[red]{step_info.n_failed:,} values > {value}[/red]"
+                )
+    console.print()
+    console.print(result_table)
+    # Show extract and summary for single check only, or if this is a failed step in multiple checks
+    if total_checks == 1:
+        # For single check, show extract and summary as before
+        _show_extract_and_summary(
+            validation,
+            check,
+            column,
+            set_val,
+            value,
+            data_source,
+            step_index,
+            step_info,
+            show_extract,
+            write_extract,
+            limit,
+        )
+    else:
+        # For multiple checks, show summary panel and handle extract if needed
+        if step_info:
+            step_passed = step_info.n_failed == 0
+            if step_passed:
+                # Create success message for this step
+                if check == "rows-distinct":
+                    success_message = f"[green]✓ Validation PASSED: No duplicate rows found in {data_source}[/green]"
+                elif check == "col-vals-not-null":
+                    success_message = f"[green]✓ Validation PASSED: No null values found in column '{column}' in {data_source}[/green]"
+                elif check == "rows-complete":
+                    success_message = f"[green]✓ Validation PASSED: All rows are complete (no missing values) in {data_source}[/green]"
+                elif check == "col-exists":
+                    success_message = f"[green]✓ Validation PASSED: Column '{column}' exists in {data_source}[/green]"
+                elif check == "col-vals-in-set":
+                    success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are in the allowed set in {data_source}[/green]"
+                elif check == "col-vals-gt":
+                    success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are > {value} in {data_source}[/green]"
+                elif check == "col-vals-ge":
+                    success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are >= {value} in {data_source}[/green]"
+                elif check == "col-vals-lt":
+                    success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are < {value} in {data_source}[/green]"
+                elif check == "col-vals-le":
+                    success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are <= {value} in {data_source}[/green]"
+                else:
+                    success_message = f"[green]✓ Validation PASSED: {check} check passed for {data_source}[/green]"
+                console.print(
+                    Panel(
+                        success_message,
+                        border_style="green",
+                    )
+                )
+            else:
+                # Create failure message for this step (without tip)
+                if check == "rows-distinct":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} duplicate rows found in {data_source}[/red]"
+                elif check == "col-vals-not-null":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} null values found in column '{column}' in {data_source}[/red]"
+                elif check == "rows-complete":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} incomplete rows found in {data_source}[/red]"
+                elif check == "col-exists":
+                    failure_message = f"[red]✗ Validation FAILED: Column '{column}' does not exist in {data_source}[/red]"
+                elif check == "col-vals-in-set":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} invalid values found in column '{column}' in {data_source}[/red]"
+                elif check == "col-vals-gt":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values <= {value} found in column '{column}' in {data_source}[/red]"
+                elif check == "col-vals-ge":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values < {value} found in column '{column}' in {data_source}[/red]"
+                elif check == "col-vals-lt":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values >= {value} found in column '{column}' in {data_source}[/red]"
+                elif check == "col-vals-le":
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values > {value} found in column '{column}' in {data_source}[/red]"
+                else:
+                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} failing rows found in {data_source}[/red]"
+                console.print(
+                    Panel(
+                        failure_message,
+                        border_style="red",
+                    )
+                )
+                # For multiple checks, show extract if requested and this step failed
+                if (show_extract or write_extract) and not step_passed:
+                    _show_extract_for_multi_check(
+                        validation,
+                        check,
+                        column,
+                        set_val,
+                        value,
+                        data_source,
+                        step_index,
+                        step_info,
+                        show_extract,
+                        write_extract,
+                        limit,
+                    )
+def _show_extract_for_multi_check(
+    validation: Any,
+    check: str,
+    column: str | None,
+    set_val: str | None,
+    value: float | None,
     data_source: str,
+    step_index: int,
+    step_info: Any,
+    show_extract: bool,
+    write_extract: str | None,
+    limit: int,
+) -> None:
+    """Show extract for a single validation step in multiple checks scenario."""
+    # Dynamic message based on check type
+    if check == "rows-distinct":
+        extract_message = "[yellow]Extract of failing rows (duplicates):[/yellow]"
+        row_type = "duplicate rows"
+    elif check == "rows-complete":
+        extract_message = "[yellow]Extract of failing rows (incomplete rows):[/yellow]"
+        row_type = "incomplete rows"
+    elif check == "col-exists":
+        extract_message = f"[yellow]Column '{column}' does not exist in the dataset[/yellow]"
+        row_type = "missing column"
+    elif check == "col-vals-not-null":
+        extract_message = f"[yellow]Extract of failing rows (null values in '{column}'):[/yellow]"
+        row_type = "rows with null values"
+    elif check == "col-vals-in-set":
+        extract_message = (
+            f"[yellow]Extract of failing rows (invalid values in '{column}'):[/yellow]"
+        )
+        row_type = "rows with invalid values"
+    elif check == "col-vals-gt":
+        extract_message = (
+            f"[yellow]Extract of failing rows (values in '{column}' <= {value}):[/yellow]"
+        )
+        row_type = f"rows with values <= {value}"
+    elif check == "col-vals-ge":
+        extract_message = (
+            f"[yellow]Extract of failing rows (values in '{column}' < {value}):[/yellow]"
+        )
+        row_type = f"rows with values < {value}"
+    elif check == "col-vals-lt":
+        extract_message = (
+            f"[yellow]Extract of failing rows (values in '{column}' >= {value}):[/yellow]"
+        )
+        row_type = f"rows with values >= {value}"
+    elif check == "col-vals-le":
+        extract_message = (
+            f"[yellow]Extract of failing rows (values in '{column}' > {value}):[/yellow]"
+        )
+        row_type = f"rows with values > {value}"
+    else:
+        extract_message = "[yellow]Extract of failing rows:[/yellow]"
+        row_type = "failing rows"
+    if show_extract:
+        console.print()
+        console.print(extract_message)
+    # Special handling for col-exists check - no rows to show when column doesn't exist
+    if check == "col-exists":
+        if show_extract:
+            console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
+            console.print(
+                "[dim]Use --show-extract with other check types to see failing data rows.[/dim]"
+            )
+        if write_extract:
+            console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
+    else:
+        try:
+            # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
+            failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
+            if failing_rows is not None and len(failing_rows) > 0:
+                if show_extract:
+                    # Limit the number of rows shown
+                    if len(failing_rows) > limit:
+                        display_rows = failing_rows.head(limit)
+                        console.print(
+                            f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
+                        )
+                    else:
+                        display_rows = failing_rows
+                        console.print(f"[dim]Showing all {len(failing_rows)} {row_type}[/dim]")
+                    # Create a preview table using pointblank's preview function
+                    import pointblank as pb
+                    preview_table = pb.preview(
+                        data=display_rows,
+                        n_head=min(limit, len(display_rows)),
+                        n_tail=0,
+                        limit=limit,
+                        show_row_numbers=True,
+                    )
+                    # Display using our Rich table function
+                    _rich_print_gt_table(preview_table, show_summary=False)
+                if write_extract:
+                    try:
+                        from pathlib import Path
+                        folder_name = write_extract
+                        # Create the output folder
+                        output_folder = Path(folder_name)
+                        output_folder.mkdir(parents=True, exist_ok=True)
+                        # Create safe filename from check type
+                        safe_check_type = check.replace("-", "_")
+                        filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
+                        filepath = output_folder / filename
+                        # Limit the output if needed
+                        write_rows = failing_rows
+                        if len(failing_rows) > limit:
+                            write_rows = failing_rows.head(limit)
+                        # Save to CSV
+                        if hasattr(write_rows, "write_csv"):
+                            # Polars
+                            write_rows.write_csv(str(filepath))
+                        elif hasattr(write_rows, "to_csv"):
+                            # Pandas
+                            write_rows.to_csv(str(filepath), index=False)
+                        else:
+                            # Try converting to pandas as fallback
+                            import pandas as pd
+                            pd_data = pd.DataFrame(write_rows)
+                            pd_data.to_csv(str(filepath), index=False)
+                        rows_saved = len(write_rows) if hasattr(write_rows, "__len__") else limit
+                        console.print(
+                            f"[green]✓[/green] Failing rows saved to folder: {output_folder}"
+                        )
+                        console.print(f"[dim]  - {filename}: {rows_saved} rows[/dim]")
+                    except Exception as e:
+                        console.print(f"[yellow]Warning: Could not save failing rows: {e}[/yellow]")
+            else:
+                if show_extract:
+                    console.print("[yellow]No failing rows could be extracted[/yellow]")
+                if write_extract:
+                    console.print("[yellow]No failing rows could be extracted to save[/yellow]")
+        except Exception as e:
+            if show_extract:
+                console.print(f"[yellow]Could not extract failing rows: {e}[/yellow]")
+            if write_extract:
+                console.print(f"[yellow]Could not extract failing rows to save: {e}[/yellow]")
+def _show_extract_and_summary(
+    validation: Any,
     check: str,
     column: str | None,
-    set: str | None,
+    set_val: str | None,
     value: float | None,
+    data_source: str,
+    step_index: int,
+    step_info: Any,
     show_extract: bool,
+    write_extract: str | None,
     limit: int,
-    exit_code: bool,
-):
-    """
-    Perform simple, single-step validations directly from the command line.
+) -> None:
+    """Show extract and summary for a validation step (used for single checks)."""
+    step_passed = step_info.n_failed == 0 if step_info else True
-    This command provides a quick way to perform common data validation checks
-    without needing to write a validation script.
+    # Show extract if requested and validation failed
+    if (show_extract or write_extract) and not step_passed:
+        console.print()
-    DATA_SOURCE can be:
+        # Dynamic message based on check type
+        if check == "rows-distinct":
+            extract_message = "[yellow]Extract of failing rows (duplicates):[/yellow]"
+            row_type = "duplicate rows"
+        elif check == "rows-complete":
+            extract_message = "[yellow]Extract of failing rows (incomplete rows):[/yellow]"
+            row_type = "incomplete rows"
+        elif check == "col-exists":
+            extract_message = f"[yellow]Column '{column}' does not exist in the dataset[/yellow]"
+            row_type = "missing column"
+        elif check == "col-vals-not-null":
+            extract_message = (
+                f"[yellow]Extract of failing rows (null values in '{column}'):[/yellow]"
+            )
+            row_type = "rows with null values"
+        elif check == "col-vals-in-set":
+            extract_message = (
+                f"[yellow]Extract of failing rows (invalid values in '{column}'):[/yellow]"
+            )
+            row_type = "rows with invalid values"
+        elif check == "col-vals-gt":
+            extract_message = (
+                f"[yellow]Extract of failing rows (values in '{column}' <= {value}):[/yellow]"
+            )
+            row_type = f"rows with values <= {value}"
+        elif check == "col-vals-ge":
+            extract_message = (
+                f"[yellow]Extract of failing rows (values in '{column}' < {value}):[/yellow]"
+            )
+            row_type = f"rows with values < {value}"
+        elif check == "col-vals-lt":
+            extract_message = (
+                f"[yellow]Extract of failing rows (values in '{column}' >= {value}):[/yellow]"
+            )
+            row_type = f"rows with values >= {value}"
+        elif check == "col-vals-le":
+            extract_message = (
+                f"[yellow]Extract of failing rows (values in '{column}' > {value}):[/yellow]"
+            )
+            row_type = f"rows with values > {value}"
+        else:
+            extract_message = "[yellow]Extract of failing rows:[/yellow]"
+            row_type = "failing rows"
-    \b
-    - CSV file path (e.g., data.csv)
-    - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
-    - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
-    - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
+        if show_extract:
+            console.print(extract_message)
-    AVAILABLE CHECKS:
+        # Special handling for col-exists check - no rows to show when column doesn't exist
+        if check == "col-exists" and not step_passed:
+            if show_extract:
+                console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
+                console.print(
+                    "[dim]Use --show-extract with other check types to see failing data rows.[/dim]"
+                )
+            if write_extract:
+                console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
+        else:
+            try:
+                # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
+                failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
-    \b
-    - rows-distinct: Check if all rows in the dataset are unique (no duplicates)
-    - rows-complete: Check if all rows are complete (no missing values in any column)
-    - col-exists: Check if a specific column exists in the dataset (requires --column)
-    - col-vals-not-null: Check if all values in a column are not null/missing (requires --column)
-    - col-vals-gt: Check if all values in a column are greater than a threshold (requires --column and --value)
-    - col-vals-ge: Check if all values in a column are greater than or equal to a threshold (requires --column and --value)
-    - col-vals-lt: Check if all values in a column are less than a threshold (requires --column and --value)
-    - col-vals-le: Check if all values in a column are less than or equal to a threshold (requires --column and --value)
-    - col-vals-in-set: Check if all values in a column are in an allowed set (requires --column and --set)
+                if failing_rows is not None and len(failing_rows) > 0:
+                    if show_extract:
+                        # Limit the number of rows shown
+                        if len(failing_rows) > limit:
+                            display_rows = failing_rows.head(limit)
+                            console.print(
+                                f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
+                            )
+                        else:
+                            display_rows = failing_rows
+                            console.print(f"[dim]Showing all {len(failing_rows)} {row_type}[/dim]")
-    Examples:
+                        # Create a preview table using pointblank's preview function
+                        import pointblank as pb
-    \b
-    pb validate-simple data.csv --check rows-distinct
-    pb validate-simple data.csv --check rows-distinct --show-extract
-    pb validate-simple data.csv --check rows-distinct --exit-code
-    pb validate-simple data.csv --check rows-complete
-    pb validate-simple data.csv --check col-exists --column price
-    pb validate-simple data.csv --check col-vals-not-null --column email
-    pb validate-simple data.csv --check col-vals-gt --column score --value 50
-    pb validate-simple data.csv --check col-vals-in-set --column status --set "active,inactive,pending"
-    """
-    try:
-        # Validate required parameters for different check types
-        if check == "col-vals-not-null" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-not-null --column email"
-            )
-            sys.exit(1)
-            sys.exit(1)
+                        preview_table = pb.preview(
+                            data=display_rows,
+                            n_head=min(limit, len(display_rows)),
+                            n_tail=0,
+                            limit=limit,
+                            show_row_numbers=True,
+                        )
-        if check == "col-exists" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print("Example: pb validate-simple data.csv --check col-exists --column price")
-            sys.exit(1)
+                        # Display using our Rich table function
+                        _rich_print_gt_table(preview_table, show_summary=False)
+                    if write_extract:
+                        try:
+                            from pathlib import Path
+                            folder_name = write_extract
+                            # Create the output folder
+                            output_folder = Path(folder_name)
+                            output_folder.mkdir(parents=True, exist_ok=True)
+                            # Create safe filename from check type
+                            safe_check_type = check.replace("-", "_")
+                            filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
+                            filepath = output_folder / filename
+                            # Limit the output if needed
+                            write_rows = failing_rows
+                            if len(failing_rows) > limit:
+                                write_rows = failing_rows.head(limit)
+                            # Save to CSV
+                            if hasattr(write_rows, "write_csv"):
+                                # Polars
+                                write_rows.write_csv(str(filepath))
+                            elif hasattr(write_rows, "to_csv"):
+                                # Pandas
+                                write_rows.to_csv(str(filepath), index=False)
+                            else:
+                                # Try converting to pandas as fallback
+                                import pandas as pd
-        if check == "col-vals-in-set" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-in-set --column status --set 'active,inactive'"
-            )
-            sys.exit(1)
+                                pd_data = pd.DataFrame(write_rows)
+                                pd_data.to_csv(str(filepath), index=False)
-        if check == "col-vals-in-set" and not set:
-            console.print(f"[red]Error:[/red] --set is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-in-set --column status --set 'active,inactive,pending'"
-            )
-            sys.exit(1)
+                            rows_saved = (
+                                len(write_rows) if hasattr(write_rows, "__len__") else limit
+                            )
+                            console.print(
+                                f"[green]✓[/green] Failing rows saved to folder: {output_folder}"
+                            )
+                            console.print(f"[dim]  - {filename}: {rows_saved} rows[/dim]")
+                        except Exception as e:
+                            console.print(
+                                f"[yellow]Warning: Could not save failing rows: {e}[/yellow]"
+                            )
+                else:
+                    if show_extract:
+                        console.print("[yellow]No failing rows could be extracted[/yellow]")
+                    if write_extract:
+                        console.print("[yellow]No failing rows could be extracted to save[/yellow]")
+            except Exception as e:
+                if show_extract:
+                    console.print(f"[yellow]Could not extract failing rows: {e}[/yellow]")
+                if write_extract:
+                    console.print(f"[yellow]Could not extract failing rows to save: {e}[/yellow]")
-        if check == "col-vals-gt" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-gt --column score --value 50"
+    # Summary message
+    console.print()
+    if step_passed:
+        if check == "rows-distinct":
+            success_message = (
+                f"[green]✓ Validation PASSED: No duplicate rows found in {data_source}[/green]"
             )
-            sys.exit(1)
-        if check == "col-vals-gt" and value is None:
-            console.print(f"[red]Error:[/red] --value is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-gt --column score --value 50"
+        elif check == "col-vals-not-null":
+            success_message = f"[green]✓ Validation PASSED: No null values found in column '{column}' in {data_source}[/green]"
+        elif check == "rows-complete":
+            success_message = f"[green]✓ Validation PASSED: All rows are complete (no missing values) in {data_source}[/green]"
+        elif check == "col-exists":
+            success_message = (
+                f"[green]✓ Validation PASSED: Column '{column}' exists in {data_source}[/green]"
             )
-            sys.exit(1)
-        if check == "col-vals-ge" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-ge --column age --value 18"
+        elif check == "col-vals-in-set":
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are in the allowed set in {data_source}[/green]"
+        elif check == "col-vals-gt":
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are > {value} in {data_source}[/green]"
+        elif check == "col-vals-ge":
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are >= {value} in {data_source}[/green]"
+        elif check == "col-vals-lt":
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are < {value} in {data_source}[/green]"
+        elif check == "col-vals-le":
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are <= {value} in {data_source}[/green]"
+        else:
+            success_message = (
+                f"[green]✓ Validation PASSED: {check} check passed for {data_source}[/green]"
             )
-            sys.exit(1)
-        if check == "col-vals-ge" and value is None:
-            console.print(f"[red]Error:[/red] --value is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-ge --column age --value 18"
-            )
-            sys.exit(1)
+        console.print(Panel(success_message, border_style="green"))
+    else:
+        if step_info:
+            if check == "rows-distinct":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} duplicate rows found in {data_source}[/red]"
+            elif check == "col-vals-not-null":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} null values found in column '{column}' in {data_source}[/red]"
+            elif check == "rows-complete":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} incomplete rows found in {data_source}[/red]"
+            elif check == "col-exists":
+                failure_message = f"[red]✗ Validation FAILED: Column '{column}' does not exist in {data_source}[/red]"
+            elif check == "col-vals-in-set":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} invalid values found in column '{column}' in {data_source}[/red]"
+            elif check == "col-vals-gt":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values <= {value} found in column '{column}' in {data_source}[/red]"
+            elif check == "col-vals-ge":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values < {value} found in column '{column}' in {data_source}[/red]"
+            elif check == "col-vals-lt":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values >= {value} found in column '{column}' in {data_source}[/red]"
+            elif check == "col-vals-le":
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values > {value} found in column '{column}' in {data_source}[/red]"
+            else:
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} failing rows found in {data_source}[/red]"
-        if check == "col-vals-lt" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-lt --column age --value 65"
-            )
-            sys.exit(1)
+            # Add hint about --show-extract if not already used (except for col-exists which has no rows to show)
+            if not show_extract and check != "col-exists":
+                failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
-        if check == "col-vals-lt" and value is None:
-            console.print(f"[red]Error:[/red] --value is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-lt --column age --value 65"
-            )
-            sys.exit(1)
+            console.print(Panel(failure_message, border_style="red"))
+        else:
+            if check == "rows-distinct":
+                failure_message = (
+                    f"[red]✗ Validation FAILED: Duplicate rows found in {data_source}[/red]"
+                )
+            elif check == "rows-complete":
+                failure_message = (
+                    f"[red]✗ Validation FAILED: Incomplete rows found in {data_source}[/red]"
+                )
+            else:
+                failure_message = (
+                    f"[red]✗ Validation FAILED: {check} check failed for {data_source}[/red]"
+                )
-        if check == "col-vals-le" and not column:
-            console.print(f"[red]Error:[/red] --column is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-le --column score --value 100"
-            )
-            sys.exit(1)
+            # Add hint about --show-extract if not already used
+            if not show_extract:
+                failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
-        if check == "col-vals-le" and value is None:
-            console.print(f"[red]Error:[/red] --value is required for {check} check")
-            console.print(
-                "Example: pb validate-simple data.csv --check col-vals-le --column score --value 100"
-            )
-            sys.exit(1)
+            console.print(Panel(failure_message, border_style="red"))
-        with console.status("[bold green]Loading data..."):
-            # Try to load as a pointblank dataset first
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
-                data = pb.load_dataset(data_source)
-                console.print(f"[green]✓[/green] Loaded dataset: {data_source}")
-            else:
-                # Assume it's a file path or connection string
-                data = data_source
-                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
-        # Perform the validation based on the check type
-        with console.status(f"[bold green]Running {check} validation..."):
-            if check == "rows-distinct":
-                # Create validation for duplicate rows
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check}",
-                    )
-                    .rows_distinct()
-                    .interrogate()
-                )
+@cli.command()
+@click.argument("output_file", type=click.Path())
+def make_template(output_file: str):
+    """
+    Create a validation script template.
-                # Get the result
-                all_passed = validation.all_passed()
+    Creates a sample Python script with examples showing how to use Pointblank
+    for data validation. Edit the template to add your own data loading and
+    validation rules, then run it with 'pb run'.
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-not-null":
-                # Create validation for not null values in specified column
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} on column '{column}'",
-                    )
-                    .col_vals_not_null(columns=column)
-                    .interrogate()
-                )
+    OUTPUT_FILE is the path where the template script will be created.
-                # Get the result
-                all_passed = validation.all_passed()
+    Examples:
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "rows-complete":
-                # Create validation for complete rows (no missing values in any column)
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check}",
-                    )
-                    .rows_complete()
-                    .interrogate()
-                )
+    \b
+    pb make-template my_validation.py
+    pb make-template validation_template.py
+    """
+    example_script = '''"""
+Example Pointblank validation script.
-                # Get the result
-                all_passed = validation.all_passed()
+This script demonstrates how to create validation rules for your data.
+Modify the data loading and validation rules below to match your requirements.
+"""
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-exists":
-                # Create validation for column existence
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}'",
-                    )
-                    .col_exists(columns=column)
-                    .interrogate()
-                )
+import pointblank as pb
-                # Get the result
-                all_passed = validation.all_passed()
+# Load your data (replace this with your actual data source)
+# You can load from various sources:
+# data = pb.load_dataset("small_table")  # Built-in dataset
+# data = pd.read_csv("your_data.csv")    # CSV file
+# data = pl.read_parquet("data.parquet") # Parquet file
+# data = pb.load_data("database://connection") # Database
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-in-set":
-                # Parse the comma-separated set values
-                allowed_values = [value.strip() for value in set.split(",")]
-                # Create validation for values in set
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}'",
-                    )
-                    .col_vals_in_set(columns=column, set=allowed_values)
-                    .interrogate()
-                )
+data = pb.load_dataset("small_table")  # Example with built-in dataset
-                # Get the result
-                all_passed = validation.all_passed()
+# Create a validation object
+validation = (
+    pb.Validate(
+        data=data,
+        tbl_name="Example Data",
+        label="Validation Example",
+        thresholds=pb.Thresholds(warning=0.05, error=0.10, critical=0.15),
+    )
+    # Add your validation rules here
+    # Example rules (modify these based on your data structure):
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-gt":
-                # Create validation for values greater than threshold
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}' > {value}",
-                    )
-                    .col_vals_gt(columns=column, value=value)
-                    .interrogate()
-                )
+    # Check that specific columns exist
+    # .col_exists(["column1", "column2"])
+    # Check for null values
+    # .col_vals_not_null(columns="important_column")
+    # Check value ranges
+    # .col_vals_gt(columns="amount", value=0)
+    # .col_vals_between(columns="score", left=0, right=100)
+    # Check string patterns
+    # .col_vals_regex(columns="email", pattern=r"^[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}$")
+    # Check unique values
+    # .col_vals_unique(columns="id")
+    # Finalize the validation
+    .interrogate()
+)
+# The validation object will be automatically used by the CLI
+# You can also access results programmatically:
+# print(f"All passed: {validation.all_passed()}")
+# print(f"Failed steps: {validation.n_failed()}")
+'''
+    Path(output_file).write_text(example_script)
+    console.print(f"[green]✓[/green] Validation script template created: {output_file}")
+    console.print("\nEdit the template to add your data loading and validation rules, then run:")
+    console.print(f"[cyan]pb run {output_file}[/cyan]")
+    console.print(
+        f"[cyan]pb run {output_file} --data your_data.csv[/cyan]  [dim]# Override data source[/dim]"
+    )
+@cli.command()
+@click.argument("validation_script", type=click.Path(exists=True))
+@click.option("--data", type=str, help="Optional data source to override script's data loading")
+@click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
+@click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
+@click.option(
+    "--show-extract", is_flag=True, help="Show extract of failing rows if validation fails"
+)
+@click.option(
+    "--write-extract",
+    type=str,
+    help="Save failing rows to folders (one CSV per step). Provide base name for folder.",
+)
+@click.option(
+    "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
+)
+@click.option(
+    "--fail-on",
+    type=click.Choice(["critical", "error", "warning", "any"], case_sensitive=False),
+    help="Exit with non-zero code when validation reaches this threshold level",
+)
+def run(
+    validation_script: str,
+    data: str | None,
+    output_html: str | None,
+    output_json: str | None,
+    show_extract: bool,
+    write_extract: str | None,
+    limit: int,
+    fail_on: str | None,
+):
+    """
+    Run a Pointblank validation script.
-                # Get the result
-                all_passed = validation.all_passed()
+    VALIDATION_SCRIPT should be a Python file that defines validation logic.
+    The script should load its own data and create validation objects.
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-ge":
-                # Create validation for values greater than or equal to threshold
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}' >= {value}",
-                    )
-                    .col_vals_ge(columns=column, value=value)
-                    .interrogate()
-                )
+    If --data is provided, it will be available as a 'cli_data' variable in the script,
+    allowing you to optionally override your script's data loading.
-                # Get the result
-                all_passed = validation.all_passed()
+    DATA can be:
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-lt":
-                # Create validation for values less than threshold
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}' < {value}",
-                    )
-                    .col_vals_lt(columns=column, value=value)
-                    .interrogate()
-                )
+    \b
+    - CSV file path (e.g., data.csv)
+    - Parquet file path or pattern (e.g., data.parquet, data/*.parquet)
+    - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
+    - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
+    - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
-                # Get the result
-                all_passed = validation.all_passed()
+    Examples:
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            elif check == "col-vals-le":
-                # Create validation for values less than or equal to threshold
-                validation = (
-                    pb.Validate(
-                        data=data,
-                        tbl_name=f"Data from {data_source}",
-                        label=f"CLI Simple Validation: {check} for column '{column}' <= {value}",
-                    )
-                    .col_vals_le(columns=column, value=value)
-                    .interrogate()
-                )
+    \b
+    pb run validation_script.py
+    pb run validation_script.py --data data.csv
+    pb run validation_script.py --data small_table --output-html report.html
+    pb run validation_script.py --show-extract --fail-on error
+    pb run validation_script.py --write-extract extracts_folder --fail-on critical
+    """
+    try:
+        # Load optional data override if provided
+        cli_data = None
+        if data:
+            with console.status(f"[bold green]Loading data from {data}..."):
+                cli_data = _load_data_source(data)
+                console.print(f"[green]✓[/green] Loaded data override: {data}")
-                # Get the result
-                all_passed = validation.all_passed()
+        # Execute the validation script
+        with console.status("[bold green]Running validation script..."):
+            # Read and execute the validation script
+            script_content = Path(validation_script).read_text()
-                console.print(
-                    f"[green]✓[/green] {check.replace('-', ' ').title()} validation completed"
-                )
-            else:
-                # This shouldn't happen due to click.Choice, but just in case
-                console.print(f"[red]Error:[/red] Unknown check type: {check}")
+            # Create a namespace with pointblank and optional CLI data
+            namespace = {
+                "pb": pb,
+                "pointblank": pb,
+                "cli_data": cli_data,  # Available if --data was provided
+                "__name__": "__main__",
+                "__file__": str(Path(validation_script).resolve()),
+            }
+            # Execute the script
+            try:
+                exec(script_content, namespace)
+            except Exception as e:
+                console.print(f"[red]Error executing validation script:[/red] {e}")
                 sys.exit(1)
-        # Display results
-        from rich.box import SIMPLE_HEAD
+            # Look for validation objects in the namespace
+            validations = []
-        # Create friendly title for table
-        if check == "rows-distinct":
-            table_title = "Validation Result: Rows Distinct"
-        elif check == "col-vals-not-null":
-            table_title = "Validation Result: Column Values Not Null"
-        elif check == "rows-complete":
-            table_title = "Validation Result: Rows Complete"
-        elif check == "col-exists":
-            table_title = "Validation Result: Column Exists"
-        elif check == "col-vals-in-set":
-            table_title = "Validation Result: Column Values In Set"
-        elif check == "col-vals-gt":
-            table_title = "Validation Result: Column Values Greater Than"
-        elif check == "col-vals-ge":
-            table_title = "Validation Result: Column Values Greater Than Or Equal"
-        elif check == "col-vals-lt":
-            table_title = "Validation Result: Column Values Less Than"
-        elif check == "col-vals-le":
-            table_title = "Validation Result: Column Values Less Than Or Equal"
-        else:
-            table_title = f"Validation Result: {check.replace('-', ' ').title()}"
+            # Look for the 'validation' variable specifically first
+            if "validation" in namespace:
+                validations.append(namespace["validation"])
-        result_table = Table(
-            title=table_title,
-            show_header=True,
-            header_style="bold magenta",
-            box=SIMPLE_HEAD,
-            title_style="bold cyan",
-            title_justify="left",
-        )
-        result_table.add_column("Property", style="cyan", no_wrap=True)
-        result_table.add_column("Value", style="white")
+            # Also look for any other validation objects
+            for key, value in namespace.items():
+                if (
+                    key != "validation"
+                    and hasattr(value, "interrogate")
+                    and hasattr(value, "validation_info")
+                ):
+                    validations.append(value)
+                # Also check if it's a Validate object that has been interrogated
+                elif key != "validation" and str(type(value)).find("Validate") != -1:
+                    validations.append(value)
-        # Add basic info
-        result_table.add_row("Data Source", data_source)
-        result_table.add_row("Check Type", check)
+            if not validations:
+                raise ValueError(
+                    "No validation objects found in script. "
+                    "Script should create Validate objects and call .interrogate() on them."
+                )
-        # Add column info for column-specific checks
-        if check in [
-            "col-vals-not-null",
-            "col-exists",
-            "col-vals-in-set",
-            "col-vals-gt",
-            "col-vals-ge",
-            "col-vals-lt",
-            "col-vals-le",
-        ]:
-            result_table.add_row("Column", column)
-        # Add set info for col-vals-in-set check
-        if check == "col-vals-in-set":
-            allowed_values = [value.strip() for value in set.split(",")]
-            result_table.add_row("Allowed Values", ", ".join(allowed_values))
-        # Add value info for range checks
-        if check in ["col-vals-gt", "col-vals-ge", "col-vals-lt", "col-vals-le"]:
-            if check == "col-vals-gt":
-                operator = ">"
-            elif check == "col-vals-ge":
-                operator = ">="
-            elif check == "col-vals-lt":
-                operator = "<"
-            elif check == "col-vals-le":
-                operator = "<="
-            result_table.add_row("Threshold", f"{operator} {value}")
-        # Get validation details
-        if hasattr(validation, "validation_info") and validation.validation_info:
-            step_info = validation.validation_info[0]  # Should only be one step
-            result_table.add_row("Total Rows Tested", f"{step_info.n:,}")
-            result_table.add_row("Passing Rows", f"{step_info.n_passed:,}")
-            result_table.add_row("Failing Rows", f"{step_info.n_failed:,}")
-            # Overall result with color coding
-            if all_passed:
-                result_table.add_row("Result", "[green]✓ PASSED[/green]")
-                if check == "rows-distinct":
-                    result_table.add_row("Duplicate Rows", "[green]None found[/green]")
-                elif check == "col-vals-not-null":
-                    result_table.add_row("Null Values", "[green]None found[/green]")
-                elif check == "rows-complete":
-                    result_table.add_row("Incomplete Rows", "[green]None found[/green]")
-                elif check == "col-exists":
-                    result_table.add_row("Column Status", "[green]Column exists[/green]")
-                elif check == "col-vals-in-set":
-                    result_table.add_row(
-                        "Values Status", "[green]All values in allowed set[/green]"
-                    )
-                elif check == "col-vals-gt":
-                    result_table.add_row("Values Status", f"[green]All values > {value}[/green]")
-                elif check == "col-vals-ge":
-                    result_table.add_row("Values Status", f"[green]All values >= {value}[/green]")
-            else:
-                result_table.add_row("Result", "[red]✗ FAILED[/red]")
-                if check == "rows-distinct":
-                    result_table.add_row(
-                        "Duplicate Rows", f"[red]{step_info.n_failed:,} found[/red]"
-                    )
-                elif check == "col-vals-not-null":
-                    result_table.add_row("Null Values", f"[red]{step_info.n_failed:,} found[/red]")
-                elif check == "rows-complete":
-                    result_table.add_row(
-                        "Incomplete Rows", f"[red]{step_info.n_failed:,} found[/red]"
-                    )
-                elif check == "col-exists":
-                    result_table.add_row("Column Status", "[red]Column does not exist[/red]")
-                elif check == "col-vals-in-set":
-                    result_table.add_row(
-                        "Invalid Values", f"[red]{step_info.n_failed:,} found[/red]"
-                    )
-                elif check == "col-vals-gt":
-                    result_table.add_row(
-                        "Invalid Values", f"[red]{step_info.n_failed:,} values <= {value}[/red]"
-                    )
-                elif check == "col-vals-ge":
-                    result_table.add_row(
-                        "Invalid Values", f"[red]{step_info.n_failed:,} values < {value}[/red]"
-                    )
+        console.print(f"[green]✓[/green] Found {len(validations)} validation object(s)")
-        console.print()
-        console.print(result_table)
+        # Process each validation
+        overall_failed = False
+        overall_critical = False
+        overall_error = False
+        overall_warning = False
-        # Show extract if requested and validation failed
-        if show_extract and not all_passed:
-            console.print()
+        for i, validation in enumerate(validations, 1):
+            if len(validations) > 1:
+                console.print(f"\n[bold cyan]Validation {i}:[/bold cyan]")
-            # Dynamic message based on check type
-            if check == "rows-distinct":
-                extract_message = "[yellow]Preview of failing rows (duplicates):[/yellow]"
-                row_type = "duplicate rows"
-            elif check == "rows-complete":
-                extract_message = "[yellow]Preview of failing rows (incomplete rows):[/yellow]"
-                row_type = "incomplete rows"
-            elif check == "col-exists":
-                extract_message = (
-                    f"[yellow]Column '{column}' does not exist in the dataset[/yellow]"
-                )
-                row_type = "missing column"
-            elif check == "col-vals-in-set":
-                extract_message = (
-                    f"[yellow]Preview of failing rows (invalid values in '{column}'):[/yellow]"
-                )
-                row_type = "rows with invalid values"
-            elif check == "col-vals-gt":
-                extract_message = (
-                    f"[yellow]Preview of failing rows (values in '{column}' <= {value}):[/yellow]"
-                )
-                row_type = f"rows with values <= {value}"
-            elif check == "col-vals-ge":
-                extract_message = (
-                    f"[yellow]Preview of failing rows (values in '{column}' < {value}):[/yellow]"
-                )
-                row_type = f"rows with values < {value}"
-            else:
-                extract_message = "[yellow]Preview of failing rows:[/yellow]"
-                row_type = "failing rows"
+            # Display summary
+            _display_validation_summary(validation)
-            console.print(extract_message)
+            # Check failure status
+            validation_failed = False
+            has_critical = False
+            has_error = False
+            has_warning = False
-            # Special handling for col-exists check - no rows to show when column doesn't exist
-            if check == "col-exists" and not all_passed:
-                console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
-                console.print(
-                    "[dim]Use --show-extract with other check types to see failing data rows.[/dim]"
-                )
-            else:
-                try:
-                    # Get failing rows extract
-                    failing_rows = validation.get_data_extracts(i=1, frame=True)
+            if hasattr(validation, "validation_info") and validation.validation_info:
+                for step_info in validation.validation_info:
+                    if step_info.critical:
+                        has_critical = True
+                        overall_critical = True
+                    if step_info.error:
+                        has_error = True
+                        overall_error = True
+                    if step_info.warning:
+                        has_warning = True
+                        overall_warning = True
+                    if step_info.n_failed > 0:
+                        validation_failed = True
+                        overall_failed = True
+            # Handle extract functionality for failed validations
+            failed_steps = []
+            if (
+                validation_failed
+                and hasattr(validation, "validation_info")
+                and validation.validation_info
+            ):
+                for j, step_info in enumerate(validation.validation_info, 1):
+                    if step_info.n_failed > 0:
+                        failed_steps.append((j, step_info))
+            if validation_failed and failed_steps and (show_extract or write_extract):
+                console.print()
+                if show_extract:
+                    extract_title = "Extract of failing rows from validation steps"
+                    if len(validations) > 1:
+                        extract_title += f" (Validation {i})"
+                    console.print(f"[yellow]{extract_title}:[/yellow]")
+                    for step_num, step_info in failed_steps:
+                        try:
+                            failing_rows = validation.get_data_extracts(i=step_num, frame=True)
+                            if failing_rows is not None and len(failing_rows) > 0:
+                                console.print(
+                                    f"\n[cyan]Step {step_num}:[/cyan] {step_info.assertion_type}"
+                                )
-                    if failing_rows is not None and len(failing_rows) > 0:
-                        # Limit the number of rows shown
-                        if len(failing_rows) > limit:
-                            display_rows = failing_rows.head(limit)
+                                # Limit the number of rows shown
+                                if len(failing_rows) > limit:
+                                    display_rows = failing_rows.head(limit)
+                                    console.print(
+                                        f"[dim]Showing first {limit} of {len(failing_rows)} failing rows[/dim]"
+                                    )
+                                else:
+                                    display_rows = failing_rows
+                                    console.print(
+                                        f"[dim]Showing all {len(failing_rows)} failing rows[/dim]"
+                                    )
+                                # Create a preview table using pointblank's preview function
+                                preview_table = pb.preview(
+                                    data=display_rows,
+                                    n_head=min(limit, len(display_rows)),
+                                    n_tail=0,
+                                    limit=limit,
+                                    show_row_numbers=True,
+                                )
+                                # Display using our Rich table function
+                                _rich_print_gt_table(preview_table, show_summary=False)
+                            else:
+                                console.print(
+                                    f"\n[cyan]Step {step_num}:[/cyan] {step_info.assertion_type}"
+                                )
+                                console.print("[yellow]No failing rows could be extracted[/yellow]")
+                        except Exception as e:
                             console.print(
-                                f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
+                                f"\n[cyan]Step {step_num}:[/cyan] {step_info.assertion_type}"
+                            )
+                            console.print(f"[yellow]Could not extract failing rows: {e}[/yellow]")
+                if write_extract:
+                    try:
+                        folder_name = write_extract
+                        # Add validation number if multiple validations
+                        if len(validations) > 1:
+                            folder_name = f"{folder_name}_validation_{i}"
+                        # Create the output folder
+                        output_folder = Path(folder_name)
+                        output_folder.mkdir(parents=True, exist_ok=True)
+                        saved_files = []
+                        # Save each failing step to its own CSV file
+                        for step_num, step_info in failed_steps:
+                            try:
+                                failing_rows = validation.get_data_extracts(i=step_num, frame=True)
+                                if failing_rows is not None and len(failing_rows) > 0:
+                                    # Create safe filename from assertion type
+                                    safe_assertion_type = (
+                                        step_info.assertion_type.replace(" ", "_")
+                                        .replace("/", "_")
+                                        .replace("\\", "_")
+                                        .replace(":", "_")
+                                        .replace("<", "_")
+                                        .replace(">", "_")
+                                        .replace("|", "_")
+                                        .replace("?", "_")
+                                        .replace("*", "_")
+                                        .replace('"', "_")
+                                    )
+                                    filename = f"step_{step_num:02d}_{safe_assertion_type}.csv"
+                                    filepath = output_folder / filename
+                                    # Limit the output if needed
+                                    save_rows = failing_rows
+                                    if hasattr(failing_rows, "head") and len(failing_rows) > limit:
+                                        save_rows = failing_rows.head(limit)
+                                    # Save to CSV
+                                    if hasattr(save_rows, "write_csv"):
+                                        # Polars
+                                        save_rows.write_csv(str(filepath))
+                                    elif hasattr(save_rows, "to_csv"):
+                                        # Pandas
+                                        save_rows.to_csv(str(filepath), index=False)
+                                    else:
+                                        # Try converting to pandas as fallback
+                                        import pandas as pd
+                                        pd_data = pd.DataFrame(save_rows)
+                                        pd_data.to_csv(str(filepath), index=False)
+                                    saved_files.append((filename, len(failing_rows)))
+                            except Exception as e:
+                                console.print(
+                                    f"[yellow]Warning: Could not save failing rows from step {step_num}: {e}[/yellow]"
+                                )
+                        if saved_files:
+                            console.print(
+                                f"[green]✓[/green] Failing rows saved to folder: {output_folder}"
                             )
+                            for filename, row_count in saved_files:
+                                console.print(f"[dim]  - {filename}: {row_count} rows[/dim]")
                         else:
-                            display_rows = failing_rows
-                            console.print(f"[dim]Showing all {len(failing_rows)} {row_type}[/dim]")
+                            console.print(
+                                "[yellow]No failing rows could be extracted to save[/yellow]"
+                            )
-                        # Create a preview table using pointblank's preview function
-                        preview_table = pb.preview(
-                            data=display_rows,
-                            n_head=min(limit, len(display_rows)),
-                            n_tail=0,
-                            limit=limit,
-                            show_row_numbers=True,
+                    except Exception as e:
+                        console.print(
+                            f"[yellow]Warning: Could not save failing rows to CSV: {e}[/yellow]"
                         )
-                        # Display using our Rich table function
-                        _rich_print_gt_table(preview_table)
-                    else:
-                        console.print("[yellow]No failing rows could be extracted[/yellow]")
-                except Exception as e:
-                    console.print(f"[yellow]Could not extract failing rows: {e}[/yellow]")
+        # Save HTML and JSON outputs (combine multiple validations if needed)
+        if output_html:
+            try:
+                if len(validations) == 1:
+                    # Single validation - save directly
+                    html_content = validations[0]._repr_html_()
+                    Path(output_html).write_text(html_content, encoding="utf-8")
+                else:
+                    # Multiple validations - combine them
+                    html_parts = []
+                    html_parts.append("<html><body>")
+                    html_parts.append("<h1>Pointblank Validation Report</h1>")
-        # Summary message
-        console.print()
-        if all_passed:
-            if check == "rows-distinct":
-                success_message = (
-                    f"[green]✓ Validation PASSED: No duplicate rows found in {data_source}[/green]"
-                )
-            elif check == "col-vals-not-null":
-                success_message = f"[green]✓ Validation PASSED: No null values found in column '{column}' in {data_source}[/green]"
-            elif check == "rows-complete":
-                success_message = f"[green]✓ Validation PASSED: All rows are complete (no missing values) in {data_source}[/green]"
-            elif check == "col-exists":
-                success_message = (
-                    f"[green]✓ Validation PASSED: Column '{column}' exists in {data_source}[/green]"
-                )
-            elif check == "col-vals-in-set":
-                success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are in the allowed set in {data_source}[/green]"
-            elif check == "col-vals-gt":
-                success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are > {value} in {data_source}[/green]"
-            elif check == "col-vals-ge":
-                success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are >= {value} in {data_source}[/green]"
-            elif check == "col-vals-lt":
-                success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are < {value} in {data_source}[/green]"
-            elif check == "col-vals-le":
-                success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are <= {value} in {data_source}[/green]"
-            else:
-                success_message = (
-                    f"[green]✓ Validation PASSED: {check} check passed for {data_source}[/green]"
-                )
+                    for i, validation in enumerate(validations, 1):
+                        html_parts.append(f"<h2>Validation {i}</h2>")
+                        html_parts.append(validation._repr_html_())
-            console.print(
-                Panel(
-                    success_message,
-                    border_style="green",
-                )
-            )
-        else:
-            if hasattr(validation, "validation_info") and validation.validation_info:
-                step_info = validation.validation_info[0]
+                    html_parts.append("</body></html>")
+                    html_content = "\n".join(html_parts)
+                    Path(output_html).write_text(html_content, encoding="utf-8")
-                if check == "rows-distinct":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} duplicate rows found in {data_source}[/red]"
-                elif check == "col-vals-not-null":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} null values found in column '{column}' in {data_source}[/red]"
-                elif check == "rows-complete":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} incomplete rows found in {data_source}[/red]"
-                elif check == "col-exists":
-                    failure_message = f"[red]✗ Validation FAILED: Column '{column}' does not exist in {data_source}[/red]"
-                elif check == "col-vals-in-set":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} invalid values found in column '{column}' in {data_source}[/red]"
-                elif check == "col-vals-gt":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values <= {value} found in column '{column}' in {data_source}[/red]"
-                elif check == "col-vals-ge":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values < {value} found in column '{column}' in {data_source}[/red]"
-                elif check == "col-vals-lt":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values >= {value} found in column '{column}' in {data_source}[/red]"
-                elif check == "col-vals-le":
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values > {value} found in column '{column}' in {data_source}[/red]"
+                console.print(f"[green]✓[/green] HTML report saved to: {output_html}")
+            except Exception as e:
+                console.print(f"[yellow]Warning: Could not save HTML report: {e}[/yellow]")
+        if output_json:
+            try:
+                if len(validations) == 1:
+                    # Single validation - save directly
+                    json_report = validations[0].get_json_report()
+                    Path(output_json).write_text(json_report, encoding="utf-8")
                 else:
-                    failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} failing rows found in {data_source}[/red]"
+                    # Multiple validations - combine them
+                    import json
-                # Add hint about --show-extract if not already used (except for col-exists which has no rows to show)
-                if not show_extract and check != "col-exists":
-                    failure_message += (
-                        "\n[dim]💡 Tip: Use --show-extract to see the failing rows[/dim]"
-                    )
+                    combined_report = {"validations": []}
-                console.print(
-                    Panel(
-                        failure_message,
-                        border_style="red",
-                    )
-                )
-            else:
-                if check == "rows-distinct":
-                    failure_message = (
-                        f"[red]✗ Validation FAILED: Duplicate rows found in {data_source}[/red]"
-                    )
-                elif check == "rows-complete":
-                    failure_message = (
-                        f"[red]✗ Validation FAILED: Incomplete rows found in {data_source}[/red]"
-                    )
-                else:
-                    failure_message = (
-                        f"[red]✗ Validation FAILED: {check} check failed for {data_source}[/red]"
-                    )
+                    for i, validation in enumerate(validations, 1):
+                        validation_json = json.loads(validation.get_json_report())
+                        validation_json["validation_id"] = i
+                        combined_report["validations"].append(validation_json)
-                # Add hint about --show-extract if not already used
-                if not show_extract:
-                    failure_message += (
-                        "\n[dim]💡 Tip: Use --show-extract to see the failing rows[/dim]"
+                    Path(output_json).write_text(
+                        json.dumps(combined_report, indent=2), encoding="utf-8"
                     )
-                console.print(
-                    Panel(
-                        failure_message,
-                        border_style="red",
-                    )
-                )
+                console.print(f"[green]✓[/green] JSON summary saved to: {output_json}")
+            except Exception as e:
+                console.print(f"[yellow]Warning: Could not save JSON report: {e}[/yellow]")
-        # Exit with appropriate code if requested
-        if exit_code and not all_passed:
-            console.print("[dim]Exiting with non-zero code due to validation failure[/dim]")
-            sys.exit(1)
+        # Check if we should fail based on threshold
+        if fail_on:
+            should_exit = False
+            exit_reason = ""
+            if fail_on.lower() == "critical" and overall_critical:
+                should_exit = True
+                exit_reason = "critical validation failures"
+            elif fail_on.lower() == "error" and (overall_critical or overall_error):
+                should_exit = True
+                exit_reason = "error or critical validation failures"
+            elif fail_on.lower() == "warning" and (
+                overall_critical or overall_error or overall_warning
+            ):
+                should_exit = True
+                exit_reason = "warning, error, or critical validation failures"
+            elif fail_on.lower() == "any" and overall_failed:
+                should_exit = True
+                exit_reason = "validation failures"
+            if should_exit:
+                console.print(f"[red]Exiting with error due to {exit_reason}[/red]")
+                sys.exit(1)
     except Exception as e:
         console.print(f"[red]Error:[/red] {e}")
         sys.exit(1)
-if __name__ == "__main__":  # pragma: no cover
-    cli()
+def _format_missing_percentage(value: float) -> str:
+    """Format missing value percentages for display.
+    Args:
+        value: The percentage value (0-100)
+    Returns:
+        Formatted string with proper percentage display
+    """
+    if value == 0.0:
+        return "[green]●[/green]"  # Large green circle for no missing values
+    elif value == 100.0:
+        return "[red]●[/red]"  # Large red circle for completely missing values
+    elif value < 1.0 and value > 0:
+        return "<1%"  # Less than 1%
+    elif value > 99.0 and value < 100.0:
+        return ">99%"  # More than 99%
+    else:
+        return f"{int(round(value))}%"  # Round to nearest integer with % sign

pointblank 0.11.0__py3-none-any.whl → 0.11.1__py3-none-any.whl

pointblank 0.11.0py3-none-any.whl → 0.11.1py3-none-any.whl