PyPI - pointblank - Versions diffs - 0.11.1__py3-none-any.whl → 0.11.3__py3-none-any.whl - Mend

pointblank 0.11.1py3-none-any.whl → 0.11.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

pointblank/cli.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from __future__ import annotations
+import copy
+import os
+import shutil
 import sys
 from pathlib import Path
 from typing import Any
@@ -31,6 +34,8 @@ class OrderedGroup(click.Group):
             "validate",
             "run",
             "make-template",
+            # Data Manipulation
+            "pl",
             # Utilities
             "datasets",
             "requirements",
@@ -90,6 +95,15 @@ def _load_data_source(data_source: str) -> Any:
     return _process_data(data_source)
+def _is_piped_data_source(data_source: str) -> bool:
+    """Check if the data source is from a piped pb command."""
+    return (
+        data_source
+        and ("pb_pipe_" in data_source)
+        and (data_source.startswith("/var/folders/") or data_source.startswith("/tmp/"))
+    )
 def _format_cell_value(
     value: Any, is_row_number: bool = False, max_width: int = 50, num_columns: int = 10
 ) -> str:
@@ -274,7 +288,7 @@ def _format_dtype_compact(dtype_str: str) -> str:
     elif "str" in dtype_str:
         return "str"
-    # Unknown or complex types - truncate if too long
+    # Unknown or complex types: truncate if too long
     elif len(dtype_str) > 8:
         return dtype_str[:8] + "…"
     else:
@@ -395,7 +409,7 @@ def _rich_print_scan_table(
             # Clean up HTML formatting from the raw data
             str_val = str(value)
-            # Handle multi-line values with <br> tags FIRST - take the first line (absolute number)
+            # Handle multi-line values with <br> tags FIRST: take the first line (absolute number)
             if "<br>" in str_val:
                 str_val = str_val.split("<br>")[0].strip()
                 # For unique values, we want just the integer part
@@ -414,14 +428,14 @@ def _rich_print_scan_table(
                 # Clean up extra whitespace
                 str_val = re.sub(r"\s+", " ", str_val).strip()
-            # Handle values like "2<.01" - extract the first number
+            # Handle values like "2<.01": extract the first number
             if "<" in str_val and not (str_val.startswith("<") and str_val.endswith(">")):
                 # Extract number before the < symbol
                 before_lt = str_val.split("<")[0].strip()
                 if before_lt and before_lt.replace(".", "").replace("-", "").isdigit():
                     str_val = before_lt
-            # Handle boolean unique values like "T0.62F0.38" - extract the more readable format
+            # Handle boolean unique values like "T0.62F0.38": extract the more readable format
             if re.match(r"^[TF]\d+\.\d+[TF]\d+\.\d+$", str_val):
                 # Extract T and F values
                 t_match = re.search(r"T(\d+\.\d+)", str_val)
@@ -451,7 +465,7 @@ def _rich_print_scan_table(
                     # Simple integers under 10000
                     return str(int(num_val))
                 elif abs(num_val) >= 10000000 and abs(num_val) < 100000000:
-                    # Likely dates in YYYYMMDD format - format as date-like
+                    # Likely dates in YYYYMMDD format: format as date-like
                     int_val = int(num_val)
                     if 19000101 <= int_val <= 29991231:  # Reasonable date range
                         str_date = str(int_val)
@@ -463,29 +477,29 @@ def _rich_print_scan_table(
                     # Otherwise treat as large number
                     return f"{num_val / 1000000:.1f}M"
                 elif abs(num_val) >= 1000000:
-                    # Large numbers - use scientific notation or M/k notation
+                    # Large numbers: use scientific notation or M/k notation
                     if abs(num_val) >= 1000000000:
                         return f"{num_val:.1e}"
                     else:
                         return f"{num_val / 1000000:.1f}M"
                 elif abs(num_val) >= 10000:
-                    # Numbers >= 10k - use compact notation
+                    # Numbers >= 10k: use compact notation
                     return f"{num_val / 1000:.1f}k"
                 elif abs(num_val) >= 100:
-                    # Numbers 100-9999 - show with minimal decimals
+                    # Numbers 100-9999: show with minimal decimals
                     return f"{num_val:.1f}"
                 elif abs(num_val) >= 10:
-                    # Numbers 10-99 - show with one decimal
+                    # Numbers 10-99: show with one decimal
                     return f"{num_val:.1f}"
                 elif abs(num_val) >= 1:
-                    # Numbers 1-9 - show with two decimals
+                    # Numbers 1-9: show with two decimals
                     return f"{num_val:.2f}"
                 elif abs(num_val) >= 0.01:
-                    # Small numbers - show with appropriate precision
+                    # Small numbers: show with appropriate precision
                     return f"{num_val:.2f}"
                 else:
-                    # Very small numbers - use scientific notation
+                    # Very small numbers: use scientific notation
                     return f"{num_val:.1e}"
@@ -493,7 +507,7 @@ def _rich_print_scan_table(
                 # Not a number, handle as string
                 pass
-            # Handle date/datetime strings - show abbreviated format
+            # Handle date/datetime strings: show abbreviated format
             if len(str_val) > 10 and any(char in str_val for char in ["-", "/", ":"]):
                 # Likely a date/datetime, show abbreviated
                 if len(str_val) > max_width:
@@ -557,9 +571,12 @@ def _rich_print_gt_table(
         gt_table: The GT table object to display
         preview_info: Optional dict with preview context info:
             - total_rows: Total rows in the dataset
+            - total_columns: Total columns in the dataset
             - head_rows: Number of head rows shown
             - tail_rows: Number of tail rows shown
             - is_complete: Whether the entire dataset is shown
+            - source_type: Type of data source (e.g., "External source: worldcities_new.csv")
+            - table_type: Type of table (e.g., "polars")
         show_summary: Whether to show the row count summary at the bottom
     """
     try:
@@ -592,6 +609,12 @@ def _rich_print_gt_table(
                 table_type = preview_info["table_type"]
                 table_title = f"Data Preview / {source_type} / {table_type}"
+                # Add dimensions subtitle in gray if available
+                total_rows = preview_info.get("total_rows")
+                total_columns = preview_info.get("total_columns")
+                if total_rows is not None and total_columns is not None:
+                    table_title += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
             rich_table = Table(
                 title=table_title,
                 show_header=True,
@@ -933,14 +956,19 @@ def _rich_print_gt_table(
 def _display_validation_summary(validation: Any) -> None:
-    """Display a validation summary in a Rich table format."""
+    """Display a validation summary in a compact Rich table format."""
     try:
         # Try to get the summary from the validation report
         if hasattr(validation, "validation_info") and validation.validation_info is not None:
             # Use the validation_info to create a summary
             info = validation.validation_info
             n_steps = len(info)
-            n_passed = sum(1 for step in info if step.all_passed)
+            # Count steps based on their threshold status
+            n_passed = sum(
+                1 for step in info if not step.warning and not step.error and not step.critical
+            )
+            n_all_passed = sum(1 for step in info if step.all_passed)
             n_failed = n_steps - n_passed
             # Calculate severity counts
@@ -950,64 +978,213 @@ def _display_validation_summary(validation: Any) -> None:
             all_passed = n_failed == 0
-            # Determine highest severity
+            # Determine highest severity and its color
             if n_critical > 0:
                 highest_severity = "critical"
+                severity_color = "red"
             elif n_error > 0:
                 highest_severity = "error"
+                severity_color = "yellow"
             elif n_warning > 0:
                 highest_severity = "warning"
-            elif n_failed > 0:
-                highest_severity = "some failing"
-            else:
+                severity_color = "bright_black"  # gray
+            elif n_all_passed == n_steps:
+                # All steps passed AND all steps had 100% pass rate
                 highest_severity = "all passed"
+                severity_color = "bold green"
+            else:
+                # Steps passed (no threshold exceedances) but some had failing test units
+                highest_severity = "passed"
+                severity_color = "green"
+            # Create compact summary header
+            # Format: Steps: 6 / P: 3 (3 AP) / W: 3 / E: 0 / C: 0 / warning
+            summary_header = (
+                f"Steps: {n_steps} / P: {n_passed} ({n_all_passed} AP) / "
+                f"W: {n_warning} / E: {n_error} / C: {n_critical} / "
+                f"[{severity_color}]{highest_severity}[/{severity_color}]"
+            )
-            # Create a summary table
-            table = Table(title="Validation Summary", show_header=True, header_style="bold magenta")
-            table.add_column("Metric", style="cyan", no_wrap=True)
-            table.add_column("Value", style="green")
-            # Add summary statistics
-            table.add_row("Total Steps", str(n_steps))
-            table.add_row("Passing Steps", str(n_passed))
-            table.add_row("Failing Steps", str(n_failed))
-            table.add_row("Warning Steps", str(n_warning))
-            table.add_row("Error Steps", str(n_error))
-            table.add_row("Critical Steps", str(n_critical))
-            table.add_row("All Passed", str(all_passed))
-            table.add_row("Highest Severity", highest_severity)
-            console.print(table)
+            # Print the report title and summary
+            console.print()
+            console.print("[blue]Validation Report[/blue]")
+            console.print(f"[white]{summary_header}[/white]")
             # Display step details
             if n_steps > 0:
+                from rich.box import SIMPLE_HEAD
                 steps_table = Table(
-                    title="Validation Steps", show_header=True, header_style="bold cyan"
+                    show_header=True,
+                    header_style="bold cyan",
+                    box=SIMPLE_HEAD,
                 )
-                steps_table.add_column("Step", style="dim")
-                steps_table.add_column("Type", style="white")
+                steps_table.add_column("", style="dim")
+                steps_table.add_column("Step", style="white")
                 steps_table.add_column("Column", style="cyan")
-                steps_table.add_column("Status", style="white")
-                steps_table.add_column("Passed/Total", style="green")
+                steps_table.add_column("Values", style="yellow")
+                steps_table.add_column("Units", style="blue")
+                steps_table.add_column("Pass", style="green")
+                steps_table.add_column("Fail", style="red")
+                steps_table.add_column("W", style="bright_black")
+                steps_table.add_column("E", style="yellow")
+                steps_table.add_column("C", style="red")
+                steps_table.add_column("Ext", style="blue", justify="center")
+                def format_units(n: int) -> str:
+                    """Format large numbers with K, M, B abbreviations for values above 10,000."""
+                    if n is None:
+                        return "—"
+                    if n >= 1000000000:  # Billions
+                        return f"{n / 1000000000:.1f}B"
+                    elif n >= 1000000:  # Millions
+                        return f"{n / 1000000:.1f}M"
+                    elif n >= 10000:  # Use K for 10,000 and above
+                        return f"{n / 1000:.0f}K"
+                    else:
+                        return str(n)
+                def format_pass_fail(passed: int, total: int) -> str:
+                    """Format pass/fail counts with abbreviated numbers and fractions."""
+                    if passed is None or total is None or total == 0:
+                        return "—/—"
+                    # Calculate fraction
+                    fraction = passed / total
+                    # Format fraction with special handling for very small and very large values
+                    if fraction == 0.0:
+                        fraction_str = "0.00"
+                    elif fraction == 1.0:
+                        fraction_str = "1.00"
+                    elif fraction < 0.005:  # Less than 0.005 rounds to 0.00
+                        fraction_str = "<0.01"
+                    elif fraction > 0.995:  # Greater than 0.995 rounds to 1.00
+                        fraction_str = ">0.99"
+                    else:
+                        fraction_str = f"{fraction:.2f}"
+                    # Format absolute number with abbreviations
+                    absolute_str = format_units(passed)
+                    return f"{absolute_str}/{fraction_str}"
                 for step in info:
-                    status_icon = "✓" if step.all_passed else "✗"
-                    status_color = "green" if step.all_passed else "red"
+                    # Extract values information for the Values column
+                    values_str = "—"  # Default to em dash if no values
+                    # Handle different validation types
+                    if step.assertion_type == "col_schema_match":
+                        values_str = "—"  # Schema is too complex to display inline
+                    elif step.assertion_type == "col_vals_between":
+                        # For between validations, try to get left and right bounds
+                        if (
+                            hasattr(step, "left")
+                            and hasattr(step, "right")
+                            and step.left is not None
+                            and step.right is not None
+                        ):
+                            values_str = f"[{step.left}, {step.right}]"
+                        elif hasattr(step, "values") and step.values is not None:
+                            if isinstance(step.values, (list, tuple)) and len(step.values) >= 2:
+                                values_str = f"[{step.values[0]}, {step.values[1]}]"
+                            else:
+                                values_str = str(step.values)
+                    elif step.assertion_type in ["row_count_match", "col_count_match"]:
+                        # For count match validations, extract the 'count' value from the dictionary
+                        if hasattr(step, "values") and step.values is not None:
+                            if isinstance(step.values, dict) and "count" in step.values:
+                                values_str = str(step.values["count"])
+                            else:
+                                values_str = str(step.values)
+                        else:
+                            values_str = "—"
+                    elif step.assertion_type in ["col_vals_expr", "conjointly"]:
+                        values_str = "COLUMN EXPR"
+                    elif step.assertion_type == "specially":
+                        values_str = "EXPR"
+                    elif hasattr(step, "values") and step.values is not None:
+                        if isinstance(step.values, (list, tuple)):
+                            if len(step.values) <= 3:
+                                values_str = ", ".join(str(v) for v in step.values)
+                            else:
+                                values_str = f"{', '.join(str(v) for v in step.values[:3])}..."
+                        else:
+                            values_str = str(step.values)
+                    elif hasattr(step, "value") and step.value is not None:
+                        values_str = str(step.value)
+                    elif hasattr(step, "set") and step.set is not None:
+                        if isinstance(step.set, (list, tuple)):
+                            if len(step.set) <= 3:
+                                values_str = ", ".join(str(v) for v in step.set)
+                            else:
+                                values_str = f"{', '.join(str(v) for v in step.set[:3])}..."
+                        else:
+                            values_str = str(step.set)
+                    # Determine threshold status for W, E, C columns
+                    # Check if thresholds are set and whether they were exceeded
+                    # Warning threshold
+                    if (
+                        hasattr(step, "thresholds")
+                        and step.thresholds
+                        and hasattr(step.thresholds, "warning")
+                        and step.thresholds.warning is not None
+                    ):
+                        w_status = (
+                            "[bright_black]●[/bright_black]"
+                            if step.warning
+                            else "[bright_black]○[/bright_black]"
+                        )
+                    else:
+                        w_status = "—"
+                    # Error threshold
+                    if (
+                        hasattr(step, "thresholds")
+                        and step.thresholds
+                        and hasattr(step.thresholds, "error")
+                        and step.thresholds.error is not None
+                    ):
+                        e_status = "[yellow]●[/yellow]" if step.error else "[yellow]○[/yellow]"
+                    else:
+                        e_status = "—"
+                    # Critical threshold
+                    if (
+                        hasattr(step, "thresholds")
+                        and step.thresholds
+                        and hasattr(step.thresholds, "critical")
+                        and step.thresholds.critical is not None
+                    ):
+                        c_status = "[red]●[/red]" if step.critical else "[red]○[/red]"
+                    else:
+                        c_status = "—"
-                    severity = ""
-                    if step.critical:
-                        severity = " [red](CRITICAL)[/red]"
-                    elif step.error:
-                        severity = " [red](ERROR)[/red]"
-                    elif step.warning:
-                        severity = " [yellow](WARNING)[/yellow]"
+                    # Extract status, here we check if the step has any extract data
+                    if (
+                        hasattr(step, "extract")
+                        and step.extract is not None
+                        and hasattr(step.extract, "__len__")
+                        and len(step.extract) > 0
+                    ):
+                        ext_status = "[blue]✓[/blue]"
+                    else:
+                        ext_status = "[bright_black]—[/bright_black]"
                     steps_table.add_row(
                         str(step.i),
                         step.assertion_type,
                         str(step.column) if step.column else "—",
-                        f"[{status_color}]{status_icon}[/{status_color}]{severity}",
-                        f"{step.n_passed}/{step.n}",
+                        values_str,
+                        format_units(step.n),
+                        format_pass_fail(step.n_passed, step.n),
+                        format_pass_fail(step.n - step.n_passed, step.n),
+                        w_status,
+                        e_status,
+                        c_status,
+                        ext_status,
                     )
                 console.print(steps_table)
@@ -1015,18 +1192,32 @@ def _display_validation_summary(validation: Any) -> None:
             # Display status with appropriate color
             if highest_severity == "all passed":
                 console.print(
-                    Panel("[green]✓ All validations passed![/green]", border_style="green")
+                    Panel(
+                        "[green]✓ All validations passed![/green]",
+                        border_style="green",
+                        expand=False,
+                    )
                 )
-            elif highest_severity == "some failing":
+            elif highest_severity == "passed":
                 console.print(
-                    Panel("[yellow]⚠ Some validations failed[/yellow]", border_style="yellow")
+                    Panel(
+                        "[dim green]⚠ Some steps had failing test units[/dim green]",
+                        border_style="dim green",
+                        expand=False,
+                    )
                 )
             elif highest_severity in ["warning", "error", "critical"]:
-                color = "yellow" if highest_severity == "warning" else "red"
+                if highest_severity == "warning":
+                    color = "bright_black"  # gray
+                elif highest_severity == "error":
+                    color = "yellow"
+                else:  # critical
+                    color = "red"
                 console.print(
                     Panel(
                         f"[{color}]✗ Validation failed with {highest_severity} severity[/{color}]",
                         border_style=color,
+                        expand=False,
                     )
                 )
         else:
@@ -1040,20 +1231,31 @@ def _display_validation_summary(validation: Any) -> None:
 @click.group(cls=OrderedGroup)
-@click.version_option(version=pb.__version__, prog_name="pb")
+@click.version_option(pb.__version__, "-v", "--version", prog_name="pb")
+@click.help_option("-h", "--help")
 def cli():
     """
-    Pointblank CLI - Data validation and quality tools for data engineers.
+    Pointblank CLI: Data validation and quality tools for data engineers.
+    Use this CLI to validate data quality, explore datasets, and generate comprehensive
+    reports for CSV, Parquet, and database sources. Suitable for data pipelines, ETL
+    validation, and exploratory data analysis from the command line.
+    Quick Examples:
-    Use this CLI to run validation scripts, preview tables, and generate reports
-    directly from the command line.
+    \b
+      pb preview data.csv              Preview your data
+      pb scan data.csv                 Generate data profile
+      pb validate data.csv             Run basic validation
+    Use pb COMMAND --help for detailed help on any command.
     """
     pass
 @cli.command()
-@click.argument("data_source", type=str)
-def info(data_source: str):
+@click.argument("data_source", type=str, required=False)
+def info(data_source: str | None):
     """
     Display information about a data source.
@@ -1069,6 +1271,11 @@ def info(data_source: str):
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
     """
     try:
+        # Handle missing data_source with concise help
+        if data_source is None:
+            _show_concise_help("info", None)
+            return
         with console.status("[bold green]Loading data..."):
             # Load the data source using the centralized function
             data = _load_data_source(data_source)
@@ -1107,21 +1314,21 @@ def info(data_source: str):
 @cli.command()
-@click.argument("data_source", type=str)
-@click.option("--columns", "-c", help="Comma-separated list of columns to display")
+@click.argument("data_source", type=str, required=False)
+@click.option("--columns", help="Comma-separated list of columns to display")
 @click.option("--col-range", help="Column range like '1:10' or '5:' or ':15' (1-based indexing)")
 @click.option("--col-first", type=int, help="Show first N columns")
 @click.option("--col-last", type=int, help="Show last N columns")
-@click.option("--head", "-h", default=5, help="Number of rows from the top (default: 5)")
-@click.option("--tail", "-t", default=5, help="Number of rows from the bottom (default: 5)")
-@click.option("--limit", "-l", default=50, help="Maximum total rows to display (default: 50)")
+@click.option("--head", default=5, help="Number of rows from the top (default: 5)")
+@click.option("--tail", default=5, help="Number of rows from the bottom (default: 5)")
+@click.option("--limit", default=50, help="Maximum total rows to display (default: 50)")
 @click.option("--no-row-numbers", is_flag=True, help="Hide row numbers")
 @click.option("--max-col-width", default=250, help="Maximum column width in pixels (default: 250)")
 @click.option("--min-table-width", default=500, help="Minimum table width in pixels (default: 500)")
 @click.option("--no-header", is_flag=True, help="Hide table header")
 @click.option("--output-html", type=click.Path(), help="Save HTML output to file")
 def preview(
-    data_source: str,
+    data_source: str | None,
     columns: str | None,
     col_range: str | None,
     col_first: int | None,
@@ -1146,6 +1353,7 @@ def preview(
     - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
+    - Piped data from pb pl command
     COLUMN SELECTION OPTIONS:
@@ -1160,11 +1368,52 @@ def preview(
     Tables with >15 columns automatically show first 7 and last 7 columns with indicators.
     """
     try:
+        import sys
+        # Handle piped input
+        if data_source is None:
+            if not sys.stdin.isatty():
+                # Data is being piped in - read the file path from stdin
+                piped_input = sys.stdin.read().strip()
+                if piped_input:
+                    data_source = piped_input
+                    # Determine the format from the file extension
+                    if piped_input.endswith(".parquet"):
+                        format_type = "Parquet"
+                    elif piped_input.endswith(".csv"):
+                        format_type = "CSV"
+                    else:
+                        format_type = "unknown"
+                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
+                else:
+                    console.print("[red]Error:[/red] No data provided via pipe")
+                    sys.exit(1)
+            else:
+                # Show concise help and exit
+                _show_concise_help("preview", None)
+                return
         with console.status("[bold green]Loading data..."):
             # Load the data source using the centralized function
             data = _load_data_source(data_source)
-            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Check if this is a piped data source and create friendly display name
+            is_piped_data = _is_piped_data_source(data_source)
+            if is_piped_data:
+                if data_source.endswith(".parquet"):
+                    display_source = "Parquet file via `pb pl`"
+                elif data_source.endswith(".csv"):
+                    display_source = "CSV file via `pb pl`"
+                else:
+                    display_source = "File via `pb pl`"
+                console.print(
+                    f"[green]✓[/green] Loaded data source: {display_source} ({data_source})"
+                )
+            else:
+                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Parse columns if provided
         columns_list = None
@@ -1186,7 +1435,7 @@ def preview(
                 # If _row_num_ exists in data but not in user selection, add it at beginning
                 if all_columns and "_row_num_" in all_columns and "_row_num_" not in columns_list:
                     columns_list = ["_row_num_"] + columns_list
-            except Exception:  # pragma: no cover
+            except Exception:
                 # If we can't process the data, just use the user's column list as-is
                 pass
         elif col_range or col_first or col_last:
@@ -1261,7 +1510,14 @@ def preview(
                 total_dataset_columns = pb.get_column_count(processed_data)
                 # Determine source type and table type for enhanced preview title
-                if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
+                if is_piped_data:
+                    if data_source.endswith(".parquet"):
+                        source_type = "Polars expression (serialized to Parquet) from `pb pl`"
+                    elif data_source.endswith(".csv"):
+                        source_type = "Polars expression (serialized to CSV) from `pb pl`"
+                    else:
+                        source_type = "Polars expression from `pb pl`"
+                elif data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
                     source_type = f"Pointblank dataset: {data_source}"
                 else:
                     source_type = f"External source: {data_source}"
@@ -1311,17 +1567,17 @@ def preview(
             _rich_print_gt_table(gt_table, preview_info)
-    except Exception as e:  # pragma: no cover
+    except Exception as e:
         console.print(f"[red]Error:[/red] {e}")
-        sys.exit(1)  # pragma: no cover
+        sys.exit(1)
 @cli.command()
-@click.argument("data_source", type=str)
+@click.argument("data_source", type=str, required=False)
 @click.option("--output-html", type=click.Path(), help="Save HTML scan report to file")
 @click.option("--columns", "-c", help="Comma-separated list of columns to scan")
 def scan(
-    data_source: str,
+    data_source: str | None,
     output_html: str | None,
     columns: str | None,
 ):
@@ -1344,17 +1600,58 @@ def scan(
     - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
+    - Piped data from pb pl command
     """
     try:
+        import sys
         import time
         start_time = time.time()
+        # Handle piped input
+        if data_source is None:
+            if not sys.stdin.isatty():
+                # Data is being piped in - read the file path from stdin
+                piped_input = sys.stdin.read().strip()
+                if piped_input:
+                    data_source = piped_input
+                    # Determine the format from the file extension
+                    if piped_input.endswith(".parquet"):
+                        format_type = "Parquet"
+                    elif piped_input.endswith(".csv"):
+                        format_type = "CSV"
+                    else:
+                        format_type = "unknown"
+                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
+                else:
+                    console.print("[red]Error:[/red] No data provided via pipe")
+                    sys.exit(1)
+            else:
+                # Show concise help and exit
+                _show_concise_help("scan", None)
+                return
         with console.status("[bold green]Loading data..."):
             # Load the data source using the centralized function
             data = _load_data_source(data_source)
-            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Check if this is a piped data source and create friendly display name
+            is_piped_data = _is_piped_data_source(data_source)
+            if is_piped_data:
+                if data_source.endswith(".parquet"):
+                    display_source = "Parquet file via `pb pl`"
+                elif data_source.endswith(".csv"):
+                    display_source = "CSV file via `pb pl`"
+                else:
+                    display_source = "File via `pb pl`"
+                console.print(
+                    f"[green]✓[/green] Loaded data source: {display_source} ({data_source})"
+                )
+            else:
+                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Parse columns if provided
         columns_list = None
@@ -1367,7 +1664,15 @@ def scan(
             # Data is already processed by _load_data_source
             scan_result = pb.col_summary_tbl(data=data)
-            if data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
+            # Create friendly source type for display
+            if is_piped_data:
+                if data_source.endswith(".parquet"):
+                    source_type = "Polars expression (serialized to Parquet) from `pb pl`"
+                elif data_source.endswith(".csv"):
+                    source_type = "Polars expression (serialized to CSV) from `pb pl`"
+                else:
+                    source_type = "Polars expression from `pb pl`"
+            elif data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
                 source_type = f"Pointblank dataset: {data_source}"
             else:
                 source_type = f"External source: {data_source}"
@@ -1399,7 +1704,12 @@ def scan(
             # Display detailed column summary using rich formatting
             try:
                 _rich_print_scan_table(
-                    scan_result, data_source, source_type, table_type, total_rows, total_columns
+                    scan_result,
+                    display_source if is_piped_data else data_source,
+                    source_type,
+                    table_type,
+                    total_rows,
+                    total_columns,
                 )
             except Exception as e:
@@ -1411,9 +1721,9 @@ def scan(
 @cli.command()
-@click.argument("data_source", type=str)
+@click.argument("data_source", type=str, required=False)
 @click.option("--output-html", type=click.Path(), help="Save HTML output to file")
-def missing(data_source: str, output_html: str | None):
+def missing(data_source: str | None, output_html: str | None):
     """
     Generate a missing values report for a data table.
@@ -1425,13 +1735,55 @@ def missing(data_source: str, output_html: str | None):
     - GitHub URL to CSV/Parquet (e.g., https://github.com/user/repo/blob/main/data.csv)
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
+    - Piped data from pb pl command
     """
     try:
+        import sys
+        # Handle piped input
+        if data_source is None:
+            if not sys.stdin.isatty():
+                # Data is being piped in - read the file path from stdin
+                piped_input = sys.stdin.read().strip()
+                if piped_input:
+                    data_source = piped_input
+                    # Determine the format from the file extension
+                    if piped_input.endswith(".parquet"):
+                        format_type = "Parquet"
+                    elif piped_input.endswith(".csv"):
+                        format_type = "CSV"
+                    else:
+                        format_type = "unknown"
+                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
+                else:
+                    console.print("[red]Error:[/red] No data provided via pipe")
+                    sys.exit(1)
+            else:
+                # Show concise help and exit
+                _show_concise_help("missing", None)
+                return
         with console.status("[bold green]Loading data..."):
             # Load the data source using the centralized function
             data = _load_data_source(data_source)
-            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Check if this is a piped data source and create friendly display name
+            is_piped_data = _is_piped_data_source(data_source)
+            if is_piped_data:
+                if data_source.endswith(".parquet"):
+                    display_source = "Parquet file via `pb pl`"
+                elif data_source.endswith(".csv"):
+                    display_source = "CSV file via `pb pl`"
+                else:
+                    display_source = "File via `pb pl`"
+                console.print(
+                    f"[green]✓[/green] Loaded data source: {display_source} ({data_source})"
+                )
+            else:
+                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Generate missing values table
         with console.status("[bold green]Analyzing missing values..."):
@@ -1447,7 +1799,38 @@ def missing(data_source: str, output_html: str | None):
             console.print(f"[green]✓[/green] Missing values report saved to: {output_html}")
         else:
             # Display in terminal with special missing values formatting
-            _rich_print_missing_table(gt_table, original_data)
+            # Create enhanced context info for missing table display
+            missing_info = {}
+            try:
+                # Determine source type and table type for enhanced preview title
+                if is_piped_data:
+                    if data_source.endswith(".parquet"):
+                        source_type = "Polars expression (serialized to Parquet) from `pb pl`"
+                    elif data_source.endswith(".csv"):
+                        source_type = "Polars expression (serialized to CSV) from `pb pl`"
+                    else:
+                        source_type = "Polars expression from `pb pl`"
+                elif data_source in ["small_table", "game_revenue", "nycflights", "global_sales"]:
+                    source_type = f"Pointblank dataset: {data_source}"
+                else:
+                    source_type = f"External source: {data_source}"
+                missing_info = {
+                    "source_type": source_type,
+                    "table_type": _get_tbl_type(original_data),
+                    "total_rows": pb.get_row_count(original_data),
+                    "total_columns": pb.get_column_count(original_data),
+                }
+            except Exception:
+                # Use defaults if metadata extraction fails
+                missing_info = {
+                    "source_type": f"Data source: {data_source}",
+                    "table_type": "unknown",
+                    "total_rows": None,
+                    "total_columns": None,
+                }
+            _rich_print_missing_table_enhanced(gt_table, original_data, missing_info)
     except Exception as e:
         console.print(f"[red]Error:[/red] {e}")
@@ -1455,10 +1838,11 @@ def missing(data_source: str, output_html: str | None):
 @cli.command(name="validate")
-@click.argument("data_source", type=str)
+@click.argument("data_source", type=str, required=False)
+@click.option("--list-checks", is_flag=True, help="List available validation checks and exit")
 @click.option(
     "--check",
-    "checks",  # Changed to collect multiple values
+    "checks",
     type=click.Choice(
         [
             "rows-distinct",
@@ -1472,25 +1856,25 @@ def missing(data_source: str, output_html: str | None):
             "col-vals-le",
         ]
     ),
+    metavar="CHECK_TYPE",
     multiple=True,  # Allow multiple --check options
     help="Type of validation check to perform. Can be used multiple times for multiple checks.",
 )
-@click.option("--list-checks", is_flag=True, help="List available validation checks and exit")
 @click.option(
     "--column",
-    "columns",  # Changed to collect multiple values
+    "columns",
     multiple=True,  # Allow multiple --column options
     help="Column name or integer position as #N (1-based index) for validation.",
 )
 @click.option(
     "--set",
-    "sets",  # Changed to collect multiple values
+    "sets",
     multiple=True,  # Allow multiple --set options
     help="Comma-separated allowed values for col-vals-in-set checks.",
 )
 @click.option(
     "--value",
-    "values",  # Changed to collect multiple values
+    "values",
     type=float,
     multiple=True,  # Allow multiple --value options
     help="Numeric value for comparison checks.",
@@ -1502,17 +1886,17 @@ def missing(data_source: str, output_html: str | None):
     "--write-extract", type=str, help="Save failing rows to folder. Provide base name for folder."
 )
 @click.option(
-    "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
+    "--limit", default=500, help="Maximum number of failing rows to save to CSV (default: 500)"
 )
 @click.option("--exit-code", is_flag=True, help="Exit with non-zero code if validation fails")
 @click.pass_context
 def validate(
     ctx: click.Context,
-    data_source: str,
-    checks: tuple[str, ...],  # Changed to tuple
-    columns: tuple[str, ...],  # Changed to tuple
-    sets: tuple[str, ...],  # Changed to tuple
-    values: tuple[float, ...],  # Changed to tuple
+    data_source: str | None,
+    checks: tuple[str, ...],
+    columns: tuple[str, ...],
+    sets: tuple[str, ...],
+    values: tuple[float, ...],
     show_extract: bool,
     write_extract: str | None,
     limit: int,
@@ -1534,21 +1918,21 @@ def validate(
     - Database connection string (e.g., duckdb:///path/to/db.ddb::table_name)
     - Dataset name from pointblank (small_table, game_revenue, nycflights, global_sales)
-    AVAILABLE CHECKS:
+    AVAILABLE CHECK_TYPES:
     Use --list-checks to see all available validation methods with examples.
-    The default check is 'rows-distinct' which checks for duplicate rows.
+    The default CHECK_TYPE is 'rows-distinct' which checks for duplicate rows.
     \b
     - rows-distinct: Check if all rows in the dataset are unique (no duplicates)
     - rows-complete: Check if all rows are complete (no missing values in any column)
     - col-exists: Check if a specific column exists in the dataset (requires --column)
     - col-vals-not-null: Check if all values in a column are not null/missing (requires --column)
-    - col-vals-gt: Check if all values in a column are greater than a threshold (requires --column and --value)
-    - col-vals-ge: Check if all values in a column are greater than or equal to a threshold (requires --column and --value)
-    - col-vals-lt: Check if all values in a column are less than a threshold (requires --column and --value)
-    - col-vals-le: Check if all values in a column are less than or equal to a threshold (requires --column and --value)
+    - col-vals-gt: Check if all values in a column are greater than a comparison value (requires --column and --value)
+    - col-vals-ge: Check if all values in a column are greater than or equal to a comparison value (requires --column and --value)
+    - col-vals-lt: Check if all values in a column are less than a comparison value (requires --column and --value)
+    - col-vals-le: Check if all values in a column are less than or equal to a comparison value (requires --column and --value)
     - col-vals-in-set: Check if all values in a column are in an allowed set (requires --column and --set)
     Examples:
@@ -1571,28 +1955,9 @@ def validate(
     pb validate data.csv --check col-vals-not-null --column email --check col-vals-gt --column age --value 18
     """
     try:
-        # Handle backward compatibility and parameter conversion
         import sys
-        # Convert parameter tuples to lists, handling default case
-        if not checks:
-            # No --check options provided, use default
-            checks_list = ["rows-distinct"]
-            is_using_default_check = True
-        else:
-            checks_list = list(checks)
-            is_using_default_check = False
-        columns_list = list(columns) if columns else []
-        sets_list = list(sets) if sets else []
-        values_list = list(values) if values else []
-        # Map parameters to checks intelligently
-        mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
-            checks_list, columns_list, sets_list, values_list
-        )
-        # Handle --list-checks option
+        # Handle --list-checks option early (doesn't need data source)
         if list_checks:
             console.print("[bold bright_cyan]Available Validation Checks:[/bold bright_cyan]")
             console.print()
@@ -1616,14 +1981,16 @@ def validate(
                 "[bold magenta]Value comparison checks [bright_black](require --column and --value)[/bright_black]:[/bold magenta]"
             )
             console.print(
-                "  • [bold cyan]col-vals-gt[/bold cyan]       Values greater than threshold"
+                "  • [bold cyan]col-vals-gt[/bold cyan]       Values greater than comparison value"
             )
             console.print(
-                "  • [bold cyan]col-vals-ge[/bold cyan]       Values greater than or equal to threshold"
+                "  • [bold cyan]col-vals-ge[/bold cyan]       Values greater than or equal to comparison value"
             )
-            console.print("  • [bold cyan]col-vals-lt[/bold cyan]       Values less than threshold")
             console.print(
-                "  • [bold cyan]col-vals-le[/bold cyan]       Values less than or equal to threshold"
+                "  • [bold cyan]col-vals-lt[/bold cyan]       Values less than comparison value"
+            )
+            console.print(
+                "  • [bold cyan]col-vals-le[/bold cyan]       Values less than or equal to comparison value"
             )
             console.print()
             console.print(
@@ -1634,19 +2001,65 @@ def validate(
             )
             console.print()
             console.print("[bold bright_yellow]Examples:[/bold bright_yellow]")
+            console.print("  [bright_blue]pb validate data.csv --check rows-distinct[/bright_blue]")
             console.print(
-                f"  [bright_blue]pb validate {data_source} --check rows-distinct[/bright_blue]"
-            )
-            console.print(
-                f"  [bright_blue]pb validate {data_source} --check col-vals-not-null --column price[/bright_blue]"
+                "  [bright_blue]pb validate data.csv --check col-vals-not-null --column price[/bright_blue]"
             )
             console.print(
-                f"  [bright_blue]pb validate {data_source} --check col-vals-gt --column age --value 18[/bright_blue]"
+                "  [bright_blue]pb validate data.csv --check col-vals-gt --column age --value 18[/bright_blue]"
             )
             import sys
             sys.exit(0)
+        # Check if data_source is provided (required for all operations except --list-checks)
+        # or if we have piped input
+        if data_source is None:
+            # Check if we have piped input
+            if not sys.stdin.isatty():
+                # Data is being piped in: read the file path from stdin
+                piped_input = sys.stdin.read().strip()
+                if piped_input:
+                    data_source = piped_input
+                    # Determine the format from the file extension
+                    if piped_input.endswith(".parquet"):
+                        format_type = "Parquet"
+                    elif piped_input.endswith(".csv"):
+                        format_type = "CSV"
+                    else:
+                        format_type = "unknown"
+                    console.print(f"[dim]Using piped data source in {format_type} format.[/dim]")
+                else:
+                    console.print("[red]Error:[/red] No data provided via pipe")
+                    sys.exit(1)
+            else:
+                # Show concise help and exit
+                _show_concise_help("validate", None)
+                return
+        # Handle backward compatibility and parameter conversion
+        import sys
+        # Convert parameter tuples to lists, handling default case
+        if not checks:
+            # No --check options provided, use default
+            checks_list = ["rows-distinct"]
+            is_using_default_check = True
+        else:
+            checks_list = list(checks)
+            is_using_default_check = False
+        columns_list = list(columns) if columns else []
+        sets_list = list(sets) if sets else []
+        values_list = list(values) if values else []
+        # Map parameters to checks intelligently
+        mapped_columns, mapped_sets, mapped_values = _map_parameters_to_checks(
+            checks_list, columns_list, sets_list, values_list
+        )
         # Validate required parameters for different check types
         # Check parameters for each check in the list using mapped parameters
         for i, check in enumerate(checks_list):
@@ -1732,7 +2145,25 @@ def validate(
                 checks_list, columns_list, sets_list, values_list
             )
-            console.print(f"[green]✓[/green] Loaded data source: {data_source}")
+            # Check if this is a piped data source and create friendly display name
+            is_piped_data = (
+                data_source
+                and data_source.startswith("/var/folders/")
+                and ("pb_pipe_" in data_source or "/T/" in data_source)
+            )
+            if is_piped_data:
+                if data_source.endswith(".parquet"):
+                    display_source = "Parquet file via `pb pl`"
+                elif data_source.endswith(".csv"):
+                    display_source = "CSV file via `pb pl`"
+                else:
+                    display_source = "File via `pb pl`"
+                console.print(
+                    f"[green]✓[/green] Loaded data source: {display_source} ({data_source})"
+                )
+            else:
+                console.print(f"[green]✓[/green] Loaded data source: {data_source}")
         # Build a single validation object with chained checks
         with console.status(f"[bold green]Running {len(checks_list)} validation check(s)..."):
@@ -1791,7 +2222,7 @@ def validate(
         # Display results based on whether we have single or multiple checks
         if len(checks_list) == 1:
-            # Single check - use current display format
+            # Single check: use current display format
             _display_validation_result(
                 validation,
                 checks_list,
@@ -1806,7 +2237,7 @@ def validate(
                 limit,
             )
         else:
-            # Multiple checks - use stacked display format
+            # Multiple checks: use stacked display format
             any_failed = False
             for i in range(len(checks_list)):
                 console.print()  # Add spacing between results
@@ -1845,7 +2276,7 @@ def validate(
             console.print()
             console.print("[bold magenta]Common validation options:[/bold magenta]")
             console.print(
-                "  • [bold cyan]--check rows-complete[/bold cyan]        Check for rows with missing values"
+                "  • [bold cyan]--check rows-complete[/bold cyan]       Check for rows with missing values"
             )
             console.print(
                 "  • [bold cyan]--check col-vals-not-null[/bold cyan]   Check for null values in a column [bright_black](requires --column)[/bright_black]"
@@ -1955,81 +2386,284 @@ def requirements():
     console.print("\n[dim]Install missing packages to enable additional functionality.[/dim]")
-def _rich_print_scan_table(
-    scan_result: Any,
-    data_source: str,
-    source_type: str,
-    table_type: str,
-    total_rows: int | None = None,
-    total_columns: int | None = None,
+def _rich_print_missing_table_enhanced(
+    gt_table: Any, original_data: Any = None, missing_info: dict = None
 ) -> None:
-    """
-    Display scan results as a Rich table in the terminal with statistical measures.
+    """Convert a missing values GT table to Rich table with enhanced formatting and metadata.
     Args:
-        scan_result: The GT object from col_summary_tbl()
-        data_source: Name of the data source being scanned
-        source_type: Type of data source (e.g., "Pointblank dataset: small_table")
-        table_type: Type of table (e.g., "polars.LazyFrame")
-        total_rows: Total number of rows in the dataset
-        total_columns: Total number of columns in the dataset
+        gt_table: The GT table object for missing values
+        original_data: The original data source to extract column types
+        missing_info: Dict with metadata including source_type, table_type, total_rows, total_columns
     """
     try:
-        import re
-        import narwhals as nw
-        from rich.box import SIMPLE_HEAD
+        # Extract the underlying data from the GT table
+        df = None
-        # Extract the underlying DataFrame from the GT object
-        # The GT object has a _tbl_data attribute that contains the DataFrame
-        gt_data = scan_result._tbl_data
+        if hasattr(gt_table, "_tbl_data") and gt_table._tbl_data is not None:
+            df = gt_table._tbl_data
+        elif hasattr(gt_table, "_data") and gt_table._data is not None:
+            df = gt_table._data
+        elif hasattr(gt_table, "data") and gt_table.data is not None:
+            df = gt_table.data
-        # Convert to Narwhals DataFrame for consistent handling
-        nw_data = nw.from_native(gt_data)
+        if df is not None:
+            from rich.box import SIMPLE_HEAD
-        # Convert to dictionary for easier access
-        data_dict = nw_data.to_dict(as_series=False)
+            # Extract metadata from missing_info or use defaults
+            source_type = "Data source"
+            table_type = "unknown"
+            total_rows = None
+            total_columns = None
-        # Create main scan table with missing data table styling
-        # Create a comprehensive title with data source, source type, and table type
-        title_text = f"Column Summary / {source_type} / {table_type}"
+            if missing_info:
+                source_type = missing_info.get("source_type", "Data source")
+                table_type = missing_info.get("table_type", "unknown")
+                total_rows = missing_info.get("total_rows")
+                total_columns = missing_info.get("total_columns")
-        # Add dimensions subtitle in gray if available
-        if total_rows is not None and total_columns is not None:
-            title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
+            # Create enhanced title matching the scan table format
+            title_text = f"Missing Values / {source_type} / {table_type}"
-        # Create the scan table
-        scan_table = Table(
-            title=title_text,
-            show_header=True,
-            header_style="bold magenta",
-            box=SIMPLE_HEAD,
-            title_style="bold cyan",
-            title_justify="left",
-        )
+            # Add dimensions subtitle in gray if available
+            if total_rows is not None and total_columns is not None:
+                title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
-        # Add columns with specific styling and appropriate widths
-        scan_table.add_column("Column", style="cyan", no_wrap=True, width=20)
-        scan_table.add_column("Type", style="yellow", no_wrap=True, width=10)
-        scan_table.add_column(
-            "NA", style="red", width=6, justify="right"
-        )  # Adjusted for better formatting
-        scan_table.add_column(
-            "UQ", style="green", width=8, justify="right"
-        )  # Adjusted for boolean values
+            # Get column names
+            columns = []
+            try:
+                if hasattr(df, "columns"):
+                    columns = list(df.columns)
+                elif hasattr(df, "schema"):
+                    columns = list(df.schema.names)
+            except Exception as e:
+                console.print(f"[red]Error getting columns:[/red] {e}")
+                columns = []
-        # Add statistical columns if they exist with appropriate widths
-        stat_columns = []
-        column_mapping = {
-            "mean": ("Mean", "blue", 9),
-            "std": ("SD", "blue", 9),
-            "min": ("Min", "yellow", 9),
-            "median": ("Med", "yellow", 9),
-            "max": ("Max", "yellow", 9),
-            "q_1": ("Q₁", "magenta", 8),
-            "q_3": ("Q₃", "magenta", 9),
-            "iqr": ("IQR", "magenta", 8),
-        }
+            if not columns:
+                columns = [f"Column {i + 1}" for i in range(10)]  # Fallback
+            # Get original data to extract column types
+            column_types = {}
+            if original_data is not None:
+                try:
+                    # Get column types from original data
+                    if hasattr(original_data, "columns"):
+                        original_columns = list(original_data.columns)
+                        column_types = _get_column_dtypes(original_data, original_columns)
+                except Exception as e:
+                    console.print(f"[red]Error getting column types:[/red] {e}")
+                    pass  # Use empty dict as fallback
+            # Add columns to Rich table with special formatting for missing values table
+            sector_columns = [col for col in columns if col != "columns" and col.isdigit()]
+            # Print the title first
+            console.print()
+            console.print(f"[bold cyan]{title_text}[/bold cyan]")
+            # Show the custom spanner header if we have sector columns
+            if sector_columns:
+                # Create a custom header line that shows the spanner
+                header_parts = []
+                header_parts.append(" " * 20)  # Space for Column header
+                header_parts.append(" " * 10)  # Space for Type header
+                # Left-align "Row Sectors" with the first numbered column
+                row_sectors_text = "Row Sectors"
+                header_parts.append(row_sectors_text)
+                # Print the custom spanner header
+                console.print("[dim]" + "  ".join(header_parts) + "[/dim]")
+                # Add a horizontal rule below the spanner
+                rule_parts = []
+                rule_parts.append(" " * 20)  # Space for Column header
+                rule_parts.append(" " * 10)  # Space for Type header
+                # Use a fixed width horizontal rule for "Row Sectors"
+                horizontal_rule = "─" * 20
+                rule_parts.append(horizontal_rule)
+                # Print the horizontal rule
+                console.print("[dim]" + "  ".join(rule_parts) + "[/dim]")
+            # Create the missing values table WITHOUT the title (since we printed it above)
+            rich_table = Table(
+                show_header=True,
+                header_style="bold magenta",
+                box=SIMPLE_HEAD,
+            )
+            # Two separate columns: Column name (20 chars) and Data type (10 chars)
+            rich_table.add_column("Column", style="cyan", no_wrap=True, width=20)
+            rich_table.add_column("Type", style="yellow", no_wrap=True, width=10)
+            # Sector columns: All same width, optimized for "100%" (4 chars + padding)
+            for sector in sector_columns:
+                rich_table.add_column(
+                    sector,
+                    style="cyan",
+                    justify="center",
+                    no_wrap=True,
+                    width=5,  # Fixed width optimized for percentage values
+                )
+            # Convert data to rows with special formatting
+            rows = []
+            try:
+                if hasattr(df, "to_dicts"):
+                    data_dict = df.to_dicts()
+                elif hasattr(df, "to_dict"):
+                    data_dict = df.to_dict("records")
+                else:
+                    data_dict = []
+                for i, row in enumerate(data_dict):
+                    try:
+                        # Each row should have: [column_name, data_type, sector1, sector2, ...]
+                        column_name = str(row.get("columns", ""))
+                        # Truncate column name to 20 characters with ellipsis if needed
+                        if len(column_name) > 20:
+                            truncated_name = column_name[:17] + "…"
+                        else:
+                            truncated_name = column_name
+                        # Get data type for this column
+                        if column_name in column_types:
+                            dtype = column_types[column_name]
+                            if len(dtype) > 10:
+                                truncated_dtype = dtype[:9] + "…"
+                            else:
+                                truncated_dtype = dtype
+                        else:
+                            truncated_dtype = "?"
+                        # Start building the row with column name and type
+                        formatted_row = [truncated_name, truncated_dtype]
+                        # Add sector values (formatted percentages)
+                        for sector in sector_columns:
+                            value = row.get(sector, 0.0)
+                            if isinstance(value, (int, float)):
+                                formatted_row.append(_format_missing_percentage(float(value)))
+                            else:
+                                formatted_row.append(str(value))
+                        rows.append(formatted_row)
+                    except Exception as e:
+                        console.print(f"[red]Error processing row {i}:[/red] {e}")
+                        continue
+            except Exception as e:
+                console.print(f"[red]Error extracting data:[/red] {e}")
+                rows = [["Error extracting data", "?", *["" for _ in sector_columns]]]
+            # Add rows to Rich table
+            for row in rows:
+                try:
+                    rich_table.add_row(*row)
+                except Exception as e:
+                    console.print(f"[red]Error adding row:[/red] {e}")
+                    break
+            # Print the Rich table (without title since we already printed it)
+            console.print(rich_table)
+            footer_text = (
+                "[dim]Symbols: [green]●[/green] = no missing vals in sector, "
+                "[red]●[/red] = all vals completely missing, "
+                "[cyan]x%[/cyan] = percentage missing[/dim]"
+            )
+            console.print(footer_text)
+        else:
+            # Fallback to regular table display
+            _rich_print_gt_table(gt_table)
+    except Exception as e:
+        console.print(f"[red]Error rendering missing values table:[/red] {e}")
+        # Fallback to regular table display
+        _rich_print_gt_table(gt_table)
+def _rich_print_scan_table(
+    scan_result: Any,
+    data_source: str,
+    source_type: str,
+    table_type: str,
+    total_rows: int | None = None,
+    total_columns: int | None = None,
+) -> None:
+    """
+    Display scan results as a Rich table in the terminal with statistical measures.
+    Args:
+        scan_result: The GT object from col_summary_tbl()
+        data_source: Name of the data source being scanned
+        source_type: Type of data source (e.g., "Pointblank dataset: small_table")
+        table_type: Type of table (e.g., "polars.LazyFrame")
+        total_rows: Total number of rows in the dataset
+        total_columns: Total number of columns in the dataset
+    """
+    try:
+        import re
+        import narwhals as nw
+        from rich.box import SIMPLE_HEAD
+        # Extract the underlying DataFrame from the GT object
+        # The GT object has a _tbl_data attribute that contains the DataFrame
+        gt_data = scan_result._tbl_data
+        # Convert to Narwhals DataFrame for consistent handling
+        nw_data = nw.from_native(gt_data)
+        # Convert to dictionary for easier access
+        data_dict = nw_data.to_dict(as_series=False)
+        # Create main scan table with missing data table styling
+        # Create a comprehensive title with data source, source type, and table type
+        title_text = f"Column Summary / {source_type} / {table_type}"
+        # Add dimensions subtitle in gray if available
+        if total_rows is not None and total_columns is not None:
+            title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
+        # Create the scan table
+        scan_table = Table(
+            title=title_text,
+            show_header=True,
+            header_style="bold magenta",
+            box=SIMPLE_HEAD,
+            title_style="bold cyan",
+            title_justify="left",
+        )
+        # Add columns with specific styling and appropriate widths
+        scan_table.add_column("Column", style="cyan", no_wrap=True, width=20)
+        scan_table.add_column("Type", style="yellow", no_wrap=True, width=10)
+        scan_table.add_column(
+            "NA", style="red", width=6, justify="right"
+        )  # Adjusted for better formatting
+        scan_table.add_column(
+            "UQ", style="green", width=8, justify="right"
+        )  # Adjusted for boolean values
+        # Add statistical columns if they exist with appropriate widths
+        stat_columns = []
+        column_mapping = {
+            "mean": ("Mean", "blue", 9),
+            "std": ("SD", "blue", 9),
+            "min": ("Min", "yellow", 9),
+            "median": ("Med", "yellow", 9),
+            "max": ("Max", "yellow", 9),
+            "q_1": ("Q₁", "magenta", 8),
+            "q_3": ("Q₃", "magenta", 9),
+            "iqr": ("IQR", "magenta", 8),
+        }
         for col_key, (display_name, color, width) in column_mapping.items():
             if col_key in data_dict:
@@ -2070,7 +2704,7 @@ def _rich_print_scan_table(
             # Clean up HTML formatting from the raw data
             str_val = str(value)
-            # Handle multi-line values with <br> tags FIRST - take the first line (absolute number)
+            # Handle multi-line values with <br> tags FIRST: take the first line (absolute number)
             if "<br>" in str_val:
                 str_val = str_val.split("<br>")[0].strip()
                 # For unique values, we want just the integer part
@@ -2089,14 +2723,14 @@ def _rich_print_scan_table(
                 # Clean up extra whitespace
                 str_val = re.sub(r"\s+", " ", str_val).strip()
-            # Handle values like "2<.01" - extract the first number
+            # Handle values like "2<.01": extract the first number
             if "<" in str_val and not (str_val.startswith("<") and str_val.endswith(">")):
                 # Extract number before the < symbol
                 before_lt = str_val.split("<")[0].strip()
                 if before_lt and before_lt.replace(".", "").replace("-", "").isdigit():
                     str_val = before_lt
-            # Handle boolean unique values like "T0.62F0.38" - extract the more readable format
+            # Handle boolean unique values like "T0.62F0.38": extract the more readable format
             if re.match(r"^[TF]\d+\.\d+[TF]\d+\.\d+$", str_val):
                 # Extract T and F values
                 t_match = re.search(r"T(\d+\.\d+)", str_val)
@@ -2126,7 +2760,7 @@ def _rich_print_scan_table(
                     # Simple integers under 10000
                     return str(int(num_val))
                 elif abs(num_val) >= 10000000 and abs(num_val) < 100000000:
-                    # Likely dates in YYYYMMDD format - format as date-like
+                    # Likely dates in YYYYMMDD format: format as date-like
                     int_val = int(num_val)
                     if 19000101 <= int_val <= 29991231:  # Reasonable date range
                         str_date = str(int_val)
@@ -2138,29 +2772,29 @@ def _rich_print_scan_table(
                     # Otherwise treat as large number
                     return f"{num_val / 1000000:.1f}M"
                 elif abs(num_val) >= 1000000:
-                    # Large numbers - use scientific notation or M/k notation
+                    # Large numbers: use scientific notation or M/k notation
                     if abs(num_val) >= 1000000000:
                         return f"{num_val:.1e}"
                     else:
                         return f"{num_val / 1000000:.1f}M"
                 elif abs(num_val) >= 10000:
-                    # Numbers >= 10k - use compact notation
+                    # Numbers >= 10k: use compact notation
                     return f"{num_val / 1000:.1f}k"
                 elif abs(num_val) >= 100:
-                    # Numbers 100-9999 - show with minimal decimals
+                    # Numbers 100-9999: show with minimal decimals
                     return f"{num_val:.1f}"
                 elif abs(num_val) >= 10:
-                    # Numbers 10-99 - show with one decimal
+                    # Numbers 10-99: show with one decimal
                     return f"{num_val:.1f}"
                 elif abs(num_val) >= 1:
-                    # Numbers 1-9 - show with two decimals
+                    # Numbers 1-9: show with two decimals
                     return f"{num_val:.2f}"
                 elif abs(num_val) >= 0.01:
-                    # Small numbers - show with appropriate precision
+                    # Small numbers: show with appropriate precision
                     return f"{num_val:.2f}"
                 else:
-                    # Very small numbers - use scientific notation
+                    # Very small numbers: use scientific notation
                     return f"{num_val:.1e}"
@@ -2168,7 +2802,7 @@ def _rich_print_scan_table(
                 # Not a number, handle as string
                 pass
-            # Handle date/datetime strings - show abbreviated format
+            # Handle date/datetime strings: show abbreviated format
             if len(str_val) > 10 and any(char in str_val for char in ["-", "/", ":"]):
                 # Likely a date/datetime, show abbreviated
                 if len(str_val) > max_width:
@@ -2244,8 +2878,36 @@ def _rich_print_missing_table(gt_table: Any, original_data: Any = None) -> None:
         if df is not None:
             from rich.box import SIMPLE_HEAD
-            # Create the missing values table
-            rich_table = Table(show_header=True, header_style="bold magenta", box=SIMPLE_HEAD)
+            # Get metadata for enhanced missing table title
+            total_rows = None
+            total_columns = None
+            source_type = "Data source"
+            table_type = "unknown"
+            if original_data is not None:
+                try:
+                    total_rows = pb.get_row_count(original_data)
+                    total_columns = pb.get_column_count(original_data)
+                    table_type = _get_tbl_type(original_data)
+                except Exception:
+                    pass
+            # Create enhanced title matching the scan table format
+            title_text = f"Missing Values / {source_type} / {table_type}"
+            # Add dimensions subtitle in gray if available
+            if total_rows is not None and total_columns is not None:
+                title_text += f"\n[dim]{total_rows:,} rows / {total_columns} columns[/dim]"
+            # Create the missing values table with enhanced title
+            rich_table = Table(
+                title=title_text,
+                show_header=True,
+                header_style="bold magenta",
+                box=SIMPLE_HEAD,
+                title_style="bold cyan",
+                title_justify="left",
+            )
             # Get column names
             columns = []
@@ -2377,12 +3039,12 @@ def _rich_print_missing_table(gt_table: Any, original_data: Any = None) -> None:
                 console.print("[dim]" + "  ".join(rule_parts) + "[/dim]")
             # Print the Rich table (will handle terminal width automatically)
+            console.print()
             console.print(rich_table)
             footer_text = (
-                "[dim]Symbols: [green]●[/green] = no missing values, "
-                "[red]●[/red] = completely missing, "
-                "<1% = less than 1% missing, "
-                ">99% = more than 99% missing[/dim]"
+                "[dim]Symbols: [green]●[/green] = no missing vals in sector, "
+                "[red]●[/red] = all vals completely missing, "
+                "[cyan]x%[/cyan] = percentage missing[/dim]"
             )
             console.print(footer_text)
@@ -2521,6 +3183,20 @@ def _display_validation_result(
     set_val = sets_list[step_index] if step_index < len(sets_list) else None
     value = values_list[step_index] if step_index < len(values_list) else None
+    # Check if this is piped data
+    is_piped_data = _is_piped_data_source(data_source)
+    # Create friendly display name for data source
+    if is_piped_data:
+        if data_source.endswith(".parquet"):
+            display_source = "Polars expression (serialized to Parquet) from `pb pl`"
+        elif data_source.endswith(".csv"):
+            display_source = "Polars expression (serialized to CSV) from `pb pl`"
+        else:
+            display_source = "Polars expression from `pb pl`"
+    else:
+        display_source = data_source
     # Get validation step info
     step_info = None
     if hasattr(validation, "validation_info") and len(validation.validation_info) > step_index:
@@ -2528,7 +3204,7 @@ def _display_validation_result(
     # Create friendly title for table
     if total_checks == 1:
-        # Single check - use original title format
+        # Single check: use original title format
         if check == "rows-distinct":
             table_title = "Validation Result: Rows Distinct"
         elif check == "col-vals-not-null":
@@ -2550,7 +3226,7 @@ def _display_validation_result(
         else:
             table_title = f"Validation Result: {check.replace('-', ' ').title()}"
     else:
-        # Multiple checks - add numbering
+        # Multiple checks: add numbering
         if check == "rows-distinct":
             base_title = "Rows Distinct"
         elif check == "col-vals-not-null":
@@ -2587,7 +3263,7 @@ def _display_validation_result(
     result_table.add_column("Value", style="white")
     # Add basic info
-    result_table.add_row("Data Source", data_source)
+    result_table.add_row("Data Source", display_source)
     result_table.add_row("Check Type", check)
     # Add column info for column-specific checks
@@ -2617,7 +3293,7 @@ def _display_validation_result(
             operator = "<"
         elif check == "col-vals-le":
             operator = "<="
-        result_table.add_row("Threshold", f"{operator} {value}")
+        result_table.add_row("Comparison Value", f"{operator} {value}")
     # Get validation details
     if step_info:
@@ -2728,6 +3404,7 @@ def _display_validation_result(
                     Panel(
                         success_message,
                         border_style="green",
+                        expand=False,
                     )
                 )
             else:
@@ -2757,6 +3434,7 @@ def _display_validation_result(
                     Panel(
                         failure_message,
                         border_style="red",
+                        expand=False,
                     )
                 )
@@ -2837,7 +3515,7 @@ def _show_extract_for_multi_check(
         console.print()
         console.print(extract_message)
-    # Special handling for col-exists check - no rows to show when column doesn't exist
+    # Special handling for col-exists check: no rows to show when column doesn't exist
     if check == "col-exists":
         if show_extract:
             console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
@@ -2848,16 +3526,17 @@ def _show_extract_for_multi_check(
             console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
     else:
         try:
-            # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
+            # Get failing rows extract: use step_index + 1 since extracts are 1-indexed
             failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
             if failing_rows is not None and len(failing_rows) > 0:
                 if show_extract:
-                    # Limit the number of rows shown
-                    if len(failing_rows) > limit:
-                        display_rows = failing_rows.head(limit)
+                    # Always limit to 10 rows for display, regardless of limit option
+                    display_limit = 10
+                    if len(failing_rows) > display_limit:
+                        display_rows = failing_rows.head(display_limit)
                         console.print(
-                            f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
+                            f"[dim]Showing first {display_limit} of {len(failing_rows)} {row_type}[/dim]"
                         )
                     else:
                         display_rows = failing_rows
@@ -2868,9 +3547,9 @@ def _show_extract_for_multi_check(
                     preview_table = pb.preview(
                         data=display_rows,
-                        n_head=min(limit, len(display_rows)),
+                        n_head=min(display_limit, len(display_rows)),
                         n_tail=0,
-                        limit=limit,
+                        limit=display_limit,
                         show_row_numbers=True,
                     )
@@ -2892,7 +3571,7 @@ def _show_extract_for_multi_check(
                         filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
                         filepath = output_folder / filename
-                        # Limit the output if needed
+                        # Use limit option for write_extract
                         write_rows = failing_rows
                         if len(failing_rows) > limit:
                             write_rows = failing_rows.head(limit)
@@ -2946,6 +3625,18 @@ def _show_extract_and_summary(
     """Show extract and summary for a validation step (used for single checks)."""
     step_passed = step_info.n_failed == 0 if step_info else True
+    # Get the friendly display name
+    is_piped_data = _is_piped_data_source(data_source)
+    if is_piped_data:
+        if data_source.endswith(".parquet"):
+            display_source = "Polars expression (serialized to Parquet) from `pb pl`"
+        elif data_source.endswith(".csv"):
+            display_source = "Polars expression (serialized to CSV) from `pb pl`"
+        else:
+            display_source = "Polars expression from `pb pl`"
+    else:
+        display_source = data_source
     # Show extract if requested and validation failed
     if (show_extract or write_extract) and not step_passed:
         console.print()
@@ -2997,7 +3688,7 @@ def _show_extract_and_summary(
         if show_extract:
             console.print(extract_message)
-        # Special handling for col-exists check - no rows to show when column doesn't exist
+        # Special handling for col-exists check: no rows to show when column doesn't exist
         if check == "col-exists" and not step_passed:
             if show_extract:
                 console.print(f"[dim]The column '{column}' was not found in the dataset.[/dim]")
@@ -3008,16 +3699,17 @@ def _show_extract_and_summary(
                 console.print("[yellow]Cannot save failing rows when column doesn't exist[/yellow]")
         else:
             try:
-                # Get failing rows extract - use step_index + 1 since extracts are 1-indexed
+                # Get failing rows extract: use step_index + 1 since extracts are 1-indexed
                 failing_rows = validation.get_data_extracts(i=step_index + 1, frame=True)
                 if failing_rows is not None and len(failing_rows) > 0:
                     if show_extract:
-                        # Limit the number of rows shown
-                        if len(failing_rows) > limit:
-                            display_rows = failing_rows.head(limit)
+                        # Always limit to 10 rows for display, regardless of limit option
+                        display_limit = 10
+                        if len(failing_rows) > display_limit:
+                            display_rows = failing_rows.head(display_limit)
                             console.print(
-                                f"[dim]Showing first {limit} of {len(failing_rows)} {row_type}[/dim]"
+                                f"[dim]Showing first {display_limit} of {len(failing_rows)} {row_type}[/dim]"
                             )
                         else:
                             display_rows = failing_rows
@@ -3028,9 +3720,9 @@ def _show_extract_and_summary(
                         preview_table = pb.preview(
                             data=display_rows,
-                            n_head=min(limit, len(display_rows)),
+                            n_head=min(display_limit, len(display_rows)),
                             n_tail=0,
-                            limit=limit,
+                            limit=display_limit,
                             show_row_numbers=True,
                         )
@@ -3052,7 +3744,7 @@ def _show_extract_and_summary(
                             filename = f"step_{step_index + 1:02d}_{safe_check_type}.csv"
                             filepath = output_folder / filename
-                            # Limit the output if needed
+                            # Use limit option for write_extract
                             write_rows = failing_rows
                             if len(failing_rows) > limit:
                                 write_rows = failing_rows.head(limit)
@@ -3098,84 +3790,84 @@ def _show_extract_and_summary(
     if step_passed:
         if check == "rows-distinct":
             success_message = (
-                f"[green]✓ Validation PASSED: No duplicate rows found in {data_source}[/green]"
+                f"[green]✓ Validation PASSED: No duplicate rows found in {display_source}[/green]"
             )
         elif check == "col-vals-not-null":
-            success_message = f"[green]✓ Validation PASSED: No null values found in column '{column}' in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: No null values found in column '{column}' in {display_source}[/green]"
         elif check == "rows-complete":
-            success_message = f"[green]✓ Validation PASSED: All rows are complete (no missing values) in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All rows are complete (no missing values) in {display_source}[/green]"
         elif check == "col-exists":
             success_message = (
-                f"[green]✓ Validation PASSED: Column '{column}' exists in {data_source}[/green]"
+                f"[green]✓ Validation PASSED: Column '{column}' exists in {display_source}[/green]"
             )
         elif check == "col-vals-in-set":
-            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are in the allowed set in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are in the allowed set in {display_source}[/green]"
         elif check == "col-vals-gt":
-            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are > {value} in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are > {value} in {display_source}[/green]"
         elif check == "col-vals-ge":
-            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are >= {value} in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are >= {value} in {display_source}[/green]"
         elif check == "col-vals-lt":
-            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are < {value} in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are < {value} in {display_source}[/green]"
         elif check == "col-vals-le":
-            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are <= {value} in {data_source}[/green]"
+            success_message = f"[green]✓ Validation PASSED: All values in column '{column}' are <= {value} in {display_source}[/green]"
         else:
             success_message = (
-                f"[green]✓ Validation PASSED: {check} check passed for {data_source}[/green]"
+                f"[green]✓ Validation PASSED: {check} check passed for {display_source}[/green]"
             )
-        console.print(Panel(success_message, border_style="green"))
+        console.print(Panel(success_message, border_style="green", expand=False))
     else:
         if step_info:
             if check == "rows-distinct":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} duplicate rows found in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} duplicate rows found in {display_source}[/red]"
             elif check == "col-vals-not-null":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} null values found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} null values found in column '{column}' in {display_source}[/red]"
             elif check == "rows-complete":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} incomplete rows found in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} incomplete rows found in {display_source}[/red]"
             elif check == "col-exists":
-                failure_message = f"[red]✗ Validation FAILED: Column '{column}' does not exist in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: Column '{column}' does not exist in {display_source}[/red]"
             elif check == "col-vals-in-set":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} invalid values found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} invalid values found in column '{column}' in {display_source}[/red]"
             elif check == "col-vals-gt":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values <= {value} found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values <= {value} found in column '{column}' in {display_source}[/red]"
             elif check == "col-vals-ge":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values < {value} found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values < {value} found in column '{column}' in {display_source}[/red]"
             elif check == "col-vals-lt":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values >= {value} found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values >= {value} found in column '{column}' in {display_source}[/red]"
             elif check == "col-vals-le":
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values > {value} found in column '{column}' in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} values > {value} found in column '{column}' in {display_source}[/red]"
             else:
-                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} failing rows found in {data_source}[/red]"
+                failure_message = f"[red]✗ Validation FAILED: {step_info.n_failed:,} failing rows found in {display_source}[/red]"
             # Add hint about --show-extract if not already used (except for col-exists which has no rows to show)
             if not show_extract and check != "col-exists":
                 failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
-            console.print(Panel(failure_message, border_style="red"))
+            console.print(Panel(failure_message, border_style="red", expand=False))
         else:
             if check == "rows-distinct":
                 failure_message = (
-                    f"[red]✗ Validation FAILED: Duplicate rows found in {data_source}[/red]"
+                    f"[red]✗ Validation FAILED: Duplicate rows found in {display_source}[/red]"
                 )
             elif check == "rows-complete":
                 failure_message = (
-                    f"[red]✗ Validation FAILED: Incomplete rows found in {data_source}[/red]"
+                    f"[red]✗ Validation FAILED: Incomplete rows found in {display_source}[/red]"
                 )
             else:
                 failure_message = (
-                    f"[red]✗ Validation FAILED: {check} check failed for {data_source}[/red]"
+                    f"[red]✗ Validation FAILED: {check} check failed for {display_source}[/red]"
                 )
             # Add hint about --show-extract if not already used
             if not show_extract:
                 failure_message += "\n[bright_blue]💡 Tip:[/bright_blue] [cyan]Use --show-extract to see the failing rows[/cyan]"
-            console.print(Panel(failure_message, border_style="red"))
+            console.print(Panel(failure_message, border_style="red", expand=False))
 @cli.command()
-@click.argument("output_file", type=click.Path())
-def make_template(output_file: str):
+@click.argument("output_file", type=click.Path(), required=False)
+def make_template(output_file: str | None):
     """
     Create a validation script template.
@@ -3191,11 +3883,19 @@ def make_template(output_file: str):
     pb make-template my_validation.py
     pb make-template validation_template.py
     """
+    # Handle missing output_file with concise help
+    if output_file is None:
+        _show_concise_help("make-template", None)
+        return
     example_script = '''"""
 Example Pointblank validation script.
 This script demonstrates how to create validation rules for your data.
 Modify the data loading and validation rules below to match your requirements.
+When using 'pb run' with --data option, the CLI will automatically replace
+the data source in your validation object with the provided data.
 """
 import pointblank as pb
@@ -3239,11 +3939,6 @@ validation = (
     # Finalize the validation
     .interrogate()
 )
-# The validation object will be automatically used by the CLI
-# You can also access results programmatically:
-# print(f"All passed: {validation.all_passed()}")
-# print(f"Failed steps: {validation.n_failed()}")
 '''
     Path(output_file).write_text(example_script)
@@ -3251,13 +3946,17 @@ validation = (
     console.print("\nEdit the template to add your data loading and validation rules, then run:")
     console.print(f"[cyan]pb run {output_file}[/cyan]")
     console.print(
-        f"[cyan]pb run {output_file} --data your_data.csv[/cyan]  [dim]# Override data source[/dim]"
+        f"[cyan]pb run {output_file} --data your_data.csv[/cyan]  [dim]# Replace data source automatically[/dim]"
     )
 @cli.command()
-@click.argument("validation_script", type=click.Path(exists=True))
-@click.option("--data", type=str, help="Optional data source to override script's data loading")
+@click.argument("validation_script", type=click.Path(exists=True), required=False)
+@click.option(
+    "--data",
+    type=str,
+    help="Data source to replace in validation objects (single validation scripts only)",
+)
 @click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
 @click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
 @click.option(
@@ -3269,7 +3968,7 @@ validation = (
     help="Save failing rows to folders (one CSV per step). Provide base name for folder.",
 )
 @click.option(
-    "--limit", "-l", default=10, help="Maximum number of failing rows to show/save (default: 10)"
+    "--limit", default=500, help="Maximum number of failing rows to save to CSV (default: 500)"
 )
 @click.option(
     "--fail-on",
@@ -3277,7 +3976,7 @@ validation = (
     help="Exit with non-zero code when validation reaches this threshold level",
 )
 def run(
-    validation_script: str,
+    validation_script: str | None,
     data: str | None,
     output_html: str | None,
     output_json: str | None,
@@ -3292,8 +3991,11 @@ def run(
     VALIDATION_SCRIPT should be a Python file that defines validation logic.
     The script should load its own data and create validation objects.
-    If --data is provided, it will be available as a 'cli_data' variable in the script,
-    allowing you to optionally override your script's data loading.
+    If --data is provided, it will automatically replace the data source in your
+    validation objects. This works with scripts containing a single validation.
+    For scripts with multiple validations, use separate script files or remove --data.
+    To get started quickly, use 'pb make-template' to create a validation script template.
     DATA can be:
@@ -3307,6 +4009,7 @@ def run(
     Examples:
     \b
+    pb make-template my_validation.py  # Create a template first
     pb run validation_script.py
     pb run validation_script.py --data data.csv
     pb run validation_script.py --data small_table --output-html report.html
@@ -3314,6 +4017,11 @@ def run(
     pb run validation_script.py --write-extract extracts_folder --fail-on critical
     """
     try:
+        # Handle missing validation_script with concise help
+        if validation_script is None:
+            _show_concise_help("run", None)
+            return
         # Load optional data override if provided
         cli_data = None
         if data:
@@ -3369,19 +4077,85 @@ def run(
         console.print(f"[green]✓[/green] Found {len(validations)} validation object(s)")
-        # Process each validation
-        overall_failed = False
-        overall_critical = False
-        overall_error = False
-        overall_warning = False
-        for i, validation in enumerate(validations, 1):
+        # Implement automatic data replacement for Validate objects if --data was provided
+        if cli_data is not None:
+            # Check if we have multiple validations (this is not supported)
             if len(validations) > 1:
-                console.print(f"\n[bold cyan]Validation {i}:[/bold cyan]")
-            # Display summary
-            _display_validation_summary(validation)
+                console.print(
+                    f"[red]Error: Found {len(validations)} validation objects in the script.[/red]"
+                )
+                console.print(
+                    "[yellow]The --data option replaces data in ALL validation objects,[/yellow]"
+                )
+                console.print(
+                    "[yellow]which may cause failures if validations expect different schemas.[/yellow]"
+                )
+                console.print("\n[cyan]Options:[/cyan]")
+                console.print("  1. Split your script into separate files with one validation each")
+                console.print(
+                    "  2. Remove the --data option to use each validation's original data"
+                )
+                sys.exit(1)
+            console.print(
+                f"[yellow]Replacing data in {len(validations)} validation object(s) with CLI data[/yellow]"
+            )
+            for idx, validation in enumerate(validations, 1):
+                # Check if it's a Validate object with data attribute
+                if hasattr(validation, "data") and hasattr(validation, "interrogate"):
+                    console.print("[cyan]Updating validation with new data source...[/cyan]")
+                    # Store the original validation_info as our "plan"
+                    original_validation_info = validation.validation_info.copy()
+                    # Replace the data
+                    validation.data = cli_data
+                    # Re-process the data (same as what happens in __post_init__)
+                    from pointblank.validate import _process_data
+                    validation.data = _process_data(validation.data)
+                    # Reset validation results but keep the plan
+                    validation.validation_info = []
+                    # Re-add each validation step from the original plan
+                    for val_info in original_validation_info:
+                        # Create a copy and reset any interrogation results
+                        new_val_info = copy.deepcopy(val_info)
+                        # Reset interrogation-specific attributes if they exist
+                        if hasattr(new_val_info, "n_passed"):
+                            new_val_info.n_passed = None
+                        if hasattr(new_val_info, "n_failed"):
+                            new_val_info.n_failed = None
+                        if hasattr(new_val_info, "all_passed"):
+                            new_val_info.all_passed = None
+                        if hasattr(new_val_info, "warning"):
+                            new_val_info.warning = None
+                        if hasattr(new_val_info, "error"):
+                            new_val_info.error = None
+                        if hasattr(new_val_info, "critical"):
+                            new_val_info.critical = None
+                        validation.validation_info.append(new_val_info)
+                    # Re-interrogate with the new data
+                    console.print("[cyan]Re-interrogating with new data...[/cyan]")
+                    validation.interrogate()
+        # Process each validation
+        overall_failed = False
+        overall_critical = False
+        overall_error = False
+        overall_warning = False
+        for i, validation in enumerate(validations, 1):
+            if len(validations) > 1:
+                console.print(f"\n[bold cyan]Validation {i}:[/bold cyan]")
+            # Display summary
+            _display_validation_summary(validation)
             # Check failure status
             validation_failed = False
             has_critical = False
@@ -3432,11 +4206,12 @@ def run(
                                     f"\n[cyan]Step {step_num}:[/cyan] {step_info.assertion_type}"
                                 )
-                                # Limit the number of rows shown
-                                if len(failing_rows) > limit:
-                                    display_rows = failing_rows.head(limit)
+                                # Always limit to 10 rows for display, regardless of limit option
+                                display_limit = 10
+                                if len(failing_rows) > display_limit:
+                                    display_rows = failing_rows.head(display_limit)
                                     console.print(
-                                        f"[dim]Showing first {limit} of {len(failing_rows)} failing rows[/dim]"
+                                        f"[dim]Showing first {display_limit} of {len(failing_rows)} failing rows[/dim]"
                                     )
                                 else:
                                     display_rows = failing_rows
@@ -3447,9 +4222,9 @@ def run(
                                 # Create a preview table using pointblank's preview function
                                 preview_table = pb.preview(
                                     data=display_rows,
-                                    n_head=min(limit, len(display_rows)),
+                                    n_head=min(display_limit, len(display_rows)),
                                     n_tail=0,
-                                    limit=limit,
+                                    limit=display_limit,
                                     show_row_numbers=True,
                                 )
@@ -3502,7 +4277,7 @@ def run(
                                     filename = f"step_{step_num:02d}_{safe_assertion_type}.csv"
                                     filepath = output_folder / filename
-                                    # Limit the output if needed
+                                    # Use limit for CSV output
                                     save_rows = failing_rows
                                     if hasattr(failing_rows, "head") and len(failing_rows) > limit:
                                         save_rows = failing_rows.head(limit)
@@ -3521,7 +4296,11 @@ def run(
                                         pd_data = pd.DataFrame(save_rows)
                                         pd_data.to_csv(str(filepath), index=False)
-                                    saved_files.append((filename, len(failing_rows)))
+                                    # Record the actual number of rows saved
+                                    rows_saved = (
+                                        len(save_rows) if hasattr(save_rows, "__len__") else limit
+                                    )
+                                    saved_files.append((filename, rows_saved))
                             except Exception as e:
                                 console.print(
@@ -3548,11 +4327,11 @@ def run(
         if output_html:
             try:
                 if len(validations) == 1:
-                    # Single validation - save directly
+                    # Single validation: save directly
                     html_content = validations[0]._repr_html_()
                     Path(output_html).write_text(html_content, encoding="utf-8")
                 else:
-                    # Multiple validations - combine them
+                    # Multiple validations: combine them
                     html_parts = []
                     html_parts.append("<html><body>")
                     html_parts.append("<h1>Pointblank Validation Report</h1>")
@@ -3572,11 +4351,11 @@ def run(
         if output_json:
             try:
                 if len(validations) == 1:
-                    # Single validation - save directly
+                    # Single validation: save directly
                     json_report = validations[0].get_json_report()
                     Path(output_json).write_text(json_report, encoding="utf-8")
                 else:
-                    # Multiple validations - combine them
+                    # Multiple validations: combine them
                     import json
                     combined_report = {"validations": []}
@@ -3642,3 +4421,768 @@ def _format_missing_percentage(value: float) -> str:
         return ">99%"  # More than 99%
     else:
         return f"{int(round(value))}%"  # Round to nearest integer with % sign
+@cli.command()
+@click.argument("polars_expression", type=str, required=False)
+@click.option("--edit", "-e", is_flag=True, help="Open editor for multi-line input")
+@click.option("--file", "-f", type=click.Path(exists=True), help="Read query from file")
+@click.option(
+    "--editor", help="Editor to use for --edit mode (overrides $EDITOR and auto-detection)"
+)
+@click.option(
+    "--output-format",
+    "-o",
+    type=click.Choice(["preview", "scan", "missing", "info"]),
+    default="preview",
+    help="Output format for the result",
+)
+@click.option("--preview-head", default=5, help="Number of head rows for preview")
+@click.option("--preview-tail", default=5, help="Number of tail rows for preview")
+@click.option("--output-html", type=click.Path(), help="Save HTML output to file")
+@click.option(
+    "--pipe", is_flag=True, help="Output data in a format suitable for piping to other pb commands"
+)
+@click.option(
+    "--pipe-format",
+    type=click.Choice(["parquet", "csv"]),
+    default="parquet",
+    help="Format for piped output (default: parquet)",
+)
+def pl(
+    polars_expression: str | None,
+    edit: bool,
+    file: str | None,
+    editor: str | None,
+    output_format: str,
+    preview_head: int,
+    preview_tail: int,
+    output_html: str | None,
+    pipe: bool,
+    pipe_format: str,
+):
+    """
+    Execute Polars expressions and display results.
+    Execute Polars DataFrame operations from the command line and display
+    the results using Pointblank's visualization tools.
+    POLARS_EXPRESSION should be a valid Polars expression that returns a DataFrame.
+    The 'pl' module is automatically imported and available.
+    Examples:
+    \b
+    # Direct expression
+    pb pl "pl.read_csv('data.csv')"
+    pb pl "pl.read_csv('data.csv').select(['name', 'age'])"
+    pb pl "pl.read_csv('data.csv').filter(pl.col('age') > 25)"
+    # Multi-line with editor (supports multiple statements)
+    pb pl --edit
+    # Multi-statement code example in editor:
+    # csv = pl.read_csv('data.csv')
+    # result = csv.select(['name', 'age']).filter(pl.col('age') > 25)
+    # Multi-line with a specific editor
+    pb pl --edit --editor nano
+    pb pl --edit --editor code
+    pb pl --edit --editor micro
+    # From file
+    pb pl --file query.py
+    # Piping to other pb commands
+    pb pl "pl.read_csv('data.csv').filter(pl.col('age') > 25)" --pipe | pb validate --check rows-distinct
+    pb pl --edit --pipe | pb preview --head 10
+    pb pl --edit --pipe | pb scan --output-html report.html
+    pb pl --edit --pipe | pb missing --output-html missing_report.html
+    Use --output-format to change how results are displayed:
+    \b
+    pb pl "pl.read_csv('data.csv')" --output-format scan
+    pb pl "pl.read_csv('data.csv')" --output-format missing
+    pb pl "pl.read_csv('data.csv')" --output-format info
+    Note: For multi-statement code, assign your final result to a variable like
+    'result', 'df', 'data', or ensure it's the last expression.
+    """
+    try:
+        # Check if Polars is available
+        if not _is_lib_present("polars"):
+            console.print("[red]Error:[/red] Polars is not installed")
+            console.print("\nThe 'pb pl' command requires Polars to be installed.")
+            console.print("Install it with: [cyan]pip install polars[/cyan]")
+            console.print("\nTo check all dependency status, run: [cyan]pb requirements[/cyan]")
+            sys.exit(1)
+        import polars as pl
+        # Determine the source of the query
+        query_code = None
+        if file:
+            # Read from file
+            query_code = Path(file).read_text()
+        elif edit:
+            # Determine which editor to use
+            chosen_editor = editor or _get_best_editor()
+            # When piping, send editor message to stderr
+            if pipe:
+                print(f"Using editor: {chosen_editor}", file=sys.stderr)
+            else:
+                console.print(f"[dim]Using editor: {chosen_editor}[/dim]")
+            # Interactive editor with custom editor
+            if chosen_editor == "code":
+                # Special handling for VS Code
+                query_code = _edit_with_vscode()
+            else:
+                # Use click.edit() for terminal editors
+                query_code = click.edit(
+                    "# Enter your Polars query here\n"
+                    "# Example:\n"
+                    "# pl.read_csv('data.csv').select(['name', 'age'])\n"
+                    "# pl.read_csv('data.csv').filter(pl.col('age') > 25)\n"
+                    "# \n"
+                    "# The result should be a Polars DataFrame or LazyFrame\n"
+                    "\n",
+                    editor=chosen_editor,
+                )
+            if query_code is None:
+                if pipe:
+                    print("No query entered", file=sys.stderr)
+                else:
+                    console.print("[yellow]No query entered[/yellow]")
+                sys.exit(1)
+        elif polars_expression:
+            # Direct argument
+            query_code = polars_expression
+        else:
+            # Try to read from stdin (for piping)
+            if not sys.stdin.isatty():
+                # Data is being piped in
+                query_code = sys.stdin.read().strip()
+            else:
+                # No input provided and stdin is a terminal - show concise help
+                _show_concise_help("pl", None)
+                return
+        if not query_code or not query_code.strip():
+            console.print("[red]Error:[/red] Empty query")
+            sys.exit(1)
+        # Execute the query
+        with console.status("[bold green]Executing Polars expression..."):
+            namespace = {
+                "pl": pl,
+                "polars": pl,
+                "__builtins__": __builtins__,
+            }
+            try:
+                # Check if this is a single expression or multiple statements
+                if "\n" in query_code.strip() or any(
+                    keyword in query_code
+                    for keyword in [
+                        " = ",
+                        "import",
+                        "for ",
+                        "if ",
+                        "def ",
+                        "class ",
+                        "with ",
+                        "try:",
+                    ]
+                ):
+                    # Multiple statements - use exec()
+                    exec(query_code, namespace)
+                    # Look for the result in the namespace
+                    # Try common variable names first
+                    result = None
+                    for var_name in ["result", "df", "data", "table", "output"]:
+                        if var_name in namespace:
+                            result = namespace[var_name]
+                            break
+                    # If no common names found, look for any DataFrame/LazyFrame
+                    if result is None:
+                        for key, value in namespace.items():
+                            if (
+                                hasattr(value, "collect") or hasattr(value, "columns")
+                            ) and not key.startswith("_"):
+                                result = value
+                                break
+                    # If still no result, get the last assigned variable (excluding builtins)
+                    if result is None:
+                        # Get variables that were added to namespace (excluding our imports)
+                        user_vars = {
+                            k: v
+                            for k, v in namespace.items()
+                            if k not in ["pl", "polars", "__builtins__"] and not k.startswith("_")
+                        }
+                        if user_vars:
+                            # Get the last variable (this is a heuristic)
+                            last_var = list(user_vars.keys())[-1]
+                            result = user_vars[last_var]
+                    if result is None:
+                        if pipe:
+                            print(
+                                "[red]Error:[/red] Could not find result variable", file=sys.stderr
+                            )
+                            print(
+                                "[dim]Assign your final result to a variable like 'result', 'df', or 'data'[/dim]",
+                                file=sys.stderr,
+                            )
+                            print(
+                                "[dim]Or ensure your last line returns a DataFrame[/dim]",
+                                file=sys.stderr,
+                            )
+                        else:
+                            console.print("[red]Error:[/red] Could not find result variable")
+                            console.print(
+                                "[dim]Assign your final result to a variable like 'result', 'df', or 'data'[/dim]"
+                            )
+                            console.print("[dim]Or ensure your last line returns a DataFrame[/dim]")
+                        sys.exit(1)
+                else:
+                    # Single expression - use eval()
+                    result = eval(query_code, namespace)
+                # Validate result
+                if not hasattr(result, "collect") and not hasattr(result, "columns"):
+                    if pipe:
+                        print(
+                            "[red]Error:[/red] Expression must return a Polars DataFrame or LazyFrame",
+                            file=sys.stderr,
+                        )
+                        print(f"[dim]Got: {type(result)}[/dim]", file=sys.stderr)
+                    else:
+                        console.print(
+                            "[red]Error:[/red] Expression must return a Polars DataFrame or LazyFrame"
+                        )
+                        console.print(f"[dim]Got: {type(result)}[/dim]")
+                    sys.exit(1)
+            except Exception as e:
+                # When piping, send errors to stderr so they don't interfere with the pipe
+                if pipe:
+                    print(f"Error executing Polars expression: {e}", file=sys.stderr)
+                    print(file=sys.stderr)
+                    # Create a panel with the expression(s) for better readability
+                    if "\n" in query_code.strip():
+                        # Multi-line expression
+                        print(f"Expression(s) provided:\n{query_code}", file=sys.stderr)
+                    else:
+                        # Single line expression
+                        print(f"Expression provided: {query_code}", file=sys.stderr)
+                else:
+                    # Normal error handling when not piping
+                    console.print(f"[red]Error executing Polars expression:[/red] {e}")
+                    console.print()
+                    # Create a panel with the expression(s) for better readability
+                    if "\n" in query_code.strip():
+                        # Multi-line expression
+                        console.print(
+                            Panel(
+                                query_code,
+                                title="Expression(s) provided",
+                                border_style="red",
+                                expand=False,
+                                title_align="left",
+                            )
+                        )
+                    else:
+                        # Single line expression
+                        console.print(
+                            Panel(
+                                query_code,
+                                title="Expression provided",
+                                border_style="red",
+                                expand=False,
+                                title_align="left",
+                            )
+                        )
+                sys.exit(1)
+        # Only print success message when not piping (so it doesn't interfere with pipe output)
+        if not pipe:
+            console.print("[green]✓[/green] Polars expression executed successfully")
+        # Process output
+        if pipe:
+            # Output data for piping to other commands
+            _handle_pl_pipe(result, pipe_format)
+        elif output_format == "preview":
+            _handle_pl_preview(result, preview_head, preview_tail, output_html)
+        elif output_format == "scan":
+            _handle_pl_scan(result, query_code, output_html)
+        elif output_format == "missing":
+            _handle_pl_missing(result, query_code, output_html)
+        elif output_format == "info":
+            _handle_pl_info(result, query_code, output_html)
+        elif output_format == "validate":
+            console.print("[yellow]Validation output format not yet implemented[/yellow]")
+            console.print("Use 'pb validate' with a data file for now")
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        sys.exit(1)
+def _handle_pl_preview(result: Any, head: int, tail: int, output_html: str | None) -> None:
+    """Handle preview output for Polars results."""
+    try:
+        # Create preview using existing preview function
+        gt_table = pb.preview(
+            data=result,
+            n_head=head,
+            n_tail=tail,
+            show_row_numbers=True,
+        )
+        if output_html:
+            html_content = gt_table.as_raw_html()
+            Path(output_html).write_text(html_content, encoding="utf-8")
+            console.print(f"[green]✓[/green] HTML saved to: {output_html}")
+        else:
+            # Get metadata for enhanced preview
+            try:
+                total_rows = pb.get_row_count(result)
+                total_columns = pb.get_column_count(result)
+                table_type = _get_tbl_type(result)
+                preview_info = {
+                    "total_rows": total_rows,
+                    "total_columns": total_columns,
+                    "head_rows": head,
+                    "tail_rows": tail,
+                    "is_complete": total_rows <= (head + tail),
+                    "source_type": "Polars expression",
+                    "table_type": table_type,
+                }
+                _rich_print_gt_table(gt_table, preview_info)
+            except Exception:
+                # Fallback to basic display
+                _rich_print_gt_table(gt_table)
+    except Exception as e:
+        console.print(f"[red]Error creating preview:[/red] {e}")
+        sys.exit(1)
+def _handle_pl_scan(result: Any, expression: str, output_html: str | None) -> None:
+    """Handle scan output for Polars results."""
+    try:
+        scan_result = pb.col_summary_tbl(data=result)
+        if output_html:
+            html_content = scan_result.as_raw_html()
+            Path(output_html).write_text(html_content, encoding="utf-8")
+            console.print(f"[green]✓[/green] Data scan report saved to: {output_html}")
+        else:
+            # Get metadata for enhanced scan display
+            try:
+                total_rows = pb.get_row_count(result)
+                total_columns = pb.get_column_count(result)
+                table_type = _get_tbl_type(result)
+                _rich_print_scan_table(
+                    scan_result,
+                    expression,
+                    "Polars expression",
+                    table_type,
+                    total_rows,
+                    total_columns,
+                )
+            except Exception as e:
+                console.print(f"[yellow]Could not display scan summary: {e}[/yellow]")
+    except Exception as e:
+        console.print(f"[red]Error creating scan:[/red] {e}")
+        sys.exit(1)
+def _handle_pl_missing(result: Any, expression: str, output_html: str | None) -> None:
+    """Handle missing values output for Polars results."""
+    try:
+        missing_table = pb.missing_vals_tbl(data=result)
+        if output_html:
+            html_content = missing_table.as_raw_html()
+            Path(output_html).write_text(html_content, encoding="utf-8")
+            console.print(f"[green]✓[/green] Missing values report saved to: {output_html}")
+        else:
+            _rich_print_missing_table(missing_table, result)
+    except Exception as e:
+        console.print(f"[red]Error creating missing values report:[/red] {e}")
+        sys.exit(1)
+def _handle_pl_info(result: Any, expression: str, output_html: str | None) -> None:
+    """Handle info output for Polars results."""
+    try:
+        # Get basic info
+        tbl_type = _get_tbl_type(result)
+        row_count = pb.get_row_count(result)
+        col_count = pb.get_column_count(result)
+        # Get column names and types
+        if hasattr(result, "columns"):
+            columns = list(result.columns)
+        elif hasattr(result, "schema"):
+            columns = list(result.schema.names)
+        else:
+            columns = []
+        dtypes_dict = _get_column_dtypes(result, columns)
+        if output_html:
+            # Create a simple HTML info page
+            # TODO: Implement an improved version of this in the Python API and then
+            # use that here
+            html_content = f"""
+            <html><body>
+            <h2>Polars Expression Info</h2>
+            <p><strong>Expression:</strong> {expression}</p>
+            <p><strong>Table Type:</strong> {tbl_type}</p>
+            <p><strong>Rows:</strong> {row_count:,}</p>
+            <p><strong>Columns:</strong> {col_count:,}</p>
+            <h3>Column Details</h3>
+            <ul>
+            {"".join(f"<li>{col}: {dtypes_dict.get(col, '?')}</li>" for col in columns)}
+            </ul>
+            </body></html>
+            """
+            Path(output_html).write_text(html_content, encoding="utf-8")
+            console.print(f"[green]✓[/green] HTML info saved to: {output_html}")
+        else:
+            # Display info table
+            from rich.box import SIMPLE_HEAD
+            info_table = Table(
+                title="Polars Expression Info",
+                show_header=True,
+                header_style="bold magenta",
+                box=SIMPLE_HEAD,
+                title_style="bold cyan",
+                title_justify="left",
+            )
+            info_table.add_column("Property", style="cyan", no_wrap=True)
+            info_table.add_column("Value", style="green")
+            info_table.add_row("Expression", expression)
+            # Capitalize "polars" to "Polars" for consistency with pb info command
+            display_tbl_type = (
+                tbl_type.replace("polars", "Polars") if "polars" in tbl_type.lower() else tbl_type
+            )
+            info_table.add_row("Table Type", display_tbl_type)
+            info_table.add_row("Rows", f"{row_count:,}")
+            info_table.add_row("Columns", f"{col_count:,}")
+            console.print()
+            console.print(info_table)
+            # Show column details
+            if columns:
+                console.print("\n[bold cyan]Column Details:[/bold cyan]")
+                for col in columns[:10]:  # Show first 10 columns
+                    dtype = dtypes_dict.get(col, "?")
+                    console.print(f"  • {col}: [yellow]{dtype}[/yellow]")
+                if len(columns) > 10:
+                    console.print(f"  ... and {len(columns) - 10} more columns")
+    except Exception as e:
+        console.print(f"[red]Error creating info:[/red] {e}")
+        sys.exit(1)
+def _handle_pl_pipe(result: Any, pipe_format: str) -> None:
+    """Handle piped output from Polars results."""
+    try:
+        import sys
+        import tempfile
+        # Create a temporary file to store the data
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=f".{pipe_format}", prefix="pb_pipe_", delete=False
+        ) as temp_file:
+            temp_path = temp_file.name
+        # Write the data to the temporary file
+        if pipe_format == "parquet":
+            if hasattr(result, "write_parquet"):
+                # Polars
+                result.write_parquet(temp_path)
+            elif hasattr(result, "to_parquet"):
+                # Pandas
+                result.to_parquet(temp_path)
+            else:
+                # Convert to pandas and write
+                import pandas as pd
+                pd_result = pd.DataFrame(result)
+                pd_result.to_parquet(temp_path)
+        else:  # CSV
+            if hasattr(result, "write_csv"):
+                # Polars
+                result.write_csv(temp_path)
+            elif hasattr(result, "to_csv"):
+                # Pandas
+                result.to_csv(temp_path, index=False)
+            else:
+                # Convert to pandas and write
+                import pandas as pd
+                pd_result = pd.DataFrame(result)
+                pd_result.to_csv(temp_path, index=False)
+        # Output the temporary file path to stdout for the next command
+        print(temp_path)
+    except Exception as e:
+        print(f"[red]Error creating pipe output:[/red] {e}", file=sys.stderr)
+        sys.exit(1)
+def _get_best_editor() -> str:
+    """Detect the best available editor on the system."""
+    # Check environment variable first
+    if "EDITOR" in os.environ:
+        return os.environ["EDITOR"]
+    # Check for common editors in order of preference
+    editors = [
+        "code",  # VS Code
+        "micro",  # Modern terminal editor
+        "nano",  # User-friendly terminal editor
+        "vim",  # Vim
+        "vi",  # Vi (fallback)
+    ]
+    for editor in editors:
+        if shutil.which(editor):
+            return editor
+    # Ultimate fallback
+    return "nano"
+def _edit_with_vscode() -> str | None:
+    """Edit Polars query using VS Code."""
+    import subprocess
+    import tempfile
+    # Create a temporary Python file
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", prefix="pb_pl_", delete=False) as f:
+        f.write("import polars as pl\n")
+        f.write("\n")
+        f.write("# Enter your Polars query here\n")
+        f.write("# Examples:\n")
+        f.write("# \n")
+        f.write("# Single expression:\n")
+        f.write("# pl.read_csv('data.csv').select(['name', 'age'])\n")
+        f.write("# \n")
+        f.write("# Multiple statements:\n")
+        f.write("# csv = pl.read_csv('data.csv')\n")
+        f.write("# result = csv.select(['name', 'age']).filter(pl.col('age') > 25)\n")
+        f.write("# \n")
+        f.write("# For multi-statement code, assign your final result to a variable\n")
+        f.write("# like 'result', 'df', 'data', or just ensure it's the last line\n")
+        f.write("# \n")
+        f.write("# Save and then close this file in VS Code to execute the query\n")
+        f.write("\n")
+        temp_file = f.name
+    try:
+        # Open in VS Code and wait for it to close
+        result = subprocess.run(
+            ["code", "--wait", temp_file], capture_output=True, text=True, timeout=300
+        )
+        if result.returncode != 0:
+            console.print(f"[yellow]VS Code exited with code {result.returncode}[/yellow]")
+        # Read the edited content
+        with open(temp_file, "r") as f:
+            content = f.read()
+        # Remove comments, empty lines, and import statements for cleaner execution
+        lines = []
+        for line in content.split("\n"):
+            stripped = line.strip()
+            if (
+                stripped
+                and not stripped.startswith("#")
+                and not stripped.startswith("import polars")
+                and not stripped.startswith("import polars as pl")
+            ):
+                lines.append(line)
+        return "\n".join(lines) if lines else None
+    except subprocess.TimeoutExpired:
+        console.print("[red]Timeout:[/red] VS Code took too long to respond")
+        return None
+    except subprocess.CalledProcessError as e:
+        console.print(f"[red]Error:[/red] Could not open VS Code: {e}")
+        return None
+    except FileNotFoundError:
+        console.print("[red]Error:[/red] VS Code not found in PATH")
+        return None
+    finally:
+        # Clean up
+        Path(temp_file).unlink(missing_ok=True)
+def _show_concise_help(command_name: str, ctx: click.Context) -> None:
+    """Show concise help for a command when required arguments are missing."""
+    if command_name == "info":
+        console.print("[bold cyan]pb info[/bold cyan] - Display information about a data source")
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb info data.csv")
+        console.print("  pb info small_table")
+        console.print()
+        console.print("[dim]Shows table type, dimensions, column names, and data types[/dim]")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb info --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "preview":
+        console.print(
+            "[bold cyan]pb preview[/bold cyan] - Preview a data table showing head and tail rows"
+        )
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb preview data.csv")
+        console.print("  pb preview data.parquet --head 10 --tail 5")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --head N          Number of rows from the top (default: 5)")
+        console.print("  --tail N          Number of rows from the bottom (default: 5)")
+        console.print("  --columns LIST    Comma-separated list of columns to display")
+        console.print("  --output-html     Save HTML output to file")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb preview --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "scan":
+        console.print(
+            "[bold cyan]pb scan[/bold cyan] - Generate a comprehensive data profile report"
+        )
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb scan data.csv")
+        console.print("  pb scan data.parquet --output-html report.html")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --output-html     Save HTML scan report to file")
+        console.print("  --columns LIST    Comma-separated list of columns to scan")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb scan --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "missing":
+        console.print("[bold cyan]pb missing[/bold cyan] - Generate a missing values report")
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb missing data.csv")
+        console.print("  pb missing data.parquet --output-html missing_report.html")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --output-html     Save HTML output to file")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb missing --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "validate":
+        console.print("[bold cyan]pb validate[/bold cyan] - Perform data validation checks")
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb validate data.csv")
+        console.print("  pb validate data.csv --check col-vals-not-null --column email")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --check TYPE      Validation check type (default: rows-distinct)")
+        console.print("  --column COL      Column name for column-specific checks")
+        console.print("  --show-extract    Show failing rows if validation fails")
+        console.print("  --list-checks     List all available validation checks")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb validate --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "run":
+        console.print("[bold cyan]pb run[/bold cyan] - Run a Pointblank validation script")
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb run validation_script.py")
+        console.print("  pb run validation_script.py --data data.csv")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --data SOURCE     Replace data source in validation objects")
+        console.print("  --output-html     Save HTML validation report to file")
+        console.print("  --show-extract    Show failing rows if validation fails")
+        console.print("  --fail-on LEVEL   Exit with error on critical/error/warning/any")
+        console.print()
+        console.print("[dim]Use [bold]pb run --help[/bold] for complete options and examples[/dim]")
+    elif command_name == "make-template":
+        console.print(
+            "[bold cyan]pb make-template[/bold cyan] - Create a validation script template"
+        )
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb make-template my_validation.py")
+        console.print("  pb make-template validation_template.py")
+        console.print()
+        console.print("[dim]Creates a sample Python script with validation examples[/dim]")
+        console.print("[dim]Edit the template and run with [bold]pb run[/bold][/dim]")
+        console.print()
+        console.print(
+            "[dim]Use [bold]pb make-template --help[/bold] for complete options and examples[/dim]"
+        )
+    elif command_name == "pl":
+        console.print(
+            "[bold cyan]pb pl[/bold cyan] - Execute Polars expressions and display results"
+        )
+        console.print()
+        console.print("[bold yellow]Usage:[/bold yellow]")
+        console.print("  pb pl \"pl.read_csv('data.csv')\"")
+        console.print("  pb pl --edit")
+        console.print()
+        console.print("[bold yellow]Key Options:[/bold yellow]")
+        console.print("  --edit            Open editor for multi-line input")
+        console.print("  --file FILE       Read query from file")
+        console.print("  --output-format   Output format: preview, scan, missing, info")
+        console.print("  --pipe            Output for piping to other pb commands")
+        console.print()
+        console.print("[dim]Use [bold]pb pl --help[/bold] for complete options and examples[/dim]")
+    # Fix the exit call at the end
+    if ctx is not None:
+        ctx.exit(1)
+    else:
+        sys.exit(1)

pointblank 0.11.1__py3-none-any.whl → 0.11.3__py3-none-any.whl

pointblank 0.11.1py3-none-any.whl → 0.11.3py3-none-any.whl