PyPI - duckguard - Versions diffs - 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

duckguard 2.0.0py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

duckguard/__init__.py +55 -28
duckguard/anomaly/__init__.py +29 -1
duckguard/anomaly/baselines.py +294 -0
duckguard/anomaly/detector.py +1 -5
duckguard/anomaly/methods.py +17 -5
duckguard/anomaly/ml_methods.py +724 -0
duckguard/cli/main.py +561 -56
duckguard/connectors/__init__.py +2 -2
duckguard/connectors/bigquery.py +1 -1
duckguard/connectors/databricks.py +1 -1
duckguard/connectors/factory.py +2 -3
duckguard/connectors/files.py +1 -1
duckguard/connectors/kafka.py +2 -2
duckguard/connectors/mongodb.py +1 -1
duckguard/connectors/mysql.py +1 -1
duckguard/connectors/oracle.py +1 -1
duckguard/connectors/postgres.py +1 -2
duckguard/connectors/redshift.py +1 -1
duckguard/connectors/snowflake.py +1 -2
duckguard/connectors/sqlite.py +1 -1
duckguard/connectors/sqlserver.py +10 -13
duckguard/contracts/__init__.py +6 -6
duckguard/contracts/diff.py +1 -1
duckguard/contracts/generator.py +5 -6
duckguard/contracts/loader.py +4 -4
duckguard/contracts/validator.py +3 -4
duckguard/core/__init__.py +3 -3
duckguard/core/column.py +588 -5
duckguard/core/dataset.py +708 -3
duckguard/core/result.py +328 -1
duckguard/core/scoring.py +1 -2
duckguard/errors.py +362 -0
duckguard/freshness/__init__.py +33 -0
duckguard/freshness/monitor.py +429 -0
duckguard/history/__init__.py +44 -0
duckguard/history/schema.py +301 -0
duckguard/history/storage.py +479 -0
duckguard/history/trends.py +348 -0
duckguard/integrations/__init__.py +31 -0
duckguard/integrations/airflow.py +387 -0
duckguard/integrations/dbt.py +458 -0
duckguard/notifications/__init__.py +61 -0
duckguard/notifications/email.py +508 -0
duckguard/notifications/formatter.py +118 -0
duckguard/notifications/notifiers.py +357 -0
duckguard/profiler/auto_profile.py +3 -3
duckguard/pytest_plugin/__init__.py +1 -1
duckguard/pytest_plugin/plugin.py +1 -1
duckguard/reporting/console.py +2 -2
duckguard/reports/__init__.py +42 -0
duckguard/reports/html_reporter.py +514 -0
duckguard/reports/pdf_reporter.py +114 -0
duckguard/rules/__init__.py +3 -3
duckguard/rules/executor.py +3 -4
duckguard/rules/generator.py +8 -5
duckguard/rules/loader.py +5 -5
duckguard/rules/schema.py +23 -0
duckguard/schema_history/__init__.py +40 -0
duckguard/schema_history/analyzer.py +414 -0
duckguard/schema_history/tracker.py +288 -0
duckguard/semantic/__init__.py +1 -1
duckguard/semantic/analyzer.py +0 -2
duckguard/semantic/detector.py +17 -1
duckguard/semantic/validators.py +2 -1
duckguard-2.3.0.dist-info/METADATA +953 -0
duckguard-2.3.0.dist-info/RECORD +77 -0
duckguard-2.0.0.dist-info/METADATA +0 -221
duckguard-2.0.0.dist-info/RECORD +0 -55
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0

duckguard/cli/main.py CHANGED Viewed

@@ -5,21 +5,14 @@ A modern, beautiful CLI for data quality that just works.
 from __future__ import annotations
-import sys
 from pathlib import Path
-from typing import Optional
 import typer
 from rich.console import Console
 from rich.panel import Panel
-from rich.table import Table
-from rich.syntax import Syntax
 from rich.progress import Progress, SpinnerColumn, TextColumn
-from rich import print as rprint
-from rich.tree import Tree
-from rich.text import Text
-from rich.columns import Columns
-from rich.markdown import Markdown
+from rich.syntax import Syntax
+from rich.table import Table
 from duckguard import __version__
@@ -45,7 +38,7 @@ def version_callback(value: bool) -> None:
 @app.callback()
 def main(
-    version: Optional[bool] = typer.Option(
+    version: bool | None = typer.Option(
         None,
         "--version",
         "-v",
@@ -61,11 +54,11 @@ def main(
 @app.command()
 def check(
     source: str = typer.Argument(..., help="Path to file or connection string"),
-    config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
-    table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
-    not_null: Optional[list[str]] = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
-    unique: Optional[list[str]] = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
-    output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file (json)"),
+    config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
+    not_null: list[str] | None = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
+    unique: list[str] | None = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
+    output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
     verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
 ) -> None:
     """
@@ -78,8 +71,8 @@ def check(
         duckguard check postgres://localhost/db --table orders
     """
     from duckguard.connectors import connect
-    from duckguard.rules import load_rules, execute_rules
     from duckguard.core.scoring import score
+    from duckguard.rules import execute_rules, load_rules
     console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking: [cyan]{source}[/cyan]\n")
@@ -185,8 +178,8 @@ def check(
 @app.command()
 def discover(
     source: str = typer.Argument(..., help="Path to file or connection string"),
-    table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
-    output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
+    output: str | None = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
     format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
 ) -> None:
     """
@@ -213,7 +206,7 @@ def discover(
             console=console,
             transient=True,
         ) as progress:
-            task = progress.add_task("Analyzing data...", total=None)
+            _task = progress.add_task("Analyzing data...", total=None)  # noqa: F841
             dataset = connect(source, table=table)
             # Semantic analysis
@@ -230,7 +223,7 @@ def discover(
         if output:
             yaml_content = ruleset_to_yaml(ruleset)
             Path(output).write_text(yaml_content, encoding="utf-8")
-            console.print(f"\n[green]✓[/green] Rules saved to [cyan]{output}[/cyan]")
+            console.print(f"\n[green]SAVED[/green] Rules saved to [cyan]{output}[/cyan]")
             console.print(f"[dim]Run: duckguard check {source} --config {output}[/dim]")
         else:
             # Display YAML
@@ -250,8 +243,8 @@ def discover(
 def contract(
     action: str = typer.Argument(..., help="Action: generate, validate, diff"),
     source: str = typer.Argument(None, help="Data source or contract file"),
-    contract_file: Optional[str] = typer.Option(None, "--contract", "-c", help="Contract file path"),
-    output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file"),
+    contract_file: str | None = typer.Option(None, "--contract", "-c", help="Contract file path"),
+    output: str | None = typer.Option(None, "--output", "-o", help="Output file"),
     strict: bool = typer.Option(False, "--strict", help="Strict validation mode"),
 ) -> None:
     """
@@ -268,13 +261,12 @@ def contract(
         duckguard contract diff old.contract.yaml new.contract.yaml
     """
     from duckguard.contracts import (
+        diff_contracts,
+        generate_contract,
         load_contract,
         validate_contract,
-        generate_contract,
-        diff_contracts,
     )
     from duckguard.contracts.loader import contract_to_yaml
-    from duckguard.connectors import connect
     try:
         if action == "generate":
@@ -298,14 +290,14 @@ def contract(
             if output:
                 yaml_content = contract_to_yaml(contract_obj)
                 Path(output).write_text(yaml_content, encoding="utf-8")
-                console.print(f"\n[green]✓[/green] Contract saved to [cyan]{output}[/cyan]")
+                console.print(f"\n[green]SAVED[/green] Contract saved to [cyan]{output}[/cyan]")
         elif action == "validate":
             if not source or not contract_file:
                 console.print("[red]Error:[/red] Both source and --contract required for validate")
                 raise typer.Exit(1)
-            console.print(f"\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
+            console.print("\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
             with Progress(
                 SpinnerColumn(),
@@ -345,21 +337,32 @@ def contract(
 @app.command()
 def anomaly(
     source: str = typer.Argument(..., help="Path to file or connection string"),
-    table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
-    method: str = typer.Option("zscore", "--method", "-m", help="Detection method: zscore, iqr, percent_change"),
-    threshold: Optional[float] = typer.Option(None, "--threshold", help="Detection threshold"),
-    columns: Optional[list[str]] = typer.Option(None, "--column", "-c", help="Specific columns to check"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
+    method: str = typer.Option("zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"),
+    threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
+    columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
+    learn_baseline: bool = typer.Option(False, "--learn-baseline", "-L", help="Learn and store baseline from current data"),
 ) -> None:
     """
     Detect anomalies in data.
+    [bold]Methods:[/bold]
+        zscore         - Z-score based detection (default)
+        iqr            - Interquartile range detection
+        percent_change - Percent change from baseline
+        baseline       - Compare to learned baseline (ML)
+        ks_test        - Distribution drift detection (ML)
     [bold]Examples:[/bold]
         duckguard anomaly data.csv
         duckguard anomaly data.csv --method iqr --threshold 2.0
         duckguard anomaly data.csv --column amount --column quantity
+        duckguard anomaly data.csv --learn-baseline     # Store baseline
+        duckguard anomaly data.csv --method baseline    # Compare to baseline
+        duckguard anomaly data.csv --method ks_test     # Detect drift
     """
-    from duckguard.connectors import connect
     from duckguard.anomaly import detect_anomalies
+    from duckguard.connectors import connect
     console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
@@ -370,8 +373,38 @@ def anomaly(
             console=console,
             transient=True,
         ) as progress:
-            progress.add_task("Analyzing data...", total=None)
+            if learn_baseline:
+                progress.add_task("Learning baseline...", total=None)
+            else:
+                progress.add_task("Analyzing data...", total=None)
             dataset = connect(source, table=table)
+            # Handle baseline learning
+            if learn_baseline:
+                from duckguard.anomaly import BaselineMethod
+                from duckguard.history import HistoryStorage
+                storage = HistoryStorage()
+                baseline_method = BaselineMethod(storage=storage)
+                # Get numeric columns to learn baselines for
+                target_columns = columns if columns else dataset.columns
+                learned = 0
+                for col_name in target_columns:
+                    col = dataset[col_name]
+                    if col.mean is not None:  # Numeric column
+                        values = col.values
+                        baseline_method.fit(values)
+                        baseline_method.save_baseline(source, col_name)
+                        learned += 1
+                console.print(f"[green]LEARNED[/green] Baselines stored for {learned} columns")
+                console.print("[dim]Use --method baseline to compare against stored baselines[/dim]")
+                return
+            # Regular anomaly detection
             report = detect_anomalies(
                 dataset,
                 method=method,
@@ -392,7 +425,7 @@ def anomaly(
 @app.command()
 def info(
     source: str = typer.Argument(..., help="Path to file or connection string"),
-    table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
 ) -> None:
     """
     Display information about a data source.
@@ -441,7 +474,7 @@ def info(
             if sem_type == "unknown":
                 sem_type = "-"
             if col_analysis.is_pii:
-                sem_type = f"🔒 {sem_type}"
+                sem_type = f"[PII] {sem_type}"
             col_table.add_row(
                 col_name,
@@ -472,11 +505,11 @@ def _display_execution_result(result, verbose: bool = False) -> None:
     for check_result in result.results:
         if check_result.passed:
-            status = "[green]✓ PASS[/green]"
+            status = "[green]PASS[/green]"
         elif check_result.severity.value == "warning":
-            status = "[yellow]⚠ WARN[/yellow]"
+            status = "[yellow]WARN[/yellow]"
         else:
-            status = "[red]✗ FAIL[/red]"
+            status = "[red]FAIL[/red]"
         col_str = f"[{check_result.column}] " if check_result.column else ""
         table.add_row(
@@ -490,10 +523,10 @@ def _display_execution_result(result, verbose: bool = False) -> None:
     # Summary
     console.print()
     if result.passed:
-        console.print(f"[green]✓ All {result.total_checks} checks passed[/green]")
+        console.print(f"[green]All {result.total_checks} checks passed[/green]")
     else:
         console.print(
-            f"[red]✗ {result.failed_count} failed[/red], "
+            f"[red]{result.failed_count} failed[/red], "
             f"[yellow]{result.warning_count} warnings[/yellow], "
             f"[green]{result.passed_count} passed[/green]"
         )
@@ -507,7 +540,7 @@ def _display_quick_results(results: list) -> None:
     table.add_column("Details")
     for check_name, passed, details, _ in results:
-        status = "[green]✓ PASS[/green]" if passed else "[red]✗ FAIL[/red]"
+        status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
         table.add_row(check_name, status, details)
     console.print(table)
@@ -534,8 +567,8 @@ def _display_discovery_results(analysis, ruleset) -> None:
     # PII warning
     if analysis.pii_columns:
         console.print(Panel(
-            "[yellow]⚠️ PII Detected[/yellow]\n" +
-            "\n".join(f"  • {col}" for col in analysis.pii_columns),
+            "[yellow]WARNING: PII Detected[/yellow]\n" +
+            "\n".join(f"  - {col}" for col in analysis.pii_columns),
             border_style="yellow",
         ))
         console.print()
@@ -549,7 +582,7 @@ def _display_discovery_results(analysis, ruleset) -> None:
     for col in analysis.columns[:15]:
         sem = col.semantic_type.value
         if col.is_pii:
-            sem = f"🔒 {sem}"
+            sem = f"[PII] {sem}"
         rules = ", ".join(col.suggested_validations[:3])
         if len(col.suggested_validations) > 3:
@@ -582,9 +615,9 @@ def _display_contract(contract) -> None:
         table.add_row(
             field_obj.name,
             type_str,
-            "✓" if field_obj.required else "",
-            "✓" if field_obj.unique else "",
-            "🔒" if field_obj.pii else "",
+            "Y" if field_obj.required else "",
+            "Y" if field_obj.unique else "",
+            "[PII]" if field_obj.pii else "",
         )
     console.print(table)
@@ -593,14 +626,14 @@ def _display_contract(contract) -> None:
     if contract.quality:
         console.print("\n[bold]Quality SLA:[/bold]")
         if contract.quality.completeness:
-            console.print(f"  • Completeness: {contract.quality.completeness}%")
+            console.print(f"  - Completeness: {contract.quality.completeness}%")
         if contract.quality.row_count_min:
-            console.print(f"  • Min rows: {contract.quality.row_count_min:,}")
+            console.print(f"  - Min rows: {contract.quality.row_count_min:,}")
 def _display_contract_validation(result) -> None:
     """Display contract validation results."""
-    status = "[green]✓ PASSED[/green]" if result.passed else "[red]✗ FAILED[/red]"
+    status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
     console.print(f"Contract: [bold]{result.contract.name}[/bold] v{result.contract.version}")
     console.print(f"Status: {status}\n")
@@ -627,7 +660,7 @@ def _display_contract_validation(result) -> None:
 def _display_contract_diff(diff) -> None:
     """Display contract diff."""
-    console.print(f"[bold]Comparing contracts[/bold]")
+    console.print("[bold]Comparing contracts[/bold]")
     console.print(f"  Old: v{diff.old_contract.version}")
     console.print(f"  New: v{diff.new_contract.version}\n")
@@ -640,19 +673,19 @@ def _display_contract_diff(diff) -> None:
     if diff.breaking_changes:
         console.print("[red bold]Breaking Changes:[/red bold]")
         for change in diff.breaking_changes:
-            console.print(f"  ❌ {change.message}")
+            console.print(f"  [red]X[/red] {change.message}")
         console.print()
     if diff.minor_changes:
         console.print("[yellow bold]Minor Changes:[/yellow bold]")
         for change in diff.minor_changes:
-            console.print(f"  ⚠️ {change.message}")
+            console.print(f"  [yellow]![/yellow] {change.message}")
         console.print()
     if diff.non_breaking_changes:
         console.print("[dim]Non-breaking Changes:[/dim]")
         for change in diff.non_breaking_changes:
-            console.print(f"  • {change.message}")
+            console.print(f"  - {change.message}")
     console.print(f"\n[dim]Suggested version bump: {diff.suggest_version_bump()}[/dim]")
@@ -660,10 +693,10 @@ def _display_contract_diff(diff) -> None:
 def _display_anomaly_report(report) -> None:
     """Display anomaly detection report."""
     if not report.has_anomalies:
-        console.print("[green]✓ No anomalies detected[/green]")
+        console.print("[green]No anomalies detected[/green]")
         return
-    console.print(f"[yellow bold]⚠️ {report.anomaly_count} anomalies detected[/yellow bold]\n")
+    console.print(f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n")
     table = Table(title="Anomalies")
     table.add_column("Column", style="cyan")
@@ -702,5 +735,477 @@ def _save_results(output: str, dataset, results) -> None:
     Path(output).write_text(json.dumps(data, indent=2))
+@app.command()
+def history(
+    source: str | None = typer.Argument(None, help="Data source to query history for (optional)"),
+    last: str = typer.Option("30d", "--last", "-l", help="Time period: 7d, 30d, 90d"),
+    output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
+    trend: bool = typer.Option(False, "--trend", "-t", help="Show quality trend analysis"),
+    db_path: str | None = typer.Option(None, "--db", help="Path to history database"),
+) -> None:
+    """
+    Query historical validation results.
+    Shows past validation runs and quality score trends over time.
+    [bold]Examples:[/bold]
+        duckguard history                        # Show all recent runs
+        duckguard history data.csv               # Show runs for specific source
+        duckguard history data.csv --last 7d    # Last 7 days
+        duckguard history data.csv --trend      # Show trend analysis
+        duckguard history --format json         # Output as JSON
+    """
+    import json as json_module
+    from duckguard.history import HistoryStorage, TrendAnalyzer
+    try:
+        storage = HistoryStorage(db_path=db_path)
+        # Parse time period
+        days = int(last.rstrip("d"))
+        if trend and source:
+            # Show trend analysis
+            console.print(f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n")
+            analyzer = TrendAnalyzer(storage)
+            analysis = analyzer.analyze(source, days=days)
+            if analysis.total_runs == 0:
+                console.print("[yellow]No historical data found for this source.[/yellow]")
+                console.print("[dim]Run some validations first, then check history.[/dim]")
+                return
+            # Display trend summary
+            trend_color = {
+                "improving": "green",
+                "declining": "red",
+                "stable": "yellow",
+            }.get(analysis.score_trend, "white")
+            trend_symbol = {
+                "improving": "[+]",
+                "declining": "[-]",
+                "stable": "[=]",
+            }.get(analysis.score_trend, "[=]")
+            console.print(Panel(
+                f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
+                f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
+                f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
+                f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
+                f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
+                f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
+                f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
+                title=f"Last {days} Days",
+                border_style=trend_color,
+            ))
+            if analysis.anomalies:
+                console.print(f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]")
+            # Show daily data if available
+            if analysis.daily_data and len(analysis.daily_data) <= 14:
+                console.print()
+                table = Table(title="Daily Quality Scores")
+                table.add_column("Date", style="cyan")
+                table.add_column("Score", justify="right")
+                table.add_column("Runs", justify="right")
+                table.add_column("Pass Rate", justify="right")
+                for day in analysis.daily_data:
+                    pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
+                    score_style = "green" if day.avg_score >= 80 else "yellow" if day.avg_score >= 60 else "red"
+                    table.add_row(
+                        day.date,
+                        f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
+                        str(day.run_count),
+                        f"{pass_rate:.0f}%",
+                    )
+                console.print(table)
+        else:
+            # Show run history
+            if source:
+                console.print(f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n")
+                runs = storage.get_runs(source, limit=20)
+            else:
+                console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
+                runs = storage.get_runs(limit=20)
+            if not runs:
+                console.print("[yellow]No historical data found.[/yellow]")
+                console.print("[dim]Run some validations first, then check history.[/dim]")
+                return
+            if output_format == "json":
+                # JSON output
+                data = [
+                    {
+                        "run_id": run.run_id,
+                        "source": run.source,
+                        "started_at": run.started_at.isoformat(),
+                        "quality_score": run.quality_score,
+                        "passed": run.passed,
+                        "total_checks": run.total_checks,
+                        "passed_count": run.passed_count,
+                        "failed_count": run.failed_count,
+                        "warning_count": run.warning_count,
+                    }
+                    for run in runs
+                ]
+                console.print(json_module.dumps(data, indent=2))
+            else:
+                # Table output
+                table = Table(title=f"Validation Runs (Last {days} days)")
+                table.add_column("Date", style="cyan")
+                table.add_column("Source", style="dim", max_width=40)
+                table.add_column("Score", justify="right")
+                table.add_column("Status", justify="center")
+                table.add_column("Checks", justify="right")
+                for run in runs:
+                    score_style = "green" if run.quality_score >= 80 else "yellow" if run.quality_score >= 60 else "red"
+                    status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
+                    table.add_row(
+                        run.started_at.strftime("%Y-%m-%d %H:%M"),
+                        run.source[:40],
+                        f"[{score_style}]{run.quality_score:.1f}%[/{score_style}]",
+                        status,
+                        f"{run.passed_count}/{run.total_checks}",
+                    )
+                console.print(table)
+                # Show sources summary
+                sources = storage.get_sources()
+                if len(sources) > 1:
+                    console.print(f"\n[dim]Tracked sources: {len(sources)}[/dim]")
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command()
+def report(
+    source: str = typer.Argument(..., help="Data source path or connection string"),
+    config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
+    output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
+    output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
+    title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
+    include_passed: bool = typer.Option(True, "--include-passed/--no-passed", help="Include passed checks"),
+    store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
+) -> None:
+    """
+    Generate a data quality report (HTML or PDF).
+    Runs validation checks and generates a beautiful, shareable report.
+    [bold]Examples:[/bold]
+        duckguard report data.csv
+        duckguard report data.csv --format pdf --output report.pdf
+        duckguard report data.csv --config rules.yaml --title "Orders Quality"
+        duckguard report data.csv --store  # Also save to history
+    """
+    from duckguard.connectors import connect
+    from duckguard.reports import generate_html_report, generate_pdf_report
+    from duckguard.rules import execute_rules, generate_rules, load_rules
+    # Determine output path based on format
+    if output == "report.html" and output_format == "pdf":
+        output = "report.pdf"
+    console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating {output_format.upper()} report\n")
+    try:
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task("Connecting to data source...", total=None)
+            dataset = connect(source, table=table)
+        console.print(f"[dim]Source: {source}[/dim]")
+        console.print(f"[dim]Rows: {dataset.row_count:,} | Columns: {dataset.column_count}[/dim]\n")
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task("Running validation checks...", total=None)
+            if config:
+                ruleset = load_rules(config)
+            else:
+                ruleset = generate_rules(dataset, as_yaml=False)
+            result = execute_rules(ruleset, dataset=dataset)
+        # Store in history if requested
+        if store:
+            from duckguard.history import HistoryStorage
+            storage = HistoryStorage()
+            run_id = storage.store(result)
+            console.print(f"[dim]Stored in history: {run_id[:8]}...[/dim]\n")
+        # Display summary
+        status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
+        console.print(f"Validation: {status}")
+        console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
+        console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
+        # Generate report
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task(f"Generating {output_format.upper()} report...", total=None)
+            if output_format.lower() == "pdf":
+                generate_pdf_report(result, output, title=title, include_passed=include_passed)
+            else:
+                generate_html_report(result, output, title=title, include_passed=include_passed)
+        console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
+        console.print("[dim]Open in browser to view the report[/dim]")
+    except ImportError as e:
+        if "weasyprint" in str(e).lower():
+            console.print("[red]Error:[/red] PDF generation requires weasyprint.")
+            console.print("[dim]Install with: pip install duckguard[reports][/dim]")
+        else:
+            console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command()
+def freshness(
+    source: str = typer.Argument(..., help="Data source path"),
+    column: str | None = typer.Option(None, "--column", "-c", help="Timestamp column to check"),
+    max_age: str = typer.Option("24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"),
+    output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
+) -> None:
+    """
+    Check data freshness.
+    Monitors how recently data was updated using file modification time
+    or timestamp columns.
+    [bold]Examples:[/bold]
+        duckguard freshness data.csv
+        duckguard freshness data.csv --max-age 6h
+        duckguard freshness data.csv --column updated_at
+        duckguard freshness data.csv --format json
+    """
+    import json as json_module
+    from duckguard.connectors import connect
+    from duckguard.freshness import FreshnessMonitor
+    from duckguard.freshness.monitor import parse_age_string
+    console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking freshness: [cyan]{source}[/cyan]\n")
+    try:
+        threshold = parse_age_string(max_age)
+        monitor = FreshnessMonitor(threshold=threshold)
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True,
+        ) as progress:
+            progress.add_task("Checking freshness...", total=None)
+            if column:
+                dataset = connect(source)
+                result = monitor.check_column_timestamp(dataset, column)
+            else:
+                # Try file mtime first, fallback to dataset
+                from pathlib import Path
+                if Path(source).exists():
+                    result = monitor.check_file_mtime(source)
+                else:
+                    dataset = connect(source)
+                    result = monitor.check(dataset)
+        if output_format == "json":
+            console.print(json_module.dumps(result.to_dict(), indent=2))
+        else:
+            # Display table
+            status_color = "green" if result.is_fresh else "red"
+            status_text = "FRESH" if result.is_fresh else "STALE"
+            console.print(Panel(
+                f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
+                f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
+                f"Age: [cyan]{result.age_human}[/cyan]\n"
+                f"Threshold: [dim]{max_age}[/dim]\n"
+                f"Method: [dim]{result.method.value}[/dim]",
+                title="Freshness Check",
+                border_style=status_color,
+            ))
+        if not result.is_fresh:
+            raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command()
+def schema(
+    source: str = typer.Argument(..., help="Data source path"),
+    action: str = typer.Option("show", "--action", "-a", help="Action: show, capture, history, changes"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
+    output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
+    limit: int = typer.Option(10, "--limit", "-l", help="Number of results to show"),
+) -> None:
+    """
+    Track schema evolution over time.
+    Captures schema snapshots and detects changes between versions.
+    [bold]Actions:[/bold]
+        show     - Show current schema
+        capture  - Capture a schema snapshot
+        history  - Show schema snapshot history
+        changes  - Detect changes from last snapshot
+    [bold]Examples:[/bold]
+        duckguard schema data.csv                    # Show current schema
+        duckguard schema data.csv --action capture  # Capture snapshot
+        duckguard schema data.csv --action history  # View history
+        duckguard schema data.csv --action changes  # Detect changes
+    """
+    import json as json_module
+    from duckguard.connectors import connect
+    from duckguard.schema_history import SchemaChangeAnalyzer, SchemaTracker
+    console.print(f"\n[bold blue]DuckGuard[/bold blue] Schema: [cyan]{source}[/cyan]\n")
+    try:
+        dataset = connect(source, table=table)
+        tracker = SchemaTracker()
+        analyzer = SchemaChangeAnalyzer()
+        if action == "show":
+            # Display current schema
+            col_table = Table(title="Current Schema")
+            col_table.add_column("Column", style="cyan")
+            col_table.add_column("Type", style="magenta")
+            col_table.add_column("Position", justify="right")
+            ref = dataset.engine.get_source_reference(dataset.source)
+            result = dataset.engine.execute(f"DESCRIBE {ref}")
+            for i, row in enumerate(result.fetchall()):
+                col_table.add_row(row[0], row[1], str(i))
+            console.print(col_table)
+            console.print(f"\n[dim]Total columns: {dataset.column_count}[/dim]")
+        elif action == "capture":
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                console=console,
+                transient=True,
+            ) as progress:
+                progress.add_task("Capturing schema snapshot...", total=None)
+                snapshot = tracker.capture(dataset)
+            console.print(f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]")
+            console.print(f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]")
+            console.print(f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]")
+        elif action == "history":
+            history = tracker.get_history(source, limit=limit)
+            if not history:
+                console.print("[yellow]No schema history found for this source.[/yellow]")
+                console.print("[dim]Use --action capture to create a snapshot first.[/dim]")
+                return
+            if output_format == "json":
+                data = [s.to_dict() for s in history]
+                console.print(json_module.dumps(data, indent=2))
+            else:
+                table_obj = Table(title="Schema History")
+                table_obj.add_column("Snapshot ID", style="cyan")
+                table_obj.add_column("Captured At", style="dim")
+                table_obj.add_column("Columns", justify="right")
+                table_obj.add_column("Rows", justify="right")
+                for snapshot in history:
+                    table_obj.add_row(
+                        snapshot.snapshot_id[:8] + "...",
+                        snapshot.captured_at.strftime("%Y-%m-%d %H:%M"),
+                        str(snapshot.column_count),
+                        f"{snapshot.row_count:,}" if snapshot.row_count else "-",
+                    )
+                console.print(table_obj)
+        elif action == "changes":
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                console=console,
+                transient=True,
+            ) as progress:
+                progress.add_task("Detecting schema changes...", total=None)
+                report = analyzer.detect_changes(dataset)
+            if not report.has_changes:
+                console.print("[green]No schema changes detected[/green]")
+                console.print(f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]")
+                return
+            # Display changes
+            console.print(f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n")
+            if report.has_breaking_changes:
+                console.print("[red bold]BREAKING CHANGES:[/red bold]")
+                for change in report.breaking_changes:
+                    console.print(f"  [red]X[/red] {change}")
+                console.print()
+            non_breaking = report.non_breaking_changes
+            if non_breaking:
+                console.print("[dim]Non-breaking changes:[/dim]")
+                for change in non_breaking:
+                    console.print(f"  - {change}")
+            if report.has_breaking_changes:
+                raise typer.Exit(1)
+        else:
+            console.print(f"[red]Error:[/red] Unknown action: {action}")
+            console.print("[dim]Valid actions: show, capture, history, changes[/dim]")
+            raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
 if __name__ == "__main__":
     app()

duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

duckguard 2.0.0py3-none-any.whl → 2.3.0py3-none-any.whl