PyPI - duckguard - Versions diffs - 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl - Mend

duckguard 3.1.0py3-none-any.whl → 3.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

duckguard/__init__.py +1 -1
duckguard/ai/__init__.py +33 -0
duckguard/ai/config.py +201 -0
duckguard/ai/explainer.py +109 -0
duckguard/ai/fixer.py +105 -0
duckguard/ai/natural_language.py +119 -0
duckguard/ai/rules_generator.py +121 -0
duckguard/checks/conditional.py +4 -3
duckguard/cli/main.py +156 -4
duckguard/core/column.py +15 -5
duckguard/py.typed +0 -0
duckguard/reports/html_reporter.py +522 -37
duckguard/reports/pdf_reporter.py +33 -5
duckguard/semantic/detector.py +18 -7
{duckguard-3.1.0.dist-info → duckguard-3.2.0.dist-info}/METADATA +98 -25
{duckguard-3.1.0.dist-info → duckguard-3.2.0.dist-info}/RECORD +20 -12
duckguard-3.2.0.dist-info/licenses/LICENSE +190 -0
duckguard-3.2.0.dist-info/licenses/NOTICE +7 -0
duckguard-3.1.0.dist-info/licenses/LICENSE +0 -55
{duckguard-3.1.0.dist-info → duckguard-3.2.0.dist-info}/WHEEL +0 -0
{duckguard-3.1.0.dist-info → duckguard-3.2.0.dist-info}/entry_points.txt +0 -0

duckguard/checks/conditional.py CHANGED Viewed

@@ -609,9 +609,9 @@ class ConditionalCheckHandler:
         # Normalize path for DuckDB (forward slashes work on all platforms)
         source_path = dataset._source.replace('\\', '/')
-        # Format allowed values for SQL IN clause
+        # Format allowed values for SQL IN clause (with proper escaping)
         if isinstance(allowed_values[0], str):
-            values_str = ", ".join(f"'{v}'" for v in allowed_values)
+            values_str = ", ".join(f"'{v.replace(chr(39), chr(39)+chr(39))}'" for v in allowed_values)
         else:
             values_str = ", ".join(str(v) for v in allowed_values)
@@ -701,11 +701,12 @@ class ConditionalCheckHandler:
         # Normalize path for DuckDB (forward slashes work on all platforms)
         source_path = dataset._source.replace('\\', '/')
+        safe_pattern = pattern.replace("'", "''")
         sql = f"""
             SELECT COUNT(*) as violations
             FROM '{source_path}'
             WHERE ({condition})
-              AND NOT regexp_matches({column}::VARCHAR, '{pattern}')
+              AND NOT regexp_matches({column}::VARCHAR, '{safe_pattern}')
         """
         try:

duckguard/cli/main.py CHANGED Viewed

@@ -1118,20 +1118,31 @@ def report(
         True, "--include-passed/--no-passed", help="Include passed checks"
     ),
     store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
+    trends: bool = typer.Option(
+        False, "--trends", help="Include quality trend charts from history"
+    ),
+    trend_days: int = typer.Option(
+        30, "--trend-days", help="Number of days of history for trend charts"
+    ),
+    dark_mode: str = typer.Option("auto", "--dark-mode", help="Theme mode: auto, light, dark"),
+    logo: str | None = typer.Option(None, "--logo", help="Logo URL or data URI for report header"),
 ) -> None:
     """
     Generate a data quality report (HTML or PDF).
-    Runs validation checks and generates a beautiful, shareable report.
+    Runs validation checks and generates a beautiful, shareable report
+    with dark mode, interactive tables, and optional trend charts.
     [bold]Examples:[/bold]
         duckguard report data.csv
         duckguard report data.csv --format pdf --output report.pdf
         duckguard report data.csv --config rules.yaml --title "Orders Quality"
         duckguard report data.csv --store  # Also save to history
+        duckguard report data.csv --trends  # Include quality trend charts
+        duckguard report data.csv --dark-mode dark  # Force dark theme
     """
     from duckguard.connectors import connect
-    from duckguard.reports import generate_html_report, generate_pdf_report
+    from duckguard.reports import HTMLReporter, PDFReporter, ReportConfig
     from duckguard.rules import execute_rules, generate_rules, load_rules
     # Determine output path based on format
@@ -1182,6 +1193,31 @@ def report(
         console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
         console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
+        # Load trend data if requested
+        trend_data = None
+        history_runs = None
+        if trends:
+            from duckguard.history import HistoryStorage
+            try:
+                storage_for_trends = HistoryStorage()
+                trend_data = storage_for_trends.get_trend(source, days=trend_days)
+                history_runs = storage_for_trends.get_runs(source, limit=20)
+                if not trend_data:
+                    console.print("[dim]No historical data found for trend charts[/dim]")
+            except Exception:
+                console.print("[dim]No historical data found for trend charts[/dim]")
+        # Build report config
+        report_config = ReportConfig(
+            title=title,
+            include_passed=include_passed,
+            include_trends=trends,
+            trend_days=trend_days,
+            dark_mode=dark_mode,
+            logo_url=logo,
+        )
         # Generate report
         with Progress(
             SpinnerColumn(),
@@ -1192,9 +1228,18 @@ def report(
             progress.add_task(f"Generating {output_format.upper()} report...", total=None)
             if output_format.lower() == "pdf":
-                generate_pdf_report(result, output, title=title, include_passed=include_passed)
+                reporter = PDFReporter(config=report_config)
             else:
-                generate_html_report(result, output, title=title, include_passed=include_passed)
+                reporter = HTMLReporter(config=report_config)
+            reporter.generate(
+                result,
+                output,
+                history=history_runs,
+                trend_data=trend_data,
+                row_count=dataset.row_count,
+                column_count=dataset.column_count,
+            )
         console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
         console.print("[dim]Open in browser to view the report[/dim]")
@@ -1442,5 +1487,112 @@ def schema(
         raise typer.Exit(1)
+# =========================================================================
+# AI-Powered Commands
+# =========================================================================
+@app.command()
+def explain(
+    source: str = typer.Argument(..., help="Path to file or connection string"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
+    focus: str | None = typer.Option(None, "--focus", "-f", help="Column or aspect to focus on"),
+    detail: str = typer.Option("medium", "--detail", "-d", help="Detail level: brief, medium, detailed"),
+) -> None:
+    """Explain data quality issues in plain English (AI-powered).
+    Requires: pip install duckguard[llm]
+    """
+    try:
+        from duckguard.ai import explain as ai_explain
+        from duckguard.connectors import connect as dg_connect
+        with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
+            dataset = dg_connect(source, table=table)
+        with console.status("[bold green]Analyzing with AI..."):
+            result = ai_explain(dataset, focus=focus, detail=detail)
+        console.print()
+        console.print(Panel(result, title="[bold]Data Quality Explanation[/bold]", border_style="green"))
+    except ImportError:
+        console.print("[red]Error:[/red] AI features require LLM packages.")
+        console.print("Install with: [bold]pip install duckguard[llm][/bold]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command()
+def suggest(
+    source: str = typer.Argument(..., help="Path to file or connection string"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
+    output: str | None = typer.Option(None, "--output", "-o", help="Output file (default: stdout)"),
+    strict: bool = typer.Option(False, "--strict", help="Generate stricter rules"),
+) -> None:
+    """Generate validation rules using AI (AI-powered).
+    Requires: pip install duckguard[llm]
+    """
+    try:
+        from duckguard.ai import suggest_rules
+        from duckguard.connectors import connect as dg_connect
+        with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
+            dataset = dg_connect(source, table=table)
+        with console.status("[bold green]Generating rules with AI..."):
+            rules_yaml = suggest_rules(dataset, strict=strict)
+        if output:
+            with open(output, "w") as f:
+                f.write(rules_yaml)
+            console.print(f"[green]Rules written to {output}[/green]")
+        else:
+            console.print()
+            console.print(Syntax(rules_yaml, "yaml", theme="monokai"))
+    except ImportError:
+        console.print("[red]Error:[/red] AI features require LLM packages.")
+        console.print("Install with: [bold]pip install duckguard[llm][/bold]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command()
+def fix(
+    source: str = typer.Argument(..., help="Path to file or connection string"),
+    table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
+) -> None:
+    """Suggest data quality fixes using AI (AI-powered).
+    Requires: pip install duckguard[llm]
+    """
+    try:
+        from duckguard.ai import suggest_fixes
+        from duckguard.connectors import connect as dg_connect
+        with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
+            dataset = dg_connect(source, table=table)
+        with console.status("[bold green]Analyzing fixes with AI..."):
+            result = suggest_fixes(dataset)
+        console.print()
+        console.print(Panel(result, title="[bold]Suggested Fixes[/bold]", border_style="yellow"))
+    except ImportError:
+        console.print("[red]Error:[/red] AI features require LLM packages.")
+        console.print("Install with: [bold]pip install duckguard[llm][/bold]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
 if __name__ == "__main__":
     app()

duckguard/core/column.py CHANGED Viewed

@@ -13,6 +13,14 @@ if TYPE_CHECKING:
 DEFAULT_SAMPLE_SIZE = 10
+def _escape_sql_string(value: str) -> str:
+    """Escape a string value for safe use in SQL queries.
+    Replaces single quotes with doubled single quotes (SQL standard escaping).
+    """
+    return value.replace("'", "''")
 class Column:
     """
     Represents a column in a dataset with validation capabilities.
@@ -246,11 +254,12 @@ class Column:
         col = f'"{self._name}"'
         # DuckDB uses regexp_matches for regex
+        safe_pattern = _escape_sql_string(pattern)
         sql = f"""
         SELECT COUNT(*) as non_matching
         FROM {ref}
         WHERE {col} IS NOT NULL
-          AND NOT regexp_matches({col}::VARCHAR, '{pattern}')
+          AND NOT regexp_matches({col}::VARCHAR, '{safe_pattern}')
         """
         non_matching = self._dataset.engine.fetch_value(sql) or 0
@@ -275,12 +284,13 @@ class Column:
         """Get sample of rows that failed pattern match."""
         ref = self._dataset.engine.get_source_reference(self._dataset.source)
         col = f'"{self._name}"'
+        safe_pattern = _escape_sql_string(pattern)
         sql = f"""
         SELECT row_number() OVER () as row_idx, {col} as val
         FROM {ref}
         WHERE {col} IS NOT NULL
-          AND NOT regexp_matches({col}::VARCHAR, '{pattern}')
+          AND NOT regexp_matches({col}::VARCHAR, '{safe_pattern}')
         LIMIT {limit}
         """
@@ -310,9 +320,9 @@ class Column:
         ref = self._dataset.engine.get_source_reference(self._dataset.source)
         col = f'"{self._name}"'
-        # Build value list for SQL
+        # Build value list for SQL (with proper escaping)
         formatted_values = ", ".join(
-            f"'{v}'" if isinstance(v, str) else str(v) for v in values
+            f"'{_escape_sql_string(str(v))}'" if isinstance(v, str) else str(v) for v in values
         )
         sql = f"""
@@ -346,7 +356,7 @@ class Column:
         col = f'"{self._name}"'
         formatted_values = ", ".join(
-            f"'{v}'" if isinstance(v, str) else str(v) for v in values
+            f"'{_escape_sql_string(str(v))}'" if isinstance(v, str) else str(v) for v in values
         )
         sql = f"""

duckguard/py.typed ADDED Viewed

File without changes

duckguard 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl

duckguard 3.1.0py3-none-any.whl → 3.2.0py3-none-any.whl