duckguard 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -609,9 +609,9 @@ class ConditionalCheckHandler:
609
609
  # Normalize path for DuckDB (forward slashes work on all platforms)
610
610
  source_path = dataset._source.replace('\\', '/')
611
611
 
612
- # Format allowed values for SQL IN clause
612
+ # Format allowed values for SQL IN clause (with proper escaping)
613
613
  if isinstance(allowed_values[0], str):
614
- values_str = ", ".join(f"'{v}'" for v in allowed_values)
614
+ values_str = ", ".join(f"'{v.replace(chr(39), chr(39)+chr(39))}'" for v in allowed_values)
615
615
  else:
616
616
  values_str = ", ".join(str(v) for v in allowed_values)
617
617
 
@@ -701,11 +701,12 @@ class ConditionalCheckHandler:
701
701
  # Normalize path for DuckDB (forward slashes work on all platforms)
702
702
  source_path = dataset._source.replace('\\', '/')
703
703
 
704
+ safe_pattern = pattern.replace("'", "''")
704
705
  sql = f"""
705
706
  SELECT COUNT(*) as violations
706
707
  FROM '{source_path}'
707
708
  WHERE ({condition})
708
- AND NOT regexp_matches({column}::VARCHAR, '{pattern}')
709
+ AND NOT regexp_matches({column}::VARCHAR, '{safe_pattern}')
709
710
  """
710
711
 
711
712
  try:
duckguard/cli/main.py CHANGED
@@ -1118,20 +1118,31 @@ def report(
1118
1118
  True, "--include-passed/--no-passed", help="Include passed checks"
1119
1119
  ),
1120
1120
  store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
1121
+ trends: bool = typer.Option(
1122
+ False, "--trends", help="Include quality trend charts from history"
1123
+ ),
1124
+ trend_days: int = typer.Option(
1125
+ 30, "--trend-days", help="Number of days of history for trend charts"
1126
+ ),
1127
+ dark_mode: str = typer.Option("auto", "--dark-mode", help="Theme mode: auto, light, dark"),
1128
+ logo: str | None = typer.Option(None, "--logo", help="Logo URL or data URI for report header"),
1121
1129
  ) -> None:
1122
1130
  """
1123
1131
  Generate a data quality report (HTML or PDF).
1124
1132
 
1125
- Runs validation checks and generates a beautiful, shareable report.
1133
+ Runs validation checks and generates a beautiful, shareable report
1134
+ with dark mode, interactive tables, and optional trend charts.
1126
1135
 
1127
1136
  [bold]Examples:[/bold]
1128
1137
  duckguard report data.csv
1129
1138
  duckguard report data.csv --format pdf --output report.pdf
1130
1139
  duckguard report data.csv --config rules.yaml --title "Orders Quality"
1131
1140
  duckguard report data.csv --store # Also save to history
1141
+ duckguard report data.csv --trends # Include quality trend charts
1142
+ duckguard report data.csv --dark-mode dark # Force dark theme
1132
1143
  """
1133
1144
  from duckguard.connectors import connect
1134
- from duckguard.reports import generate_html_report, generate_pdf_report
1145
+ from duckguard.reports import HTMLReporter, PDFReporter, ReportConfig
1135
1146
  from duckguard.rules import execute_rules, generate_rules, load_rules
1136
1147
 
1137
1148
  # Determine output path based on format
@@ -1182,6 +1193,31 @@ def report(
1182
1193
  console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
1183
1194
  console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
1184
1195
 
1196
+ # Load trend data if requested
1197
+ trend_data = None
1198
+ history_runs = None
1199
+ if trends:
1200
+ from duckguard.history import HistoryStorage
1201
+
1202
+ try:
1203
+ storage_for_trends = HistoryStorage()
1204
+ trend_data = storage_for_trends.get_trend(source, days=trend_days)
1205
+ history_runs = storage_for_trends.get_runs(source, limit=20)
1206
+ if not trend_data:
1207
+ console.print("[dim]No historical data found for trend charts[/dim]")
1208
+ except Exception:
1209
+ console.print("[dim]No historical data found for trend charts[/dim]")
1210
+
1211
+ # Build report config
1212
+ report_config = ReportConfig(
1213
+ title=title,
1214
+ include_passed=include_passed,
1215
+ include_trends=trends,
1216
+ trend_days=trend_days,
1217
+ dark_mode=dark_mode,
1218
+ logo_url=logo,
1219
+ )
1220
+
1185
1221
  # Generate report
1186
1222
  with Progress(
1187
1223
  SpinnerColumn(),
@@ -1192,9 +1228,18 @@ def report(
1192
1228
  progress.add_task(f"Generating {output_format.upper()} report...", total=None)
1193
1229
 
1194
1230
  if output_format.lower() == "pdf":
1195
- generate_pdf_report(result, output, title=title, include_passed=include_passed)
1231
+ reporter = PDFReporter(config=report_config)
1196
1232
  else:
1197
- generate_html_report(result, output, title=title, include_passed=include_passed)
1233
+ reporter = HTMLReporter(config=report_config)
1234
+
1235
+ reporter.generate(
1236
+ result,
1237
+ output,
1238
+ history=history_runs,
1239
+ trend_data=trend_data,
1240
+ row_count=dataset.row_count,
1241
+ column_count=dataset.column_count,
1242
+ )
1198
1243
 
1199
1244
  console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
1200
1245
  console.print("[dim]Open in browser to view the report[/dim]")
@@ -1442,5 +1487,112 @@ def schema(
1442
1487
  raise typer.Exit(1)
1443
1488
 
1444
1489
 
1490
+ # =========================================================================
1491
+ # AI-Powered Commands
1492
+ # =========================================================================
1493
+
1494
+
1495
+ @app.command()
1496
+ def explain(
1497
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1498
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1499
+ focus: str | None = typer.Option(None, "--focus", "-f", help="Column or aspect to focus on"),
1500
+ detail: str = typer.Option("medium", "--detail", "-d", help="Detail level: brief, medium, detailed"),
1501
+ ) -> None:
1502
+ """Explain data quality issues in plain English (AI-powered).
1503
+
1504
+ Requires: pip install duckguard[llm]
1505
+ """
1506
+ try:
1507
+ from duckguard.ai import explain as ai_explain
1508
+ from duckguard.connectors import connect as dg_connect
1509
+
1510
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1511
+ dataset = dg_connect(source, table=table)
1512
+
1513
+ with console.status("[bold green]Analyzing with AI..."):
1514
+ result = ai_explain(dataset, focus=focus, detail=detail)
1515
+
1516
+ console.print()
1517
+ console.print(Panel(result, title="[bold]Data Quality Explanation[/bold]", border_style="green"))
1518
+
1519
+ except ImportError:
1520
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1521
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1522
+ raise typer.Exit(1)
1523
+ except Exception as e:
1524
+ console.print(f"[red]Error:[/red] {e}")
1525
+ raise typer.Exit(1)
1526
+
1527
+
1528
+ @app.command()
1529
+ def suggest(
1530
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1531
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1532
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file (default: stdout)"),
1533
+ strict: bool = typer.Option(False, "--strict", help="Generate stricter rules"),
1534
+ ) -> None:
1535
+ """Generate validation rules using AI (AI-powered).
1536
+
1537
+ Requires: pip install duckguard[llm]
1538
+ """
1539
+ try:
1540
+ from duckguard.ai import suggest_rules
1541
+ from duckguard.connectors import connect as dg_connect
1542
+
1543
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1544
+ dataset = dg_connect(source, table=table)
1545
+
1546
+ with console.status("[bold green]Generating rules with AI..."):
1547
+ rules_yaml = suggest_rules(dataset, strict=strict)
1548
+
1549
+ if output:
1550
+ with open(output, "w") as f:
1551
+ f.write(rules_yaml)
1552
+ console.print(f"[green]Rules written to {output}[/green]")
1553
+ else:
1554
+ console.print()
1555
+ console.print(Syntax(rules_yaml, "yaml", theme="monokai"))
1556
+
1557
+ except ImportError:
1558
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1559
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1560
+ raise typer.Exit(1)
1561
+ except Exception as e:
1562
+ console.print(f"[red]Error:[/red] {e}")
1563
+ raise typer.Exit(1)
1564
+
1565
+
1566
+ @app.command()
1567
+ def fix(
1568
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1569
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1570
+ ) -> None:
1571
+ """Suggest data quality fixes using AI (AI-powered).
1572
+
1573
+ Requires: pip install duckguard[llm]
1574
+ """
1575
+ try:
1576
+ from duckguard.ai import suggest_fixes
1577
+ from duckguard.connectors import connect as dg_connect
1578
+
1579
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1580
+ dataset = dg_connect(source, table=table)
1581
+
1582
+ with console.status("[bold green]Analyzing fixes with AI..."):
1583
+ result = suggest_fixes(dataset)
1584
+
1585
+ console.print()
1586
+ console.print(Panel(result, title="[bold]Suggested Fixes[/bold]", border_style="yellow"))
1587
+
1588
+ except ImportError:
1589
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1590
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1591
+ raise typer.Exit(1)
1592
+ except Exception as e:
1593
+ console.print(f"[red]Error:[/red] {e}")
1594
+ raise typer.Exit(1)
1595
+
1596
+
1445
1597
  if __name__ == "__main__":
1446
1598
  app()
duckguard/core/column.py CHANGED
@@ -13,6 +13,14 @@ if TYPE_CHECKING:
13
13
  DEFAULT_SAMPLE_SIZE = 10
14
14
 
15
15
 
16
+ def _escape_sql_string(value: str) -> str:
17
+ """Escape a string value for safe use in SQL queries.
18
+
19
+ Replaces single quotes with doubled single quotes (SQL standard escaping).
20
+ """
21
+ return value.replace("'", "''")
22
+
23
+
16
24
  class Column:
17
25
  """
18
26
  Represents a column in a dataset with validation capabilities.
@@ -246,11 +254,12 @@ class Column:
246
254
  col = f'"{self._name}"'
247
255
 
248
256
  # DuckDB uses regexp_matches for regex
257
+ safe_pattern = _escape_sql_string(pattern)
249
258
  sql = f"""
250
259
  SELECT COUNT(*) as non_matching
251
260
  FROM {ref}
252
261
  WHERE {col} IS NOT NULL
253
- AND NOT regexp_matches({col}::VARCHAR, '{pattern}')
262
+ AND NOT regexp_matches({col}::VARCHAR, '{safe_pattern}')
254
263
  """
255
264
 
256
265
  non_matching = self._dataset.engine.fetch_value(sql) or 0
@@ -275,12 +284,13 @@ class Column:
275
284
  """Get sample of rows that failed pattern match."""
276
285
  ref = self._dataset.engine.get_source_reference(self._dataset.source)
277
286
  col = f'"{self._name}"'
287
+ safe_pattern = _escape_sql_string(pattern)
278
288
 
279
289
  sql = f"""
280
290
  SELECT row_number() OVER () as row_idx, {col} as val
281
291
  FROM {ref}
282
292
  WHERE {col} IS NOT NULL
283
- AND NOT regexp_matches({col}::VARCHAR, '{pattern}')
293
+ AND NOT regexp_matches({col}::VARCHAR, '{safe_pattern}')
284
294
  LIMIT {limit}
285
295
  """
286
296
 
@@ -310,9 +320,9 @@ class Column:
310
320
  ref = self._dataset.engine.get_source_reference(self._dataset.source)
311
321
  col = f'"{self._name}"'
312
322
 
313
- # Build value list for SQL
323
+ # Build value list for SQL (with proper escaping)
314
324
  formatted_values = ", ".join(
315
- f"'{v}'" if isinstance(v, str) else str(v) for v in values
325
+ f"'{_escape_sql_string(str(v))}'" if isinstance(v, str) else str(v) for v in values
316
326
  )
317
327
 
318
328
  sql = f"""
@@ -346,7 +356,7 @@ class Column:
346
356
  col = f'"{self._name}"'
347
357
 
348
358
  formatted_values = ", ".join(
349
- f"'{v}'" if isinstance(v, str) else str(v) for v in values
359
+ f"'{_escape_sql_string(str(v))}'" if isinstance(v, str) else str(v) for v in values
350
360
  )
351
361
 
352
362
  sql = f"""
duckguard/py.typed ADDED
File without changes