duckguard 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +1 -1
  3. duckguard/anomaly/detector.py +1 -5
  4. duckguard/anomaly/methods.py +1 -3
  5. duckguard/cli/main.py +304 -54
  6. duckguard/connectors/__init__.py +2 -2
  7. duckguard/connectors/bigquery.py +1 -1
  8. duckguard/connectors/databricks.py +1 -1
  9. duckguard/connectors/factory.py +2 -3
  10. duckguard/connectors/files.py +1 -1
  11. duckguard/connectors/kafka.py +2 -2
  12. duckguard/connectors/mongodb.py +1 -1
  13. duckguard/connectors/mysql.py +1 -1
  14. duckguard/connectors/oracle.py +1 -1
  15. duckguard/connectors/postgres.py +1 -2
  16. duckguard/connectors/redshift.py +1 -1
  17. duckguard/connectors/snowflake.py +1 -2
  18. duckguard/connectors/sqlite.py +1 -1
  19. duckguard/connectors/sqlserver.py +10 -13
  20. duckguard/contracts/__init__.py +6 -6
  21. duckguard/contracts/diff.py +1 -1
  22. duckguard/contracts/generator.py +5 -6
  23. duckguard/contracts/loader.py +4 -4
  24. duckguard/contracts/validator.py +3 -4
  25. duckguard/core/__init__.py +3 -3
  26. duckguard/core/column.py +110 -5
  27. duckguard/core/dataset.py +3 -3
  28. duckguard/core/result.py +92 -1
  29. duckguard/core/scoring.py +1 -2
  30. duckguard/errors.py +362 -0
  31. duckguard/history/__init__.py +44 -0
  32. duckguard/history/schema.py +183 -0
  33. duckguard/history/storage.py +479 -0
  34. duckguard/history/trends.py +348 -0
  35. duckguard/integrations/__init__.py +31 -0
  36. duckguard/integrations/airflow.py +387 -0
  37. duckguard/integrations/dbt.py +458 -0
  38. duckguard/notifications/__init__.py +43 -0
  39. duckguard/notifications/formatter.py +118 -0
  40. duckguard/notifications/notifiers.py +357 -0
  41. duckguard/profiler/auto_profile.py +3 -3
  42. duckguard/pytest_plugin/__init__.py +1 -1
  43. duckguard/pytest_plugin/plugin.py +1 -1
  44. duckguard/reporting/console.py +2 -2
  45. duckguard/reports/__init__.py +42 -0
  46. duckguard/reports/html_reporter.py +515 -0
  47. duckguard/reports/pdf_reporter.py +114 -0
  48. duckguard/rules/__init__.py +3 -3
  49. duckguard/rules/executor.py +3 -4
  50. duckguard/rules/generator.py +4 -4
  51. duckguard/rules/loader.py +5 -5
  52. duckguard/semantic/__init__.py +1 -1
  53. duckguard/semantic/analyzer.py +0 -2
  54. duckguard/semantic/validators.py +2 -1
  55. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/METADATA +135 -5
  56. duckguard-2.2.0.dist-info/RECORD +69 -0
  57. duckguard-2.0.0.dist-info/RECORD +0 -55
  58. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/WHEEL +0 -0
  59. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/entry_points.txt +0 -0
  60. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/licenses/LICENSE +0 -0
duckguard/cli/main.py CHANGED
@@ -5,21 +5,14 @@ A modern, beautiful CLI for data quality that just works.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import sys
9
8
  from pathlib import Path
10
- from typing import Optional
11
9
 
12
10
  import typer
13
11
  from rich.console import Console
14
12
  from rich.panel import Panel
15
- from rich.table import Table
16
- from rich.syntax import Syntax
17
13
  from rich.progress import Progress, SpinnerColumn, TextColumn
18
- from rich import print as rprint
19
- from rich.tree import Tree
20
- from rich.text import Text
21
- from rich.columns import Columns
22
- from rich.markdown import Markdown
14
+ from rich.syntax import Syntax
15
+ from rich.table import Table
23
16
 
24
17
  from duckguard import __version__
25
18
 
@@ -45,7 +38,7 @@ def version_callback(value: bool) -> None:
45
38
 
46
39
  @app.callback()
47
40
  def main(
48
- version: Optional[bool] = typer.Option(
41
+ version: bool | None = typer.Option(
49
42
  None,
50
43
  "--version",
51
44
  "-v",
@@ -61,11 +54,11 @@ def main(
61
54
  @app.command()
62
55
  def check(
63
56
  source: str = typer.Argument(..., help="Path to file or connection string"),
64
- config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
65
- table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
66
- not_null: Optional[list[str]] = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
67
- unique: Optional[list[str]] = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
68
- output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file (json)"),
57
+ config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
58
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
59
+ not_null: list[str] | None = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
60
+ unique: list[str] | None = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
61
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
69
62
  verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
70
63
  ) -> None:
71
64
  """
@@ -78,8 +71,8 @@ def check(
78
71
  duckguard check postgres://localhost/db --table orders
79
72
  """
80
73
  from duckguard.connectors import connect
81
- from duckguard.rules import load_rules, execute_rules
82
74
  from duckguard.core.scoring import score
75
+ from duckguard.rules import execute_rules, load_rules
83
76
 
84
77
  console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking: [cyan]{source}[/cyan]\n")
85
78
 
@@ -185,8 +178,8 @@ def check(
185
178
  @app.command()
186
179
  def discover(
187
180
  source: str = typer.Argument(..., help="Path to file or connection string"),
188
- table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
189
- output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
181
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
182
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
190
183
  format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
191
184
  ) -> None:
192
185
  """
@@ -213,7 +206,7 @@ def discover(
213
206
  console=console,
214
207
  transient=True,
215
208
  ) as progress:
216
- task = progress.add_task("Analyzing data...", total=None)
209
+ _task = progress.add_task("Analyzing data...", total=None) # noqa: F841
217
210
  dataset = connect(source, table=table)
218
211
 
219
212
  # Semantic analysis
@@ -230,7 +223,7 @@ def discover(
230
223
  if output:
231
224
  yaml_content = ruleset_to_yaml(ruleset)
232
225
  Path(output).write_text(yaml_content, encoding="utf-8")
233
- console.print(f"\n[green][/green] Rules saved to [cyan]{output}[/cyan]")
226
+ console.print(f"\n[green]SAVED[/green] Rules saved to [cyan]{output}[/cyan]")
234
227
  console.print(f"[dim]Run: duckguard check {source} --config {output}[/dim]")
235
228
  else:
236
229
  # Display YAML
@@ -250,8 +243,8 @@ def discover(
250
243
  def contract(
251
244
  action: str = typer.Argument(..., help="Action: generate, validate, diff"),
252
245
  source: str = typer.Argument(None, help="Data source or contract file"),
253
- contract_file: Optional[str] = typer.Option(None, "--contract", "-c", help="Contract file path"),
254
- output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file"),
246
+ contract_file: str | None = typer.Option(None, "--contract", "-c", help="Contract file path"),
247
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file"),
255
248
  strict: bool = typer.Option(False, "--strict", help="Strict validation mode"),
256
249
  ) -> None:
257
250
  """
@@ -268,13 +261,12 @@ def contract(
268
261
  duckguard contract diff old.contract.yaml new.contract.yaml
269
262
  """
270
263
  from duckguard.contracts import (
264
+ diff_contracts,
265
+ generate_contract,
271
266
  load_contract,
272
267
  validate_contract,
273
- generate_contract,
274
- diff_contracts,
275
268
  )
276
269
  from duckguard.contracts.loader import contract_to_yaml
277
- from duckguard.connectors import connect
278
270
 
279
271
  try:
280
272
  if action == "generate":
@@ -298,14 +290,14 @@ def contract(
298
290
  if output:
299
291
  yaml_content = contract_to_yaml(contract_obj)
300
292
  Path(output).write_text(yaml_content, encoding="utf-8")
301
- console.print(f"\n[green][/green] Contract saved to [cyan]{output}[/cyan]")
293
+ console.print(f"\n[green]SAVED[/green] Contract saved to [cyan]{output}[/cyan]")
302
294
 
303
295
  elif action == "validate":
304
296
  if not source or not contract_file:
305
297
  console.print("[red]Error:[/red] Both source and --contract required for validate")
306
298
  raise typer.Exit(1)
307
299
 
308
- console.print(f"\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
300
+ console.print("\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
309
301
 
310
302
  with Progress(
311
303
  SpinnerColumn(),
@@ -345,10 +337,10 @@ def contract(
345
337
  @app.command()
346
338
  def anomaly(
347
339
  source: str = typer.Argument(..., help="Path to file or connection string"),
348
- table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
340
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
349
341
  method: str = typer.Option("zscore", "--method", "-m", help="Detection method: zscore, iqr, percent_change"),
350
- threshold: Optional[float] = typer.Option(None, "--threshold", help="Detection threshold"),
351
- columns: Optional[list[str]] = typer.Option(None, "--column", "-c", help="Specific columns to check"),
342
+ threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
343
+ columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
352
344
  ) -> None:
353
345
  """
354
346
  Detect anomalies in data.
@@ -358,8 +350,8 @@ def anomaly(
358
350
  duckguard anomaly data.csv --method iqr --threshold 2.0
359
351
  duckguard anomaly data.csv --column amount --column quantity
360
352
  """
361
- from duckguard.connectors import connect
362
353
  from duckguard.anomaly import detect_anomalies
354
+ from duckguard.connectors import connect
363
355
 
364
356
  console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
365
357
 
@@ -392,7 +384,7 @@ def anomaly(
392
384
  @app.command()
393
385
  def info(
394
386
  source: str = typer.Argument(..., help="Path to file or connection string"),
395
- table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
387
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
396
388
  ) -> None:
397
389
  """
398
390
  Display information about a data source.
@@ -441,7 +433,7 @@ def info(
441
433
  if sem_type == "unknown":
442
434
  sem_type = "-"
443
435
  if col_analysis.is_pii:
444
- sem_type = f"🔒 {sem_type}"
436
+ sem_type = f"[PII] {sem_type}"
445
437
 
446
438
  col_table.add_row(
447
439
  col_name,
@@ -472,11 +464,11 @@ def _display_execution_result(result, verbose: bool = False) -> None:
472
464
 
473
465
  for check_result in result.results:
474
466
  if check_result.passed:
475
- status = "[green]PASS[/green]"
467
+ status = "[green]PASS[/green]"
476
468
  elif check_result.severity.value == "warning":
477
- status = "[yellow]WARN[/yellow]"
469
+ status = "[yellow]WARN[/yellow]"
478
470
  else:
479
- status = "[red]FAIL[/red]"
471
+ status = "[red]FAIL[/red]"
480
472
 
481
473
  col_str = f"[{check_result.column}] " if check_result.column else ""
482
474
  table.add_row(
@@ -490,10 +482,10 @@ def _display_execution_result(result, verbose: bool = False) -> None:
490
482
  # Summary
491
483
  console.print()
492
484
  if result.passed:
493
- console.print(f"[green]All {result.total_checks} checks passed[/green]")
485
+ console.print(f"[green]All {result.total_checks} checks passed[/green]")
494
486
  else:
495
487
  console.print(
496
- f"[red]{result.failed_count} failed[/red], "
488
+ f"[red]{result.failed_count} failed[/red], "
497
489
  f"[yellow]{result.warning_count} warnings[/yellow], "
498
490
  f"[green]{result.passed_count} passed[/green]"
499
491
  )
@@ -507,7 +499,7 @@ def _display_quick_results(results: list) -> None:
507
499
  table.add_column("Details")
508
500
 
509
501
  for check_name, passed, details, _ in results:
510
- status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
502
+ status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
511
503
  table.add_row(check_name, status, details)
512
504
 
513
505
  console.print(table)
@@ -534,8 +526,8 @@ def _display_discovery_results(analysis, ruleset) -> None:
534
526
  # PII warning
535
527
  if analysis.pii_columns:
536
528
  console.print(Panel(
537
- "[yellow]⚠️ PII Detected[/yellow]\n" +
538
- "\n".join(f" {col}" for col in analysis.pii_columns),
529
+ "[yellow]WARNING: PII Detected[/yellow]\n" +
530
+ "\n".join(f" - {col}" for col in analysis.pii_columns),
539
531
  border_style="yellow",
540
532
  ))
541
533
  console.print()
@@ -549,7 +541,7 @@ def _display_discovery_results(analysis, ruleset) -> None:
549
541
  for col in analysis.columns[:15]:
550
542
  sem = col.semantic_type.value
551
543
  if col.is_pii:
552
- sem = f"🔒 {sem}"
544
+ sem = f"[PII] {sem}"
553
545
 
554
546
  rules = ", ".join(col.suggested_validations[:3])
555
547
  if len(col.suggested_validations) > 3:
@@ -582,9 +574,9 @@ def _display_contract(contract) -> None:
582
574
  table.add_row(
583
575
  field_obj.name,
584
576
  type_str,
585
- "" if field_obj.required else "",
586
- "" if field_obj.unique else "",
587
- "🔒" if field_obj.pii else "",
577
+ "Y" if field_obj.required else "",
578
+ "Y" if field_obj.unique else "",
579
+ "[PII]" if field_obj.pii else "",
588
580
  )
589
581
 
590
582
  console.print(table)
@@ -593,14 +585,14 @@ def _display_contract(contract) -> None:
593
585
  if contract.quality:
594
586
  console.print("\n[bold]Quality SLA:[/bold]")
595
587
  if contract.quality.completeness:
596
- console.print(f" Completeness: {contract.quality.completeness}%")
588
+ console.print(f" - Completeness: {contract.quality.completeness}%")
597
589
  if contract.quality.row_count_min:
598
- console.print(f" Min rows: {contract.quality.row_count_min:,}")
590
+ console.print(f" - Min rows: {contract.quality.row_count_min:,}")
599
591
 
600
592
 
601
593
  def _display_contract_validation(result) -> None:
602
594
  """Display contract validation results."""
603
- status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
595
+ status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
604
596
  console.print(f"Contract: [bold]{result.contract.name}[/bold] v{result.contract.version}")
605
597
  console.print(f"Status: {status}\n")
606
598
 
@@ -627,7 +619,7 @@ def _display_contract_validation(result) -> None:
627
619
 
628
620
  def _display_contract_diff(diff) -> None:
629
621
  """Display contract diff."""
630
- console.print(f"[bold]Comparing contracts[/bold]")
622
+ console.print("[bold]Comparing contracts[/bold]")
631
623
  console.print(f" Old: v{diff.old_contract.version}")
632
624
  console.print(f" New: v{diff.new_contract.version}\n")
633
625
 
@@ -640,19 +632,19 @@ def _display_contract_diff(diff) -> None:
640
632
  if diff.breaking_changes:
641
633
  console.print("[red bold]Breaking Changes:[/red bold]")
642
634
  for change in diff.breaking_changes:
643
- console.print(f" {change.message}")
635
+ console.print(f" [red]X[/red] {change.message}")
644
636
  console.print()
645
637
 
646
638
  if diff.minor_changes:
647
639
  console.print("[yellow bold]Minor Changes:[/yellow bold]")
648
640
  for change in diff.minor_changes:
649
- console.print(f" ⚠️ {change.message}")
641
+ console.print(f" [yellow]![/yellow] {change.message}")
650
642
  console.print()
651
643
 
652
644
  if diff.non_breaking_changes:
653
645
  console.print("[dim]Non-breaking Changes:[/dim]")
654
646
  for change in diff.non_breaking_changes:
655
- console.print(f" {change.message}")
647
+ console.print(f" - {change.message}")
656
648
 
657
649
  console.print(f"\n[dim]Suggested version bump: {diff.suggest_version_bump()}[/dim]")
658
650
 
@@ -660,10 +652,10 @@ def _display_contract_diff(diff) -> None:
660
652
  def _display_anomaly_report(report) -> None:
661
653
  """Display anomaly detection report."""
662
654
  if not report.has_anomalies:
663
- console.print("[green]No anomalies detected[/green]")
655
+ console.print("[green]No anomalies detected[/green]")
664
656
  return
665
657
 
666
- console.print(f"[yellow bold]⚠️ {report.anomaly_count} anomalies detected[/yellow bold]\n")
658
+ console.print(f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n")
667
659
 
668
660
  table = Table(title="Anomalies")
669
661
  table.add_column("Column", style="cyan")
@@ -702,5 +694,263 @@ def _save_results(output: str, dataset, results) -> None:
702
694
  Path(output).write_text(json.dumps(data, indent=2))
703
695
 
704
696
 
697
+ @app.command()
698
+ def history(
699
+ source: str | None = typer.Argument(None, help="Data source to query history for (optional)"),
700
+ last: str = typer.Option("30d", "--last", "-l", help="Time period: 7d, 30d, 90d"),
701
+ output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
702
+ trend: bool = typer.Option(False, "--trend", "-t", help="Show quality trend analysis"),
703
+ db_path: str | None = typer.Option(None, "--db", help="Path to history database"),
704
+ ) -> None:
705
+ """
706
+ Query historical validation results.
707
+
708
+ Shows past validation runs and quality score trends over time.
709
+
710
+ [bold]Examples:[/bold]
711
+ duckguard history # Show all recent runs
712
+ duckguard history data.csv # Show runs for specific source
713
+ duckguard history data.csv --last 7d # Last 7 days
714
+ duckguard history data.csv --trend # Show trend analysis
715
+ duckguard history --format json # Output as JSON
716
+ """
717
+ import json as json_module
718
+
719
+ from duckguard.history import HistoryStorage, TrendAnalyzer
720
+
721
+ try:
722
+ storage = HistoryStorage(db_path=db_path)
723
+
724
+ # Parse time period
725
+ days = int(last.rstrip("d"))
726
+
727
+ if trend and source:
728
+ # Show trend analysis
729
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n")
730
+
731
+ analyzer = TrendAnalyzer(storage)
732
+ analysis = analyzer.analyze(source, days=days)
733
+
734
+ if analysis.total_runs == 0:
735
+ console.print("[yellow]No historical data found for this source.[/yellow]")
736
+ console.print("[dim]Run some validations first, then check history.[/dim]")
737
+ return
738
+
739
+ # Display trend summary
740
+ trend_color = {
741
+ "improving": "green",
742
+ "declining": "red",
743
+ "stable": "yellow",
744
+ }.get(analysis.score_trend, "white")
745
+
746
+ trend_symbol = {
747
+ "improving": "[+]",
748
+ "declining": "[-]",
749
+ "stable": "[=]",
750
+ }.get(analysis.score_trend, "[=]")
751
+
752
+ console.print(Panel(
753
+ f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
754
+ f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
755
+ f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
756
+ f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
757
+ f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
758
+ f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
759
+ f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
760
+ title=f"Last {days} Days",
761
+ border_style=trend_color,
762
+ ))
763
+
764
+ if analysis.anomalies:
765
+ console.print(f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]")
766
+
767
+ # Show daily data if available
768
+ if analysis.daily_data and len(analysis.daily_data) <= 14:
769
+ console.print()
770
+ table = Table(title="Daily Quality Scores")
771
+ table.add_column("Date", style="cyan")
772
+ table.add_column("Score", justify="right")
773
+ table.add_column("Runs", justify="right")
774
+ table.add_column("Pass Rate", justify="right")
775
+
776
+ for day in analysis.daily_data:
777
+ pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
778
+ score_style = "green" if day.avg_score >= 80 else "yellow" if day.avg_score >= 60 else "red"
779
+ table.add_row(
780
+ day.date,
781
+ f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
782
+ str(day.run_count),
783
+ f"{pass_rate:.0f}%",
784
+ )
785
+
786
+ console.print(table)
787
+
788
+ else:
789
+ # Show run history
790
+ if source:
791
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n")
792
+ runs = storage.get_runs(source, limit=20)
793
+ else:
794
+ console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
795
+ runs = storage.get_runs(limit=20)
796
+
797
+ if not runs:
798
+ console.print("[yellow]No historical data found.[/yellow]")
799
+ console.print("[dim]Run some validations first, then check history.[/dim]")
800
+ return
801
+
802
+ if output_format == "json":
803
+ # JSON output
804
+ data = [
805
+ {
806
+ "run_id": run.run_id,
807
+ "source": run.source,
808
+ "started_at": run.started_at.isoformat(),
809
+ "quality_score": run.quality_score,
810
+ "passed": run.passed,
811
+ "total_checks": run.total_checks,
812
+ "passed_count": run.passed_count,
813
+ "failed_count": run.failed_count,
814
+ "warning_count": run.warning_count,
815
+ }
816
+ for run in runs
817
+ ]
818
+ console.print(json_module.dumps(data, indent=2))
819
+ else:
820
+ # Table output
821
+ table = Table(title=f"Validation Runs (Last {days} days)")
822
+ table.add_column("Date", style="cyan")
823
+ table.add_column("Source", style="dim", max_width=40)
824
+ table.add_column("Score", justify="right")
825
+ table.add_column("Status", justify="center")
826
+ table.add_column("Checks", justify="right")
827
+
828
+ for run in runs:
829
+ score_style = "green" if run.quality_score >= 80 else "yellow" if run.quality_score >= 60 else "red"
830
+ status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
831
+
832
+ table.add_row(
833
+ run.started_at.strftime("%Y-%m-%d %H:%M"),
834
+ run.source[:40],
835
+ f"[{score_style}]{run.quality_score:.1f}%[/{score_style}]",
836
+ status,
837
+ f"{run.passed_count}/{run.total_checks}",
838
+ )
839
+
840
+ console.print(table)
841
+
842
+ # Show sources summary
843
+ sources = storage.get_sources()
844
+ if len(sources) > 1:
845
+ console.print(f"\n[dim]Tracked sources: {len(sources)}[/dim]")
846
+
847
+ except Exception as e:
848
+ console.print(f"[red]Error:[/red] {e}")
849
+ raise typer.Exit(1)
850
+
851
+
852
+ @app.command()
853
+ def report(
854
+ source: str = typer.Argument(..., help="Data source path or connection string"),
855
+ config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
856
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
857
+ output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
858
+ output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
859
+ title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
860
+ include_passed: bool = typer.Option(True, "--include-passed/--no-passed", help="Include passed checks"),
861
+ store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
862
+ ) -> None:
863
+ """
864
+ Generate a data quality report (HTML or PDF).
865
+
866
+ Runs validation checks and generates a beautiful, shareable report.
867
+
868
+ [bold]Examples:[/bold]
869
+ duckguard report data.csv
870
+ duckguard report data.csv --format pdf --output report.pdf
871
+ duckguard report data.csv --config rules.yaml --title "Orders Quality"
872
+ duckguard report data.csv --store # Also save to history
873
+ """
874
+ from duckguard.connectors import connect
875
+ from duckguard.reports import generate_html_report, generate_pdf_report
876
+ from duckguard.rules import execute_rules, generate_rules, load_rules
877
+
878
+ # Determine output path based on format
879
+ if output == "report.html" and output_format == "pdf":
880
+ output = "report.pdf"
881
+
882
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating {output_format.upper()} report\n")
883
+
884
+ try:
885
+ with Progress(
886
+ SpinnerColumn(),
887
+ TextColumn("[progress.description]{task.description}"),
888
+ console=console,
889
+ transient=True,
890
+ ) as progress:
891
+ progress.add_task("Connecting to data source...", total=None)
892
+ dataset = connect(source, table=table)
893
+
894
+ console.print(f"[dim]Source: {source}[/dim]")
895
+ console.print(f"[dim]Rows: {dataset.row_count:,} | Columns: {dataset.column_count}[/dim]\n")
896
+
897
+ with Progress(
898
+ SpinnerColumn(),
899
+ TextColumn("[progress.description]{task.description}"),
900
+ console=console,
901
+ transient=True,
902
+ ) as progress:
903
+ progress.add_task("Running validation checks...", total=None)
904
+
905
+ if config:
906
+ ruleset = load_rules(config)
907
+ else:
908
+ ruleset = generate_rules(dataset, as_yaml=False)
909
+
910
+ result = execute_rules(ruleset, dataset=dataset)
911
+
912
+ # Store in history if requested
913
+ if store:
914
+ from duckguard.history import HistoryStorage
915
+
916
+ storage = HistoryStorage()
917
+ run_id = storage.store(result)
918
+ console.print(f"[dim]Stored in history: {run_id[:8]}...[/dim]\n")
919
+
920
+ # Display summary
921
+ status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
922
+ console.print(f"Validation: {status}")
923
+ console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
924
+ console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
925
+
926
+ # Generate report
927
+ with Progress(
928
+ SpinnerColumn(),
929
+ TextColumn("[progress.description]{task.description}"),
930
+ console=console,
931
+ transient=True,
932
+ ) as progress:
933
+ progress.add_task(f"Generating {output_format.upper()} report...", total=None)
934
+
935
+ if output_format.lower() == "pdf":
936
+ generate_pdf_report(result, output, title=title, include_passed=include_passed)
937
+ else:
938
+ generate_html_report(result, output, title=title, include_passed=include_passed)
939
+
940
+ console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
941
+ console.print("[dim]Open in browser to view the report[/dim]")
942
+
943
+ except ImportError as e:
944
+ if "weasyprint" in str(e).lower():
945
+ console.print("[red]Error:[/red] PDF generation requires weasyprint.")
946
+ console.print("[dim]Install with: pip install duckguard[reports][/dim]")
947
+ else:
948
+ console.print(f"[red]Error:[/red] {e}")
949
+ raise typer.Exit(1)
950
+ except Exception as e:
951
+ console.print(f"[red]Error:[/red] {e}")
952
+ raise typer.Exit(1)
953
+
954
+
705
955
  if __name__ == "__main__":
706
956
  app()
@@ -1,8 +1,8 @@
1
1
  """Connectors for various data sources."""
2
2
 
3
- from duckguard.connectors.base import Connector, ConnectionConfig
4
- from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
3
+ from duckguard.connectors.base import ConnectionConfig, Connector
5
4
  from duckguard.connectors.factory import connect, register_connector
5
+ from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
6
6
 
7
7
  # Database connectors (imported lazily to avoid import errors)
8
8
  __all__ = [
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -4,12 +4,11 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
8
- from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
+ from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
12
-
13
12
  # Registry of available connectors
14
13
  _CONNECTORS: list[type[Connector]] = [
15
14
  S3Connector,
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import os
6
6
  from pathlib import Path
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -6,7 +6,7 @@ import json
6
6
  from typing import Any
7
7
  from urllib.parse import parse_qs, urlparse
8
8
 
9
- from duckguard.connectors.base import Connector, ConnectionConfig
9
+ from duckguard.connectors.base import ConnectionConfig, Connector
10
10
  from duckguard.core.dataset import Dataset
11
11
  from duckguard.core.engine import DuckGuardEngine
12
12
 
@@ -320,7 +320,7 @@ class KafkaStreamValidator:
320
320
  "messages_failed": 0,
321
321
  }
322
322
 
323
- def add_rule(self, rule: callable) -> "KafkaStreamValidator":
323
+ def add_rule(self, rule: callable) -> KafkaStreamValidator:
324
324
  """Add a validation rule."""
325
325
  self.rules.append(rule)
326
326
  return self
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from urllib.parse import urlparse
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
8
  from duckguard.core.dataset import Dataset
9
9
  from duckguard.core.engine import DuckGuardEngine
10
10
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11