duckguard 3.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckguard/cli/main.py CHANGED
@@ -6,6 +6,7 @@ A modern, beautiful CLI for data quality that just works.
6
6
  from __future__ import annotations
7
7
 
8
8
  from pathlib import Path
9
+ from typing import Any
9
10
 
10
11
  import typer
11
12
  from rich.console import Console
@@ -28,11 +29,13 @@ console = Console()
28
29
  def version_callback(value: bool) -> None:
29
30
  """Print version and exit."""
30
31
  if value:
31
- console.print(Panel(
32
- f"[bold blue]DuckGuard[/bold blue] v{__version__}\n"
33
- "[dim]The fast, simple data quality tool[/dim]",
34
- border_style="blue"
35
- ))
32
+ console.print(
33
+ Panel(
34
+ f"[bold blue]DuckGuard[/bold blue] v{__version__}\n"
35
+ "[dim]The fast, simple data quality tool[/dim]",
36
+ border_style="blue",
37
+ )
38
+ )
36
39
  raise typer.Exit()
37
40
 
38
41
 
@@ -54,10 +57,16 @@ def main(
54
57
  @app.command()
55
58
  def check(
56
59
  source: str = typer.Argument(..., help="Path to file or connection string"),
57
- config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
60
+ config: str | None = typer.Option(
61
+ None, "--config", "-c", help="Path to duckguard.yaml rules file"
62
+ ),
58
63
  table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
59
- not_null: list[str] | None = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
60
- unique: list[str] | None = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
64
+ not_null: list[str] | None = typer.Option(
65
+ None, "--not-null", "-n", help="Columns that must not be null"
66
+ ),
67
+ unique: list[str] | None = typer.Option(
68
+ None, "--unique", "-u", help="Columns that must be unique"
69
+ ),
61
70
  output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
62
71
  verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
63
72
  ) -> None:
@@ -115,7 +124,9 @@ def check(
115
124
  results = []
116
125
 
117
126
  # Row count check
118
- results.append(("Row count > 0", dataset.row_count > 0, f"{dataset.row_count:,} rows", None))
127
+ results.append(
128
+ ("Row count > 0", dataset.row_count > 0, f"{dataset.row_count:,} rows", None)
129
+ )
119
130
 
120
131
  # Not null checks
121
132
  if not_null:
@@ -123,14 +134,18 @@ def check(
123
134
  if col_name in dataset.columns:
124
135
  col = dataset[col_name]
125
136
  passed = col.null_count == 0
126
- results.append((
127
- f"{col_name} not null",
128
- passed,
129
- f"{col.null_count:,} nulls ({col.null_percent:.1f}%)",
130
- col_name
131
- ))
137
+ results.append(
138
+ (
139
+ f"{col_name} not null",
140
+ passed,
141
+ f"{col.null_count:,} nulls ({col.null_percent:.1f}%)",
142
+ col_name,
143
+ )
144
+ )
132
145
  else:
133
- results.append((f"{col_name} not null", False, "Column not found", col_name))
146
+ results.append(
147
+ (f"{col_name} not null", False, "Column not found", col_name)
148
+ )
134
149
 
135
150
  # Unique checks
136
151
  if unique:
@@ -139,12 +154,14 @@ def check(
139
154
  col = dataset[col_name]
140
155
  passed = col.unique_percent == 100
141
156
  dup_count = col.total_count - col.unique_count
142
- results.append((
143
- f"{col_name} unique",
144
- passed,
145
- f"{col.unique_percent:.1f}% unique ({dup_count:,} duplicates)",
146
- col_name
147
- ))
157
+ results.append(
158
+ (
159
+ f"{col_name} unique",
160
+ passed,
161
+ f"{col.unique_percent:.1f}% unique ({dup_count:,} duplicates)",
162
+ col_name,
163
+ )
164
+ )
148
165
  else:
149
166
  results.append((f"{col_name} unique", False, "Column not found", col_name))
150
167
 
@@ -179,7 +196,9 @@ def check(
179
196
  def discover(
180
197
  source: str = typer.Argument(..., help="Path to file or connection string"),
181
198
  table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
182
- output: str | None = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
199
+ output: str | None = typer.Option(
200
+ None, "--output", "-o", help="Output file for rules (duckguard.yaml)"
201
+ ),
183
202
  format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
184
203
  ) -> None:
185
204
  """
@@ -228,17 +247,181 @@ def discover(
228
247
  else:
229
248
  # Display YAML
230
249
  yaml_content = ruleset_to_yaml(ruleset)
231
- console.print(Panel(
232
- Syntax(yaml_content, "yaml", theme="monokai"),
233
- title="Generated Rules (duckguard.yaml)",
234
- border_style="green"
235
- ))
250
+ console.print(
251
+ Panel(
252
+ Syntax(yaml_content, "yaml", theme="monokai"),
253
+ title="Generated Rules (duckguard.yaml)",
254
+ border_style="green",
255
+ )
256
+ )
257
+
258
+ except Exception as e:
259
+ console.print(f"[red]Error:[/red] {e}")
260
+ raise typer.Exit(1)
261
+
262
+
263
+ @app.command(name="profile")
264
+ def profile_command(
265
+ source: str = typer.Argument(..., help="Path to file or connection string"),
266
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
267
+ deep: bool = typer.Option(
268
+ False, "--deep", "-d", help="Enable deep profiling (distribution, outliers)"
269
+ ),
270
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
271
+ output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
272
+ ) -> None:
273
+ """
274
+ Profile a data source and suggest validation rules.
275
+
276
+ Analyzes data patterns, statistics, and quality to generate
277
+ a comprehensive profile with rule suggestions.
278
+
279
+ [bold]Examples:[/bold]
280
+ duckguard profile data.csv
281
+ duckguard profile data.csv --deep
282
+ duckguard profile data.csv --format json
283
+ duckguard profile postgres://localhost/db --table orders
284
+ """
285
+ import json as json_module
286
+
287
+ from duckguard.connectors import connect
288
+ from duckguard.profiler import AutoProfiler
289
+
290
+ if output_format != "json":
291
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Profiling: [cyan]{source}[/cyan]\n")
292
+
293
+ try:
294
+ with Progress(
295
+ SpinnerColumn(),
296
+ TextColumn("[progress.description]{task.description}"),
297
+ console=console,
298
+ transient=True,
299
+ ) as progress:
300
+ _task = progress.add_task("Profiling data...", total=None) # noqa: F841
301
+ dataset = connect(source, table=table)
302
+ profiler = AutoProfiler(deep=deep)
303
+ result = profiler.profile(dataset)
304
+
305
+ if output_format == "json":
306
+ data = _profile_to_dict(result)
307
+ json_str = json_module.dumps(data, indent=2, default=str)
308
+ if output:
309
+ Path(output).write_text(json_str, encoding="utf-8")
310
+ console.print(f"[green]SAVED[/green] Profile saved to [cyan]{output}[/cyan]")
311
+ else:
312
+ print(json_str)
313
+ else:
314
+ _display_profile_result(result)
315
+
316
+ if output:
317
+ data = _profile_to_dict(result)
318
+ Path(output).write_text(
319
+ json_module.dumps(data, indent=2, default=str), encoding="utf-8"
320
+ )
321
+ console.print(f"\n[green]SAVED[/green] Profile saved to [cyan]{output}[/cyan]")
236
322
 
237
323
  except Exception as e:
238
324
  console.print(f"[red]Error:[/red] {e}")
239
325
  raise typer.Exit(1)
240
326
 
241
327
 
328
+ def _display_profile_result(result: Any) -> None:
329
+ """Display profiling results in a rich table."""
330
+ _grade_colors = {"A": "green", "B": "blue", "C": "yellow", "D": "orange1", "F": "red"}
331
+
332
+ summary_parts = [
333
+ f"Rows: [cyan]{result.row_count:,}[/cyan]",
334
+ f"Columns: [cyan]{result.column_count}[/cyan]",
335
+ f"Rules Suggested: [cyan]{len(result.suggested_rules)}[/cyan]",
336
+ ]
337
+ if result.overall_quality_score is not None:
338
+ color = _grade_colors.get(result.overall_quality_grade, "white")
339
+ summary_parts.append(
340
+ f"Quality: [{color}]{result.overall_quality_score:.0f}/100 "
341
+ f"({result.overall_quality_grade})[/{color}]"
342
+ )
343
+
344
+ console.print(Panel("\n".join(summary_parts), title="Profile Summary", border_style="blue"))
345
+ console.print()
346
+
347
+ col_table = Table(title="Column Profiles")
348
+ col_table.add_column("Column", style="cyan")
349
+ col_table.add_column("Type", style="magenta")
350
+ col_table.add_column("Nulls", justify="right")
351
+ col_table.add_column("Unique", justify="right")
352
+ col_table.add_column("Min", justify="right")
353
+ col_table.add_column("Max", justify="right")
354
+ col_table.add_column("Grade", justify="center")
355
+ col_table.add_column("Rules", justify="right")
356
+
357
+ for col in result.columns:
358
+ grade_str = ""
359
+ if col.quality_grade:
360
+ color = _grade_colors.get(col.quality_grade, "white")
361
+ grade_str = f"[{color}]{col.quality_grade}[/{color}]"
362
+
363
+ col_table.add_row(
364
+ col.name,
365
+ col.dtype,
366
+ f"{col.null_percent:.1f}%",
367
+ f"{col.unique_percent:.1f}%",
368
+ str(col.min_value) if col.min_value is not None else "-",
369
+ str(col.max_value) if col.max_value is not None else "-",
370
+ grade_str or "-",
371
+ str(len(col.suggested_rules)),
372
+ )
373
+
374
+ console.print(col_table)
375
+
376
+ if result.suggested_rules:
377
+ console.print()
378
+ console.print(f"[bold]Suggested Rules ({len(result.suggested_rules)}):[/bold]")
379
+ for rule in result.suggested_rules[:20]:
380
+ console.print(f" {rule}")
381
+ if len(result.suggested_rules) > 20:
382
+ console.print(f" [dim]... and {len(result.suggested_rules) - 20} more[/dim]")
383
+
384
+
385
+ def _profile_to_dict(result: Any) -> dict[str, Any]:
386
+ """Convert ProfileResult to a JSON-serializable dict."""
387
+
388
+ return {
389
+ "source": result.source,
390
+ "row_count": result.row_count,
391
+ "column_count": result.column_count,
392
+ "overall_quality_score": result.overall_quality_score,
393
+ "overall_quality_grade": result.overall_quality_grade,
394
+ "columns": [
395
+ {
396
+ "name": col.name,
397
+ "dtype": col.dtype,
398
+ "null_count": col.null_count,
399
+ "null_percent": col.null_percent,
400
+ "unique_count": col.unique_count,
401
+ "unique_percent": col.unique_percent,
402
+ "min_value": col.min_value,
403
+ "max_value": col.max_value,
404
+ "mean_value": col.mean_value,
405
+ "stddev_value": col.stddev_value,
406
+ "median_value": col.median_value,
407
+ "p25_value": col.p25_value,
408
+ "p75_value": col.p75_value,
409
+ "quality_score": col.quality_score,
410
+ "quality_grade": col.quality_grade,
411
+ "distribution_type": col.distribution_type,
412
+ "skewness": col.skewness,
413
+ "kurtosis": col.kurtosis,
414
+ "is_normal": col.is_normal,
415
+ "outlier_count": col.outlier_count,
416
+ "outlier_percentage": col.outlier_percentage,
417
+ "suggested_rules": col.suggested_rules,
418
+ }
419
+ for col in result.columns
420
+ ],
421
+ "suggested_rules": result.suggested_rules,
422
+ }
423
+
424
+
242
425
  @app.command()
243
426
  def contract(
244
427
  action: str = typer.Argument(..., help="Action: generate, validate, diff"),
@@ -274,7 +457,9 @@ def contract(
274
457
  console.print("[red]Error:[/red] Source required for generate")
275
458
  raise typer.Exit(1)
276
459
 
277
- console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating contract for: [cyan]{source}[/cyan]\n")
460
+ console.print(
461
+ f"\n[bold blue]DuckGuard[/bold blue] Generating contract for: [cyan]{source}[/cyan]\n"
462
+ )
278
463
 
279
464
  with Progress(
280
465
  SpinnerColumn(),
@@ -338,10 +523,16 @@ def contract(
338
523
  def anomaly(
339
524
  source: str = typer.Argument(..., help="Path to file or connection string"),
340
525
  table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
341
- method: str = typer.Option("zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"),
526
+ method: str = typer.Option(
527
+ "zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"
528
+ ),
342
529
  threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
343
- columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
344
- learn_baseline: bool = typer.Option(False, "--learn-baseline", "-L", help="Learn and store baseline from current data"),
530
+ columns: list[str] | None = typer.Option(
531
+ None, "--column", "-c", help="Specific columns to check"
532
+ ),
533
+ learn_baseline: bool = typer.Option(
534
+ False, "--learn-baseline", "-L", help="Learn and store baseline from current data"
535
+ ),
345
536
  ) -> None:
346
537
  """
347
538
  Detect anomalies in data.
@@ -364,7 +555,9 @@ def anomaly(
364
555
  from duckguard.anomaly import detect_anomalies
365
556
  from duckguard.connectors import connect
366
557
 
367
- console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
558
+ console.print(
559
+ f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n"
560
+ )
368
561
 
369
562
  try:
370
563
  with Progress(
@@ -401,7 +594,9 @@ def anomaly(
401
594
  learned += 1
402
595
 
403
596
  console.print(f"[green]LEARNED[/green] Baselines stored for {learned} columns")
404
- console.print("[dim]Use --method baseline to compare against stored baselines[/dim]")
597
+ console.print(
598
+ "[dim]Use --method baseline to compare against stored baselines[/dim]"
599
+ )
405
600
  return
406
601
 
407
602
  # Regular anomaly detection
@@ -441,10 +636,7 @@ def info(
441
636
  dataset = connect(source, table=table)
442
637
  analyzer = SemanticAnalyzer()
443
638
 
444
- console.print(Panel(
445
- f"[bold]{dataset.name}[/bold]",
446
- border_style="blue"
447
- ))
639
+ console.print(Panel(f"[bold]{dataset.name}[/bold]", border_style="blue"))
448
640
 
449
641
  # Basic info
450
642
  info_table = Table(show_header=False, box=None)
@@ -496,6 +688,7 @@ def info(
496
688
 
497
689
  # Helper display functions
498
690
 
691
+
499
692
  def _display_execution_result(result, verbose: bool = False) -> None:
500
693
  """Display rule execution results."""
501
694
  table = Table(title="Validation Results")
@@ -552,11 +745,13 @@ def _display_quality_score(quality) -> None:
552
745
  color = grade_colors.get(quality.grade, "white")
553
746
 
554
747
  console.print()
555
- console.print(Panel(
556
- f"[bold]Quality Score: [{color}]{quality.overall:.0f}/100[/{color}] "
557
- f"(Grade: [{color}]{quality.grade}[/{color}])[/bold]",
558
- border_style=color,
559
- ))
748
+ console.print(
749
+ Panel(
750
+ f"[bold]Quality Score: [{color}]{quality.overall:.0f}/100[/{color}] "
751
+ f"(Grade: [{color}]{quality.grade}[/{color}])[/bold]",
752
+ border_style=color,
753
+ )
754
+ )
560
755
 
561
756
 
562
757
  def _display_discovery_results(analysis, ruleset) -> None:
@@ -566,11 +761,13 @@ def _display_discovery_results(analysis, ruleset) -> None:
566
761
 
567
762
  # PII warning
568
763
  if analysis.pii_columns:
569
- console.print(Panel(
570
- "[yellow]WARNING: PII Detected[/yellow]\n" +
571
- "\n".join(f" - {col}" for col in analysis.pii_columns),
572
- border_style="yellow",
573
- ))
764
+ console.print(
765
+ Panel(
766
+ "[yellow]WARNING: PII Detected[/yellow]\n"
767
+ + "\n".join(f" - {col}" for col in analysis.pii_columns),
768
+ border_style="yellow",
769
+ )
770
+ )
574
771
  console.print()
575
772
 
576
773
  # Column analysis table
@@ -611,7 +808,7 @@ def _display_contract(contract) -> None:
611
808
  table.add_column("PII")
612
809
 
613
810
  for field_obj in contract.schema[:15]:
614
- type_str = field_obj.type.value if hasattr(field_obj.type, 'value') else str(field_obj.type)
811
+ type_str = field_obj.type.value if hasattr(field_obj.type, "value") else str(field_obj.type)
615
812
  table.add_row(
616
813
  field_obj.name,
617
814
  type_str,
@@ -645,7 +842,9 @@ def _display_contract_validation(result) -> None:
645
842
  table.add_column("Severity")
646
843
 
647
844
  for v in result.violations[:20]:
648
- sev_style = {"error": "red", "warning": "yellow", "info": "dim"}.get(v.severity.value, "white")
845
+ sev_style = {"error": "red", "warning": "yellow", "info": "dim"}.get(
846
+ v.severity.value, "white"
847
+ )
649
848
  table.add_row(
650
849
  v.type.value,
651
850
  v.field or "-",
@@ -696,7 +895,9 @@ def _display_anomaly_report(report) -> None:
696
895
  console.print("[green]No anomalies detected[/green]")
697
896
  return
698
897
 
699
- console.print(f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n")
898
+ console.print(
899
+ f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n"
900
+ )
700
901
 
701
902
  table = Table(title="Anomalies")
702
903
  table.add_column("Column", style="cyan")
@@ -727,10 +928,7 @@ def _save_results(output: str, dataset, results) -> None:
727
928
  }
728
929
 
729
930
  if results:
730
- data["checks"] = [
731
- {"name": r[0], "passed": r[1], "details": r[2]}
732
- for r in results
733
- ]
931
+ data["checks"] = [{"name": r[0], "passed": r[1], "details": r[2]} for r in results]
734
932
 
735
933
  Path(output).write_text(json.dumps(data, indent=2))
736
934
 
@@ -767,7 +965,9 @@ def history(
767
965
 
768
966
  if trend and source:
769
967
  # Show trend analysis
770
- console.print(f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n")
968
+ console.print(
969
+ f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n"
970
+ )
771
971
 
772
972
  analyzer = TrendAnalyzer(storage)
773
973
  analysis = analyzer.analyze(source, days=days)
@@ -790,20 +990,24 @@ def history(
790
990
  "stable": "[=]",
791
991
  }.get(analysis.score_trend, "[=]")
792
992
 
793
- console.print(Panel(
794
- f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
795
- f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
796
- f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
797
- f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
798
- f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
799
- f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
800
- f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
801
- title=f"Last {days} Days",
802
- border_style=trend_color,
803
- ))
993
+ console.print(
994
+ Panel(
995
+ f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
996
+ f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
997
+ f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
998
+ f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
999
+ f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
1000
+ f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
1001
+ f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
1002
+ title=f"Last {days} Days",
1003
+ border_style=trend_color,
1004
+ )
1005
+ )
804
1006
 
805
1007
  if analysis.anomalies:
806
- console.print(f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]")
1008
+ console.print(
1009
+ f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]"
1010
+ )
807
1011
 
808
1012
  # Show daily data if available
809
1013
  if analysis.daily_data and len(analysis.daily_data) <= 14:
@@ -816,7 +1020,11 @@ def history(
816
1020
 
817
1021
  for day in analysis.daily_data:
818
1022
  pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
819
- score_style = "green" if day.avg_score >= 80 else "yellow" if day.avg_score >= 60 else "red"
1023
+ score_style = (
1024
+ "green"
1025
+ if day.avg_score >= 80
1026
+ else "yellow" if day.avg_score >= 60 else "red"
1027
+ )
820
1028
  table.add_row(
821
1029
  day.date,
822
1030
  f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
@@ -829,7 +1037,9 @@ def history(
829
1037
  else:
830
1038
  # Show run history
831
1039
  if source:
832
- console.print(f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n")
1040
+ console.print(
1041
+ f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n"
1042
+ )
833
1043
  runs = storage.get_runs(source, limit=20)
834
1044
  else:
835
1045
  console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
@@ -867,7 +1077,11 @@ def history(
867
1077
  table.add_column("Checks", justify="right")
868
1078
 
869
1079
  for run in runs:
870
- score_style = "green" if run.quality_score >= 80 else "yellow" if run.quality_score >= 60 else "red"
1080
+ score_style = (
1081
+ "green"
1082
+ if run.quality_score >= 80
1083
+ else "yellow" if run.quality_score >= 60 else "red"
1084
+ )
871
1085
  status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
872
1086
 
873
1087
  table.add_row(
@@ -893,27 +1107,42 @@ def history(
893
1107
  @app.command()
894
1108
  def report(
895
1109
  source: str = typer.Argument(..., help="Data source path or connection string"),
896
- config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
1110
+ config: str | None = typer.Option(
1111
+ None, "--config", "-c", help="Path to duckguard.yaml rules file"
1112
+ ),
897
1113
  table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
898
1114
  output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
899
1115
  output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
900
1116
  title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
901
- include_passed: bool = typer.Option(True, "--include-passed/--no-passed", help="Include passed checks"),
1117
+ include_passed: bool = typer.Option(
1118
+ True, "--include-passed/--no-passed", help="Include passed checks"
1119
+ ),
902
1120
  store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
1121
+ trends: bool = typer.Option(
1122
+ False, "--trends", help="Include quality trend charts from history"
1123
+ ),
1124
+ trend_days: int = typer.Option(
1125
+ 30, "--trend-days", help="Number of days of history for trend charts"
1126
+ ),
1127
+ dark_mode: str = typer.Option("auto", "--dark-mode", help="Theme mode: auto, light, dark"),
1128
+ logo: str | None = typer.Option(None, "--logo", help="Logo URL or data URI for report header"),
903
1129
  ) -> None:
904
1130
  """
905
1131
  Generate a data quality report (HTML or PDF).
906
1132
 
907
- Runs validation checks and generates a beautiful, shareable report.
1133
+ Runs validation checks and generates a beautiful, shareable report
1134
+ with dark mode, interactive tables, and optional trend charts.
908
1135
 
909
1136
  [bold]Examples:[/bold]
910
1137
  duckguard report data.csv
911
1138
  duckguard report data.csv --format pdf --output report.pdf
912
1139
  duckguard report data.csv --config rules.yaml --title "Orders Quality"
913
1140
  duckguard report data.csv --store # Also save to history
1141
+ duckguard report data.csv --trends # Include quality trend charts
1142
+ duckguard report data.csv --dark-mode dark # Force dark theme
914
1143
  """
915
1144
  from duckguard.connectors import connect
916
- from duckguard.reports import generate_html_report, generate_pdf_report
1145
+ from duckguard.reports import HTMLReporter, PDFReporter, ReportConfig
917
1146
  from duckguard.rules import execute_rules, generate_rules, load_rules
918
1147
 
919
1148
  # Determine output path based on format
@@ -964,6 +1193,31 @@ def report(
964
1193
  console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
965
1194
  console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
966
1195
 
1196
+ # Load trend data if requested
1197
+ trend_data = None
1198
+ history_runs = None
1199
+ if trends:
1200
+ from duckguard.history import HistoryStorage
1201
+
1202
+ try:
1203
+ storage_for_trends = HistoryStorage()
1204
+ trend_data = storage_for_trends.get_trend(source, days=trend_days)
1205
+ history_runs = storage_for_trends.get_runs(source, limit=20)
1206
+ if not trend_data:
1207
+ console.print("[dim]No historical data found for trend charts[/dim]")
1208
+ except Exception:
1209
+ console.print("[dim]No historical data found for trend charts[/dim]")
1210
+
1211
+ # Build report config
1212
+ report_config = ReportConfig(
1213
+ title=title,
1214
+ include_passed=include_passed,
1215
+ include_trends=trends,
1216
+ trend_days=trend_days,
1217
+ dark_mode=dark_mode,
1218
+ logo_url=logo,
1219
+ )
1220
+
967
1221
  # Generate report
968
1222
  with Progress(
969
1223
  SpinnerColumn(),
@@ -974,9 +1228,18 @@ def report(
974
1228
  progress.add_task(f"Generating {output_format.upper()} report...", total=None)
975
1229
 
976
1230
  if output_format.lower() == "pdf":
977
- generate_pdf_report(result, output, title=title, include_passed=include_passed)
1231
+ reporter = PDFReporter(config=report_config)
978
1232
  else:
979
- generate_html_report(result, output, title=title, include_passed=include_passed)
1233
+ reporter = HTMLReporter(config=report_config)
1234
+
1235
+ reporter.generate(
1236
+ result,
1237
+ output,
1238
+ history=history_runs,
1239
+ trend_data=trend_data,
1240
+ row_count=dataset.row_count,
1241
+ column_count=dataset.column_count,
1242
+ )
980
1243
 
981
1244
  console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
982
1245
  console.print("[dim]Open in browser to view the report[/dim]")
@@ -997,7 +1260,9 @@ def report(
997
1260
  def freshness(
998
1261
  source: str = typer.Argument(..., help="Data source path"),
999
1262
  column: str | None = typer.Option(None, "--column", "-c", help="Timestamp column to check"),
1000
- max_age: str = typer.Option("24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"),
1263
+ max_age: str = typer.Option(
1264
+ "24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"
1265
+ ),
1001
1266
  output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
1002
1267
  ) -> None:
1003
1268
  """
@@ -1038,6 +1303,7 @@ def freshness(
1038
1303
  else:
1039
1304
  # Try file mtime first, fallback to dataset
1040
1305
  from pathlib import Path
1306
+
1041
1307
  if Path(source).exists():
1042
1308
  result = monitor.check_file_mtime(source)
1043
1309
  else:
@@ -1051,15 +1317,17 @@ def freshness(
1051
1317
  status_color = "green" if result.is_fresh else "red"
1052
1318
  status_text = "FRESH" if result.is_fresh else "STALE"
1053
1319
 
1054
- console.print(Panel(
1055
- f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
1056
- f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
1057
- f"Age: [cyan]{result.age_human}[/cyan]\n"
1058
- f"Threshold: [dim]{max_age}[/dim]\n"
1059
- f"Method: [dim]{result.method.value}[/dim]",
1060
- title="Freshness Check",
1061
- border_style=status_color,
1062
- ))
1320
+ console.print(
1321
+ Panel(
1322
+ f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
1323
+ f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
1324
+ f"Age: [cyan]{result.age_human}[/cyan]\n"
1325
+ f"Threshold: [dim]{max_age}[/dim]\n"
1326
+ f"Method: [dim]{result.method.value}[/dim]",
1327
+ title="Freshness Check",
1328
+ border_style=status_color,
1329
+ )
1330
+ )
1063
1331
 
1064
1332
  if not result.is_fresh:
1065
1333
  raise typer.Exit(1)
@@ -1072,7 +1340,9 @@ def freshness(
1072
1340
  @app.command()
1073
1341
  def schema(
1074
1342
  source: str = typer.Argument(..., help="Data source path"),
1075
- action: str = typer.Option("show", "--action", "-a", help="Action: show, capture, history, changes"),
1343
+ action: str = typer.Option(
1344
+ "show", "--action", "-a", help="Action: show, capture, history, changes"
1345
+ ),
1076
1346
  table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
1077
1347
  output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
1078
1348
  limit: int = typer.Option(10, "--limit", "-l", help="Number of results to show"),
@@ -1132,9 +1402,15 @@ def schema(
1132
1402
  progress.add_task("Capturing schema snapshot...", total=None)
1133
1403
  snapshot = tracker.capture(dataset)
1134
1404
 
1135
- console.print(f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]")
1136
- console.print(f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]")
1137
- console.print(f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]")
1405
+ console.print(
1406
+ f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]"
1407
+ )
1408
+ console.print(
1409
+ f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]"
1410
+ )
1411
+ console.print(
1412
+ f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]"
1413
+ )
1138
1414
 
1139
1415
  elif action == "history":
1140
1416
  history = tracker.get_history(source, limit=limit)
@@ -1176,11 +1452,15 @@ def schema(
1176
1452
 
1177
1453
  if not report.has_changes:
1178
1454
  console.print("[green]No schema changes detected[/green]")
1179
- console.print(f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]")
1455
+ console.print(
1456
+ f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]"
1457
+ )
1180
1458
  return
1181
1459
 
1182
1460
  # Display changes
1183
- console.print(f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n")
1461
+ console.print(
1462
+ f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n"
1463
+ )
1184
1464
 
1185
1465
  if report.has_breaking_changes:
1186
1466
  console.print("[red bold]BREAKING CHANGES:[/red bold]")
@@ -1207,5 +1487,112 @@ def schema(
1207
1487
  raise typer.Exit(1)
1208
1488
 
1209
1489
 
1490
+ # =========================================================================
1491
+ # AI-Powered Commands
1492
+ # =========================================================================
1493
+
1494
+
1495
+ @app.command()
1496
+ def explain(
1497
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1498
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1499
+ focus: str | None = typer.Option(None, "--focus", "-f", help="Column or aspect to focus on"),
1500
+ detail: str = typer.Option("medium", "--detail", "-d", help="Detail level: brief, medium, detailed"),
1501
+ ) -> None:
1502
+ """Explain data quality issues in plain English (AI-powered).
1503
+
1504
+ Requires: pip install duckguard[llm]
1505
+ """
1506
+ try:
1507
+ from duckguard.ai import explain as ai_explain
1508
+ from duckguard.connectors import connect as dg_connect
1509
+
1510
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1511
+ dataset = dg_connect(source, table=table)
1512
+
1513
+ with console.status("[bold green]Analyzing with AI..."):
1514
+ result = ai_explain(dataset, focus=focus, detail=detail)
1515
+
1516
+ console.print()
1517
+ console.print(Panel(result, title="[bold]Data Quality Explanation[/bold]", border_style="green"))
1518
+
1519
+ except ImportError:
1520
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1521
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1522
+ raise typer.Exit(1)
1523
+ except Exception as e:
1524
+ console.print(f"[red]Error:[/red] {e}")
1525
+ raise typer.Exit(1)
1526
+
1527
+
1528
+ @app.command()
1529
+ def suggest(
1530
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1531
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1532
+ output: str | None = typer.Option(None, "--output", "-o", help="Output file (default: stdout)"),
1533
+ strict: bool = typer.Option(False, "--strict", help="Generate stricter rules"),
1534
+ ) -> None:
1535
+ """Generate validation rules using AI (AI-powered).
1536
+
1537
+ Requires: pip install duckguard[llm]
1538
+ """
1539
+ try:
1540
+ from duckguard.ai import suggest_rules
1541
+ from duckguard.connectors import connect as dg_connect
1542
+
1543
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1544
+ dataset = dg_connect(source, table=table)
1545
+
1546
+ with console.status("[bold green]Generating rules with AI..."):
1547
+ rules_yaml = suggest_rules(dataset, strict=strict)
1548
+
1549
+ if output:
1550
+ with open(output, "w") as f:
1551
+ f.write(rules_yaml)
1552
+ console.print(f"[green]Rules written to {output}[/green]")
1553
+ else:
1554
+ console.print()
1555
+ console.print(Syntax(rules_yaml, "yaml", theme="monokai"))
1556
+
1557
+ except ImportError:
1558
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1559
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1560
+ raise typer.Exit(1)
1561
+ except Exception as e:
1562
+ console.print(f"[red]Error:[/red] {e}")
1563
+ raise typer.Exit(1)
1564
+
1565
+
1566
+ @app.command()
1567
+ def fix(
1568
+ source: str = typer.Argument(..., help="Path to file or connection string"),
1569
+ table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
1570
+ ) -> None:
1571
+ """Suggest data quality fixes using AI (AI-powered).
1572
+
1573
+ Requires: pip install duckguard[llm]
1574
+ """
1575
+ try:
1576
+ from duckguard.ai import suggest_fixes
1577
+ from duckguard.connectors import connect as dg_connect
1578
+
1579
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console):
1580
+ dataset = dg_connect(source, table=table)
1581
+
1582
+ with console.status("[bold green]Analyzing fixes with AI..."):
1583
+ result = suggest_fixes(dataset)
1584
+
1585
+ console.print()
1586
+ console.print(Panel(result, title="[bold]Suggested Fixes[/bold]", border_style="yellow"))
1587
+
1588
+ except ImportError:
1589
+ console.print("[red]Error:[/red] AI features require LLM packages.")
1590
+ console.print("Install with: [bold]pip install duckguard[llm][/bold]")
1591
+ raise typer.Exit(1)
1592
+ except Exception as e:
1593
+ console.print(f"[red]Error:[/red] {e}")
1594
+ raise typer.Exit(1)
1595
+
1596
+
1210
1597
  if __name__ == "__main__":
1211
1598
  app()