duckguard 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +55 -28
- duckguard/anomaly/__init__.py +1 -1
- duckguard/anomaly/detector.py +1 -5
- duckguard/anomaly/methods.py +1 -3
- duckguard/cli/main.py +304 -54
- duckguard/connectors/__init__.py +2 -2
- duckguard/connectors/bigquery.py +1 -1
- duckguard/connectors/databricks.py +1 -1
- duckguard/connectors/factory.py +2 -3
- duckguard/connectors/files.py +1 -1
- duckguard/connectors/kafka.py +2 -2
- duckguard/connectors/mongodb.py +1 -1
- duckguard/connectors/mysql.py +1 -1
- duckguard/connectors/oracle.py +1 -1
- duckguard/connectors/postgres.py +1 -2
- duckguard/connectors/redshift.py +1 -1
- duckguard/connectors/snowflake.py +1 -2
- duckguard/connectors/sqlite.py +1 -1
- duckguard/connectors/sqlserver.py +10 -13
- duckguard/contracts/__init__.py +6 -6
- duckguard/contracts/diff.py +1 -1
- duckguard/contracts/generator.py +5 -6
- duckguard/contracts/loader.py +4 -4
- duckguard/contracts/validator.py +3 -4
- duckguard/core/__init__.py +3 -3
- duckguard/core/column.py +110 -5
- duckguard/core/dataset.py +3 -3
- duckguard/core/result.py +92 -1
- duckguard/core/scoring.py +1 -2
- duckguard/errors.py +362 -0
- duckguard/history/__init__.py +44 -0
- duckguard/history/schema.py +183 -0
- duckguard/history/storage.py +479 -0
- duckguard/history/trends.py +348 -0
- duckguard/integrations/__init__.py +31 -0
- duckguard/integrations/airflow.py +387 -0
- duckguard/integrations/dbt.py +458 -0
- duckguard/notifications/__init__.py +43 -0
- duckguard/notifications/formatter.py +118 -0
- duckguard/notifications/notifiers.py +357 -0
- duckguard/profiler/auto_profile.py +3 -3
- duckguard/pytest_plugin/__init__.py +1 -1
- duckguard/pytest_plugin/plugin.py +1 -1
- duckguard/reporting/console.py +2 -2
- duckguard/reports/__init__.py +42 -0
- duckguard/reports/html_reporter.py +515 -0
- duckguard/reports/pdf_reporter.py +114 -0
- duckguard/rules/__init__.py +3 -3
- duckguard/rules/executor.py +3 -4
- duckguard/rules/generator.py +4 -4
- duckguard/rules/loader.py +5 -5
- duckguard/semantic/__init__.py +1 -1
- duckguard/semantic/analyzer.py +0 -2
- duckguard/semantic/validators.py +2 -1
- {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/METADATA +135 -5
- duckguard-2.2.0.dist-info/RECORD +69 -0
- duckguard-2.0.0.dist-info/RECORD +0 -55
- {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/WHEEL +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/licenses/LICENSE +0 -0
duckguard/cli/main.py
CHANGED
|
@@ -5,21 +5,14 @@ A modern, beautiful CLI for data quality that just works.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
import sys
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Optional
|
|
11
9
|
|
|
12
10
|
import typer
|
|
13
11
|
from rich.console import Console
|
|
14
12
|
from rich.panel import Panel
|
|
15
|
-
from rich.table import Table
|
|
16
|
-
from rich.syntax import Syntax
|
|
17
13
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
18
|
-
from rich import
|
|
19
|
-
from rich.
|
|
20
|
-
from rich.text import Text
|
|
21
|
-
from rich.columns import Columns
|
|
22
|
-
from rich.markdown import Markdown
|
|
14
|
+
from rich.syntax import Syntax
|
|
15
|
+
from rich.table import Table
|
|
23
16
|
|
|
24
17
|
from duckguard import __version__
|
|
25
18
|
|
|
@@ -45,7 +38,7 @@ def version_callback(value: bool) -> None:
|
|
|
45
38
|
|
|
46
39
|
@app.callback()
|
|
47
40
|
def main(
|
|
48
|
-
version:
|
|
41
|
+
version: bool | None = typer.Option(
|
|
49
42
|
None,
|
|
50
43
|
"--version",
|
|
51
44
|
"-v",
|
|
@@ -61,11 +54,11 @@ def main(
|
|
|
61
54
|
@app.command()
|
|
62
55
|
def check(
|
|
63
56
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
64
|
-
config:
|
|
65
|
-
table:
|
|
66
|
-
not_null:
|
|
67
|
-
unique:
|
|
68
|
-
output:
|
|
57
|
+
config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
|
|
58
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
59
|
+
not_null: list[str] | None = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
|
|
60
|
+
unique: list[str] | None = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
|
|
61
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
|
|
69
62
|
verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
|
|
70
63
|
) -> None:
|
|
71
64
|
"""
|
|
@@ -78,8 +71,8 @@ def check(
|
|
|
78
71
|
duckguard check postgres://localhost/db --table orders
|
|
79
72
|
"""
|
|
80
73
|
from duckguard.connectors import connect
|
|
81
|
-
from duckguard.rules import load_rules, execute_rules
|
|
82
74
|
from duckguard.core.scoring import score
|
|
75
|
+
from duckguard.rules import execute_rules, load_rules
|
|
83
76
|
|
|
84
77
|
console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking: [cyan]{source}[/cyan]\n")
|
|
85
78
|
|
|
@@ -185,8 +178,8 @@ def check(
|
|
|
185
178
|
@app.command()
|
|
186
179
|
def discover(
|
|
187
180
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
188
|
-
table:
|
|
189
|
-
output:
|
|
181
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
182
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
|
|
190
183
|
format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
|
|
191
184
|
) -> None:
|
|
192
185
|
"""
|
|
@@ -213,7 +206,7 @@ def discover(
|
|
|
213
206
|
console=console,
|
|
214
207
|
transient=True,
|
|
215
208
|
) as progress:
|
|
216
|
-
|
|
209
|
+
_task = progress.add_task("Analyzing data...", total=None) # noqa: F841
|
|
217
210
|
dataset = connect(source, table=table)
|
|
218
211
|
|
|
219
212
|
# Semantic analysis
|
|
@@ -230,7 +223,7 @@ def discover(
|
|
|
230
223
|
if output:
|
|
231
224
|
yaml_content = ruleset_to_yaml(ruleset)
|
|
232
225
|
Path(output).write_text(yaml_content, encoding="utf-8")
|
|
233
|
-
console.print(f"\n[green]
|
|
226
|
+
console.print(f"\n[green]SAVED[/green] Rules saved to [cyan]{output}[/cyan]")
|
|
234
227
|
console.print(f"[dim]Run: duckguard check {source} --config {output}[/dim]")
|
|
235
228
|
else:
|
|
236
229
|
# Display YAML
|
|
@@ -250,8 +243,8 @@ def discover(
|
|
|
250
243
|
def contract(
|
|
251
244
|
action: str = typer.Argument(..., help="Action: generate, validate, diff"),
|
|
252
245
|
source: str = typer.Argument(None, help="Data source or contract file"),
|
|
253
|
-
contract_file:
|
|
254
|
-
output:
|
|
246
|
+
contract_file: str | None = typer.Option(None, "--contract", "-c", help="Contract file path"),
|
|
247
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file"),
|
|
255
248
|
strict: bool = typer.Option(False, "--strict", help="Strict validation mode"),
|
|
256
249
|
) -> None:
|
|
257
250
|
"""
|
|
@@ -268,13 +261,12 @@ def contract(
|
|
|
268
261
|
duckguard contract diff old.contract.yaml new.contract.yaml
|
|
269
262
|
"""
|
|
270
263
|
from duckguard.contracts import (
|
|
264
|
+
diff_contracts,
|
|
265
|
+
generate_contract,
|
|
271
266
|
load_contract,
|
|
272
267
|
validate_contract,
|
|
273
|
-
generate_contract,
|
|
274
|
-
diff_contracts,
|
|
275
268
|
)
|
|
276
269
|
from duckguard.contracts.loader import contract_to_yaml
|
|
277
|
-
from duckguard.connectors import connect
|
|
278
270
|
|
|
279
271
|
try:
|
|
280
272
|
if action == "generate":
|
|
@@ -298,14 +290,14 @@ def contract(
|
|
|
298
290
|
if output:
|
|
299
291
|
yaml_content = contract_to_yaml(contract_obj)
|
|
300
292
|
Path(output).write_text(yaml_content, encoding="utf-8")
|
|
301
|
-
console.print(f"\n[green]
|
|
293
|
+
console.print(f"\n[green]SAVED[/green] Contract saved to [cyan]{output}[/cyan]")
|
|
302
294
|
|
|
303
295
|
elif action == "validate":
|
|
304
296
|
if not source or not contract_file:
|
|
305
297
|
console.print("[red]Error:[/red] Both source and --contract required for validate")
|
|
306
298
|
raise typer.Exit(1)
|
|
307
299
|
|
|
308
|
-
console.print(
|
|
300
|
+
console.print("\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
|
|
309
301
|
|
|
310
302
|
with Progress(
|
|
311
303
|
SpinnerColumn(),
|
|
@@ -345,10 +337,10 @@ def contract(
|
|
|
345
337
|
@app.command()
|
|
346
338
|
def anomaly(
|
|
347
339
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
348
|
-
table:
|
|
340
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
349
341
|
method: str = typer.Option("zscore", "--method", "-m", help="Detection method: zscore, iqr, percent_change"),
|
|
350
|
-
threshold:
|
|
351
|
-
columns:
|
|
342
|
+
threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
|
|
343
|
+
columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
|
|
352
344
|
) -> None:
|
|
353
345
|
"""
|
|
354
346
|
Detect anomalies in data.
|
|
@@ -358,8 +350,8 @@ def anomaly(
|
|
|
358
350
|
duckguard anomaly data.csv --method iqr --threshold 2.0
|
|
359
351
|
duckguard anomaly data.csv --column amount --column quantity
|
|
360
352
|
"""
|
|
361
|
-
from duckguard.connectors import connect
|
|
362
353
|
from duckguard.anomaly import detect_anomalies
|
|
354
|
+
from duckguard.connectors import connect
|
|
363
355
|
|
|
364
356
|
console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
|
|
365
357
|
|
|
@@ -392,7 +384,7 @@ def anomaly(
|
|
|
392
384
|
@app.command()
|
|
393
385
|
def info(
|
|
394
386
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
395
|
-
table:
|
|
387
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
396
388
|
) -> None:
|
|
397
389
|
"""
|
|
398
390
|
Display information about a data source.
|
|
@@ -441,7 +433,7 @@ def info(
|
|
|
441
433
|
if sem_type == "unknown":
|
|
442
434
|
sem_type = "-"
|
|
443
435
|
if col_analysis.is_pii:
|
|
444
|
-
sem_type = f"
|
|
436
|
+
sem_type = f"[PII] {sem_type}"
|
|
445
437
|
|
|
446
438
|
col_table.add_row(
|
|
447
439
|
col_name,
|
|
@@ -472,11 +464,11 @@ def _display_execution_result(result, verbose: bool = False) -> None:
|
|
|
472
464
|
|
|
473
465
|
for check_result in result.results:
|
|
474
466
|
if check_result.passed:
|
|
475
|
-
status = "[green]
|
|
467
|
+
status = "[green]PASS[/green]"
|
|
476
468
|
elif check_result.severity.value == "warning":
|
|
477
|
-
status = "[yellow]
|
|
469
|
+
status = "[yellow]WARN[/yellow]"
|
|
478
470
|
else:
|
|
479
|
-
status = "[red]
|
|
471
|
+
status = "[red]FAIL[/red]"
|
|
480
472
|
|
|
481
473
|
col_str = f"[{check_result.column}] " if check_result.column else ""
|
|
482
474
|
table.add_row(
|
|
@@ -490,10 +482,10 @@ def _display_execution_result(result, verbose: bool = False) -> None:
|
|
|
490
482
|
# Summary
|
|
491
483
|
console.print()
|
|
492
484
|
if result.passed:
|
|
493
|
-
console.print(f"[green]
|
|
485
|
+
console.print(f"[green]All {result.total_checks} checks passed[/green]")
|
|
494
486
|
else:
|
|
495
487
|
console.print(
|
|
496
|
-
f"[red]
|
|
488
|
+
f"[red]{result.failed_count} failed[/red], "
|
|
497
489
|
f"[yellow]{result.warning_count} warnings[/yellow], "
|
|
498
490
|
f"[green]{result.passed_count} passed[/green]"
|
|
499
491
|
)
|
|
@@ -507,7 +499,7 @@ def _display_quick_results(results: list) -> None:
|
|
|
507
499
|
table.add_column("Details")
|
|
508
500
|
|
|
509
501
|
for check_name, passed, details, _ in results:
|
|
510
|
-
status = "[green]
|
|
502
|
+
status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
|
|
511
503
|
table.add_row(check_name, status, details)
|
|
512
504
|
|
|
513
505
|
console.print(table)
|
|
@@ -534,8 +526,8 @@ def _display_discovery_results(analysis, ruleset) -> None:
|
|
|
534
526
|
# PII warning
|
|
535
527
|
if analysis.pii_columns:
|
|
536
528
|
console.print(Panel(
|
|
537
|
-
"[yellow]
|
|
538
|
-
"\n".join(f"
|
|
529
|
+
"[yellow]WARNING: PII Detected[/yellow]\n" +
|
|
530
|
+
"\n".join(f" - {col}" for col in analysis.pii_columns),
|
|
539
531
|
border_style="yellow",
|
|
540
532
|
))
|
|
541
533
|
console.print()
|
|
@@ -549,7 +541,7 @@ def _display_discovery_results(analysis, ruleset) -> None:
|
|
|
549
541
|
for col in analysis.columns[:15]:
|
|
550
542
|
sem = col.semantic_type.value
|
|
551
543
|
if col.is_pii:
|
|
552
|
-
sem = f"
|
|
544
|
+
sem = f"[PII] {sem}"
|
|
553
545
|
|
|
554
546
|
rules = ", ".join(col.suggested_validations[:3])
|
|
555
547
|
if len(col.suggested_validations) > 3:
|
|
@@ -582,9 +574,9 @@ def _display_contract(contract) -> None:
|
|
|
582
574
|
table.add_row(
|
|
583
575
|
field_obj.name,
|
|
584
576
|
type_str,
|
|
585
|
-
"
|
|
586
|
-
"
|
|
587
|
-
"
|
|
577
|
+
"Y" if field_obj.required else "",
|
|
578
|
+
"Y" if field_obj.unique else "",
|
|
579
|
+
"[PII]" if field_obj.pii else "",
|
|
588
580
|
)
|
|
589
581
|
|
|
590
582
|
console.print(table)
|
|
@@ -593,14 +585,14 @@ def _display_contract(contract) -> None:
|
|
|
593
585
|
if contract.quality:
|
|
594
586
|
console.print("\n[bold]Quality SLA:[/bold]")
|
|
595
587
|
if contract.quality.completeness:
|
|
596
|
-
console.print(f"
|
|
588
|
+
console.print(f" - Completeness: {contract.quality.completeness}%")
|
|
597
589
|
if contract.quality.row_count_min:
|
|
598
|
-
console.print(f"
|
|
590
|
+
console.print(f" - Min rows: {contract.quality.row_count_min:,}")
|
|
599
591
|
|
|
600
592
|
|
|
601
593
|
def _display_contract_validation(result) -> None:
|
|
602
594
|
"""Display contract validation results."""
|
|
603
|
-
status = "[green]
|
|
595
|
+
status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
|
|
604
596
|
console.print(f"Contract: [bold]{result.contract.name}[/bold] v{result.contract.version}")
|
|
605
597
|
console.print(f"Status: {status}\n")
|
|
606
598
|
|
|
@@ -627,7 +619,7 @@ def _display_contract_validation(result) -> None:
|
|
|
627
619
|
|
|
628
620
|
def _display_contract_diff(diff) -> None:
|
|
629
621
|
"""Display contract diff."""
|
|
630
|
-
console.print(
|
|
622
|
+
console.print("[bold]Comparing contracts[/bold]")
|
|
631
623
|
console.print(f" Old: v{diff.old_contract.version}")
|
|
632
624
|
console.print(f" New: v{diff.new_contract.version}\n")
|
|
633
625
|
|
|
@@ -640,19 +632,19 @@ def _display_contract_diff(diff) -> None:
|
|
|
640
632
|
if diff.breaking_changes:
|
|
641
633
|
console.print("[red bold]Breaking Changes:[/red bold]")
|
|
642
634
|
for change in diff.breaking_changes:
|
|
643
|
-
console.print(f"
|
|
635
|
+
console.print(f" [red]X[/red] {change.message}")
|
|
644
636
|
console.print()
|
|
645
637
|
|
|
646
638
|
if diff.minor_changes:
|
|
647
639
|
console.print("[yellow bold]Minor Changes:[/yellow bold]")
|
|
648
640
|
for change in diff.minor_changes:
|
|
649
|
-
console.print(f"
|
|
641
|
+
console.print(f" [yellow]![/yellow] {change.message}")
|
|
650
642
|
console.print()
|
|
651
643
|
|
|
652
644
|
if diff.non_breaking_changes:
|
|
653
645
|
console.print("[dim]Non-breaking Changes:[/dim]")
|
|
654
646
|
for change in diff.non_breaking_changes:
|
|
655
|
-
console.print(f"
|
|
647
|
+
console.print(f" - {change.message}")
|
|
656
648
|
|
|
657
649
|
console.print(f"\n[dim]Suggested version bump: {diff.suggest_version_bump()}[/dim]")
|
|
658
650
|
|
|
@@ -660,10 +652,10 @@ def _display_contract_diff(diff) -> None:
|
|
|
660
652
|
def _display_anomaly_report(report) -> None:
|
|
661
653
|
"""Display anomaly detection report."""
|
|
662
654
|
if not report.has_anomalies:
|
|
663
|
-
console.print("[green]
|
|
655
|
+
console.print("[green]No anomalies detected[/green]")
|
|
664
656
|
return
|
|
665
657
|
|
|
666
|
-
console.print(f"[yellow bold]
|
|
658
|
+
console.print(f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n")
|
|
667
659
|
|
|
668
660
|
table = Table(title="Anomalies")
|
|
669
661
|
table.add_column("Column", style="cyan")
|
|
@@ -702,5 +694,263 @@ def _save_results(output: str, dataset, results) -> None:
|
|
|
702
694
|
Path(output).write_text(json.dumps(data, indent=2))
|
|
703
695
|
|
|
704
696
|
|
|
697
|
+
@app.command()
|
|
698
|
+
def history(
|
|
699
|
+
source: str | None = typer.Argument(None, help="Data source to query history for (optional)"),
|
|
700
|
+
last: str = typer.Option("30d", "--last", "-l", help="Time period: 7d, 30d, 90d"),
|
|
701
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
702
|
+
trend: bool = typer.Option(False, "--trend", "-t", help="Show quality trend analysis"),
|
|
703
|
+
db_path: str | None = typer.Option(None, "--db", help="Path to history database"),
|
|
704
|
+
) -> None:
|
|
705
|
+
"""
|
|
706
|
+
Query historical validation results.
|
|
707
|
+
|
|
708
|
+
Shows past validation runs and quality score trends over time.
|
|
709
|
+
|
|
710
|
+
[bold]Examples:[/bold]
|
|
711
|
+
duckguard history # Show all recent runs
|
|
712
|
+
duckguard history data.csv # Show runs for specific source
|
|
713
|
+
duckguard history data.csv --last 7d # Last 7 days
|
|
714
|
+
duckguard history data.csv --trend # Show trend analysis
|
|
715
|
+
duckguard history --format json # Output as JSON
|
|
716
|
+
"""
|
|
717
|
+
import json as json_module
|
|
718
|
+
|
|
719
|
+
from duckguard.history import HistoryStorage, TrendAnalyzer
|
|
720
|
+
|
|
721
|
+
try:
|
|
722
|
+
storage = HistoryStorage(db_path=db_path)
|
|
723
|
+
|
|
724
|
+
# Parse time period
|
|
725
|
+
days = int(last.rstrip("d"))
|
|
726
|
+
|
|
727
|
+
if trend and source:
|
|
728
|
+
# Show trend analysis
|
|
729
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n")
|
|
730
|
+
|
|
731
|
+
analyzer = TrendAnalyzer(storage)
|
|
732
|
+
analysis = analyzer.analyze(source, days=days)
|
|
733
|
+
|
|
734
|
+
if analysis.total_runs == 0:
|
|
735
|
+
console.print("[yellow]No historical data found for this source.[/yellow]")
|
|
736
|
+
console.print("[dim]Run some validations first, then check history.[/dim]")
|
|
737
|
+
return
|
|
738
|
+
|
|
739
|
+
# Display trend summary
|
|
740
|
+
trend_color = {
|
|
741
|
+
"improving": "green",
|
|
742
|
+
"declining": "red",
|
|
743
|
+
"stable": "yellow",
|
|
744
|
+
}.get(analysis.score_trend, "white")
|
|
745
|
+
|
|
746
|
+
trend_symbol = {
|
|
747
|
+
"improving": "[+]",
|
|
748
|
+
"declining": "[-]",
|
|
749
|
+
"stable": "[=]",
|
|
750
|
+
}.get(analysis.score_trend, "[=]")
|
|
751
|
+
|
|
752
|
+
console.print(Panel(
|
|
753
|
+
f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
|
|
754
|
+
f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
|
|
755
|
+
f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
|
|
756
|
+
f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
|
|
757
|
+
f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
|
|
758
|
+
f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
|
|
759
|
+
f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
|
|
760
|
+
title=f"Last {days} Days",
|
|
761
|
+
border_style=trend_color,
|
|
762
|
+
))
|
|
763
|
+
|
|
764
|
+
if analysis.anomalies:
|
|
765
|
+
console.print(f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]")
|
|
766
|
+
|
|
767
|
+
# Show daily data if available
|
|
768
|
+
if analysis.daily_data and len(analysis.daily_data) <= 14:
|
|
769
|
+
console.print()
|
|
770
|
+
table = Table(title="Daily Quality Scores")
|
|
771
|
+
table.add_column("Date", style="cyan")
|
|
772
|
+
table.add_column("Score", justify="right")
|
|
773
|
+
table.add_column("Runs", justify="right")
|
|
774
|
+
table.add_column("Pass Rate", justify="right")
|
|
775
|
+
|
|
776
|
+
for day in analysis.daily_data:
|
|
777
|
+
pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
|
|
778
|
+
score_style = "green" if day.avg_score >= 80 else "yellow" if day.avg_score >= 60 else "red"
|
|
779
|
+
table.add_row(
|
|
780
|
+
day.date,
|
|
781
|
+
f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
|
|
782
|
+
str(day.run_count),
|
|
783
|
+
f"{pass_rate:.0f}%",
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
console.print(table)
|
|
787
|
+
|
|
788
|
+
else:
|
|
789
|
+
# Show run history
|
|
790
|
+
if source:
|
|
791
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n")
|
|
792
|
+
runs = storage.get_runs(source, limit=20)
|
|
793
|
+
else:
|
|
794
|
+
console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
|
|
795
|
+
runs = storage.get_runs(limit=20)
|
|
796
|
+
|
|
797
|
+
if not runs:
|
|
798
|
+
console.print("[yellow]No historical data found.[/yellow]")
|
|
799
|
+
console.print("[dim]Run some validations first, then check history.[/dim]")
|
|
800
|
+
return
|
|
801
|
+
|
|
802
|
+
if output_format == "json":
|
|
803
|
+
# JSON output
|
|
804
|
+
data = [
|
|
805
|
+
{
|
|
806
|
+
"run_id": run.run_id,
|
|
807
|
+
"source": run.source,
|
|
808
|
+
"started_at": run.started_at.isoformat(),
|
|
809
|
+
"quality_score": run.quality_score,
|
|
810
|
+
"passed": run.passed,
|
|
811
|
+
"total_checks": run.total_checks,
|
|
812
|
+
"passed_count": run.passed_count,
|
|
813
|
+
"failed_count": run.failed_count,
|
|
814
|
+
"warning_count": run.warning_count,
|
|
815
|
+
}
|
|
816
|
+
for run in runs
|
|
817
|
+
]
|
|
818
|
+
console.print(json_module.dumps(data, indent=2))
|
|
819
|
+
else:
|
|
820
|
+
# Table output
|
|
821
|
+
table = Table(title=f"Validation Runs (Last {days} days)")
|
|
822
|
+
table.add_column("Date", style="cyan")
|
|
823
|
+
table.add_column("Source", style="dim", max_width=40)
|
|
824
|
+
table.add_column("Score", justify="right")
|
|
825
|
+
table.add_column("Status", justify="center")
|
|
826
|
+
table.add_column("Checks", justify="right")
|
|
827
|
+
|
|
828
|
+
for run in runs:
|
|
829
|
+
score_style = "green" if run.quality_score >= 80 else "yellow" if run.quality_score >= 60 else "red"
|
|
830
|
+
status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
|
|
831
|
+
|
|
832
|
+
table.add_row(
|
|
833
|
+
run.started_at.strftime("%Y-%m-%d %H:%M"),
|
|
834
|
+
run.source[:40],
|
|
835
|
+
f"[{score_style}]{run.quality_score:.1f}%[/{score_style}]",
|
|
836
|
+
status,
|
|
837
|
+
f"{run.passed_count}/{run.total_checks}",
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
console.print(table)
|
|
841
|
+
|
|
842
|
+
# Show sources summary
|
|
843
|
+
sources = storage.get_sources()
|
|
844
|
+
if len(sources) > 1:
|
|
845
|
+
console.print(f"\n[dim]Tracked sources: {len(sources)}[/dim]")
|
|
846
|
+
|
|
847
|
+
except Exception as e:
|
|
848
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
849
|
+
raise typer.Exit(1)
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
@app.command()
|
|
853
|
+
def report(
|
|
854
|
+
source: str = typer.Argument(..., help="Data source path or connection string"),
|
|
855
|
+
config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
|
|
856
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
857
|
+
output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
|
|
858
|
+
output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
|
|
859
|
+
title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
|
|
860
|
+
include_passed: bool = typer.Option(True, "--include-passed/--no-passed", help="Include passed checks"),
|
|
861
|
+
store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
|
|
862
|
+
) -> None:
|
|
863
|
+
"""
|
|
864
|
+
Generate a data quality report (HTML or PDF).
|
|
865
|
+
|
|
866
|
+
Runs validation checks and generates a beautiful, shareable report.
|
|
867
|
+
|
|
868
|
+
[bold]Examples:[/bold]
|
|
869
|
+
duckguard report data.csv
|
|
870
|
+
duckguard report data.csv --format pdf --output report.pdf
|
|
871
|
+
duckguard report data.csv --config rules.yaml --title "Orders Quality"
|
|
872
|
+
duckguard report data.csv --store # Also save to history
|
|
873
|
+
"""
|
|
874
|
+
from duckguard.connectors import connect
|
|
875
|
+
from duckguard.reports import generate_html_report, generate_pdf_report
|
|
876
|
+
from duckguard.rules import execute_rules, generate_rules, load_rules
|
|
877
|
+
|
|
878
|
+
# Determine output path based on format
|
|
879
|
+
if output == "report.html" and output_format == "pdf":
|
|
880
|
+
output = "report.pdf"
|
|
881
|
+
|
|
882
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating {output_format.upper()} report\n")
|
|
883
|
+
|
|
884
|
+
try:
|
|
885
|
+
with Progress(
|
|
886
|
+
SpinnerColumn(),
|
|
887
|
+
TextColumn("[progress.description]{task.description}"),
|
|
888
|
+
console=console,
|
|
889
|
+
transient=True,
|
|
890
|
+
) as progress:
|
|
891
|
+
progress.add_task("Connecting to data source...", total=None)
|
|
892
|
+
dataset = connect(source, table=table)
|
|
893
|
+
|
|
894
|
+
console.print(f"[dim]Source: {source}[/dim]")
|
|
895
|
+
console.print(f"[dim]Rows: {dataset.row_count:,} | Columns: {dataset.column_count}[/dim]\n")
|
|
896
|
+
|
|
897
|
+
with Progress(
|
|
898
|
+
SpinnerColumn(),
|
|
899
|
+
TextColumn("[progress.description]{task.description}"),
|
|
900
|
+
console=console,
|
|
901
|
+
transient=True,
|
|
902
|
+
) as progress:
|
|
903
|
+
progress.add_task("Running validation checks...", total=None)
|
|
904
|
+
|
|
905
|
+
if config:
|
|
906
|
+
ruleset = load_rules(config)
|
|
907
|
+
else:
|
|
908
|
+
ruleset = generate_rules(dataset, as_yaml=False)
|
|
909
|
+
|
|
910
|
+
result = execute_rules(ruleset, dataset=dataset)
|
|
911
|
+
|
|
912
|
+
# Store in history if requested
|
|
913
|
+
if store:
|
|
914
|
+
from duckguard.history import HistoryStorage
|
|
915
|
+
|
|
916
|
+
storage = HistoryStorage()
|
|
917
|
+
run_id = storage.store(result)
|
|
918
|
+
console.print(f"[dim]Stored in history: {run_id[:8]}...[/dim]\n")
|
|
919
|
+
|
|
920
|
+
# Display summary
|
|
921
|
+
status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
|
|
922
|
+
console.print(f"Validation: {status}")
|
|
923
|
+
console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
|
|
924
|
+
console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
|
|
925
|
+
|
|
926
|
+
# Generate report
|
|
927
|
+
with Progress(
|
|
928
|
+
SpinnerColumn(),
|
|
929
|
+
TextColumn("[progress.description]{task.description}"),
|
|
930
|
+
console=console,
|
|
931
|
+
transient=True,
|
|
932
|
+
) as progress:
|
|
933
|
+
progress.add_task(f"Generating {output_format.upper()} report...", total=None)
|
|
934
|
+
|
|
935
|
+
if output_format.lower() == "pdf":
|
|
936
|
+
generate_pdf_report(result, output, title=title, include_passed=include_passed)
|
|
937
|
+
else:
|
|
938
|
+
generate_html_report(result, output, title=title, include_passed=include_passed)
|
|
939
|
+
|
|
940
|
+
console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
|
|
941
|
+
console.print("[dim]Open in browser to view the report[/dim]")
|
|
942
|
+
|
|
943
|
+
except ImportError as e:
|
|
944
|
+
if "weasyprint" in str(e).lower():
|
|
945
|
+
console.print("[red]Error:[/red] PDF generation requires weasyprint.")
|
|
946
|
+
console.print("[dim]Install with: pip install duckguard[reports][/dim]")
|
|
947
|
+
else:
|
|
948
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
949
|
+
raise typer.Exit(1)
|
|
950
|
+
except Exception as e:
|
|
951
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
952
|
+
raise typer.Exit(1)
|
|
953
|
+
|
|
954
|
+
|
|
705
955
|
if __name__ == "__main__":
|
|
706
956
|
app()
|
duckguard/connectors/__init__.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""Connectors for various data sources."""
|
|
2
2
|
|
|
3
|
-
from duckguard.connectors.base import
|
|
4
|
-
from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
|
|
3
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
5
4
|
from duckguard.connectors.factory import connect, register_connector
|
|
5
|
+
from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
|
|
6
6
|
|
|
7
7
|
# Database connectors (imported lazily to avoid import errors)
|
|
8
8
|
__all__ = [
|
duckguard/connectors/bigquery.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/factory.py
CHANGED
|
@@ -4,12 +4,11 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
-
from duckguard.connectors.base import
|
|
8
|
-
from duckguard.connectors.files import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
8
|
+
from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
# Registry of available connectors
|
|
14
13
|
_CONNECTORS: list[type[Connector]] = [
|
|
15
14
|
S3Connector,
|
duckguard/connectors/files.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/kafka.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
from urllib.parse import parse_qs, urlparse
|
|
8
8
|
|
|
9
|
-
from duckguard.connectors.base import
|
|
9
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
10
10
|
from duckguard.core.dataset import Dataset
|
|
11
11
|
from duckguard.core.engine import DuckGuardEngine
|
|
12
12
|
|
|
@@ -320,7 +320,7 @@ class KafkaStreamValidator:
|
|
|
320
320
|
"messages_failed": 0,
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
-
def add_rule(self, rule: callable) ->
|
|
323
|
+
def add_rule(self, rule: callable) -> KafkaStreamValidator:
|
|
324
324
|
"""Add a validation rule."""
|
|
325
325
|
self.rules.append(rule)
|
|
326
326
|
return self
|
duckguard/connectors/mongodb.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/mysql.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
|
-
from duckguard.connectors.base import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
8
8
|
from duckguard.core.dataset import Dataset
|
|
9
9
|
from duckguard.core.engine import DuckGuardEngine
|
|
10
10
|
|
duckguard/connectors/oracle.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|