duckguard 3.0.0__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +1 -1
- duckguard/anomaly/methods.py +47 -0
- duckguard/anomaly/ml_methods.py +146 -21
- duckguard/cli/main.py +324 -89
- duckguard/core/result.py +40 -14
- duckguard/notifications/email.py +9 -0
- duckguard/notifications/notifiers.py +39 -1
- duckguard/profiler/auto_profile.py +217 -64
- duckguard-3.1.0.dist-info/METADATA +1133 -0
- {duckguard-3.0.0.dist-info → duckguard-3.1.0.dist-info}/RECORD +13 -13
- duckguard-3.0.0.dist-info/METADATA +0 -1072
- {duckguard-3.0.0.dist-info → duckguard-3.1.0.dist-info}/WHEEL +0 -0
- {duckguard-3.0.0.dist-info → duckguard-3.1.0.dist-info}/entry_points.txt +0 -0
- {duckguard-3.0.0.dist-info → duckguard-3.1.0.dist-info}/licenses/LICENSE +0 -0
duckguard/cli/main.py
CHANGED
|
@@ -6,6 +6,7 @@ A modern, beautiful CLI for data quality that just works.
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import typer
|
|
11
12
|
from rich.console import Console
|
|
@@ -28,11 +29,13 @@ console = Console()
|
|
|
28
29
|
def version_callback(value: bool) -> None:
|
|
29
30
|
"""Print version and exit."""
|
|
30
31
|
if value:
|
|
31
|
-
console.print(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
console.print(
|
|
33
|
+
Panel(
|
|
34
|
+
f"[bold blue]DuckGuard[/bold blue] v{__version__}\n"
|
|
35
|
+
"[dim]The fast, simple data quality tool[/dim]",
|
|
36
|
+
border_style="blue",
|
|
37
|
+
)
|
|
38
|
+
)
|
|
36
39
|
raise typer.Exit()
|
|
37
40
|
|
|
38
41
|
|
|
@@ -54,10 +57,16 @@ def main(
|
|
|
54
57
|
@app.command()
|
|
55
58
|
def check(
|
|
56
59
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
57
|
-
config: str | None = typer.Option(
|
|
60
|
+
config: str | None = typer.Option(
|
|
61
|
+
None, "--config", "-c", help="Path to duckguard.yaml rules file"
|
|
62
|
+
),
|
|
58
63
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
59
|
-
not_null: list[str] | None = typer.Option(
|
|
60
|
-
|
|
64
|
+
not_null: list[str] | None = typer.Option(
|
|
65
|
+
None, "--not-null", "-n", help="Columns that must not be null"
|
|
66
|
+
),
|
|
67
|
+
unique: list[str] | None = typer.Option(
|
|
68
|
+
None, "--unique", "-u", help="Columns that must be unique"
|
|
69
|
+
),
|
|
61
70
|
output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
|
|
62
71
|
verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
|
|
63
72
|
) -> None:
|
|
@@ -115,7 +124,9 @@ def check(
|
|
|
115
124
|
results = []
|
|
116
125
|
|
|
117
126
|
# Row count check
|
|
118
|
-
results.append(
|
|
127
|
+
results.append(
|
|
128
|
+
("Row count > 0", dataset.row_count > 0, f"{dataset.row_count:,} rows", None)
|
|
129
|
+
)
|
|
119
130
|
|
|
120
131
|
# Not null checks
|
|
121
132
|
if not_null:
|
|
@@ -123,14 +134,18 @@ def check(
|
|
|
123
134
|
if col_name in dataset.columns:
|
|
124
135
|
col = dataset[col_name]
|
|
125
136
|
passed = col.null_count == 0
|
|
126
|
-
results.append(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
137
|
+
results.append(
|
|
138
|
+
(
|
|
139
|
+
f"{col_name} not null",
|
|
140
|
+
passed,
|
|
141
|
+
f"{col.null_count:,} nulls ({col.null_percent:.1f}%)",
|
|
142
|
+
col_name,
|
|
143
|
+
)
|
|
144
|
+
)
|
|
132
145
|
else:
|
|
133
|
-
results.append(
|
|
146
|
+
results.append(
|
|
147
|
+
(f"{col_name} not null", False, "Column not found", col_name)
|
|
148
|
+
)
|
|
134
149
|
|
|
135
150
|
# Unique checks
|
|
136
151
|
if unique:
|
|
@@ -139,12 +154,14 @@ def check(
|
|
|
139
154
|
col = dataset[col_name]
|
|
140
155
|
passed = col.unique_percent == 100
|
|
141
156
|
dup_count = col.total_count - col.unique_count
|
|
142
|
-
results.append(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
157
|
+
results.append(
|
|
158
|
+
(
|
|
159
|
+
f"{col_name} unique",
|
|
160
|
+
passed,
|
|
161
|
+
f"{col.unique_percent:.1f}% unique ({dup_count:,} duplicates)",
|
|
162
|
+
col_name,
|
|
163
|
+
)
|
|
164
|
+
)
|
|
148
165
|
else:
|
|
149
166
|
results.append((f"{col_name} unique", False, "Column not found", col_name))
|
|
150
167
|
|
|
@@ -179,7 +196,9 @@ def check(
|
|
|
179
196
|
def discover(
|
|
180
197
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
181
198
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
182
|
-
output: str | None = typer.Option(
|
|
199
|
+
output: str | None = typer.Option(
|
|
200
|
+
None, "--output", "-o", help="Output file for rules (duckguard.yaml)"
|
|
201
|
+
),
|
|
183
202
|
format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
|
|
184
203
|
) -> None:
|
|
185
204
|
"""
|
|
@@ -228,17 +247,181 @@ def discover(
|
|
|
228
247
|
else:
|
|
229
248
|
# Display YAML
|
|
230
249
|
yaml_content = ruleset_to_yaml(ruleset)
|
|
231
|
-
console.print(
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
250
|
+
console.print(
|
|
251
|
+
Panel(
|
|
252
|
+
Syntax(yaml_content, "yaml", theme="monokai"),
|
|
253
|
+
title="Generated Rules (duckguard.yaml)",
|
|
254
|
+
border_style="green",
|
|
255
|
+
)
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
260
|
+
raise typer.Exit(1)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@app.command(name="profile")
|
|
264
|
+
def profile_command(
|
|
265
|
+
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
266
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
267
|
+
deep: bool = typer.Option(
|
|
268
|
+
False, "--deep", "-d", help="Enable deep profiling (distribution, outliers)"
|
|
269
|
+
),
|
|
270
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
|
|
271
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
272
|
+
) -> None:
|
|
273
|
+
"""
|
|
274
|
+
Profile a data source and suggest validation rules.
|
|
275
|
+
|
|
276
|
+
Analyzes data patterns, statistics, and quality to generate
|
|
277
|
+
a comprehensive profile with rule suggestions.
|
|
278
|
+
|
|
279
|
+
[bold]Examples:[/bold]
|
|
280
|
+
duckguard profile data.csv
|
|
281
|
+
duckguard profile data.csv --deep
|
|
282
|
+
duckguard profile data.csv --format json
|
|
283
|
+
duckguard profile postgres://localhost/db --table orders
|
|
284
|
+
"""
|
|
285
|
+
import json as json_module
|
|
286
|
+
|
|
287
|
+
from duckguard.connectors import connect
|
|
288
|
+
from duckguard.profiler import AutoProfiler
|
|
289
|
+
|
|
290
|
+
if output_format != "json":
|
|
291
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Profiling: [cyan]{source}[/cyan]\n")
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
with Progress(
|
|
295
|
+
SpinnerColumn(),
|
|
296
|
+
TextColumn("[progress.description]{task.description}"),
|
|
297
|
+
console=console,
|
|
298
|
+
transient=True,
|
|
299
|
+
) as progress:
|
|
300
|
+
_task = progress.add_task("Profiling data...", total=None) # noqa: F841
|
|
301
|
+
dataset = connect(source, table=table)
|
|
302
|
+
profiler = AutoProfiler(deep=deep)
|
|
303
|
+
result = profiler.profile(dataset)
|
|
304
|
+
|
|
305
|
+
if output_format == "json":
|
|
306
|
+
data = _profile_to_dict(result)
|
|
307
|
+
json_str = json_module.dumps(data, indent=2, default=str)
|
|
308
|
+
if output:
|
|
309
|
+
Path(output).write_text(json_str, encoding="utf-8")
|
|
310
|
+
console.print(f"[green]SAVED[/green] Profile saved to [cyan]{output}[/cyan]")
|
|
311
|
+
else:
|
|
312
|
+
print(json_str)
|
|
313
|
+
else:
|
|
314
|
+
_display_profile_result(result)
|
|
315
|
+
|
|
316
|
+
if output:
|
|
317
|
+
data = _profile_to_dict(result)
|
|
318
|
+
Path(output).write_text(
|
|
319
|
+
json_module.dumps(data, indent=2, default=str), encoding="utf-8"
|
|
320
|
+
)
|
|
321
|
+
console.print(f"\n[green]SAVED[/green] Profile saved to [cyan]{output}[/cyan]")
|
|
236
322
|
|
|
237
323
|
except Exception as e:
|
|
238
324
|
console.print(f"[red]Error:[/red] {e}")
|
|
239
325
|
raise typer.Exit(1)
|
|
240
326
|
|
|
241
327
|
|
|
328
|
+
def _display_profile_result(result: Any) -> None:
|
|
329
|
+
"""Display profiling results in a rich table."""
|
|
330
|
+
_grade_colors = {"A": "green", "B": "blue", "C": "yellow", "D": "orange1", "F": "red"}
|
|
331
|
+
|
|
332
|
+
summary_parts = [
|
|
333
|
+
f"Rows: [cyan]{result.row_count:,}[/cyan]",
|
|
334
|
+
f"Columns: [cyan]{result.column_count}[/cyan]",
|
|
335
|
+
f"Rules Suggested: [cyan]{len(result.suggested_rules)}[/cyan]",
|
|
336
|
+
]
|
|
337
|
+
if result.overall_quality_score is not None:
|
|
338
|
+
color = _grade_colors.get(result.overall_quality_grade, "white")
|
|
339
|
+
summary_parts.append(
|
|
340
|
+
f"Quality: [{color}]{result.overall_quality_score:.0f}/100 "
|
|
341
|
+
f"({result.overall_quality_grade})[/{color}]"
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
console.print(Panel("\n".join(summary_parts), title="Profile Summary", border_style="blue"))
|
|
345
|
+
console.print()
|
|
346
|
+
|
|
347
|
+
col_table = Table(title="Column Profiles")
|
|
348
|
+
col_table.add_column("Column", style="cyan")
|
|
349
|
+
col_table.add_column("Type", style="magenta")
|
|
350
|
+
col_table.add_column("Nulls", justify="right")
|
|
351
|
+
col_table.add_column("Unique", justify="right")
|
|
352
|
+
col_table.add_column("Min", justify="right")
|
|
353
|
+
col_table.add_column("Max", justify="right")
|
|
354
|
+
col_table.add_column("Grade", justify="center")
|
|
355
|
+
col_table.add_column("Rules", justify="right")
|
|
356
|
+
|
|
357
|
+
for col in result.columns:
|
|
358
|
+
grade_str = ""
|
|
359
|
+
if col.quality_grade:
|
|
360
|
+
color = _grade_colors.get(col.quality_grade, "white")
|
|
361
|
+
grade_str = f"[{color}]{col.quality_grade}[/{color}]"
|
|
362
|
+
|
|
363
|
+
col_table.add_row(
|
|
364
|
+
col.name,
|
|
365
|
+
col.dtype,
|
|
366
|
+
f"{col.null_percent:.1f}%",
|
|
367
|
+
f"{col.unique_percent:.1f}%",
|
|
368
|
+
str(col.min_value) if col.min_value is not None else "-",
|
|
369
|
+
str(col.max_value) if col.max_value is not None else "-",
|
|
370
|
+
grade_str or "-",
|
|
371
|
+
str(len(col.suggested_rules)),
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
console.print(col_table)
|
|
375
|
+
|
|
376
|
+
if result.suggested_rules:
|
|
377
|
+
console.print()
|
|
378
|
+
console.print(f"[bold]Suggested Rules ({len(result.suggested_rules)}):[/bold]")
|
|
379
|
+
for rule in result.suggested_rules[:20]:
|
|
380
|
+
console.print(f" {rule}")
|
|
381
|
+
if len(result.suggested_rules) > 20:
|
|
382
|
+
console.print(f" [dim]... and {len(result.suggested_rules) - 20} more[/dim]")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _profile_to_dict(result: Any) -> dict[str, Any]:
|
|
386
|
+
"""Convert ProfileResult to a JSON-serializable dict."""
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
"source": result.source,
|
|
390
|
+
"row_count": result.row_count,
|
|
391
|
+
"column_count": result.column_count,
|
|
392
|
+
"overall_quality_score": result.overall_quality_score,
|
|
393
|
+
"overall_quality_grade": result.overall_quality_grade,
|
|
394
|
+
"columns": [
|
|
395
|
+
{
|
|
396
|
+
"name": col.name,
|
|
397
|
+
"dtype": col.dtype,
|
|
398
|
+
"null_count": col.null_count,
|
|
399
|
+
"null_percent": col.null_percent,
|
|
400
|
+
"unique_count": col.unique_count,
|
|
401
|
+
"unique_percent": col.unique_percent,
|
|
402
|
+
"min_value": col.min_value,
|
|
403
|
+
"max_value": col.max_value,
|
|
404
|
+
"mean_value": col.mean_value,
|
|
405
|
+
"stddev_value": col.stddev_value,
|
|
406
|
+
"median_value": col.median_value,
|
|
407
|
+
"p25_value": col.p25_value,
|
|
408
|
+
"p75_value": col.p75_value,
|
|
409
|
+
"quality_score": col.quality_score,
|
|
410
|
+
"quality_grade": col.quality_grade,
|
|
411
|
+
"distribution_type": col.distribution_type,
|
|
412
|
+
"skewness": col.skewness,
|
|
413
|
+
"kurtosis": col.kurtosis,
|
|
414
|
+
"is_normal": col.is_normal,
|
|
415
|
+
"outlier_count": col.outlier_count,
|
|
416
|
+
"outlier_percentage": col.outlier_percentage,
|
|
417
|
+
"suggested_rules": col.suggested_rules,
|
|
418
|
+
}
|
|
419
|
+
for col in result.columns
|
|
420
|
+
],
|
|
421
|
+
"suggested_rules": result.suggested_rules,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
|
|
242
425
|
@app.command()
|
|
243
426
|
def contract(
|
|
244
427
|
action: str = typer.Argument(..., help="Action: generate, validate, diff"),
|
|
@@ -274,7 +457,9 @@ def contract(
|
|
|
274
457
|
console.print("[red]Error:[/red] Source required for generate")
|
|
275
458
|
raise typer.Exit(1)
|
|
276
459
|
|
|
277
|
-
console.print(
|
|
460
|
+
console.print(
|
|
461
|
+
f"\n[bold blue]DuckGuard[/bold blue] Generating contract for: [cyan]{source}[/cyan]\n"
|
|
462
|
+
)
|
|
278
463
|
|
|
279
464
|
with Progress(
|
|
280
465
|
SpinnerColumn(),
|
|
@@ -338,10 +523,16 @@ def contract(
|
|
|
338
523
|
def anomaly(
|
|
339
524
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
340
525
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
341
|
-
method: str = typer.Option(
|
|
526
|
+
method: str = typer.Option(
|
|
527
|
+
"zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"
|
|
528
|
+
),
|
|
342
529
|
threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
|
|
343
|
-
columns: list[str] | None = typer.Option(
|
|
344
|
-
|
|
530
|
+
columns: list[str] | None = typer.Option(
|
|
531
|
+
None, "--column", "-c", help="Specific columns to check"
|
|
532
|
+
),
|
|
533
|
+
learn_baseline: bool = typer.Option(
|
|
534
|
+
False, "--learn-baseline", "-L", help="Learn and store baseline from current data"
|
|
535
|
+
),
|
|
345
536
|
) -> None:
|
|
346
537
|
"""
|
|
347
538
|
Detect anomalies in data.
|
|
@@ -364,7 +555,9 @@ def anomaly(
|
|
|
364
555
|
from duckguard.anomaly import detect_anomalies
|
|
365
556
|
from duckguard.connectors import connect
|
|
366
557
|
|
|
367
|
-
console.print(
|
|
558
|
+
console.print(
|
|
559
|
+
f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n"
|
|
560
|
+
)
|
|
368
561
|
|
|
369
562
|
try:
|
|
370
563
|
with Progress(
|
|
@@ -401,7 +594,9 @@ def anomaly(
|
|
|
401
594
|
learned += 1
|
|
402
595
|
|
|
403
596
|
console.print(f"[green]LEARNED[/green] Baselines stored for {learned} columns")
|
|
404
|
-
console.print(
|
|
597
|
+
console.print(
|
|
598
|
+
"[dim]Use --method baseline to compare against stored baselines[/dim]"
|
|
599
|
+
)
|
|
405
600
|
return
|
|
406
601
|
|
|
407
602
|
# Regular anomaly detection
|
|
@@ -441,10 +636,7 @@ def info(
|
|
|
441
636
|
dataset = connect(source, table=table)
|
|
442
637
|
analyzer = SemanticAnalyzer()
|
|
443
638
|
|
|
444
|
-
console.print(Panel(
|
|
445
|
-
f"[bold]{dataset.name}[/bold]",
|
|
446
|
-
border_style="blue"
|
|
447
|
-
))
|
|
639
|
+
console.print(Panel(f"[bold]{dataset.name}[/bold]", border_style="blue"))
|
|
448
640
|
|
|
449
641
|
# Basic info
|
|
450
642
|
info_table = Table(show_header=False, box=None)
|
|
@@ -496,6 +688,7 @@ def info(
|
|
|
496
688
|
|
|
497
689
|
# Helper display functions
|
|
498
690
|
|
|
691
|
+
|
|
499
692
|
def _display_execution_result(result, verbose: bool = False) -> None:
|
|
500
693
|
"""Display rule execution results."""
|
|
501
694
|
table = Table(title="Validation Results")
|
|
@@ -552,11 +745,13 @@ def _display_quality_score(quality) -> None:
|
|
|
552
745
|
color = grade_colors.get(quality.grade, "white")
|
|
553
746
|
|
|
554
747
|
console.print()
|
|
555
|
-
console.print(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
748
|
+
console.print(
|
|
749
|
+
Panel(
|
|
750
|
+
f"[bold]Quality Score: [{color}]{quality.overall:.0f}/100[/{color}] "
|
|
751
|
+
f"(Grade: [{color}]{quality.grade}[/{color}])[/bold]",
|
|
752
|
+
border_style=color,
|
|
753
|
+
)
|
|
754
|
+
)
|
|
560
755
|
|
|
561
756
|
|
|
562
757
|
def _display_discovery_results(analysis, ruleset) -> None:
|
|
@@ -566,11 +761,13 @@ def _display_discovery_results(analysis, ruleset) -> None:
|
|
|
566
761
|
|
|
567
762
|
# PII warning
|
|
568
763
|
if analysis.pii_columns:
|
|
569
|
-
console.print(
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
764
|
+
console.print(
|
|
765
|
+
Panel(
|
|
766
|
+
"[yellow]WARNING: PII Detected[/yellow]\n"
|
|
767
|
+
+ "\n".join(f" - {col}" for col in analysis.pii_columns),
|
|
768
|
+
border_style="yellow",
|
|
769
|
+
)
|
|
770
|
+
)
|
|
574
771
|
console.print()
|
|
575
772
|
|
|
576
773
|
# Column analysis table
|
|
@@ -611,7 +808,7 @@ def _display_contract(contract) -> None:
|
|
|
611
808
|
table.add_column("PII")
|
|
612
809
|
|
|
613
810
|
for field_obj in contract.schema[:15]:
|
|
614
|
-
type_str = field_obj.type.value if hasattr(field_obj.type,
|
|
811
|
+
type_str = field_obj.type.value if hasattr(field_obj.type, "value") else str(field_obj.type)
|
|
615
812
|
table.add_row(
|
|
616
813
|
field_obj.name,
|
|
617
814
|
type_str,
|
|
@@ -645,7 +842,9 @@ def _display_contract_validation(result) -> None:
|
|
|
645
842
|
table.add_column("Severity")
|
|
646
843
|
|
|
647
844
|
for v in result.violations[:20]:
|
|
648
|
-
sev_style = {"error": "red", "warning": "yellow", "info": "dim"}.get(
|
|
845
|
+
sev_style = {"error": "red", "warning": "yellow", "info": "dim"}.get(
|
|
846
|
+
v.severity.value, "white"
|
|
847
|
+
)
|
|
649
848
|
table.add_row(
|
|
650
849
|
v.type.value,
|
|
651
850
|
v.field or "-",
|
|
@@ -696,7 +895,9 @@ def _display_anomaly_report(report) -> None:
|
|
|
696
895
|
console.print("[green]No anomalies detected[/green]")
|
|
697
896
|
return
|
|
698
897
|
|
|
699
|
-
console.print(
|
|
898
|
+
console.print(
|
|
899
|
+
f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n"
|
|
900
|
+
)
|
|
700
901
|
|
|
701
902
|
table = Table(title="Anomalies")
|
|
702
903
|
table.add_column("Column", style="cyan")
|
|
@@ -727,10 +928,7 @@ def _save_results(output: str, dataset, results) -> None:
|
|
|
727
928
|
}
|
|
728
929
|
|
|
729
930
|
if results:
|
|
730
|
-
data["checks"] = [
|
|
731
|
-
{"name": r[0], "passed": r[1], "details": r[2]}
|
|
732
|
-
for r in results
|
|
733
|
-
]
|
|
931
|
+
data["checks"] = [{"name": r[0], "passed": r[1], "details": r[2]} for r in results]
|
|
734
932
|
|
|
735
933
|
Path(output).write_text(json.dumps(data, indent=2))
|
|
736
934
|
|
|
@@ -767,7 +965,9 @@ def history(
|
|
|
767
965
|
|
|
768
966
|
if trend and source:
|
|
769
967
|
# Show trend analysis
|
|
770
|
-
console.print(
|
|
968
|
+
console.print(
|
|
969
|
+
f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n"
|
|
970
|
+
)
|
|
771
971
|
|
|
772
972
|
analyzer = TrendAnalyzer(storage)
|
|
773
973
|
analysis = analyzer.analyze(source, days=days)
|
|
@@ -790,20 +990,24 @@ def history(
|
|
|
790
990
|
"stable": "[=]",
|
|
791
991
|
}.get(analysis.score_trend, "[=]")
|
|
792
992
|
|
|
793
|
-
console.print(
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
993
|
+
console.print(
|
|
994
|
+
Panel(
|
|
995
|
+
f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
|
|
996
|
+
f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
|
|
997
|
+
f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
|
|
998
|
+
f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
|
|
999
|
+
f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
|
|
1000
|
+
f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
|
|
1001
|
+
f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
|
|
1002
|
+
title=f"Last {days} Days",
|
|
1003
|
+
border_style=trend_color,
|
|
1004
|
+
)
|
|
1005
|
+
)
|
|
804
1006
|
|
|
805
1007
|
if analysis.anomalies:
|
|
806
|
-
console.print(
|
|
1008
|
+
console.print(
|
|
1009
|
+
f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]"
|
|
1010
|
+
)
|
|
807
1011
|
|
|
808
1012
|
# Show daily data if available
|
|
809
1013
|
if analysis.daily_data and len(analysis.daily_data) <= 14:
|
|
@@ -816,7 +1020,11 @@ def history(
|
|
|
816
1020
|
|
|
817
1021
|
for day in analysis.daily_data:
|
|
818
1022
|
pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
|
|
819
|
-
score_style =
|
|
1023
|
+
score_style = (
|
|
1024
|
+
"green"
|
|
1025
|
+
if day.avg_score >= 80
|
|
1026
|
+
else "yellow" if day.avg_score >= 60 else "red"
|
|
1027
|
+
)
|
|
820
1028
|
table.add_row(
|
|
821
1029
|
day.date,
|
|
822
1030
|
f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
|
|
@@ -829,7 +1037,9 @@ def history(
|
|
|
829
1037
|
else:
|
|
830
1038
|
# Show run history
|
|
831
1039
|
if source:
|
|
832
|
-
console.print(
|
|
1040
|
+
console.print(
|
|
1041
|
+
f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n"
|
|
1042
|
+
)
|
|
833
1043
|
runs = storage.get_runs(source, limit=20)
|
|
834
1044
|
else:
|
|
835
1045
|
console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
|
|
@@ -867,7 +1077,11 @@ def history(
|
|
|
867
1077
|
table.add_column("Checks", justify="right")
|
|
868
1078
|
|
|
869
1079
|
for run in runs:
|
|
870
|
-
score_style =
|
|
1080
|
+
score_style = (
|
|
1081
|
+
"green"
|
|
1082
|
+
if run.quality_score >= 80
|
|
1083
|
+
else "yellow" if run.quality_score >= 60 else "red"
|
|
1084
|
+
)
|
|
871
1085
|
status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
|
|
872
1086
|
|
|
873
1087
|
table.add_row(
|
|
@@ -893,12 +1107,16 @@ def history(
|
|
|
893
1107
|
@app.command()
|
|
894
1108
|
def report(
|
|
895
1109
|
source: str = typer.Argument(..., help="Data source path or connection string"),
|
|
896
|
-
config: str | None = typer.Option(
|
|
1110
|
+
config: str | None = typer.Option(
|
|
1111
|
+
None, "--config", "-c", help="Path to duckguard.yaml rules file"
|
|
1112
|
+
),
|
|
897
1113
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
898
1114
|
output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
|
|
899
1115
|
output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
|
|
900
1116
|
title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
|
|
901
|
-
include_passed: bool = typer.Option(
|
|
1117
|
+
include_passed: bool = typer.Option(
|
|
1118
|
+
True, "--include-passed/--no-passed", help="Include passed checks"
|
|
1119
|
+
),
|
|
902
1120
|
store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
|
|
903
1121
|
) -> None:
|
|
904
1122
|
"""
|
|
@@ -997,7 +1215,9 @@ def report(
|
|
|
997
1215
|
def freshness(
|
|
998
1216
|
source: str = typer.Argument(..., help="Data source path"),
|
|
999
1217
|
column: str | None = typer.Option(None, "--column", "-c", help="Timestamp column to check"),
|
|
1000
|
-
max_age: str = typer.Option(
|
|
1218
|
+
max_age: str = typer.Option(
|
|
1219
|
+
"24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"
|
|
1220
|
+
),
|
|
1001
1221
|
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1002
1222
|
) -> None:
|
|
1003
1223
|
"""
|
|
@@ -1038,6 +1258,7 @@ def freshness(
|
|
|
1038
1258
|
else:
|
|
1039
1259
|
# Try file mtime first, fallback to dataset
|
|
1040
1260
|
from pathlib import Path
|
|
1261
|
+
|
|
1041
1262
|
if Path(source).exists():
|
|
1042
1263
|
result = monitor.check_file_mtime(source)
|
|
1043
1264
|
else:
|
|
@@ -1051,15 +1272,17 @@ def freshness(
|
|
|
1051
1272
|
status_color = "green" if result.is_fresh else "red"
|
|
1052
1273
|
status_text = "FRESH" if result.is_fresh else "STALE"
|
|
1053
1274
|
|
|
1054
|
-
console.print(
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1275
|
+
console.print(
|
|
1276
|
+
Panel(
|
|
1277
|
+
f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
|
|
1278
|
+
f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
|
|
1279
|
+
f"Age: [cyan]{result.age_human}[/cyan]\n"
|
|
1280
|
+
f"Threshold: [dim]{max_age}[/dim]\n"
|
|
1281
|
+
f"Method: [dim]{result.method.value}[/dim]",
|
|
1282
|
+
title="Freshness Check",
|
|
1283
|
+
border_style=status_color,
|
|
1284
|
+
)
|
|
1285
|
+
)
|
|
1063
1286
|
|
|
1064
1287
|
if not result.is_fresh:
|
|
1065
1288
|
raise typer.Exit(1)
|
|
@@ -1072,7 +1295,9 @@ def freshness(
|
|
|
1072
1295
|
@app.command()
|
|
1073
1296
|
def schema(
|
|
1074
1297
|
source: str = typer.Argument(..., help="Data source path"),
|
|
1075
|
-
action: str = typer.Option(
|
|
1298
|
+
action: str = typer.Option(
|
|
1299
|
+
"show", "--action", "-a", help="Action: show, capture, history, changes"
|
|
1300
|
+
),
|
|
1076
1301
|
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
1077
1302
|
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1078
1303
|
limit: int = typer.Option(10, "--limit", "-l", help="Number of results to show"),
|
|
@@ -1132,9 +1357,15 @@ def schema(
|
|
|
1132
1357
|
progress.add_task("Capturing schema snapshot...", total=None)
|
|
1133
1358
|
snapshot = tracker.capture(dataset)
|
|
1134
1359
|
|
|
1135
|
-
console.print(
|
|
1136
|
-
|
|
1137
|
-
|
|
1360
|
+
console.print(
|
|
1361
|
+
f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]"
|
|
1362
|
+
)
|
|
1363
|
+
console.print(
|
|
1364
|
+
f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]"
|
|
1365
|
+
)
|
|
1366
|
+
console.print(
|
|
1367
|
+
f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]"
|
|
1368
|
+
)
|
|
1138
1369
|
|
|
1139
1370
|
elif action == "history":
|
|
1140
1371
|
history = tracker.get_history(source, limit=limit)
|
|
@@ -1176,11 +1407,15 @@ def schema(
|
|
|
1176
1407
|
|
|
1177
1408
|
if not report.has_changes:
|
|
1178
1409
|
console.print("[green]No schema changes detected[/green]")
|
|
1179
|
-
console.print(
|
|
1410
|
+
console.print(
|
|
1411
|
+
f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]"
|
|
1412
|
+
)
|
|
1180
1413
|
return
|
|
1181
1414
|
|
|
1182
1415
|
# Display changes
|
|
1183
|
-
console.print(
|
|
1416
|
+
console.print(
|
|
1417
|
+
f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n"
|
|
1418
|
+
)
|
|
1184
1419
|
|
|
1185
1420
|
if report.has_breaking_changes:
|
|
1186
1421
|
console.print("[red bold]BREAKING CHANGES:[/red bold]")
|