duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +55 -28
- duckguard/anomaly/__init__.py +29 -1
- duckguard/anomaly/baselines.py +294 -0
- duckguard/anomaly/detector.py +1 -5
- duckguard/anomaly/methods.py +17 -5
- duckguard/anomaly/ml_methods.py +724 -0
- duckguard/cli/main.py +561 -56
- duckguard/connectors/__init__.py +2 -2
- duckguard/connectors/bigquery.py +1 -1
- duckguard/connectors/databricks.py +1 -1
- duckguard/connectors/factory.py +2 -3
- duckguard/connectors/files.py +1 -1
- duckguard/connectors/kafka.py +2 -2
- duckguard/connectors/mongodb.py +1 -1
- duckguard/connectors/mysql.py +1 -1
- duckguard/connectors/oracle.py +1 -1
- duckguard/connectors/postgres.py +1 -2
- duckguard/connectors/redshift.py +1 -1
- duckguard/connectors/snowflake.py +1 -2
- duckguard/connectors/sqlite.py +1 -1
- duckguard/connectors/sqlserver.py +10 -13
- duckguard/contracts/__init__.py +6 -6
- duckguard/contracts/diff.py +1 -1
- duckguard/contracts/generator.py +5 -6
- duckguard/contracts/loader.py +4 -4
- duckguard/contracts/validator.py +3 -4
- duckguard/core/__init__.py +3 -3
- duckguard/core/column.py +588 -5
- duckguard/core/dataset.py +708 -3
- duckguard/core/result.py +328 -1
- duckguard/core/scoring.py +1 -2
- duckguard/errors.py +362 -0
- duckguard/freshness/__init__.py +33 -0
- duckguard/freshness/monitor.py +429 -0
- duckguard/history/__init__.py +44 -0
- duckguard/history/schema.py +301 -0
- duckguard/history/storage.py +479 -0
- duckguard/history/trends.py +348 -0
- duckguard/integrations/__init__.py +31 -0
- duckguard/integrations/airflow.py +387 -0
- duckguard/integrations/dbt.py +458 -0
- duckguard/notifications/__init__.py +61 -0
- duckguard/notifications/email.py +508 -0
- duckguard/notifications/formatter.py +118 -0
- duckguard/notifications/notifiers.py +357 -0
- duckguard/profiler/auto_profile.py +3 -3
- duckguard/pytest_plugin/__init__.py +1 -1
- duckguard/pytest_plugin/plugin.py +1 -1
- duckguard/reporting/console.py +2 -2
- duckguard/reports/__init__.py +42 -0
- duckguard/reports/html_reporter.py +514 -0
- duckguard/reports/pdf_reporter.py +114 -0
- duckguard/rules/__init__.py +3 -3
- duckguard/rules/executor.py +3 -4
- duckguard/rules/generator.py +8 -5
- duckguard/rules/loader.py +5 -5
- duckguard/rules/schema.py +23 -0
- duckguard/schema_history/__init__.py +40 -0
- duckguard/schema_history/analyzer.py +414 -0
- duckguard/schema_history/tracker.py +288 -0
- duckguard/semantic/__init__.py +1 -1
- duckguard/semantic/analyzer.py +0 -2
- duckguard/semantic/detector.py +17 -1
- duckguard/semantic/validators.py +2 -1
- duckguard-2.3.0.dist-info/METADATA +953 -0
- duckguard-2.3.0.dist-info/RECORD +77 -0
- duckguard-2.0.0.dist-info/METADATA +0 -221
- duckguard-2.0.0.dist-info/RECORD +0 -55
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
duckguard/cli/main.py
CHANGED
|
@@ -5,21 +5,14 @@ A modern, beautiful CLI for data quality that just works.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
import sys
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import Optional
|
|
11
9
|
|
|
12
10
|
import typer
|
|
13
11
|
from rich.console import Console
|
|
14
12
|
from rich.panel import Panel
|
|
15
|
-
from rich.table import Table
|
|
16
|
-
from rich.syntax import Syntax
|
|
17
13
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
18
|
-
from rich import
|
|
19
|
-
from rich.
|
|
20
|
-
from rich.text import Text
|
|
21
|
-
from rich.columns import Columns
|
|
22
|
-
from rich.markdown import Markdown
|
|
14
|
+
from rich.syntax import Syntax
|
|
15
|
+
from rich.table import Table
|
|
23
16
|
|
|
24
17
|
from duckguard import __version__
|
|
25
18
|
|
|
@@ -45,7 +38,7 @@ def version_callback(value: bool) -> None:
|
|
|
45
38
|
|
|
46
39
|
@app.callback()
|
|
47
40
|
def main(
|
|
48
|
-
version:
|
|
41
|
+
version: bool | None = typer.Option(
|
|
49
42
|
None,
|
|
50
43
|
"--version",
|
|
51
44
|
"-v",
|
|
@@ -61,11 +54,11 @@ def main(
|
|
|
61
54
|
@app.command()
|
|
62
55
|
def check(
|
|
63
56
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
64
|
-
config:
|
|
65
|
-
table:
|
|
66
|
-
not_null:
|
|
67
|
-
unique:
|
|
68
|
-
output:
|
|
57
|
+
config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
|
|
58
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
59
|
+
not_null: list[str] | None = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
|
|
60
|
+
unique: list[str] | None = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
|
|
61
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file (json)"),
|
|
69
62
|
verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
|
|
70
63
|
) -> None:
|
|
71
64
|
"""
|
|
@@ -78,8 +71,8 @@ def check(
|
|
|
78
71
|
duckguard check postgres://localhost/db --table orders
|
|
79
72
|
"""
|
|
80
73
|
from duckguard.connectors import connect
|
|
81
|
-
from duckguard.rules import load_rules, execute_rules
|
|
82
74
|
from duckguard.core.scoring import score
|
|
75
|
+
from duckguard.rules import execute_rules, load_rules
|
|
83
76
|
|
|
84
77
|
console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking: [cyan]{source}[/cyan]\n")
|
|
85
78
|
|
|
@@ -185,8 +178,8 @@ def check(
|
|
|
185
178
|
@app.command()
|
|
186
179
|
def discover(
|
|
187
180
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
188
|
-
table:
|
|
189
|
-
output:
|
|
181
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
182
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
|
|
190
183
|
format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
|
|
191
184
|
) -> None:
|
|
192
185
|
"""
|
|
@@ -213,7 +206,7 @@ def discover(
|
|
|
213
206
|
console=console,
|
|
214
207
|
transient=True,
|
|
215
208
|
) as progress:
|
|
216
|
-
|
|
209
|
+
_task = progress.add_task("Analyzing data...", total=None) # noqa: F841
|
|
217
210
|
dataset = connect(source, table=table)
|
|
218
211
|
|
|
219
212
|
# Semantic analysis
|
|
@@ -230,7 +223,7 @@ def discover(
|
|
|
230
223
|
if output:
|
|
231
224
|
yaml_content = ruleset_to_yaml(ruleset)
|
|
232
225
|
Path(output).write_text(yaml_content, encoding="utf-8")
|
|
233
|
-
console.print(f"\n[green]
|
|
226
|
+
console.print(f"\n[green]SAVED[/green] Rules saved to [cyan]{output}[/cyan]")
|
|
234
227
|
console.print(f"[dim]Run: duckguard check {source} --config {output}[/dim]")
|
|
235
228
|
else:
|
|
236
229
|
# Display YAML
|
|
@@ -250,8 +243,8 @@ def discover(
|
|
|
250
243
|
def contract(
|
|
251
244
|
action: str = typer.Argument(..., help="Action: generate, validate, diff"),
|
|
252
245
|
source: str = typer.Argument(None, help="Data source or contract file"),
|
|
253
|
-
contract_file:
|
|
254
|
-
output:
|
|
246
|
+
contract_file: str | None = typer.Option(None, "--contract", "-c", help="Contract file path"),
|
|
247
|
+
output: str | None = typer.Option(None, "--output", "-o", help="Output file"),
|
|
255
248
|
strict: bool = typer.Option(False, "--strict", help="Strict validation mode"),
|
|
256
249
|
) -> None:
|
|
257
250
|
"""
|
|
@@ -268,13 +261,12 @@ def contract(
|
|
|
268
261
|
duckguard contract diff old.contract.yaml new.contract.yaml
|
|
269
262
|
"""
|
|
270
263
|
from duckguard.contracts import (
|
|
264
|
+
diff_contracts,
|
|
265
|
+
generate_contract,
|
|
271
266
|
load_contract,
|
|
272
267
|
validate_contract,
|
|
273
|
-
generate_contract,
|
|
274
|
-
diff_contracts,
|
|
275
268
|
)
|
|
276
269
|
from duckguard.contracts.loader import contract_to_yaml
|
|
277
|
-
from duckguard.connectors import connect
|
|
278
270
|
|
|
279
271
|
try:
|
|
280
272
|
if action == "generate":
|
|
@@ -298,14 +290,14 @@ def contract(
|
|
|
298
290
|
if output:
|
|
299
291
|
yaml_content = contract_to_yaml(contract_obj)
|
|
300
292
|
Path(output).write_text(yaml_content, encoding="utf-8")
|
|
301
|
-
console.print(f"\n[green]
|
|
293
|
+
console.print(f"\n[green]SAVED[/green] Contract saved to [cyan]{output}[/cyan]")
|
|
302
294
|
|
|
303
295
|
elif action == "validate":
|
|
304
296
|
if not source or not contract_file:
|
|
305
297
|
console.print("[red]Error:[/red] Both source and --contract required for validate")
|
|
306
298
|
raise typer.Exit(1)
|
|
307
299
|
|
|
308
|
-
console.print(
|
|
300
|
+
console.print("\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
|
|
309
301
|
|
|
310
302
|
with Progress(
|
|
311
303
|
SpinnerColumn(),
|
|
@@ -345,21 +337,32 @@ def contract(
|
|
|
345
337
|
@app.command()
|
|
346
338
|
def anomaly(
|
|
347
339
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
348
|
-
table:
|
|
349
|
-
method: str = typer.Option("zscore", "--method", "-m", help="
|
|
350
|
-
threshold:
|
|
351
|
-
columns:
|
|
340
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
341
|
+
method: str = typer.Option("zscore", "--method", "-m", help="Method: zscore, iqr, percent_change, baseline, ks_test"),
|
|
342
|
+
threshold: float | None = typer.Option(None, "--threshold", help="Detection threshold"),
|
|
343
|
+
columns: list[str] | None = typer.Option(None, "--column", "-c", help="Specific columns to check"),
|
|
344
|
+
learn_baseline: bool = typer.Option(False, "--learn-baseline", "-L", help="Learn and store baseline from current data"),
|
|
352
345
|
) -> None:
|
|
353
346
|
"""
|
|
354
347
|
Detect anomalies in data.
|
|
355
348
|
|
|
349
|
+
[bold]Methods:[/bold]
|
|
350
|
+
zscore - Z-score based detection (default)
|
|
351
|
+
iqr - Interquartile range detection
|
|
352
|
+
percent_change - Percent change from baseline
|
|
353
|
+
baseline - Compare to learned baseline (ML)
|
|
354
|
+
ks_test - Distribution drift detection (ML)
|
|
355
|
+
|
|
356
356
|
[bold]Examples:[/bold]
|
|
357
357
|
duckguard anomaly data.csv
|
|
358
358
|
duckguard anomaly data.csv --method iqr --threshold 2.0
|
|
359
359
|
duckguard anomaly data.csv --column amount --column quantity
|
|
360
|
+
duckguard anomaly data.csv --learn-baseline # Store baseline
|
|
361
|
+
duckguard anomaly data.csv --method baseline # Compare to baseline
|
|
362
|
+
duckguard anomaly data.csv --method ks_test # Detect drift
|
|
360
363
|
"""
|
|
361
|
-
from duckguard.connectors import connect
|
|
362
364
|
from duckguard.anomaly import detect_anomalies
|
|
365
|
+
from duckguard.connectors import connect
|
|
363
366
|
|
|
364
367
|
console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
|
|
365
368
|
|
|
@@ -370,8 +373,38 @@ def anomaly(
|
|
|
370
373
|
console=console,
|
|
371
374
|
transient=True,
|
|
372
375
|
) as progress:
|
|
373
|
-
|
|
376
|
+
if learn_baseline:
|
|
377
|
+
progress.add_task("Learning baseline...", total=None)
|
|
378
|
+
else:
|
|
379
|
+
progress.add_task("Analyzing data...", total=None)
|
|
380
|
+
|
|
374
381
|
dataset = connect(source, table=table)
|
|
382
|
+
|
|
383
|
+
# Handle baseline learning
|
|
384
|
+
if learn_baseline:
|
|
385
|
+
from duckguard.anomaly import BaselineMethod
|
|
386
|
+
from duckguard.history import HistoryStorage
|
|
387
|
+
|
|
388
|
+
storage = HistoryStorage()
|
|
389
|
+
baseline_method = BaselineMethod(storage=storage)
|
|
390
|
+
|
|
391
|
+
# Get numeric columns to learn baselines for
|
|
392
|
+
target_columns = columns if columns else dataset.columns
|
|
393
|
+
learned = 0
|
|
394
|
+
|
|
395
|
+
for col_name in target_columns:
|
|
396
|
+
col = dataset[col_name]
|
|
397
|
+
if col.mean is not None: # Numeric column
|
|
398
|
+
values = col.values
|
|
399
|
+
baseline_method.fit(values)
|
|
400
|
+
baseline_method.save_baseline(source, col_name)
|
|
401
|
+
learned += 1
|
|
402
|
+
|
|
403
|
+
console.print(f"[green]LEARNED[/green] Baselines stored for {learned} columns")
|
|
404
|
+
console.print("[dim]Use --method baseline to compare against stored baselines[/dim]")
|
|
405
|
+
return
|
|
406
|
+
|
|
407
|
+
# Regular anomaly detection
|
|
375
408
|
report = detect_anomalies(
|
|
376
409
|
dataset,
|
|
377
410
|
method=method,
|
|
@@ -392,7 +425,7 @@ def anomaly(
|
|
|
392
425
|
@app.command()
|
|
393
426
|
def info(
|
|
394
427
|
source: str = typer.Argument(..., help="Path to file or connection string"),
|
|
395
|
-
table:
|
|
428
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name"),
|
|
396
429
|
) -> None:
|
|
397
430
|
"""
|
|
398
431
|
Display information about a data source.
|
|
@@ -441,7 +474,7 @@ def info(
|
|
|
441
474
|
if sem_type == "unknown":
|
|
442
475
|
sem_type = "-"
|
|
443
476
|
if col_analysis.is_pii:
|
|
444
|
-
sem_type = f"
|
|
477
|
+
sem_type = f"[PII] {sem_type}"
|
|
445
478
|
|
|
446
479
|
col_table.add_row(
|
|
447
480
|
col_name,
|
|
@@ -472,11 +505,11 @@ def _display_execution_result(result, verbose: bool = False) -> None:
|
|
|
472
505
|
|
|
473
506
|
for check_result in result.results:
|
|
474
507
|
if check_result.passed:
|
|
475
|
-
status = "[green]
|
|
508
|
+
status = "[green]PASS[/green]"
|
|
476
509
|
elif check_result.severity.value == "warning":
|
|
477
|
-
status = "[yellow]
|
|
510
|
+
status = "[yellow]WARN[/yellow]"
|
|
478
511
|
else:
|
|
479
|
-
status = "[red]
|
|
512
|
+
status = "[red]FAIL[/red]"
|
|
480
513
|
|
|
481
514
|
col_str = f"[{check_result.column}] " if check_result.column else ""
|
|
482
515
|
table.add_row(
|
|
@@ -490,10 +523,10 @@ def _display_execution_result(result, verbose: bool = False) -> None:
|
|
|
490
523
|
# Summary
|
|
491
524
|
console.print()
|
|
492
525
|
if result.passed:
|
|
493
|
-
console.print(f"[green]
|
|
526
|
+
console.print(f"[green]All {result.total_checks} checks passed[/green]")
|
|
494
527
|
else:
|
|
495
528
|
console.print(
|
|
496
|
-
f"[red]
|
|
529
|
+
f"[red]{result.failed_count} failed[/red], "
|
|
497
530
|
f"[yellow]{result.warning_count} warnings[/yellow], "
|
|
498
531
|
f"[green]{result.passed_count} passed[/green]"
|
|
499
532
|
)
|
|
@@ -507,7 +540,7 @@ def _display_quick_results(results: list) -> None:
|
|
|
507
540
|
table.add_column("Details")
|
|
508
541
|
|
|
509
542
|
for check_name, passed, details, _ in results:
|
|
510
|
-
status = "[green]
|
|
543
|
+
status = "[green]PASS[/green]" if passed else "[red]FAIL[/red]"
|
|
511
544
|
table.add_row(check_name, status, details)
|
|
512
545
|
|
|
513
546
|
console.print(table)
|
|
@@ -534,8 +567,8 @@ def _display_discovery_results(analysis, ruleset) -> None:
|
|
|
534
567
|
# PII warning
|
|
535
568
|
if analysis.pii_columns:
|
|
536
569
|
console.print(Panel(
|
|
537
|
-
"[yellow]
|
|
538
|
-
"\n".join(f"
|
|
570
|
+
"[yellow]WARNING: PII Detected[/yellow]\n" +
|
|
571
|
+
"\n".join(f" - {col}" for col in analysis.pii_columns),
|
|
539
572
|
border_style="yellow",
|
|
540
573
|
))
|
|
541
574
|
console.print()
|
|
@@ -549,7 +582,7 @@ def _display_discovery_results(analysis, ruleset) -> None:
|
|
|
549
582
|
for col in analysis.columns[:15]:
|
|
550
583
|
sem = col.semantic_type.value
|
|
551
584
|
if col.is_pii:
|
|
552
|
-
sem = f"
|
|
585
|
+
sem = f"[PII] {sem}"
|
|
553
586
|
|
|
554
587
|
rules = ", ".join(col.suggested_validations[:3])
|
|
555
588
|
if len(col.suggested_validations) > 3:
|
|
@@ -582,9 +615,9 @@ def _display_contract(contract) -> None:
|
|
|
582
615
|
table.add_row(
|
|
583
616
|
field_obj.name,
|
|
584
617
|
type_str,
|
|
585
|
-
"
|
|
586
|
-
"
|
|
587
|
-
"
|
|
618
|
+
"Y" if field_obj.required else "",
|
|
619
|
+
"Y" if field_obj.unique else "",
|
|
620
|
+
"[PII]" if field_obj.pii else "",
|
|
588
621
|
)
|
|
589
622
|
|
|
590
623
|
console.print(table)
|
|
@@ -593,14 +626,14 @@ def _display_contract(contract) -> None:
|
|
|
593
626
|
if contract.quality:
|
|
594
627
|
console.print("\n[bold]Quality SLA:[/bold]")
|
|
595
628
|
if contract.quality.completeness:
|
|
596
|
-
console.print(f"
|
|
629
|
+
console.print(f" - Completeness: {contract.quality.completeness}%")
|
|
597
630
|
if contract.quality.row_count_min:
|
|
598
|
-
console.print(f"
|
|
631
|
+
console.print(f" - Min rows: {contract.quality.row_count_min:,}")
|
|
599
632
|
|
|
600
633
|
|
|
601
634
|
def _display_contract_validation(result) -> None:
|
|
602
635
|
"""Display contract validation results."""
|
|
603
|
-
status = "[green]
|
|
636
|
+
status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
|
|
604
637
|
console.print(f"Contract: [bold]{result.contract.name}[/bold] v{result.contract.version}")
|
|
605
638
|
console.print(f"Status: {status}\n")
|
|
606
639
|
|
|
@@ -627,7 +660,7 @@ def _display_contract_validation(result) -> None:
|
|
|
627
660
|
|
|
628
661
|
def _display_contract_diff(diff) -> None:
|
|
629
662
|
"""Display contract diff."""
|
|
630
|
-
console.print(
|
|
663
|
+
console.print("[bold]Comparing contracts[/bold]")
|
|
631
664
|
console.print(f" Old: v{diff.old_contract.version}")
|
|
632
665
|
console.print(f" New: v{diff.new_contract.version}\n")
|
|
633
666
|
|
|
@@ -640,19 +673,19 @@ def _display_contract_diff(diff) -> None:
|
|
|
640
673
|
if diff.breaking_changes:
|
|
641
674
|
console.print("[red bold]Breaking Changes:[/red bold]")
|
|
642
675
|
for change in diff.breaking_changes:
|
|
643
|
-
console.print(f"
|
|
676
|
+
console.print(f" [red]X[/red] {change.message}")
|
|
644
677
|
console.print()
|
|
645
678
|
|
|
646
679
|
if diff.minor_changes:
|
|
647
680
|
console.print("[yellow bold]Minor Changes:[/yellow bold]")
|
|
648
681
|
for change in diff.minor_changes:
|
|
649
|
-
console.print(f"
|
|
682
|
+
console.print(f" [yellow]![/yellow] {change.message}")
|
|
650
683
|
console.print()
|
|
651
684
|
|
|
652
685
|
if diff.non_breaking_changes:
|
|
653
686
|
console.print("[dim]Non-breaking Changes:[/dim]")
|
|
654
687
|
for change in diff.non_breaking_changes:
|
|
655
|
-
console.print(f"
|
|
688
|
+
console.print(f" - {change.message}")
|
|
656
689
|
|
|
657
690
|
console.print(f"\n[dim]Suggested version bump: {diff.suggest_version_bump()}[/dim]")
|
|
658
691
|
|
|
@@ -660,10 +693,10 @@ def _display_contract_diff(diff) -> None:
|
|
|
660
693
|
def _display_anomaly_report(report) -> None:
|
|
661
694
|
"""Display anomaly detection report."""
|
|
662
695
|
if not report.has_anomalies:
|
|
663
|
-
console.print("[green]
|
|
696
|
+
console.print("[green]No anomalies detected[/green]")
|
|
664
697
|
return
|
|
665
698
|
|
|
666
|
-
console.print(f"[yellow bold]
|
|
699
|
+
console.print(f"[yellow bold]WARNING: {report.anomaly_count} anomalies detected[/yellow bold]\n")
|
|
667
700
|
|
|
668
701
|
table = Table(title="Anomalies")
|
|
669
702
|
table.add_column("Column", style="cyan")
|
|
@@ -702,5 +735,477 @@ def _save_results(output: str, dataset, results) -> None:
|
|
|
702
735
|
Path(output).write_text(json.dumps(data, indent=2))
|
|
703
736
|
|
|
704
737
|
|
|
738
|
+
@app.command()
|
|
739
|
+
def history(
|
|
740
|
+
source: str | None = typer.Argument(None, help="Data source to query history for (optional)"),
|
|
741
|
+
last: str = typer.Option("30d", "--last", "-l", help="Time period: 7d, 30d, 90d"),
|
|
742
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
743
|
+
trend: bool = typer.Option(False, "--trend", "-t", help="Show quality trend analysis"),
|
|
744
|
+
db_path: str | None = typer.Option(None, "--db", help="Path to history database"),
|
|
745
|
+
) -> None:
|
|
746
|
+
"""
|
|
747
|
+
Query historical validation results.
|
|
748
|
+
|
|
749
|
+
Shows past validation runs and quality score trends over time.
|
|
750
|
+
|
|
751
|
+
[bold]Examples:[/bold]
|
|
752
|
+
duckguard history # Show all recent runs
|
|
753
|
+
duckguard history data.csv # Show runs for specific source
|
|
754
|
+
duckguard history data.csv --last 7d # Last 7 days
|
|
755
|
+
duckguard history data.csv --trend # Show trend analysis
|
|
756
|
+
duckguard history --format json # Output as JSON
|
|
757
|
+
"""
|
|
758
|
+
import json as json_module
|
|
759
|
+
|
|
760
|
+
from duckguard.history import HistoryStorage, TrendAnalyzer
|
|
761
|
+
|
|
762
|
+
try:
|
|
763
|
+
storage = HistoryStorage(db_path=db_path)
|
|
764
|
+
|
|
765
|
+
# Parse time period
|
|
766
|
+
days = int(last.rstrip("d"))
|
|
767
|
+
|
|
768
|
+
if trend and source:
|
|
769
|
+
# Show trend analysis
|
|
770
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Trend Analysis: [cyan]{source}[/cyan]\n")
|
|
771
|
+
|
|
772
|
+
analyzer = TrendAnalyzer(storage)
|
|
773
|
+
analysis = analyzer.analyze(source, days=days)
|
|
774
|
+
|
|
775
|
+
if analysis.total_runs == 0:
|
|
776
|
+
console.print("[yellow]No historical data found for this source.[/yellow]")
|
|
777
|
+
console.print("[dim]Run some validations first, then check history.[/dim]")
|
|
778
|
+
return
|
|
779
|
+
|
|
780
|
+
# Display trend summary
|
|
781
|
+
trend_color = {
|
|
782
|
+
"improving": "green",
|
|
783
|
+
"declining": "red",
|
|
784
|
+
"stable": "yellow",
|
|
785
|
+
}.get(analysis.score_trend, "white")
|
|
786
|
+
|
|
787
|
+
trend_symbol = {
|
|
788
|
+
"improving": "[+]",
|
|
789
|
+
"declining": "[-]",
|
|
790
|
+
"stable": "[=]",
|
|
791
|
+
}.get(analysis.score_trend, "[=]")
|
|
792
|
+
|
|
793
|
+
console.print(Panel(
|
|
794
|
+
f"[bold]Quality Trend: [{trend_color}]{trend_symbol} {analysis.score_trend.upper()}[/{trend_color}][/bold]\n\n"
|
|
795
|
+
f"Current Score: [cyan]{analysis.current_score:.1f}%[/cyan]\n"
|
|
796
|
+
f"Average Score: [cyan]{analysis.average_score:.1f}%[/cyan]\n"
|
|
797
|
+
f"Min/Max: [dim]{analysis.min_score:.1f}% - {analysis.max_score:.1f}%[/dim]\n"
|
|
798
|
+
f"Change: [{trend_color}]{analysis.trend_change:+.1f}%[/{trend_color}]\n"
|
|
799
|
+
f"Total Runs: [cyan]{analysis.total_runs}[/cyan]\n"
|
|
800
|
+
f"Pass Rate: [cyan]{analysis.pass_rate:.1f}%[/cyan]",
|
|
801
|
+
title=f"Last {days} Days",
|
|
802
|
+
border_style=trend_color,
|
|
803
|
+
))
|
|
804
|
+
|
|
805
|
+
if analysis.anomalies:
|
|
806
|
+
console.print(f"\n[yellow]Anomalies detected on: {', '.join(analysis.anomalies)}[/yellow]")
|
|
807
|
+
|
|
808
|
+
# Show daily data if available
|
|
809
|
+
if analysis.daily_data and len(analysis.daily_data) <= 14:
|
|
810
|
+
console.print()
|
|
811
|
+
table = Table(title="Daily Quality Scores")
|
|
812
|
+
table.add_column("Date", style="cyan")
|
|
813
|
+
table.add_column("Score", justify="right")
|
|
814
|
+
table.add_column("Runs", justify="right")
|
|
815
|
+
table.add_column("Pass Rate", justify="right")
|
|
816
|
+
|
|
817
|
+
for day in analysis.daily_data:
|
|
818
|
+
pass_rate = (day.passed_count / day.run_count * 100) if day.run_count > 0 else 0
|
|
819
|
+
score_style = "green" if day.avg_score >= 80 else "yellow" if day.avg_score >= 60 else "red"
|
|
820
|
+
table.add_row(
|
|
821
|
+
day.date,
|
|
822
|
+
f"[{score_style}]{day.avg_score:.1f}%[/{score_style}]",
|
|
823
|
+
str(day.run_count),
|
|
824
|
+
f"{pass_rate:.0f}%",
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
console.print(table)
|
|
828
|
+
|
|
829
|
+
else:
|
|
830
|
+
# Show run history
|
|
831
|
+
if source:
|
|
832
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] History: [cyan]{source}[/cyan]\n")
|
|
833
|
+
runs = storage.get_runs(source, limit=20)
|
|
834
|
+
else:
|
|
835
|
+
console.print("\n[bold blue]DuckGuard[/bold blue] Recent Validation History\n")
|
|
836
|
+
runs = storage.get_runs(limit=20)
|
|
837
|
+
|
|
838
|
+
if not runs:
|
|
839
|
+
console.print("[yellow]No historical data found.[/yellow]")
|
|
840
|
+
console.print("[dim]Run some validations first, then check history.[/dim]")
|
|
841
|
+
return
|
|
842
|
+
|
|
843
|
+
if output_format == "json":
|
|
844
|
+
# JSON output
|
|
845
|
+
data = [
|
|
846
|
+
{
|
|
847
|
+
"run_id": run.run_id,
|
|
848
|
+
"source": run.source,
|
|
849
|
+
"started_at": run.started_at.isoformat(),
|
|
850
|
+
"quality_score": run.quality_score,
|
|
851
|
+
"passed": run.passed,
|
|
852
|
+
"total_checks": run.total_checks,
|
|
853
|
+
"passed_count": run.passed_count,
|
|
854
|
+
"failed_count": run.failed_count,
|
|
855
|
+
"warning_count": run.warning_count,
|
|
856
|
+
}
|
|
857
|
+
for run in runs
|
|
858
|
+
]
|
|
859
|
+
console.print(json_module.dumps(data, indent=2))
|
|
860
|
+
else:
|
|
861
|
+
# Table output
|
|
862
|
+
table = Table(title=f"Validation Runs (Last {days} days)")
|
|
863
|
+
table.add_column("Date", style="cyan")
|
|
864
|
+
table.add_column("Source", style="dim", max_width=40)
|
|
865
|
+
table.add_column("Score", justify="right")
|
|
866
|
+
table.add_column("Status", justify="center")
|
|
867
|
+
table.add_column("Checks", justify="right")
|
|
868
|
+
|
|
869
|
+
for run in runs:
|
|
870
|
+
score_style = "green" if run.quality_score >= 80 else "yellow" if run.quality_score >= 60 else "red"
|
|
871
|
+
status = "[green]PASS[/green]" if run.passed else "[red]FAIL[/red]"
|
|
872
|
+
|
|
873
|
+
table.add_row(
|
|
874
|
+
run.started_at.strftime("%Y-%m-%d %H:%M"),
|
|
875
|
+
run.source[:40],
|
|
876
|
+
f"[{score_style}]{run.quality_score:.1f}%[/{score_style}]",
|
|
877
|
+
status,
|
|
878
|
+
f"{run.passed_count}/{run.total_checks}",
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
console.print(table)
|
|
882
|
+
|
|
883
|
+
# Show sources summary
|
|
884
|
+
sources = storage.get_sources()
|
|
885
|
+
if len(sources) > 1:
|
|
886
|
+
console.print(f"\n[dim]Tracked sources: {len(sources)}[/dim]")
|
|
887
|
+
|
|
888
|
+
except Exception as e:
|
|
889
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
890
|
+
raise typer.Exit(1)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
@app.command()
|
|
894
|
+
def report(
|
|
895
|
+
source: str = typer.Argument(..., help="Data source path or connection string"),
|
|
896
|
+
config: str | None = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
|
|
897
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
898
|
+
output_format: str = typer.Option("html", "--format", "-f", help="Output format: html, pdf"),
|
|
899
|
+
output: str = typer.Option("report.html", "--output", "-o", help="Output file path"),
|
|
900
|
+
title: str = typer.Option("DuckGuard Data Quality Report", "--title", help="Report title"),
|
|
901
|
+
include_passed: bool = typer.Option(True, "--include-passed/--no-passed", help="Include passed checks"),
|
|
902
|
+
store: bool = typer.Option(False, "--store", "-s", help="Store results in history"),
|
|
903
|
+
) -> None:
|
|
904
|
+
"""
|
|
905
|
+
Generate a data quality report (HTML or PDF).
|
|
906
|
+
|
|
907
|
+
Runs validation checks and generates a beautiful, shareable report.
|
|
908
|
+
|
|
909
|
+
[bold]Examples:[/bold]
|
|
910
|
+
duckguard report data.csv
|
|
911
|
+
duckguard report data.csv --format pdf --output report.pdf
|
|
912
|
+
duckguard report data.csv --config rules.yaml --title "Orders Quality"
|
|
913
|
+
duckguard report data.csv --store # Also save to history
|
|
914
|
+
"""
|
|
915
|
+
from duckguard.connectors import connect
|
|
916
|
+
from duckguard.reports import generate_html_report, generate_pdf_report
|
|
917
|
+
from duckguard.rules import execute_rules, generate_rules, load_rules
|
|
918
|
+
|
|
919
|
+
# Determine output path based on format
|
|
920
|
+
if output == "report.html" and output_format == "pdf":
|
|
921
|
+
output = "report.pdf"
|
|
922
|
+
|
|
923
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating {output_format.upper()} report\n")
|
|
924
|
+
|
|
925
|
+
try:
|
|
926
|
+
with Progress(
|
|
927
|
+
SpinnerColumn(),
|
|
928
|
+
TextColumn("[progress.description]{task.description}"),
|
|
929
|
+
console=console,
|
|
930
|
+
transient=True,
|
|
931
|
+
) as progress:
|
|
932
|
+
progress.add_task("Connecting to data source...", total=None)
|
|
933
|
+
dataset = connect(source, table=table)
|
|
934
|
+
|
|
935
|
+
console.print(f"[dim]Source: {source}[/dim]")
|
|
936
|
+
console.print(f"[dim]Rows: {dataset.row_count:,} | Columns: {dataset.column_count}[/dim]\n")
|
|
937
|
+
|
|
938
|
+
with Progress(
|
|
939
|
+
SpinnerColumn(),
|
|
940
|
+
TextColumn("[progress.description]{task.description}"),
|
|
941
|
+
console=console,
|
|
942
|
+
transient=True,
|
|
943
|
+
) as progress:
|
|
944
|
+
progress.add_task("Running validation checks...", total=None)
|
|
945
|
+
|
|
946
|
+
if config:
|
|
947
|
+
ruleset = load_rules(config)
|
|
948
|
+
else:
|
|
949
|
+
ruleset = generate_rules(dataset, as_yaml=False)
|
|
950
|
+
|
|
951
|
+
result = execute_rules(ruleset, dataset=dataset)
|
|
952
|
+
|
|
953
|
+
# Store in history if requested
|
|
954
|
+
if store:
|
|
955
|
+
from duckguard.history import HistoryStorage
|
|
956
|
+
|
|
957
|
+
storage = HistoryStorage()
|
|
958
|
+
run_id = storage.store(result)
|
|
959
|
+
console.print(f"[dim]Stored in history: {run_id[:8]}...[/dim]\n")
|
|
960
|
+
|
|
961
|
+
# Display summary
|
|
962
|
+
status = "[green]PASSED[/green]" if result.passed else "[red]FAILED[/red]"
|
|
963
|
+
console.print(f"Validation: {status}")
|
|
964
|
+
console.print(f"Quality Score: [cyan]{result.quality_score:.1f}%[/cyan]")
|
|
965
|
+
console.print(f"Checks: {result.passed_count}/{result.total_checks} passed\n")
|
|
966
|
+
|
|
967
|
+
# Generate report
|
|
968
|
+
with Progress(
|
|
969
|
+
SpinnerColumn(),
|
|
970
|
+
TextColumn("[progress.description]{task.description}"),
|
|
971
|
+
console=console,
|
|
972
|
+
transient=True,
|
|
973
|
+
) as progress:
|
|
974
|
+
progress.add_task(f"Generating {output_format.upper()} report...", total=None)
|
|
975
|
+
|
|
976
|
+
if output_format.lower() == "pdf":
|
|
977
|
+
generate_pdf_report(result, output, title=title, include_passed=include_passed)
|
|
978
|
+
else:
|
|
979
|
+
generate_html_report(result, output, title=title, include_passed=include_passed)
|
|
980
|
+
|
|
981
|
+
console.print(f"[green]SAVED[/green] Report saved to [cyan]{output}[/cyan]")
|
|
982
|
+
console.print("[dim]Open in browser to view the report[/dim]")
|
|
983
|
+
|
|
984
|
+
except ImportError as e:
|
|
985
|
+
if "weasyprint" in str(e).lower():
|
|
986
|
+
console.print("[red]Error:[/red] PDF generation requires weasyprint.")
|
|
987
|
+
console.print("[dim]Install with: pip install duckguard[reports][/dim]")
|
|
988
|
+
else:
|
|
989
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
990
|
+
raise typer.Exit(1)
|
|
991
|
+
except Exception as e:
|
|
992
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
993
|
+
raise typer.Exit(1)
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
@app.command()
|
|
997
|
+
def freshness(
|
|
998
|
+
source: str = typer.Argument(..., help="Data source path"),
|
|
999
|
+
column: str | None = typer.Option(None, "--column", "-c", help="Timestamp column to check"),
|
|
1000
|
+
max_age: str = typer.Option("24h", "--max-age", "-m", help="Maximum acceptable age: 1h, 6h, 24h, 7d"),
|
|
1001
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1002
|
+
) -> None:
|
|
1003
|
+
"""
|
|
1004
|
+
Check data freshness.
|
|
1005
|
+
|
|
1006
|
+
Monitors how recently data was updated using file modification time
|
|
1007
|
+
or timestamp columns.
|
|
1008
|
+
|
|
1009
|
+
[bold]Examples:[/bold]
|
|
1010
|
+
duckguard freshness data.csv
|
|
1011
|
+
duckguard freshness data.csv --max-age 6h
|
|
1012
|
+
duckguard freshness data.csv --column updated_at
|
|
1013
|
+
duckguard freshness data.csv --format json
|
|
1014
|
+
"""
|
|
1015
|
+
import json as json_module
|
|
1016
|
+
|
|
1017
|
+
from duckguard.connectors import connect
|
|
1018
|
+
from duckguard.freshness import FreshnessMonitor
|
|
1019
|
+
from duckguard.freshness.monitor import parse_age_string
|
|
1020
|
+
|
|
1021
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking freshness: [cyan]{source}[/cyan]\n")
|
|
1022
|
+
|
|
1023
|
+
try:
|
|
1024
|
+
threshold = parse_age_string(max_age)
|
|
1025
|
+
monitor = FreshnessMonitor(threshold=threshold)
|
|
1026
|
+
|
|
1027
|
+
with Progress(
|
|
1028
|
+
SpinnerColumn(),
|
|
1029
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1030
|
+
console=console,
|
|
1031
|
+
transient=True,
|
|
1032
|
+
) as progress:
|
|
1033
|
+
progress.add_task("Checking freshness...", total=None)
|
|
1034
|
+
|
|
1035
|
+
if column:
|
|
1036
|
+
dataset = connect(source)
|
|
1037
|
+
result = monitor.check_column_timestamp(dataset, column)
|
|
1038
|
+
else:
|
|
1039
|
+
# Try file mtime first, fallback to dataset
|
|
1040
|
+
from pathlib import Path
|
|
1041
|
+
if Path(source).exists():
|
|
1042
|
+
result = monitor.check_file_mtime(source)
|
|
1043
|
+
else:
|
|
1044
|
+
dataset = connect(source)
|
|
1045
|
+
result = monitor.check(dataset)
|
|
1046
|
+
|
|
1047
|
+
if output_format == "json":
|
|
1048
|
+
console.print(json_module.dumps(result.to_dict(), indent=2))
|
|
1049
|
+
else:
|
|
1050
|
+
# Display table
|
|
1051
|
+
status_color = "green" if result.is_fresh else "red"
|
|
1052
|
+
status_text = "FRESH" if result.is_fresh else "STALE"
|
|
1053
|
+
|
|
1054
|
+
console.print(Panel(
|
|
1055
|
+
f"[bold {status_color}]{status_text}[/bold {status_color}]\n\n"
|
|
1056
|
+
f"Last Modified: [cyan]{result.last_modified.strftime('%Y-%m-%d %H:%M:%S') if result.last_modified else 'Unknown'}[/cyan]\n"
|
|
1057
|
+
f"Age: [cyan]{result.age_human}[/cyan]\n"
|
|
1058
|
+
f"Threshold: [dim]{max_age}[/dim]\n"
|
|
1059
|
+
f"Method: [dim]{result.method.value}[/dim]",
|
|
1060
|
+
title="Freshness Check",
|
|
1061
|
+
border_style=status_color,
|
|
1062
|
+
))
|
|
1063
|
+
|
|
1064
|
+
if not result.is_fresh:
|
|
1065
|
+
raise typer.Exit(1)
|
|
1066
|
+
|
|
1067
|
+
except Exception as e:
|
|
1068
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
1069
|
+
raise typer.Exit(1)
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
@app.command()
|
|
1073
|
+
def schema(
|
|
1074
|
+
source: str = typer.Argument(..., help="Data source path"),
|
|
1075
|
+
action: str = typer.Option("show", "--action", "-a", help="Action: show, capture, history, changes"),
|
|
1076
|
+
table: str | None = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
|
|
1077
|
+
output_format: str = typer.Option("table", "--format", "-f", help="Output format: table, json"),
|
|
1078
|
+
limit: int = typer.Option(10, "--limit", "-l", help="Number of results to show"),
|
|
1079
|
+
) -> None:
|
|
1080
|
+
"""
|
|
1081
|
+
Track schema evolution over time.
|
|
1082
|
+
|
|
1083
|
+
Captures schema snapshots and detects changes between versions.
|
|
1084
|
+
|
|
1085
|
+
[bold]Actions:[/bold]
|
|
1086
|
+
show - Show current schema
|
|
1087
|
+
capture - Capture a schema snapshot
|
|
1088
|
+
history - Show schema snapshot history
|
|
1089
|
+
changes - Detect changes from last snapshot
|
|
1090
|
+
|
|
1091
|
+
[bold]Examples:[/bold]
|
|
1092
|
+
duckguard schema data.csv # Show current schema
|
|
1093
|
+
duckguard schema data.csv --action capture # Capture snapshot
|
|
1094
|
+
duckguard schema data.csv --action history # View history
|
|
1095
|
+
duckguard schema data.csv --action changes # Detect changes
|
|
1096
|
+
"""
|
|
1097
|
+
import json as json_module
|
|
1098
|
+
|
|
1099
|
+
from duckguard.connectors import connect
|
|
1100
|
+
from duckguard.schema_history import SchemaChangeAnalyzer, SchemaTracker
|
|
1101
|
+
|
|
1102
|
+
console.print(f"\n[bold blue]DuckGuard[/bold blue] Schema: [cyan]{source}[/cyan]\n")
|
|
1103
|
+
|
|
1104
|
+
try:
|
|
1105
|
+
dataset = connect(source, table=table)
|
|
1106
|
+
tracker = SchemaTracker()
|
|
1107
|
+
analyzer = SchemaChangeAnalyzer()
|
|
1108
|
+
|
|
1109
|
+
if action == "show":
|
|
1110
|
+
# Display current schema
|
|
1111
|
+
col_table = Table(title="Current Schema")
|
|
1112
|
+
col_table.add_column("Column", style="cyan")
|
|
1113
|
+
col_table.add_column("Type", style="magenta")
|
|
1114
|
+
col_table.add_column("Position", justify="right")
|
|
1115
|
+
|
|
1116
|
+
ref = dataset.engine.get_source_reference(dataset.source)
|
|
1117
|
+
result = dataset.engine.execute(f"DESCRIBE {ref}")
|
|
1118
|
+
|
|
1119
|
+
for i, row in enumerate(result.fetchall()):
|
|
1120
|
+
col_table.add_row(row[0], row[1], str(i))
|
|
1121
|
+
|
|
1122
|
+
console.print(col_table)
|
|
1123
|
+
console.print(f"\n[dim]Total columns: {dataset.column_count}[/dim]")
|
|
1124
|
+
|
|
1125
|
+
elif action == "capture":
|
|
1126
|
+
with Progress(
|
|
1127
|
+
SpinnerColumn(),
|
|
1128
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1129
|
+
console=console,
|
|
1130
|
+
transient=True,
|
|
1131
|
+
) as progress:
|
|
1132
|
+
progress.add_task("Capturing schema snapshot...", total=None)
|
|
1133
|
+
snapshot = tracker.capture(dataset)
|
|
1134
|
+
|
|
1135
|
+
console.print(f"[green]CAPTURED[/green] Schema snapshot: [cyan]{snapshot.snapshot_id[:8]}...[/cyan]")
|
|
1136
|
+
console.print(f"[dim]Columns: {snapshot.column_count} | Rows: {snapshot.row_count:,}[/dim]")
|
|
1137
|
+
console.print(f"[dim]Captured at: {snapshot.captured_at.strftime('%Y-%m-%d %H:%M:%S')}[/dim]")
|
|
1138
|
+
|
|
1139
|
+
elif action == "history":
|
|
1140
|
+
history = tracker.get_history(source, limit=limit)
|
|
1141
|
+
|
|
1142
|
+
if not history:
|
|
1143
|
+
console.print("[yellow]No schema history found for this source.[/yellow]")
|
|
1144
|
+
console.print("[dim]Use --action capture to create a snapshot first.[/dim]")
|
|
1145
|
+
return
|
|
1146
|
+
|
|
1147
|
+
if output_format == "json":
|
|
1148
|
+
data = [s.to_dict() for s in history]
|
|
1149
|
+
console.print(json_module.dumps(data, indent=2))
|
|
1150
|
+
else:
|
|
1151
|
+
table_obj = Table(title="Schema History")
|
|
1152
|
+
table_obj.add_column("Snapshot ID", style="cyan")
|
|
1153
|
+
table_obj.add_column("Captured At", style="dim")
|
|
1154
|
+
table_obj.add_column("Columns", justify="right")
|
|
1155
|
+
table_obj.add_column("Rows", justify="right")
|
|
1156
|
+
|
|
1157
|
+
for snapshot in history:
|
|
1158
|
+
table_obj.add_row(
|
|
1159
|
+
snapshot.snapshot_id[:8] + "...",
|
|
1160
|
+
snapshot.captured_at.strftime("%Y-%m-%d %H:%M"),
|
|
1161
|
+
str(snapshot.column_count),
|
|
1162
|
+
f"{snapshot.row_count:,}" if snapshot.row_count else "-",
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
console.print(table_obj)
|
|
1166
|
+
|
|
1167
|
+
elif action == "changes":
|
|
1168
|
+
with Progress(
|
|
1169
|
+
SpinnerColumn(),
|
|
1170
|
+
TextColumn("[progress.description]{task.description}"),
|
|
1171
|
+
console=console,
|
|
1172
|
+
transient=True,
|
|
1173
|
+
) as progress:
|
|
1174
|
+
progress.add_task("Detecting schema changes...", total=None)
|
|
1175
|
+
report = analyzer.detect_changes(dataset)
|
|
1176
|
+
|
|
1177
|
+
if not report.has_changes:
|
|
1178
|
+
console.print("[green]No schema changes detected[/green]")
|
|
1179
|
+
console.print(f"[dim]Snapshot captured: {report.current_snapshot.snapshot_id[:8]}...[/dim]")
|
|
1180
|
+
return
|
|
1181
|
+
|
|
1182
|
+
# Display changes
|
|
1183
|
+
console.print(f"[yellow bold]{len(report.changes)} schema changes detected[/yellow bold]\n")
|
|
1184
|
+
|
|
1185
|
+
if report.has_breaking_changes:
|
|
1186
|
+
console.print("[red bold]BREAKING CHANGES:[/red bold]")
|
|
1187
|
+
for change in report.breaking_changes:
|
|
1188
|
+
console.print(f" [red]X[/red] {change}")
|
|
1189
|
+
console.print()
|
|
1190
|
+
|
|
1191
|
+
non_breaking = report.non_breaking_changes
|
|
1192
|
+
if non_breaking:
|
|
1193
|
+
console.print("[dim]Non-breaking changes:[/dim]")
|
|
1194
|
+
for change in non_breaking:
|
|
1195
|
+
console.print(f" - {change}")
|
|
1196
|
+
|
|
1197
|
+
if report.has_breaking_changes:
|
|
1198
|
+
raise typer.Exit(1)
|
|
1199
|
+
|
|
1200
|
+
else:
|
|
1201
|
+
console.print(f"[red]Error:[/red] Unknown action: {action}")
|
|
1202
|
+
console.print("[dim]Valid actions: show, capture, history, changes[/dim]")
|
|
1203
|
+
raise typer.Exit(1)
|
|
1204
|
+
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
1207
|
+
raise typer.Exit(1)
|
|
1208
|
+
|
|
1209
|
+
|
|
705
1210
|
if __name__ == "__main__":
|
|
706
1211
|
app()
|