duckguard 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. duckguard/__init__.py +110 -0
  2. duckguard/anomaly/__init__.py +34 -0
  3. duckguard/anomaly/detector.py +394 -0
  4. duckguard/anomaly/methods.py +432 -0
  5. duckguard/cli/__init__.py +5 -0
  6. duckguard/cli/main.py +706 -0
  7. duckguard/connectors/__init__.py +58 -0
  8. duckguard/connectors/base.py +80 -0
  9. duckguard/connectors/bigquery.py +171 -0
  10. duckguard/connectors/databricks.py +201 -0
  11. duckguard/connectors/factory.py +292 -0
  12. duckguard/connectors/files.py +135 -0
  13. duckguard/connectors/kafka.py +343 -0
  14. duckguard/connectors/mongodb.py +236 -0
  15. duckguard/connectors/mysql.py +121 -0
  16. duckguard/connectors/oracle.py +196 -0
  17. duckguard/connectors/postgres.py +99 -0
  18. duckguard/connectors/redshift.py +154 -0
  19. duckguard/connectors/snowflake.py +226 -0
  20. duckguard/connectors/sqlite.py +112 -0
  21. duckguard/connectors/sqlserver.py +242 -0
  22. duckguard/contracts/__init__.py +48 -0
  23. duckguard/contracts/diff.py +432 -0
  24. duckguard/contracts/generator.py +334 -0
  25. duckguard/contracts/loader.py +367 -0
  26. duckguard/contracts/schema.py +242 -0
  27. duckguard/contracts/validator.py +453 -0
  28. duckguard/core/__init__.py +8 -0
  29. duckguard/core/column.py +437 -0
  30. duckguard/core/dataset.py +284 -0
  31. duckguard/core/engine.py +261 -0
  32. duckguard/core/result.py +119 -0
  33. duckguard/core/scoring.py +508 -0
  34. duckguard/profiler/__init__.py +5 -0
  35. duckguard/profiler/auto_profile.py +350 -0
  36. duckguard/pytest_plugin/__init__.py +5 -0
  37. duckguard/pytest_plugin/plugin.py +161 -0
  38. duckguard/reporting/__init__.py +6 -0
  39. duckguard/reporting/console.py +88 -0
  40. duckguard/reporting/json_report.py +96 -0
  41. duckguard/rules/__init__.py +28 -0
  42. duckguard/rules/executor.py +616 -0
  43. duckguard/rules/generator.py +341 -0
  44. duckguard/rules/loader.py +483 -0
  45. duckguard/rules/schema.py +289 -0
  46. duckguard/semantic/__init__.py +31 -0
  47. duckguard/semantic/analyzer.py +270 -0
  48. duckguard/semantic/detector.py +459 -0
  49. duckguard/semantic/validators.py +354 -0
  50. duckguard/validators/__init__.py +7 -0
  51. duckguard-2.0.0.dist-info/METADATA +221 -0
  52. duckguard-2.0.0.dist-info/RECORD +55 -0
  53. duckguard-2.0.0.dist-info/WHEEL +4 -0
  54. duckguard-2.0.0.dist-info/entry_points.txt +5 -0
  55. duckguard-2.0.0.dist-info/licenses/LICENSE +55 -0
duckguard/cli/main.py ADDED
@@ -0,0 +1,706 @@
1
+ """DuckGuard CLI - Command line interface for data quality validation.
2
+
3
+ A modern, beautiful CLI for data quality that just works.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ import typer
13
+ from rich.console import Console
14
+ from rich.panel import Panel
15
+ from rich.table import Table
16
+ from rich.syntax import Syntax
17
+ from rich.progress import Progress, SpinnerColumn, TextColumn
18
+ from rich import print as rprint
19
+ from rich.tree import Tree
20
+ from rich.text import Text
21
+ from rich.columns import Columns
22
+ from rich.markdown import Markdown
23
+
24
+ from duckguard import __version__
25
+
26
+ app = typer.Typer(
27
+ name="duckguard",
28
+ help="DuckGuard - Data quality that just works. Fast, simple, Pythonic.",
29
+ add_completion=False,
30
+ rich_markup_mode="rich",
31
+ )
32
+ console = Console()
33
+
34
+
35
+ def version_callback(value: bool) -> None:
36
+ """Print version and exit."""
37
+ if value:
38
+ console.print(Panel(
39
+ f"[bold blue]DuckGuard[/bold blue] v{__version__}\n"
40
+ "[dim]The fast, simple data quality tool[/dim]",
41
+ border_style="blue"
42
+ ))
43
+ raise typer.Exit()
44
+
45
+
46
+ @app.callback()
47
+ def main(
48
+ version: Optional[bool] = typer.Option(
49
+ None,
50
+ "--version",
51
+ "-v",
52
+ callback=version_callback,
53
+ is_eager=True,
54
+ help="Show version and exit.",
55
+ ),
56
+ ) -> None:
57
+ """DuckGuard - Data quality made clear."""
58
+ pass
59
+
60
+
61
+ @app.command()
62
+ def check(
63
+ source: str = typer.Argument(..., help="Path to file or connection string"),
64
+ config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to duckguard.yaml rules file"),
65
+ table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name (for databases)"),
66
+ not_null: Optional[list[str]] = typer.Option(None, "--not-null", "-n", help="Columns that must not be null"),
67
+ unique: Optional[list[str]] = typer.Option(None, "--unique", "-u", help="Columns that must be unique"),
68
+ output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file (json)"),
69
+ verbose: bool = typer.Option(False, "--verbose", "-V", help="Verbose output"),
70
+ ) -> None:
71
+ """
72
+ Run data quality checks on a data source.
73
+
74
+ [bold]Examples:[/bold]
75
+ duckguard check data.csv
76
+ duckguard check data.csv --not-null id --unique email
77
+ duckguard check data.csv --config duckguard.yaml
78
+ duckguard check postgres://localhost/db --table orders
79
+ """
80
+ from duckguard.connectors import connect
81
+ from duckguard.rules import load_rules, execute_rules
82
+ from duckguard.core.scoring import score
83
+
84
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Checking: [cyan]{source}[/cyan]\n")
85
+
86
+ try:
87
+ with Progress(
88
+ SpinnerColumn(),
89
+ TextColumn("[progress.description]{task.description}"),
90
+ console=console,
91
+ transient=True,
92
+ ) as progress:
93
+ progress.add_task("Connecting to data source...", total=None)
94
+ dataset = connect(source, table=table)
95
+
96
+ # Display basic info
97
+ info_table = Table(show_header=False, box=None, padding=(0, 2))
98
+ info_table.add_column("", style="dim")
99
+ info_table.add_column("")
100
+ info_table.add_row("Rows", f"[green]{dataset.row_count:,}[/green]")
101
+ info_table.add_row("Columns", f"[green]{dataset.column_count}[/green]")
102
+ console.print(info_table)
103
+ console.print()
104
+
105
+ # Execute checks
106
+ if config:
107
+ # Use YAML rules
108
+ with Progress(
109
+ SpinnerColumn(),
110
+ TextColumn("[progress.description]{task.description}"),
111
+ console=console,
112
+ transient=True,
113
+ ) as progress:
114
+ progress.add_task("Running checks...", total=None)
115
+ ruleset = load_rules(config)
116
+ result = execute_rules(ruleset, dataset=dataset)
117
+
118
+ _display_execution_result(result, verbose)
119
+
120
+ else:
121
+ # Quick checks from CLI arguments
122
+ results = []
123
+
124
+ # Row count check
125
+ results.append(("Row count > 0", dataset.row_count > 0, f"{dataset.row_count:,} rows", None))
126
+
127
+ # Not null checks
128
+ if not_null:
129
+ for col_name in not_null:
130
+ if col_name in dataset.columns:
131
+ col = dataset[col_name]
132
+ passed = col.null_count == 0
133
+ results.append((
134
+ f"{col_name} not null",
135
+ passed,
136
+ f"{col.null_count:,} nulls ({col.null_percent:.1f}%)",
137
+ col_name
138
+ ))
139
+ else:
140
+ results.append((f"{col_name} not null", False, "Column not found", col_name))
141
+
142
+ # Unique checks
143
+ if unique:
144
+ for col_name in unique:
145
+ if col_name in dataset.columns:
146
+ col = dataset[col_name]
147
+ passed = col.unique_percent == 100
148
+ dup_count = col.total_count - col.unique_count
149
+ results.append((
150
+ f"{col_name} unique",
151
+ passed,
152
+ f"{col.unique_percent:.1f}% unique ({dup_count:,} duplicates)",
153
+ col_name
154
+ ))
155
+ else:
156
+ results.append((f"{col_name} unique", False, "Column not found", col_name))
157
+
158
+ _display_quick_results(results)
159
+
160
+ # Calculate quality score
161
+ quality = score(dataset)
162
+ _display_quality_score(quality)
163
+
164
+ # Output to file
165
+ if output:
166
+ _save_results(output, dataset, results if not config else None)
167
+ console.print(f"\n[dim]Results saved to {output}[/dim]")
168
+
169
+ # Exit with error if any checks failed
170
+ if config and not result.passed:
171
+ raise typer.Exit(1)
172
+ elif not config and not all(r[1] for r in results):
173
+ raise typer.Exit(1)
174
+
175
+ except FileNotFoundError as e:
176
+ console.print(f"[red]Error:[/red] {e}")
177
+ raise typer.Exit(1)
178
+ except Exception as e:
179
+ console.print(f"[red]Error:[/red] {e}")
180
+ if verbose:
181
+ console.print_exception()
182
+ raise typer.Exit(1)
183
+
184
+
185
+ @app.command()
186
+ def discover(
187
+ source: str = typer.Argument(..., help="Path to file or connection string"),
188
+ table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
189
+ output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file for rules (duckguard.yaml)"),
190
+ format: str = typer.Option("yaml", "--format", "-f", help="Output format: yaml, python"),
191
+ ) -> None:
192
+ """
193
+ Discover data and auto-generate validation rules.
194
+
195
+ Analyzes your data and suggests appropriate validation rules.
196
+
197
+ [bold]Examples:[/bold]
198
+ duckguard discover data.csv
199
+ duckguard discover data.csv --output duckguard.yaml
200
+ duckguard discover postgres://localhost/db --table users
201
+ """
202
+ from duckguard.connectors import connect
203
+ from duckguard.rules import generate_rules
204
+ from duckguard.rules.generator import ruleset_to_yaml
205
+ from duckguard.semantic import SemanticAnalyzer
206
+
207
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Discovering: [cyan]{source}[/cyan]\n")
208
+
209
+ try:
210
+ with Progress(
211
+ SpinnerColumn(),
212
+ TextColumn("[progress.description]{task.description}"),
213
+ console=console,
214
+ transient=True,
215
+ ) as progress:
216
+ task = progress.add_task("Analyzing data...", total=None)
217
+ dataset = connect(source, table=table)
218
+
219
+ # Semantic analysis
220
+ analyzer = SemanticAnalyzer()
221
+ analysis = analyzer.analyze(dataset)
222
+
223
+ # Generate rules (as RuleSet object, not YAML string)
224
+ ruleset = generate_rules(dataset, as_yaml=False)
225
+
226
+ # Display discovery results
227
+ _display_discovery_results(analysis, ruleset)
228
+
229
+ # Output
230
+ if output:
231
+ yaml_content = ruleset_to_yaml(ruleset)
232
+ Path(output).write_text(yaml_content, encoding="utf-8")
233
+ console.print(f"\n[green]✓[/green] Rules saved to [cyan]{output}[/cyan]")
234
+ console.print(f"[dim]Run: duckguard check {source} --config {output}[/dim]")
235
+ else:
236
+ # Display YAML
237
+ yaml_content = ruleset_to_yaml(ruleset)
238
+ console.print(Panel(
239
+ Syntax(yaml_content, "yaml", theme="monokai"),
240
+ title="Generated Rules (duckguard.yaml)",
241
+ border_style="green"
242
+ ))
243
+
244
+ except Exception as e:
245
+ console.print(f"[red]Error:[/red] {e}")
246
+ raise typer.Exit(1)
247
+
248
+
249
+ @app.command()
250
+ def contract(
251
+ action: str = typer.Argument(..., help="Action: generate, validate, diff"),
252
+ source: str = typer.Argument(None, help="Data source or contract file"),
253
+ contract_file: Optional[str] = typer.Option(None, "--contract", "-c", help="Contract file path"),
254
+ output: Optional[str] = typer.Option(None, "--output", "-o", help="Output file"),
255
+ strict: bool = typer.Option(False, "--strict", help="Strict validation mode"),
256
+ ) -> None:
257
+ """
258
+ Manage data contracts.
259
+
260
+ [bold]Actions:[/bold]
261
+ generate - Generate a contract from data
262
+ validate - Validate data against a contract
263
+ diff - Compare two contract versions
264
+
265
+ [bold]Examples:[/bold]
266
+ duckguard contract generate data.csv --output orders.contract.yaml
267
+ duckguard contract validate data.csv --contract orders.contract.yaml
268
+ duckguard contract diff old.contract.yaml new.contract.yaml
269
+ """
270
+ from duckguard.contracts import (
271
+ load_contract,
272
+ validate_contract,
273
+ generate_contract,
274
+ diff_contracts,
275
+ )
276
+ from duckguard.contracts.loader import contract_to_yaml
277
+ from duckguard.connectors import connect
278
+
279
+ try:
280
+ if action == "generate":
281
+ if not source:
282
+ console.print("[red]Error:[/red] Source required for generate")
283
+ raise typer.Exit(1)
284
+
285
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Generating contract for: [cyan]{source}[/cyan]\n")
286
+
287
+ with Progress(
288
+ SpinnerColumn(),
289
+ TextColumn("[progress.description]{task.description}"),
290
+ console=console,
291
+ transient=True,
292
+ ) as progress:
293
+ progress.add_task("Analyzing data...", total=None)
294
+ contract_obj = generate_contract(source)
295
+
296
+ _display_contract(contract_obj)
297
+
298
+ if output:
299
+ yaml_content = contract_to_yaml(contract_obj)
300
+ Path(output).write_text(yaml_content, encoding="utf-8")
301
+ console.print(f"\n[green]✓[/green] Contract saved to [cyan]{output}[/cyan]")
302
+
303
+ elif action == "validate":
304
+ if not source or not contract_file:
305
+ console.print("[red]Error:[/red] Both source and --contract required for validate")
306
+ raise typer.Exit(1)
307
+
308
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Validating against contract\n")
309
+
310
+ with Progress(
311
+ SpinnerColumn(),
312
+ TextColumn("[progress.description]{task.description}"),
313
+ console=console,
314
+ transient=True,
315
+ ) as progress:
316
+ progress.add_task("Validating...", total=None)
317
+ contract_obj = load_contract(contract_file)
318
+ result = validate_contract(contract_obj, source, strict_mode=strict)
319
+
320
+ _display_contract_validation(result)
321
+
322
+ if not result.passed:
323
+ raise typer.Exit(1)
324
+
325
+ elif action == "diff":
326
+ if not source or not contract_file:
327
+ console.print("[red]Error:[/red] Two contract files required for diff")
328
+ raise typer.Exit(1)
329
+
330
+ old_contract = load_contract(source)
331
+ new_contract = load_contract(contract_file)
332
+
333
+ diff_result = diff_contracts(old_contract, new_contract)
334
+ _display_contract_diff(diff_result)
335
+
336
+ else:
337
+ console.print(f"[red]Error:[/red] Unknown action: {action}")
338
+ raise typer.Exit(1)
339
+
340
+ except Exception as e:
341
+ console.print(f"[red]Error:[/red] {e}")
342
+ raise typer.Exit(1)
343
+
344
+
345
+ @app.command()
346
+ def anomaly(
347
+ source: str = typer.Argument(..., help="Path to file or connection string"),
348
+ table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
349
+ method: str = typer.Option("zscore", "--method", "-m", help="Detection method: zscore, iqr, percent_change"),
350
+ threshold: Optional[float] = typer.Option(None, "--threshold", help="Detection threshold"),
351
+ columns: Optional[list[str]] = typer.Option(None, "--column", "-c", help="Specific columns to check"),
352
+ ) -> None:
353
+ """
354
+ Detect anomalies in data.
355
+
356
+ [bold]Examples:[/bold]
357
+ duckguard anomaly data.csv
358
+ duckguard anomaly data.csv --method iqr --threshold 2.0
359
+ duckguard anomaly data.csv --column amount --column quantity
360
+ """
361
+ from duckguard.connectors import connect
362
+ from duckguard.anomaly import detect_anomalies
363
+
364
+ console.print(f"\n[bold blue]DuckGuard[/bold blue] Detecting anomalies in: [cyan]{source}[/cyan]\n")
365
+
366
+ try:
367
+ with Progress(
368
+ SpinnerColumn(),
369
+ TextColumn("[progress.description]{task.description}"),
370
+ console=console,
371
+ transient=True,
372
+ ) as progress:
373
+ progress.add_task("Analyzing data...", total=None)
374
+ dataset = connect(source, table=table)
375
+ report = detect_anomalies(
376
+ dataset,
377
+ method=method,
378
+ threshold=threshold,
379
+ columns=columns,
380
+ )
381
+
382
+ _display_anomaly_report(report)
383
+
384
+ if report.has_anomalies:
385
+ raise typer.Exit(1)
386
+
387
+ except Exception as e:
388
+ console.print(f"[red]Error:[/red] {e}")
389
+ raise typer.Exit(1)
390
+
391
+
392
+ @app.command()
393
+ def info(
394
+ source: str = typer.Argument(..., help="Path to file or connection string"),
395
+ table: Optional[str] = typer.Option(None, "--table", "-t", help="Table name"),
396
+ ) -> None:
397
+ """
398
+ Display information about a data source.
399
+
400
+ [bold]Examples:[/bold]
401
+ duckguard info data.csv
402
+ duckguard info postgres://localhost/db --table users
403
+ """
404
+ from duckguard.connectors import connect
405
+ from duckguard.semantic import SemanticAnalyzer
406
+
407
+ try:
408
+ dataset = connect(source, table=table)
409
+ analyzer = SemanticAnalyzer()
410
+
411
+ console.print(Panel(
412
+ f"[bold]{dataset.name}[/bold]",
413
+ border_style="blue"
414
+ ))
415
+
416
+ # Basic info
417
+ info_table = Table(show_header=False, box=None)
418
+ info_table.add_column("Property", style="cyan")
419
+ info_table.add_column("Value", style="green")
420
+
421
+ info_table.add_row("Source", source)
422
+ info_table.add_row("Rows", f"{dataset.row_count:,}")
423
+ info_table.add_row("Columns", str(dataset.column_count))
424
+
425
+ console.print(info_table)
426
+ console.print()
427
+
428
+ # Column details
429
+ col_table = Table(title="Columns")
430
+ col_table.add_column("Name", style="cyan")
431
+ col_table.add_column("Type", style="magenta")
432
+ col_table.add_column("Nulls", justify="right")
433
+ col_table.add_column("Unique", justify="right")
434
+ col_table.add_column("Semantic", style="yellow")
435
+
436
+ for col_name in dataset.columns[:20]:
437
+ col = dataset[col_name]
438
+ col_analysis = analyzer.analyze_column(dataset, col_name)
439
+
440
+ sem_type = col_analysis.semantic_type.value
441
+ if sem_type == "unknown":
442
+ sem_type = "-"
443
+ if col_analysis.is_pii:
444
+ sem_type = f"🔒 {sem_type}"
445
+
446
+ col_table.add_row(
447
+ col_name,
448
+ "numeric" if col.mean is not None else "string",
449
+ f"{col.null_percent:.1f}%",
450
+ f"{col.unique_percent:.1f}%",
451
+ sem_type,
452
+ )
453
+
454
+ if dataset.column_count > 20:
455
+ col_table.add_row(f"... and {dataset.column_count - 20} more", "", "", "", "")
456
+
457
+ console.print(col_table)
458
+
459
+ except Exception as e:
460
+ console.print(f"[red]Error:[/red] {e}")
461
+ raise typer.Exit(1)
462
+
463
+
464
+ # Helper display functions
465
+
466
+ def _display_execution_result(result, verbose: bool = False) -> None:
467
+ """Display rule execution results."""
468
+ table = Table(title="Validation Results")
469
+ table.add_column("Check", style="cyan")
470
+ table.add_column("Status", justify="center")
471
+ table.add_column("Details")
472
+
473
+ for check_result in result.results:
474
+ if check_result.passed:
475
+ status = "[green]✓ PASS[/green]"
476
+ elif check_result.severity.value == "warning":
477
+ status = "[yellow]⚠ WARN[/yellow]"
478
+ else:
479
+ status = "[red]✗ FAIL[/red]"
480
+
481
+ col_str = f"[{check_result.column}] " if check_result.column else ""
482
+ table.add_row(
483
+ f"{col_str}{check_result.check.type.value}",
484
+ status,
485
+ check_result.message[:60],
486
+ )
487
+
488
+ console.print(table)
489
+
490
+ # Summary
491
+ console.print()
492
+ if result.passed:
493
+ console.print(f"[green]✓ All {result.total_checks} checks passed[/green]")
494
+ else:
495
+ console.print(
496
+ f"[red]✗ {result.failed_count} failed[/red], "
497
+ f"[yellow]{result.warning_count} warnings[/yellow], "
498
+ f"[green]{result.passed_count} passed[/green]"
499
+ )
500
+
501
+
502
+ def _display_quick_results(results: list) -> None:
503
+ """Display quick check results."""
504
+ table = Table()
505
+ table.add_column("Check", style="cyan")
506
+ table.add_column("Status", justify="center")
507
+ table.add_column("Details")
508
+
509
+ for check_name, passed, details, _ in results:
510
+ status = "[green]✓ PASS[/green]" if passed else "[red]✗ FAIL[/red]"
511
+ table.add_row(check_name, status, details)
512
+
513
+ console.print(table)
514
+
515
+
516
+ def _display_quality_score(quality) -> None:
517
+ """Display quality score."""
518
+ grade_colors = {"A": "green", "B": "blue", "C": "yellow", "D": "orange1", "F": "red"}
519
+ color = grade_colors.get(quality.grade, "white")
520
+
521
+ console.print()
522
+ console.print(Panel(
523
+ f"[bold]Quality Score: [{color}]{quality.overall:.0f}/100[/{color}] "
524
+ f"(Grade: [{color}]{quality.grade}[/{color}])[/bold]",
525
+ border_style=color,
526
+ ))
527
+
528
+
529
+ def _display_discovery_results(analysis, ruleset) -> None:
530
+ """Display discovery results."""
531
+ # Summary
532
+ console.print(f"[bold]Discovered {analysis.column_count} columns[/bold]\n")
533
+
534
+ # PII warning
535
+ if analysis.pii_columns:
536
+ console.print(Panel(
537
+ "[yellow]⚠️ PII Detected[/yellow]\n" +
538
+ "\n".join(f" • {col}" for col in analysis.pii_columns),
539
+ border_style="yellow",
540
+ ))
541
+ console.print()
542
+
543
+ # Column analysis table
544
+ table = Table(title="Column Analysis")
545
+ table.add_column("Column", style="cyan")
546
+ table.add_column("Semantic Type", style="magenta")
547
+ table.add_column("Suggested Rules")
548
+
549
+ for col in analysis.columns[:15]:
550
+ sem = col.semantic_type.value
551
+ if col.is_pii:
552
+ sem = f"🔒 {sem}"
553
+
554
+ rules = ", ".join(col.suggested_validations[:3])
555
+ if len(col.suggested_validations) > 3:
556
+ rules += f" (+{len(col.suggested_validations) - 3})"
557
+
558
+ table.add_row(col.name, sem, rules or "-")
559
+
560
+ if len(analysis.columns) > 15:
561
+ table.add_row(f"... and {len(analysis.columns) - 15} more", "", "")
562
+
563
+ console.print(table)
564
+ console.print()
565
+ console.print(f"[dim]Generated {ruleset.total_checks} validation rules[/dim]")
566
+
567
+
568
+ def _display_contract(contract) -> None:
569
+ """Display contract details."""
570
+ console.print(f"[bold]Contract: {contract.name}[/bold] v{contract.version}\n")
571
+
572
+ # Schema
573
+ table = Table(title="Schema")
574
+ table.add_column("Field", style="cyan")
575
+ table.add_column("Type", style="magenta")
576
+ table.add_column("Required")
577
+ table.add_column("Unique")
578
+ table.add_column("PII")
579
+
580
+ for field_obj in contract.schema[:15]:
581
+ type_str = field_obj.type.value if hasattr(field_obj.type, 'value') else str(field_obj.type)
582
+ table.add_row(
583
+ field_obj.name,
584
+ type_str,
585
+ "✓" if field_obj.required else "",
586
+ "✓" if field_obj.unique else "",
587
+ "🔒" if field_obj.pii else "",
588
+ )
589
+
590
+ console.print(table)
591
+
592
+ # Quality SLA
593
+ if contract.quality:
594
+ console.print("\n[bold]Quality SLA:[/bold]")
595
+ if contract.quality.completeness:
596
+ console.print(f" • Completeness: {contract.quality.completeness}%")
597
+ if contract.quality.row_count_min:
598
+ console.print(f" • Min rows: {contract.quality.row_count_min:,}")
599
+
600
+
601
+ def _display_contract_validation(result) -> None:
602
+ """Display contract validation results."""
603
+ status = "[green]✓ PASSED[/green]" if result.passed else "[red]✗ FAILED[/red]"
604
+ console.print(f"Contract: [bold]{result.contract.name}[/bold] v{result.contract.version}")
605
+ console.print(f"Status: {status}\n")
606
+
607
+ if result.violations:
608
+ table = Table(title="Violations")
609
+ table.add_column("Type", style="magenta")
610
+ table.add_column("Field", style="cyan")
611
+ table.add_column("Message")
612
+ table.add_column("Severity")
613
+
614
+ for v in result.violations[:20]:
615
+ sev_style = {"error": "red", "warning": "yellow", "info": "dim"}.get(v.severity.value, "white")
616
+ table.add_row(
617
+ v.type.value,
618
+ v.field or "-",
619
+ v.message[:50],
620
+ f"[{sev_style}]{v.severity.value}[/{sev_style}]",
621
+ )
622
+
623
+ console.print(table)
624
+ else:
625
+ console.print("[green]No violations found[/green]")
626
+
627
+
628
+ def _display_contract_diff(diff) -> None:
629
+ """Display contract diff."""
630
+ console.print(f"[bold]Comparing contracts[/bold]")
631
+ console.print(f" Old: v{diff.old_contract.version}")
632
+ console.print(f" New: v{diff.new_contract.version}\n")
633
+
634
+ if not diff.has_changes:
635
+ console.print("[green]No changes detected[/green]")
636
+ return
637
+
638
+ console.print(f"[bold]{len(diff.changes)} changes detected[/bold]\n")
639
+
640
+ if diff.breaking_changes:
641
+ console.print("[red bold]Breaking Changes:[/red bold]")
642
+ for change in diff.breaking_changes:
643
+ console.print(f" ❌ {change.message}")
644
+ console.print()
645
+
646
+ if diff.minor_changes:
647
+ console.print("[yellow bold]Minor Changes:[/yellow bold]")
648
+ for change in diff.minor_changes:
649
+ console.print(f" ⚠️ {change.message}")
650
+ console.print()
651
+
652
+ if diff.non_breaking_changes:
653
+ console.print("[dim]Non-breaking Changes:[/dim]")
654
+ for change in diff.non_breaking_changes:
655
+ console.print(f" • {change.message}")
656
+
657
+ console.print(f"\n[dim]Suggested version bump: {diff.suggest_version_bump()}[/dim]")
658
+
659
+
660
+ def _display_anomaly_report(report) -> None:
661
+ """Display anomaly detection report."""
662
+ if not report.has_anomalies:
663
+ console.print("[green]✓ No anomalies detected[/green]")
664
+ return
665
+
666
+ console.print(f"[yellow bold]⚠️ {report.anomaly_count} anomalies detected[/yellow bold]\n")
667
+
668
+ table = Table(title="Anomalies")
669
+ table.add_column("Column", style="cyan")
670
+ table.add_column("Type", style="magenta")
671
+ table.add_column("Score", justify="right")
672
+ table.add_column("Message")
673
+
674
+ for anomaly in report.get_anomalies():
675
+ table.add_row(
676
+ anomaly.column or "-",
677
+ anomaly.anomaly_type.value,
678
+ f"{anomaly.score:.2f}",
679
+ anomaly.message[:50],
680
+ )
681
+
682
+ console.print(table)
683
+
684
+
685
+ def _save_results(output: str, dataset, results) -> None:
686
+ """Save results to file."""
687
+ import json
688
+
689
+ data = {
690
+ "source": dataset.source,
691
+ "row_count": dataset.row_count,
692
+ "column_count": dataset.column_count,
693
+ "columns": dataset.columns,
694
+ }
695
+
696
+ if results:
697
+ data["checks"] = [
698
+ {"name": r[0], "passed": r[1], "details": r[2]}
699
+ for r in results
700
+ ]
701
+
702
+ Path(output).write_text(json.dumps(data, indent=2))
703
+
704
+
705
+ if __name__ == "__main__":
706
+ app()