duckguard 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. duckguard/__init__.py +110 -0
  2. duckguard/anomaly/__init__.py +34 -0
  3. duckguard/anomaly/detector.py +394 -0
  4. duckguard/anomaly/methods.py +432 -0
  5. duckguard/cli/__init__.py +5 -0
  6. duckguard/cli/main.py +706 -0
  7. duckguard/connectors/__init__.py +58 -0
  8. duckguard/connectors/base.py +80 -0
  9. duckguard/connectors/bigquery.py +171 -0
  10. duckguard/connectors/databricks.py +201 -0
  11. duckguard/connectors/factory.py +292 -0
  12. duckguard/connectors/files.py +135 -0
  13. duckguard/connectors/kafka.py +343 -0
  14. duckguard/connectors/mongodb.py +236 -0
  15. duckguard/connectors/mysql.py +121 -0
  16. duckguard/connectors/oracle.py +196 -0
  17. duckguard/connectors/postgres.py +99 -0
  18. duckguard/connectors/redshift.py +154 -0
  19. duckguard/connectors/snowflake.py +226 -0
  20. duckguard/connectors/sqlite.py +112 -0
  21. duckguard/connectors/sqlserver.py +242 -0
  22. duckguard/contracts/__init__.py +48 -0
  23. duckguard/contracts/diff.py +432 -0
  24. duckguard/contracts/generator.py +334 -0
  25. duckguard/contracts/loader.py +367 -0
  26. duckguard/contracts/schema.py +242 -0
  27. duckguard/contracts/validator.py +453 -0
  28. duckguard/core/__init__.py +8 -0
  29. duckguard/core/column.py +437 -0
  30. duckguard/core/dataset.py +284 -0
  31. duckguard/core/engine.py +261 -0
  32. duckguard/core/result.py +119 -0
  33. duckguard/core/scoring.py +508 -0
  34. duckguard/profiler/__init__.py +5 -0
  35. duckguard/profiler/auto_profile.py +350 -0
  36. duckguard/pytest_plugin/__init__.py +5 -0
  37. duckguard/pytest_plugin/plugin.py +161 -0
  38. duckguard/reporting/__init__.py +6 -0
  39. duckguard/reporting/console.py +88 -0
  40. duckguard/reporting/json_report.py +96 -0
  41. duckguard/rules/__init__.py +28 -0
  42. duckguard/rules/executor.py +616 -0
  43. duckguard/rules/generator.py +341 -0
  44. duckguard/rules/loader.py +483 -0
  45. duckguard/rules/schema.py +289 -0
  46. duckguard/semantic/__init__.py +31 -0
  47. duckguard/semantic/analyzer.py +270 -0
  48. duckguard/semantic/detector.py +459 -0
  49. duckguard/semantic/validators.py +354 -0
  50. duckguard/validators/__init__.py +7 -0
  51. duckguard-2.0.0.dist-info/METADATA +221 -0
  52. duckguard-2.0.0.dist-info/RECORD +55 -0
  53. duckguard-2.0.0.dist-info/WHEEL +4 -0
  54. duckguard-2.0.0.dist-info/entry_points.txt +5 -0
  55. duckguard-2.0.0.dist-info/licenses/LICENSE +55 -0
@@ -0,0 +1,96 @@
1
+ """JSON reporter for DuckGuard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from duckguard.core.result import ProfileResult, ScanResult
11
+
12
+
13
+ class JSONReporter:
14
+ """Reporter that outputs to JSON format."""
15
+
16
+ def __init__(self, pretty: bool = True):
17
+ self.pretty = pretty
18
+
19
+ def profile_to_dict(self, profile: ProfileResult) -> dict[str, Any]:
20
+ """Convert profile result to dictionary."""
21
+ return {
22
+ "type": "profile",
23
+ "source": profile.source,
24
+ "row_count": profile.row_count,
25
+ "column_count": profile.column_count,
26
+ "timestamp": profile.timestamp.isoformat(),
27
+ "columns": [
28
+ {
29
+ "name": col.name,
30
+ "dtype": col.dtype,
31
+ "null_count": col.null_count,
32
+ "null_percent": col.null_percent,
33
+ "unique_count": col.unique_count,
34
+ "unique_percent": col.unique_percent,
35
+ "min_value": self._serialize_value(col.min_value),
36
+ "max_value": self._serialize_value(col.max_value),
37
+ "mean_value": col.mean_value,
38
+ "stddev_value": col.stddev_value,
39
+ "suggested_rules": col.suggested_rules,
40
+ }
41
+ for col in profile.columns
42
+ ],
43
+ "suggested_rules": profile.suggested_rules,
44
+ }
45
+
46
+ def scan_to_dict(self, scan: ScanResult) -> dict[str, Any]:
47
+ """Convert scan result to dictionary."""
48
+ return {
49
+ "type": "scan",
50
+ "source": scan.source,
51
+ "row_count": scan.row_count,
52
+ "checks_run": scan.checks_run,
53
+ "checks_passed": scan.checks_passed,
54
+ "checks_failed": scan.checks_failed,
55
+ "checks_warned": scan.checks_warned,
56
+ "pass_rate": scan.pass_rate,
57
+ "passed": scan.passed,
58
+ "timestamp": scan.timestamp.isoformat(),
59
+ "results": [
60
+ {
61
+ "name": r.name,
62
+ "status": r.status.value,
63
+ "actual_value": self._serialize_value(r.actual_value),
64
+ "expected_value": self._serialize_value(r.expected_value),
65
+ "message": r.message,
66
+ "column": r.column,
67
+ }
68
+ for r in scan.results
69
+ ],
70
+ }
71
+
72
+ def to_json(self, data: dict[str, Any]) -> str:
73
+ """Convert dictionary to JSON string."""
74
+ if self.pretty:
75
+ return json.dumps(data, indent=2, default=str)
76
+ return json.dumps(data, default=str)
77
+
78
+ def save_profile(self, profile: ProfileResult, path: str | Path) -> None:
79
+ """Save profile result to JSON file."""
80
+ data = self.profile_to_dict(profile)
81
+ Path(path).write_text(self.to_json(data))
82
+
83
+ def save_scan(self, scan: ScanResult, path: str | Path) -> None:
84
+ """Save scan result to JSON file."""
85
+ data = self.scan_to_dict(scan)
86
+ Path(path).write_text(self.to_json(data))
87
+
88
+ def _serialize_value(self, value: Any) -> Any:
89
+ """Serialize a value to JSON-compatible type."""
90
+ if value is None:
91
+ return None
92
+ if isinstance(value, datetime):
93
+ return value.isoformat()
94
+ if isinstance(value, (int, float, str, bool)):
95
+ return value
96
+ return str(value)
@@ -0,0 +1,28 @@
1
+ """YAML-based rule system for DuckGuard.
2
+
3
+ This module provides a declarative YAML syntax for defining data quality rules,
4
+ making DuckGuard accessible to users who prefer configuration over code.
5
+
6
+ Example:
7
+ from duckguard.rules import load_rules, execute_rules
8
+
9
+ rules = load_rules("duckguard.yaml")
10
+ results = execute_rules(rules, "data.csv")
11
+ """
12
+
13
+ from duckguard.rules.loader import load_rules, load_rules_from_string
14
+ from duckguard.rules.executor import execute_rules, RuleExecutor
15
+ from duckguard.rules.schema import RuleSet, ColumnRules, Check, SimpleCheck
16
+ from duckguard.rules.generator import generate_rules
17
+
18
+ __all__ = [
19
+ "load_rules",
20
+ "load_rules_from_string",
21
+ "execute_rules",
22
+ "RuleExecutor",
23
+ "RuleSet",
24
+ "ColumnRules",
25
+ "Check",
26
+ "SimpleCheck",
27
+ "generate_rules",
28
+ ]