duckguard 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +110 -0
- duckguard/anomaly/__init__.py +34 -0
- duckguard/anomaly/detector.py +394 -0
- duckguard/anomaly/methods.py +432 -0
- duckguard/cli/__init__.py +5 -0
- duckguard/cli/main.py +706 -0
- duckguard/connectors/__init__.py +58 -0
- duckguard/connectors/base.py +80 -0
- duckguard/connectors/bigquery.py +171 -0
- duckguard/connectors/databricks.py +201 -0
- duckguard/connectors/factory.py +292 -0
- duckguard/connectors/files.py +135 -0
- duckguard/connectors/kafka.py +343 -0
- duckguard/connectors/mongodb.py +236 -0
- duckguard/connectors/mysql.py +121 -0
- duckguard/connectors/oracle.py +196 -0
- duckguard/connectors/postgres.py +99 -0
- duckguard/connectors/redshift.py +154 -0
- duckguard/connectors/snowflake.py +226 -0
- duckguard/connectors/sqlite.py +112 -0
- duckguard/connectors/sqlserver.py +242 -0
- duckguard/contracts/__init__.py +48 -0
- duckguard/contracts/diff.py +432 -0
- duckguard/contracts/generator.py +334 -0
- duckguard/contracts/loader.py +367 -0
- duckguard/contracts/schema.py +242 -0
- duckguard/contracts/validator.py +453 -0
- duckguard/core/__init__.py +8 -0
- duckguard/core/column.py +437 -0
- duckguard/core/dataset.py +284 -0
- duckguard/core/engine.py +261 -0
- duckguard/core/result.py +119 -0
- duckguard/core/scoring.py +508 -0
- duckguard/profiler/__init__.py +5 -0
- duckguard/profiler/auto_profile.py +350 -0
- duckguard/pytest_plugin/__init__.py +5 -0
- duckguard/pytest_plugin/plugin.py +161 -0
- duckguard/reporting/__init__.py +6 -0
- duckguard/reporting/console.py +88 -0
- duckguard/reporting/json_report.py +96 -0
- duckguard/rules/__init__.py +28 -0
- duckguard/rules/executor.py +616 -0
- duckguard/rules/generator.py +341 -0
- duckguard/rules/loader.py +483 -0
- duckguard/rules/schema.py +289 -0
- duckguard/semantic/__init__.py +31 -0
- duckguard/semantic/analyzer.py +270 -0
- duckguard/semantic/detector.py +459 -0
- duckguard/semantic/validators.py +354 -0
- duckguard/validators/__init__.py +7 -0
- duckguard-2.0.0.dist-info/METADATA +221 -0
- duckguard-2.0.0.dist-info/RECORD +55 -0
- duckguard-2.0.0.dist-info/WHEEL +4 -0
- duckguard-2.0.0.dist-info/entry_points.txt +5 -0
- duckguard-2.0.0.dist-info/licenses/LICENSE +55 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""JSON reporter for DuckGuard."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from duckguard.core.result import ProfileResult, ScanResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JSONReporter:
|
|
14
|
+
"""Reporter that outputs to JSON format."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, pretty: bool = True):
|
|
17
|
+
self.pretty = pretty
|
|
18
|
+
|
|
19
|
+
def profile_to_dict(self, profile: ProfileResult) -> dict[str, Any]:
|
|
20
|
+
"""Convert profile result to dictionary."""
|
|
21
|
+
return {
|
|
22
|
+
"type": "profile",
|
|
23
|
+
"source": profile.source,
|
|
24
|
+
"row_count": profile.row_count,
|
|
25
|
+
"column_count": profile.column_count,
|
|
26
|
+
"timestamp": profile.timestamp.isoformat(),
|
|
27
|
+
"columns": [
|
|
28
|
+
{
|
|
29
|
+
"name": col.name,
|
|
30
|
+
"dtype": col.dtype,
|
|
31
|
+
"null_count": col.null_count,
|
|
32
|
+
"null_percent": col.null_percent,
|
|
33
|
+
"unique_count": col.unique_count,
|
|
34
|
+
"unique_percent": col.unique_percent,
|
|
35
|
+
"min_value": self._serialize_value(col.min_value),
|
|
36
|
+
"max_value": self._serialize_value(col.max_value),
|
|
37
|
+
"mean_value": col.mean_value,
|
|
38
|
+
"stddev_value": col.stddev_value,
|
|
39
|
+
"suggested_rules": col.suggested_rules,
|
|
40
|
+
}
|
|
41
|
+
for col in profile.columns
|
|
42
|
+
],
|
|
43
|
+
"suggested_rules": profile.suggested_rules,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def scan_to_dict(self, scan: ScanResult) -> dict[str, Any]:
|
|
47
|
+
"""Convert scan result to dictionary."""
|
|
48
|
+
return {
|
|
49
|
+
"type": "scan",
|
|
50
|
+
"source": scan.source,
|
|
51
|
+
"row_count": scan.row_count,
|
|
52
|
+
"checks_run": scan.checks_run,
|
|
53
|
+
"checks_passed": scan.checks_passed,
|
|
54
|
+
"checks_failed": scan.checks_failed,
|
|
55
|
+
"checks_warned": scan.checks_warned,
|
|
56
|
+
"pass_rate": scan.pass_rate,
|
|
57
|
+
"passed": scan.passed,
|
|
58
|
+
"timestamp": scan.timestamp.isoformat(),
|
|
59
|
+
"results": [
|
|
60
|
+
{
|
|
61
|
+
"name": r.name,
|
|
62
|
+
"status": r.status.value,
|
|
63
|
+
"actual_value": self._serialize_value(r.actual_value),
|
|
64
|
+
"expected_value": self._serialize_value(r.expected_value),
|
|
65
|
+
"message": r.message,
|
|
66
|
+
"column": r.column,
|
|
67
|
+
}
|
|
68
|
+
for r in scan.results
|
|
69
|
+
],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def to_json(self, data: dict[str, Any]) -> str:
|
|
73
|
+
"""Convert dictionary to JSON string."""
|
|
74
|
+
if self.pretty:
|
|
75
|
+
return json.dumps(data, indent=2, default=str)
|
|
76
|
+
return json.dumps(data, default=str)
|
|
77
|
+
|
|
78
|
+
def save_profile(self, profile: ProfileResult, path: str | Path) -> None:
|
|
79
|
+
"""Save profile result to JSON file."""
|
|
80
|
+
data = self.profile_to_dict(profile)
|
|
81
|
+
Path(path).write_text(self.to_json(data))
|
|
82
|
+
|
|
83
|
+
def save_scan(self, scan: ScanResult, path: str | Path) -> None:
|
|
84
|
+
"""Save scan result to JSON file."""
|
|
85
|
+
data = self.scan_to_dict(scan)
|
|
86
|
+
Path(path).write_text(self.to_json(data))
|
|
87
|
+
|
|
88
|
+
def _serialize_value(self, value: Any) -> Any:
|
|
89
|
+
"""Serialize a value to JSON-compatible type."""
|
|
90
|
+
if value is None:
|
|
91
|
+
return None
|
|
92
|
+
if isinstance(value, datetime):
|
|
93
|
+
return value.isoformat()
|
|
94
|
+
if isinstance(value, (int, float, str, bool)):
|
|
95
|
+
return value
|
|
96
|
+
return str(value)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""YAML-based rule system for DuckGuard.
|
|
2
|
+
|
|
3
|
+
This module provides a declarative YAML syntax for defining data quality rules,
|
|
4
|
+
making DuckGuard accessible to users who prefer configuration over code.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from duckguard.rules import load_rules, execute_rules
|
|
8
|
+
|
|
9
|
+
rules = load_rules("duckguard.yaml")
|
|
10
|
+
results = execute_rules(rules, "data.csv")
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from duckguard.rules.loader import load_rules, load_rules_from_string
|
|
14
|
+
from duckguard.rules.executor import execute_rules, RuleExecutor
|
|
15
|
+
from duckguard.rules.schema import RuleSet, ColumnRules, Check, SimpleCheck
|
|
16
|
+
from duckguard.rules.generator import generate_rules
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"load_rules",
|
|
20
|
+
"load_rules_from_string",
|
|
21
|
+
"execute_rules",
|
|
22
|
+
"RuleExecutor",
|
|
23
|
+
"RuleSet",
|
|
24
|
+
"ColumnRules",
|
|
25
|
+
"Check",
|
|
26
|
+
"SimpleCheck",
|
|
27
|
+
"generate_rules",
|
|
28
|
+
]
|