akad-framework 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akad/__init__.py +10 -0
- akad/cli.py +121 -0
- akad/contract_loader.py +17 -0
- akad/engine.py +124 -0
- akad/models/__init__.py +0 -0
- akad/models/contract.py +111 -0
- akad/models/result.py +73 -0
- akad/notifier.py +23 -0
- akad/notifiers/__init__.py +0 -0
- akad/notifiers/base.py +13 -0
- akad/notifiers/email_notifier.py +62 -0
- akad/notifiers/webhook_notifier.py +51 -0
- akad/readers/__init__.py +0 -0
- akad/readers/base.py +23 -0
- akad/readers/parquet_reader.py +26 -0
- akad/readers/sql_reader.py +36 -0
- akad/registry_client.py +60 -0
- akad/sdk.py +90 -0
- akad/validators/__init__.py +0 -0
- akad/validators/base.py +20 -0
- akad/validators/freshness_validator.py +66 -0
- akad/validators/quality_validator.py +88 -0
- akad/validators/schema_validator.py +115 -0
- akad/validators/volume_validator.py +47 -0
- akad_framework-1.0.0.dist-info/METADATA +531 -0
- akad_framework-1.0.0.dist-info/RECORD +47 -0
- akad_framework-1.0.0.dist-info/WHEEL +5 -0
- akad_framework-1.0.0.dist-info/entry_points.txt +2 -0
- akad_framework-1.0.0.dist-info/licenses/LICENSE +21 -0
- akad_framework-1.0.0.dist-info/top_level.txt +3 -0
- dashboard/__init__.py +0 -0
- dashboard/main.py +122 -0
- dashboard/templates/_macros.html +9 -0
- dashboard/templates/base.html +44 -0
- dashboard/templates/breaches.html +55 -0
- dashboard/templates/contract_detail.html +48 -0
- dashboard/templates/discovery.html +31 -0
- dashboard/templates/index.html +86 -0
- registry/__init__.py +0 -0
- registry/database.py +36 -0
- registry/main.py +26 -0
- registry/models.py +37 -0
- registry/routers/__init__.py +0 -0
- registry/routers/contracts.py +73 -0
- registry/routers/health.py +16 -0
- registry/routers/results.py +51 -0
- registry/schemas.py +67 -0
akad/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Akad — Data Contract Framework.
|
|
2
|
+
|
|
3
|
+
Public API::
|
|
4
|
+
|
|
5
|
+
from akad import DataContractValidator, DataContractBreachError
|
|
6
|
+
"""
|
|
7
|
+
from akad.sdk import DataContractBreachError, DataContractValidator
|
|
8
|
+
|
|
9
|
+
__all__ = ["DataContractValidator", "DataContractBreachError"]
|
|
10
|
+
__version__ = "1.0.0"
|
akad/cli.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from akad.contract_loader import load_contract
|
|
9
|
+
from akad.models.result import ValidationResult
|
|
10
|
+
from akad.registry_client import RegistryClient
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(name="akad", help="Akad — Data Contract Framework CLI", no_args_is_help=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command()
|
|
16
|
+
def validate(
|
|
17
|
+
contract: Path = typer.Option(..., "--contract", "-c", help="Path to contract YAML"),
|
|
18
|
+
registry_url: str | None = typer.Option(None, "--registry-url", "-r", help="Registry URL"),
|
|
19
|
+
output: str = typer.Option("text", "--output", "-o", help="Output format: text|json"),
|
|
20
|
+
) -> None:
|
|
21
|
+
"""Validate a dataset against its contract."""
|
|
22
|
+
from akad.sdk import DataContractBreachError, DataContractValidator
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
validator = DataContractValidator(
|
|
26
|
+
contract_path=contract,
|
|
27
|
+
registry_url=registry_url,
|
|
28
|
+
notifiers=[], # CLI never sends notifications
|
|
29
|
+
)
|
|
30
|
+
result = validator.validate()
|
|
31
|
+
except DataContractBreachError as exc:
|
|
32
|
+
result = exc.result
|
|
33
|
+
_print_result(result, output)
|
|
34
|
+
raise typer.Exit(code=1) from exc
|
|
35
|
+
except Exception as exc:
|
|
36
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
37
|
+
raise typer.Exit(code=2) from exc
|
|
38
|
+
|
|
39
|
+
_print_result(result, output)
|
|
40
|
+
if result.is_breach:
|
|
41
|
+
raise typer.Exit(code=1)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@app.command()
|
|
45
|
+
def publish(
|
|
46
|
+
contract: Path = typer.Option(..., "--contract", "-c", help="Path to contract YAML"),
|
|
47
|
+
registry_url: str = typer.Option(..., "--registry-url", "-r", help="Registry URL"),
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Publish a contract to the registry."""
|
|
50
|
+
c = load_contract(contract)
|
|
51
|
+
client = RegistryClient(registry_url)
|
|
52
|
+
client.publish_contract(c)
|
|
53
|
+
typer.echo(f"Published {c.metadata.name} v{c.metadata.version}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def check(
|
|
58
|
+
contract: Path = typer.Option(..., "--contract", "-c", help="Path to contract YAML"),
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Validate contract YAML syntax without accessing data."""
|
|
61
|
+
try:
|
|
62
|
+
c = load_contract(contract)
|
|
63
|
+
typer.echo(f"OK {c.metadata.name} v{c.metadata.version} — contract is valid")
|
|
64
|
+
except Exception as exc:
|
|
65
|
+
typer.echo(f"FAIL {exc}", err=True)
|
|
66
|
+
raise typer.Exit(code=1) from exc
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@app.command(name="list")
|
|
70
|
+
def list_contracts(
|
|
71
|
+
registry_url: str = typer.Option(..., "--registry-url", "-r", help="Registry URL"),
|
|
72
|
+
) -> None:
|
|
73
|
+
"""List all contracts in the registry."""
|
|
74
|
+
import httpx
|
|
75
|
+
try:
|
|
76
|
+
data = httpx.get(f"{registry_url.rstrip('/')}/contracts/", timeout=10).json()
|
|
77
|
+
for c in data:
|
|
78
|
+
typer.echo(f" {c['name']:40s} v{c['version']}")
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
81
|
+
raise typer.Exit(code=1) from exc
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@app.command()
|
|
85
|
+
def history(
|
|
86
|
+
name: str = typer.Option(..., "--name", "-n", help="Contract name"),
|
|
87
|
+
registry_url: str = typer.Option(..., "--registry-url", "-r", help="Registry URL"),
|
|
88
|
+
limit: int = typer.Option(20, "--limit", "-l", help="Number of results"),
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Show breach history for a contract."""
|
|
91
|
+
import httpx
|
|
92
|
+
try:
|
|
93
|
+
url = f"{registry_url.rstrip('/')}/validation-results/?contract_name={name}&limit={limit}"
|
|
94
|
+
data = httpx.get(url, timeout=10).json()
|
|
95
|
+
for r in data:
|
|
96
|
+
icon = "✓" if r["overall_status"] == "COMPLIANT" else "✗"
|
|
97
|
+
typer.echo(f" {icon} {r['validated_at']} {r['overall_status']}")
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
100
|
+
raise typer.Exit(code=1) from exc
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _print_result(result: ValidationResult, output: str) -> None:
|
|
104
|
+
if output == "json":
|
|
105
|
+
typer.echo(json.dumps({
|
|
106
|
+
"status": result.overall_status.value,
|
|
107
|
+
"row_count": result.row_count,
|
|
108
|
+
"failed_clauses": [c.to_dict() for c in result.failed_clauses],
|
|
109
|
+
}, indent=2))
|
|
110
|
+
else:
|
|
111
|
+
icon = "✓" if result.overall_status.value == "COMPLIANT" else "✗"
|
|
112
|
+
typer.echo(f"{icon} {result.contract_name} v{result.contract_version}: {result.overall_status.value}")
|
|
113
|
+
if result.failed_clauses:
|
|
114
|
+
typer.echo("Failed clauses:")
|
|
115
|
+
for c in result.failed_clauses:
|
|
116
|
+
target = f" [{c.clause_target}]" if c.clause_target else ""
|
|
117
|
+
typer.echo(f" - [{c.clause_type}]{target} {c.message}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
app()
|
akad/contract_loader.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from akad.models.contract import DataContract
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_contract(path: str | Path) -> DataContract:
|
|
11
|
+
"""Load and validate a contract YAML file.
|
|
12
|
+
|
|
13
|
+
Raises pydantic.ValidationError with clear messages if the file is invalid.
|
|
14
|
+
"""
|
|
15
|
+
with open(path, encoding="utf-8") as fh:
|
|
16
|
+
raw = yaml.safe_load(fh)
|
|
17
|
+
return DataContract.model_validate(raw)
|
akad/engine.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from akad.models.contract import DataContract
|
|
8
|
+
from akad.models.result import ClauseResult, ClauseStatus, OverallStatus, ValidationResult
|
|
9
|
+
from akad.readers.parquet_reader import ParquetReader
|
|
10
|
+
from akad.readers.sql_reader import SQLReader
|
|
11
|
+
from akad.validators.freshness_validator import FreshnessValidator
|
|
12
|
+
from akad.validators.quality_validator import QualityValidator
|
|
13
|
+
from akad.validators.schema_validator import SchemaValidator
|
|
14
|
+
from akad.validators.volume_validator import VolumeValidator
|
|
15
|
+
|
|
16
|
+
_READERS = {
|
|
17
|
+
"parquet": ParquetReader,
|
|
18
|
+
"sql": SQLReader,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_DEFAULT_VALIDATORS = [
|
|
22
|
+
SchemaValidator(),
|
|
23
|
+
FreshnessValidator(),
|
|
24
|
+
VolumeValidator(),
|
|
25
|
+
QualityValidator(),
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate(
|
|
30
|
+
contract: DataContract,
|
|
31
|
+
extra_validators: list | None = None,
|
|
32
|
+
) -> ValidationResult:
|
|
33
|
+
"""Run all validators against the dataset described in *contract*.
|
|
34
|
+
|
|
35
|
+
Reads data from storage. Use validate_dataframe() for unit-test-friendly validation
|
|
36
|
+
that skips the read step.
|
|
37
|
+
"""
|
|
38
|
+
now = datetime.now(UTC)
|
|
39
|
+
location = str(contract.dataset.location or contract.dataset.table_name or "")
|
|
40
|
+
|
|
41
|
+
reader_cls = _READERS.get(contract.dataset.format)
|
|
42
|
+
if not reader_cls:
|
|
43
|
+
return ValidationResult(
|
|
44
|
+
contract_name=contract.metadata.name,
|
|
45
|
+
contract_version=contract.metadata.version,
|
|
46
|
+
dataset_location=location,
|
|
47
|
+
validated_at=now,
|
|
48
|
+
overall_status=OverallStatus.ERROR,
|
|
49
|
+
error_message=f"Unsupported dataset format: {contract.dataset.format}",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
reader = reader_cls()
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
df = reader.read(contract.dataset)
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
return ValidationResult(
|
|
58
|
+
contract_name=contract.metadata.name,
|
|
59
|
+
contract_version=contract.metadata.version,
|
|
60
|
+
dataset_location=location,
|
|
61
|
+
validated_at=now,
|
|
62
|
+
overall_status=OverallStatus.ERROR,
|
|
63
|
+
error_message=f"Failed to read dataset: {exc}",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
last_modified: float | None = reader.get_last_modified(contract.dataset)
|
|
68
|
+
except Exception:
|
|
69
|
+
last_modified = None
|
|
70
|
+
|
|
71
|
+
return validate_dataframe(df, contract, extra_validators, last_modified, _now=now)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def validate_dataframe(
|
|
75
|
+
df: pd.DataFrame,
|
|
76
|
+
contract: DataContract,
|
|
77
|
+
extra_validators: list | None = None,
|
|
78
|
+
reader_last_modified: float | None = None,
|
|
79
|
+
_now: datetime | None = None,
|
|
80
|
+
) -> ValidationResult:
|
|
81
|
+
"""Run all validators against a pre-loaded DataFrame.
|
|
82
|
+
|
|
83
|
+
Designed for unit and integration tests — callers supply the DataFrame directly,
|
|
84
|
+
no storage access needed.
|
|
85
|
+
"""
|
|
86
|
+
now = _now or datetime.now(UTC)
|
|
87
|
+
location = str(contract.dataset.location or contract.dataset.table_name or "")
|
|
88
|
+
|
|
89
|
+
all_validators = _DEFAULT_VALIDATORS + (extra_validators or [])
|
|
90
|
+
all_clause_results: list[ClauseResult] = []
|
|
91
|
+
|
|
92
|
+
for v in all_validators:
|
|
93
|
+
try:
|
|
94
|
+
results = v.validate(df, contract, reader_last_modified)
|
|
95
|
+
all_clause_results.extend(results)
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
all_clause_results.append(ClauseResult(
|
|
98
|
+
clause_type=type(v).__name__,
|
|
99
|
+
clause_target=None,
|
|
100
|
+
status=ClauseStatus.ERROR,
|
|
101
|
+
expected="validator to complete",
|
|
102
|
+
observed="validator raised exception",
|
|
103
|
+
message=str(exc),
|
|
104
|
+
))
|
|
105
|
+
|
|
106
|
+
has_fail = any(r.status == ClauseStatus.FAIL for r in all_clause_results)
|
|
107
|
+
has_error = any(r.status == ClauseStatus.ERROR for r in all_clause_results)
|
|
108
|
+
|
|
109
|
+
if has_fail:
|
|
110
|
+
overall = OverallStatus.BREACH
|
|
111
|
+
elif has_error:
|
|
112
|
+
overall = OverallStatus.ERROR
|
|
113
|
+
else:
|
|
114
|
+
overall = OverallStatus.COMPLIANT
|
|
115
|
+
|
|
116
|
+
return ValidationResult(
|
|
117
|
+
contract_name=contract.metadata.name,
|
|
118
|
+
contract_version=contract.metadata.version,
|
|
119
|
+
dataset_location=location,
|
|
120
|
+
validated_at=now,
|
|
121
|
+
overall_status=overall,
|
|
122
|
+
clause_results=all_clause_results,
|
|
123
|
+
row_count=len(df),
|
|
124
|
+
)
|
akad/models/__init__.py
ADDED
|
File without changes
|
akad/models/contract.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from enum import StrEnum
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ColumnType(StrEnum):
|
|
10
|
+
STRING = "string"
|
|
11
|
+
INTEGER = "integer"
|
|
12
|
+
FLOAT = "float"
|
|
13
|
+
BOOLEAN = "boolean"
|
|
14
|
+
DATE = "date"
|
|
15
|
+
TIMESTAMP = "timestamp"
|
|
16
|
+
DECIMAL = "decimal"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ColumnSpec(BaseModel):
|
|
20
|
+
name: str
|
|
21
|
+
type: ColumnType
|
|
22
|
+
nullable: bool = True
|
|
23
|
+
description: str | None = None
|
|
24
|
+
allowed_values: list[str] | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SchemaSpec(BaseModel):
|
|
28
|
+
enforce_no_extra_columns: bool = False
|
|
29
|
+
columns: list[ColumnSpec]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FreshnessSpec(BaseModel):
|
|
33
|
+
max_age_hours: float
|
|
34
|
+
check_column: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VolumeSpec(BaseModel):
|
|
38
|
+
min_rows: int | None = None
|
|
39
|
+
max_rows: int | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class QualityRule(BaseModel):
|
|
43
|
+
column: str
|
|
44
|
+
max_null_percentage: float | None = None
|
|
45
|
+
max_duplicate_percentage: float | None = None
|
|
46
|
+
min_value: float | None = None
|
|
47
|
+
max_value: float | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ConsumerSpec(BaseModel):
|
|
51
|
+
team: str
|
|
52
|
+
email: str
|
|
53
|
+
slack_webhook: str | None = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class OwnerSpec(BaseModel):
|
|
57
|
+
team: str
|
|
58
|
+
email: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MetadataSpec(BaseModel):
|
|
62
|
+
name: str
|
|
63
|
+
version: str
|
|
64
|
+
description: str | None = None
|
|
65
|
+
owner: OwnerSpec
|
|
66
|
+
tags: list[str] = []
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class DatasetSpec(BaseModel):
|
|
70
|
+
format: Literal["parquet", "sql"]
|
|
71
|
+
location: str | None = None
|
|
72
|
+
catalog_uri: str | None = None
|
|
73
|
+
catalog_type: str | None = None
|
|
74
|
+
namespace: str | None = None
|
|
75
|
+
table_name: str | None = None
|
|
76
|
+
connection_string: str | None = None
|
|
77
|
+
partition_column: str | None = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WebhookSpec(BaseModel):
|
|
81
|
+
url: str
|
|
82
|
+
headers: dict[str, str] = {}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class EmailSpec(BaseModel):
|
|
86
|
+
smtp_host: str
|
|
87
|
+
smtp_port: int = 587
|
|
88
|
+
smtp_user: str
|
|
89
|
+
smtp_password_env: str
|
|
90
|
+
recipients: list[str] = []
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class NotificationsSpec(BaseModel):
|
|
94
|
+
webhook: WebhookSpec | None = None
|
|
95
|
+
email: EmailSpec | None = None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DataContract(BaseModel):
|
|
99
|
+
apiVersion: Literal["datacontract/v1"]
|
|
100
|
+
kind: Literal["DataContract"]
|
|
101
|
+
metadata: MetadataSpec
|
|
102
|
+
dataset: DatasetSpec
|
|
103
|
+
on_breach: Literal["warn", "fail"] = "warn"
|
|
104
|
+
consumers: list[ConsumerSpec] = []
|
|
105
|
+
schema_: SchemaSpec | None = Field(None, alias="schema")
|
|
106
|
+
freshness: FreshnessSpec | None = None
|
|
107
|
+
volume: VolumeSpec | None = None
|
|
108
|
+
quality: list[QualityRule] = []
|
|
109
|
+
notifications: NotificationsSpec | None = None
|
|
110
|
+
|
|
111
|
+
model_config = {"populate_by_name": True}
|
akad/models/result.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ClauseStatus(StrEnum):
|
|
10
|
+
PASS = "PASS"
|
|
11
|
+
FAIL = "FAIL"
|
|
12
|
+
SKIPPED = "SKIPPED"
|
|
13
|
+
ERROR = "ERROR"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OverallStatus(StrEnum):
|
|
17
|
+
COMPLIANT = "COMPLIANT"
|
|
18
|
+
BREACH = "BREACH"
|
|
19
|
+
ERROR = "ERROR"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ClauseResult:
|
|
24
|
+
clause_type: str
|
|
25
|
+
clause_target: str | None
|
|
26
|
+
status: ClauseStatus
|
|
27
|
+
expected: Any
|
|
28
|
+
observed: Any
|
|
29
|
+
message: str
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict:
|
|
32
|
+
"""JSON-safe dict matching the registry's ClauseResultSchema."""
|
|
33
|
+
return {
|
|
34
|
+
"clause_type": self.clause_type,
|
|
35
|
+
"clause_target": self.clause_target,
|
|
36
|
+
"status": self.status.value,
|
|
37
|
+
"expected": str(self.expected),
|
|
38
|
+
"observed": str(self.observed),
|
|
39
|
+
"message": self.message,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ValidationResult:
|
|
45
|
+
contract_name: str
|
|
46
|
+
contract_version: str
|
|
47
|
+
dataset_location: str
|
|
48
|
+
validated_at: datetime
|
|
49
|
+
overall_status: OverallStatus
|
|
50
|
+
clause_results: list[ClauseResult] = field(default_factory=list)
|
|
51
|
+
row_count: int | None = None
|
|
52
|
+
error_message: str | None = None
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def is_breach(self) -> bool:
|
|
56
|
+
return self.overall_status == OverallStatus.BREACH
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def failed_clauses(self) -> list[ClauseResult]:
|
|
60
|
+
return [c for c in self.clause_results if c.status == ClauseStatus.FAIL]
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> dict:
|
|
63
|
+
"""JSON-safe dict matching the registry's ValidationResultRequest payload."""
|
|
64
|
+
return {
|
|
65
|
+
"contract_name": self.contract_name,
|
|
66
|
+
"contract_version": self.contract_version,
|
|
67
|
+
"dataset_location": self.dataset_location,
|
|
68
|
+
"validated_at": self.validated_at.isoformat(),
|
|
69
|
+
"overall_status": self.overall_status.value,
|
|
70
|
+
"row_count": self.row_count,
|
|
71
|
+
"clause_results": [c.to_dict() for c in self.clause_results],
|
|
72
|
+
"error_message": self.error_message,
|
|
73
|
+
}
|
akad/notifier.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from akad.models.contract import DataContract
|
|
4
|
+
from akad.models.result import ValidationResult
|
|
5
|
+
from akad.notifiers.base import Notifier
|
|
6
|
+
from akad.notifiers.email_notifier import EmailNotifier
|
|
7
|
+
from akad.notifiers.webhook_notifier import WebhookNotifier
|
|
8
|
+
|
|
9
|
+
_DEFAULT_NOTIFIERS: list[Notifier] = [WebhookNotifier(), EmailNotifier()]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def dispatch_notifications(
|
|
13
|
+
contract: DataContract,
|
|
14
|
+
result: ValidationResult,
|
|
15
|
+
notifiers: list[Notifier] | None = None,
|
|
16
|
+
) -> None:
|
|
17
|
+
"""Send breach notifications via all configured notifiers.
|
|
18
|
+
|
|
19
|
+
Pass *notifiers* to override the default list — useful in tests to inject mocks.
|
|
20
|
+
"""
|
|
21
|
+
active = notifiers if notifiers is not None else _DEFAULT_NOTIFIERS
|
|
22
|
+
for n in active:
|
|
23
|
+
n.notify(contract, result)
|
|
File without changes
|
akad/notifiers/base.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from akad.models.contract import DataContract
|
|
6
|
+
from akad.models.result import ValidationResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Notifier(ABC):
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def notify(self, contract: DataContract, result: ValidationResult) -> None:
|
|
12
|
+
"""Send a breach notification. Must never raise."""
|
|
13
|
+
...
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import smtplib
|
|
6
|
+
from email.mime.text import MIMEText
|
|
7
|
+
|
|
8
|
+
from akad.models.contract import DataContract
|
|
9
|
+
from akad.models.result import ValidationResult
|
|
10
|
+
from akad.notifiers.base import Notifier
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _collect_recipients(contract: DataContract) -> list[str]:
|
|
16
|
+
recipients: list[str] = []
|
|
17
|
+
recipients.append(contract.metadata.owner.email)
|
|
18
|
+
for consumer in contract.consumers:
|
|
19
|
+
recipients.append(consumer.email)
|
|
20
|
+
if contract.notifications and contract.notifications.email:
|
|
21
|
+
recipients.extend(contract.notifications.email.recipients)
|
|
22
|
+
return list(dict.fromkeys(recipients)) # deduplicate, preserve order
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _build_email_body(contract: DataContract, result: ValidationResult) -> str:
|
|
26
|
+
lines = [
|
|
27
|
+
"Akad Breach Alert",
|
|
28
|
+
"",
|
|
29
|
+
f"Contract : {result.contract_name} v{result.contract_version}",
|
|
30
|
+
f"Dataset : {result.dataset_location}",
|
|
31
|
+
f"Time : {result.validated_at.isoformat()}",
|
|
32
|
+
f"Rows : {result.row_count}",
|
|
33
|
+
"",
|
|
34
|
+
f"Failed Clauses ({len(result.failed_clauses)}):",
|
|
35
|
+
]
|
|
36
|
+
for c in result.failed_clauses:
|
|
37
|
+
target = f" [{c.clause_target}]" if c.clause_target else ""
|
|
38
|
+
lines.append(f" • {c.clause_type}{target}: {c.message}")
|
|
39
|
+
return "\n".join(lines)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EmailNotifier(Notifier):
|
|
43
|
+
def notify(self, contract: DataContract, result: ValidationResult) -> None:
|
|
44
|
+
if not (contract.notifications and contract.notifications.email):
|
|
45
|
+
return
|
|
46
|
+
cfg = contract.notifications.email
|
|
47
|
+
recipients = _collect_recipients(contract)
|
|
48
|
+
if not recipients:
|
|
49
|
+
return
|
|
50
|
+
body = _build_email_body(contract, result)
|
|
51
|
+
try:
|
|
52
|
+
msg = MIMEText(body)
|
|
53
|
+
msg["Subject"] = f"[Akad BREACH] {contract.metadata.name} v{contract.metadata.version}"
|
|
54
|
+
msg["From"] = cfg.smtp_user
|
|
55
|
+
msg["To"] = ", ".join(recipients)
|
|
56
|
+
password = os.environ.get(cfg.smtp_password_env, "")
|
|
57
|
+
with smtplib.SMTP(cfg.smtp_host, cfg.smtp_port) as srv:
|
|
58
|
+
srv.starttls()
|
|
59
|
+
srv.login(cfg.smtp_user, password)
|
|
60
|
+
srv.sendmail(cfg.smtp_user, recipients, msg.as_string())
|
|
61
|
+
except Exception as exc:
|
|
62
|
+
log.warning("Email notification failed: %s", exc)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from akad.models.contract import DataContract
|
|
8
|
+
from akad.models.result import ValidationResult
|
|
9
|
+
from akad.notifiers.base import Notifier
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _build_payload(contract: DataContract, result: ValidationResult) -> dict:
|
|
15
|
+
return {
|
|
16
|
+
"event": "DATA_CONTRACT_BREACH",
|
|
17
|
+
"contract_name": result.contract_name,
|
|
18
|
+
"contract_version": result.contract_version,
|
|
19
|
+
"dataset_location": result.dataset_location,
|
|
20
|
+
"validated_at": result.validated_at.isoformat(),
|
|
21
|
+
"row_count": result.row_count,
|
|
22
|
+
"failed_clauses": [
|
|
23
|
+
{
|
|
24
|
+
"clause_type": c.clause_type,
|
|
25
|
+
"clause_target": c.clause_target,
|
|
26
|
+
"expected": str(c.expected),
|
|
27
|
+
"observed": str(c.observed),
|
|
28
|
+
"message": c.message,
|
|
29
|
+
}
|
|
30
|
+
for c in result.failed_clauses
|
|
31
|
+
],
|
|
32
|
+
"on_breach": contract.on_breach,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class WebhookNotifier(Notifier):
|
|
37
|
+
def notify(self, contract: DataContract, result: ValidationResult) -> None:
|
|
38
|
+
if not (contract.notifications and contract.notifications.webhook):
|
|
39
|
+
return
|
|
40
|
+
cfg = contract.notifications.webhook
|
|
41
|
+
payload = _build_payload(contract, result)
|
|
42
|
+
try:
|
|
43
|
+
resp = httpx.post(
|
|
44
|
+
cfg.url,
|
|
45
|
+
json=payload,
|
|
46
|
+
headers=cfg.headers,
|
|
47
|
+
timeout=10,
|
|
48
|
+
)
|
|
49
|
+
resp.raise_for_status()
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
log.warning("Webhook notification failed: %s", exc)
|
akad/readers/__init__.py
ADDED
|
File without changes
|
akad/readers/base.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from akad.models.contract import DatasetSpec
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DataReadError(Exception):
|
|
11
|
+
"""Raised when a DataReader cannot load the requested dataset."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DataReader(ABC):
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def read(self, spec: DatasetSpec) -> pd.DataFrame:
|
|
17
|
+
"""Read the dataset and return a DataFrame. Raise DataReadError on failure."""
|
|
18
|
+
...
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def get_last_modified(self, spec: DatasetSpec) -> float:
|
|
22
|
+
"""Return Unix epoch of last modification. Raise NotImplementedError if unsupported."""
|
|
23
|
+
...
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pyarrow.parquet as pq
|
|
7
|
+
|
|
8
|
+
from akad.models.contract import DatasetSpec
|
|
9
|
+
from akad.readers.base import DataReader, DataReadError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ParquetReader(DataReader):
|
|
13
|
+
def read(self, spec: DatasetSpec) -> pd.DataFrame:
|
|
14
|
+
try:
|
|
15
|
+
table = pq.read_table(spec.location)
|
|
16
|
+
return table.to_pandas()
|
|
17
|
+
except Exception as exc:
|
|
18
|
+
raise DataReadError(f"Cannot read Parquet at '{spec.location}': {exc}") from exc
|
|
19
|
+
|
|
20
|
+
def get_last_modified(self, spec: DatasetSpec) -> float:
|
|
21
|
+
if spec.location is None:
|
|
22
|
+
raise DataReadError("Parquet dataset spec is missing 'location'")
|
|
23
|
+
path = Path(spec.location)
|
|
24
|
+
if path.exists():
|
|
25
|
+
return path.stat().st_mtime
|
|
26
|
+
raise NotImplementedError("S3/remote last-modified requires s3fs configuration")
|