akad-framework 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akad_framework-1.0.0/LICENSE +21 -0
- akad_framework-1.0.0/PKG-INFO +531 -0
- akad_framework-1.0.0/README.md +489 -0
- akad_framework-1.0.0/akad/__init__.py +10 -0
- akad_framework-1.0.0/akad/cli.py +121 -0
- akad_framework-1.0.0/akad/contract_loader.py +17 -0
- akad_framework-1.0.0/akad/engine.py +124 -0
- akad_framework-1.0.0/akad/models/__init__.py +0 -0
- akad_framework-1.0.0/akad/models/contract.py +111 -0
- akad_framework-1.0.0/akad/models/result.py +73 -0
- akad_framework-1.0.0/akad/notifier.py +23 -0
- akad_framework-1.0.0/akad/notifiers/__init__.py +0 -0
- akad_framework-1.0.0/akad/notifiers/base.py +13 -0
- akad_framework-1.0.0/akad/notifiers/email_notifier.py +62 -0
- akad_framework-1.0.0/akad/notifiers/webhook_notifier.py +51 -0
- akad_framework-1.0.0/akad/readers/__init__.py +0 -0
- akad_framework-1.0.0/akad/readers/base.py +23 -0
- akad_framework-1.0.0/akad/readers/parquet_reader.py +26 -0
- akad_framework-1.0.0/akad/readers/sql_reader.py +36 -0
- akad_framework-1.0.0/akad/registry_client.py +60 -0
- akad_framework-1.0.0/akad/sdk.py +90 -0
- akad_framework-1.0.0/akad/validators/__init__.py +0 -0
- akad_framework-1.0.0/akad/validators/base.py +20 -0
- akad_framework-1.0.0/akad/validators/freshness_validator.py +66 -0
- akad_framework-1.0.0/akad/validators/quality_validator.py +88 -0
- akad_framework-1.0.0/akad/validators/schema_validator.py +115 -0
- akad_framework-1.0.0/akad/validators/volume_validator.py +47 -0
- akad_framework-1.0.0/akad_framework.egg-info/PKG-INFO +531 -0
- akad_framework-1.0.0/akad_framework.egg-info/SOURCES.txt +50 -0
- akad_framework-1.0.0/akad_framework.egg-info/dependency_links.txt +1 -0
- akad_framework-1.0.0/akad_framework.egg-info/entry_points.txt +2 -0
- akad_framework-1.0.0/akad_framework.egg-info/requires.txt +23 -0
- akad_framework-1.0.0/akad_framework.egg-info/top_level.txt +3 -0
- akad_framework-1.0.0/dashboard/__init__.py +0 -0
- akad_framework-1.0.0/dashboard/main.py +122 -0
- akad_framework-1.0.0/dashboard/templates/_macros.html +9 -0
- akad_framework-1.0.0/dashboard/templates/base.html +44 -0
- akad_framework-1.0.0/dashboard/templates/breaches.html +55 -0
- akad_framework-1.0.0/dashboard/templates/contract_detail.html +48 -0
- akad_framework-1.0.0/dashboard/templates/discovery.html +31 -0
- akad_framework-1.0.0/dashboard/templates/index.html +86 -0
- akad_framework-1.0.0/pyproject.toml +122 -0
- akad_framework-1.0.0/registry/__init__.py +0 -0
- akad_framework-1.0.0/registry/database.py +36 -0
- akad_framework-1.0.0/registry/main.py +26 -0
- akad_framework-1.0.0/registry/models.py +37 -0
- akad_framework-1.0.0/registry/routers/__init__.py +0 -0
- akad_framework-1.0.0/registry/routers/contracts.py +73 -0
- akad_framework-1.0.0/registry/routers/health.py +16 -0
- akad_framework-1.0.0/registry/routers/results.py +51 -0
- akad_framework-1.0.0/registry/schemas.py +67 -0
- akad_framework-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Faizal Azman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: akad-framework
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Akad — a lightweight data contract framework for automated data quality enforcement in lakehouse pipelines
|
|
5
|
+
Author-email: Faizal Azman <faizalazman88@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Source Code, https://github.com/ParmenidesSartre/Akad
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/ParmenidesSartre/Akad/issues
|
|
9
|
+
Project-URL: Documentation, https://parmenidessartre.github.io/Akad/
|
|
10
|
+
Keywords: data-contracts,data-quality,data-engineering,airflow,lakehouse
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: httpx>=0.28
|
|
22
|
+
Requires-Dist: pandas>=2.0
|
|
23
|
+
Requires-Dist: pyarrow>=14.0
|
|
24
|
+
Requires-Dist: pydantic>=2.0
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
27
|
+
Requires-Dist: typer>=0.9
|
|
28
|
+
Provides-Extra: registry
|
|
29
|
+
Requires-Dist: alembic>=1.18; extra == "registry"
|
|
30
|
+
Requires-Dist: fastapi>=0.136; extra == "registry"
|
|
31
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "registry"
|
|
32
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "registry"
|
|
33
|
+
Provides-Extra: dashboard
|
|
34
|
+
Requires-Dist: fastapi>=0.136; extra == "dashboard"
|
|
35
|
+
Requires-Dist: jinja2>=3.1; extra == "dashboard"
|
|
36
|
+
Requires-Dist: uvicorn[standard]>=0.29; extra == "dashboard"
|
|
37
|
+
Requires-Dist: httpx>=0.28; extra == "dashboard"
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: akad-framework[registry]; extra == "all"
|
|
40
|
+
Requires-Dist: akad-framework[dashboard]; extra == "all"
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
|
|
43
|
+
# Akad
|
|
44
|
+
|
|
45
|
+
[](https://pypi.org/project/akad-framework/)
|
|
46
|
+
[](https://pypi.org/project/akad-framework/)
|
|
47
|
+
[](LICENSE)
|
|
48
|
+
[](https://parmenidessartre.github.io/Akad/)
|
|
49
|
+
|
|
50
|
+
**Akad** (Malay/Arabic: *contract, covenant* — the term for the underlying contract of any Islamic finance product) is a lightweight Python library for defining, enforcing, and monitoring data quality contracts on batch datasets. Built for data engineering pipelines — works standalone, in Airflow, or any Python environment.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install akad-framework
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Table of Contents
|
|
57
|
+
|
|
58
|
+
- [What it does](#what-it-does)
|
|
59
|
+
- [Features](#features)
|
|
60
|
+
- [Installation](#installation)
|
|
61
|
+
- [Quick Start](#quick-start)
|
|
62
|
+
- [Workflow](#workflow)
|
|
63
|
+
- [Contract YAML Reference](#contract-yaml-reference)
|
|
64
|
+
- [CLI Reference](#cli-reference)
|
|
65
|
+
- [Python SDK Reference](#python-sdk-reference)
|
|
66
|
+
- [Development Setup](#development-setup)
|
|
67
|
+
- [Contributing](#contributing)
|
|
68
|
+
- [License](#license)
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## What it does
|
|
73
|
+
|
|
74
|
+
When a producer pipeline changes a dataset (renames a column, drops rows, adds bad values), downstream consumers break silently. Akad gives you:
|
|
75
|
+
|
|
76
|
+
- A **contract file** (YAML) that declares what the dataset must look like
|
|
77
|
+
- An **enforcement engine** that validates the dataset against the contract at pipeline runtime
|
|
78
|
+
- A **registry** that stores contract versions and validation history
|
|
79
|
+
- A **CLI** for manual validation and contract management
|
|
80
|
+
- A **dashboard** to monitor all contracts across your data platform
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Features
|
|
85
|
+
|
|
86
|
+
### Validation Rules
|
|
87
|
+
|
|
88
|
+
| Feature | What it checks |
|
|
89
|
+
|---|---|
|
|
90
|
+
| **Schema — column existence** | Every declared column is present in the dataset |
|
|
91
|
+
| **Schema — column types** | Column dtype matches declared type (`string`, `integer`, `float`, `boolean`, `date`, `timestamp`) |
|
|
92
|
+
| **Schema — nullable** | Non-nullable columns have zero null values |
|
|
93
|
+
| **Schema — allowed values** | Column contains only the declared set of allowed values |
|
|
94
|
+
| **Schema — no extra columns** | Dataset has no undeclared columns (optional, off by default) |
|
|
95
|
+
| **Freshness** | Dataset was updated within `max_age_hours`; uses file mtime or `max(check_column)` |
|
|
96
|
+
| **Volume** | Row count is within `min_rows` / `max_rows` bounds |
|
|
97
|
+
| **Quality — null rate** | Column null percentage does not exceed `max_null_percentage` |
|
|
98
|
+
| **Quality — duplicate rate** | Column duplicate percentage does not exceed `max_duplicate_percentage` |
|
|
99
|
+
| **Quality — value range** | Column values are within `min_value` / `max_value` bounds |
|
|
100
|
+
|
|
101
|
+
### Dataset Formats
|
|
102
|
+
|
|
103
|
+
| Format | How |
|
|
104
|
+
|---|---|
|
|
105
|
+
| **Parquet** | Local path or S3 via `pyarrow` |
|
|
106
|
+
| **SQL** | Any SQLAlchemy-supported database (PostgreSQL, MySQL, SQLite) via `table_name` + `connection_string` |
|
|
107
|
+
|
|
108
|
+
### Breach Modes
|
|
109
|
+
|
|
110
|
+
| Mode | Behaviour |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `on_breach: warn` | Returns result with `is_breach=True`, pipeline continues |
|
|
113
|
+
| `on_breach: fail` | Raises `DataContractBreachError`, pipeline halts |
|
|
114
|
+
|
|
115
|
+
### Contract Loading
|
|
116
|
+
|
|
117
|
+
| Method | When to use |
|
|
118
|
+
|---|---|
|
|
119
|
+
| `contract_path="contracts/sales.yaml"` | Dev machine, CI — file is local |
|
|
120
|
+
| `contract_name="daily_sales"` + `registry_url=...` | Airflow workers, remote runners — no local file needed |
|
|
121
|
+
|
|
122
|
+
### Notifications
|
|
123
|
+
|
|
124
|
+
- **Webhook** — POST JSON breach payload to any URL (Slack, Teams, PagerDuty)
|
|
125
|
+
- **Email** — SMTP with configurable recipients; password stored in env var, never in YAML
|
|
126
|
+
|
|
127
|
+
### Registry
|
|
128
|
+
|
|
129
|
+
- REST API (FastAPI) — publish contracts, fetch by name, list versions, store validation results
|
|
130
|
+
- PostgreSQL backend for production; SQLite for local dev
|
|
131
|
+
- Interactive API docs at `/docs`
|
|
132
|
+
|
|
133
|
+
### Observability Dashboard
|
|
134
|
+
|
|
135
|
+
- FastAPI + Jinja2 + Tailwind (CDN, no build step) — overview of all contracts, compliant vs breach counts, per-contract validation history, breach history with status filters, contract discovery/search
|
|
136
|
+
|
|
137
|
+
### CLI
|
|
138
|
+
|
|
139
|
+
- `akad check` — parse and validate YAML syntax without touching data (CI-safe)
|
|
140
|
+
- `akad publish` — register a contract version
|
|
141
|
+
- `akad validate` — run full validation, exit 1 on breach (CI-friendly)
|
|
142
|
+
- `akad list` — list all current contracts in registry
|
|
143
|
+
- `akad history` — show recent validation runs for a contract
|
|
144
|
+
|
|
145
|
+
### Developer Experience
|
|
146
|
+
|
|
147
|
+
- `validate_dataframe(df, contract)` — skip storage reads in unit tests, pass a DataFrame directly
|
|
148
|
+
- Injectable `_http_client` and `_registry_client` — test the full SDK without a real server
|
|
149
|
+
- Custom validator plugin API — extend with your own business rules
|
|
150
|
+
- Split dependencies — `pip install akad-framework` (core only) keeps Airflow worker environments lean
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Installation
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install akad-framework # core — Airflow workers, pipelines
|
|
158
|
+
pip install "akad-framework[registry]" # + registry server
|
|
159
|
+
pip install "akad-framework[all]" # everything
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Quick Start
|
|
165
|
+
|
|
166
|
+
**1. Write a contract**
|
|
167
|
+
|
|
168
|
+
```yaml
|
|
169
|
+
# contracts/sales.yaml
|
|
170
|
+
apiVersion: datacontract/v1
|
|
171
|
+
kind: DataContract
|
|
172
|
+
metadata:
|
|
173
|
+
name: daily_sales
|
|
174
|
+
version: "1.0.0"
|
|
175
|
+
owner:
|
|
176
|
+
team: Data Engineering
|
|
177
|
+
email: data@example.com
|
|
178
|
+
dataset:
|
|
179
|
+
format: parquet
|
|
180
|
+
location: /data/sales/daily.parquet
|
|
181
|
+
on_breach: warn
|
|
182
|
+
schema:
|
|
183
|
+
columns:
|
|
184
|
+
- name: sale_id
|
|
185
|
+
type: string
|
|
186
|
+
nullable: false
|
|
187
|
+
- name: amount
|
|
188
|
+
type: float
|
|
189
|
+
nullable: false
|
|
190
|
+
- name: currency_code
|
|
191
|
+
type: string
|
|
192
|
+
allowed_values: [MYR, USD, SGD]
|
|
193
|
+
volume:
|
|
194
|
+
min_rows: 1000
|
|
195
|
+
quality:
|
|
196
|
+
- column: sale_id
|
|
197
|
+
max_null_percentage: 0.0
|
|
198
|
+
max_duplicate_percentage: 0.0
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**2. Validate**
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
akad validate --contract contracts/sales.yaml
|
|
205
|
+
# ✓ daily_sales v1.0.0: COMPLIANT
|
|
206
|
+
|
|
207
|
+
# On breach:
|
|
208
|
+
# ✗ daily_sales v1.0.0: BREACH
|
|
209
|
+
# Failed clauses:
|
|
210
|
+
# - [schema.allowed_values] [currency_code] Contains values not in allowed list: ['JPY']
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Workflow
|
|
216
|
+
|
|
217
|
+
### Step 1 — Check contract syntax
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
akad check --contract contracts/sales.yaml
|
|
221
|
+
# OK daily_sales v1.0.0 — contract is valid
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Step 2 — Start the registry
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
docker compose up -d
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
- Registry API: `http://localhost:8000`
|
|
231
|
+
- Dashboard: `http://localhost:8501`
|
|
232
|
+
|
|
233
|
+
### Step 3 — Publish the contract
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
akad publish --contract contracts/sales.yaml --registry-url http://localhost:8000
|
|
237
|
+
# Published daily_sales v1.0.0
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Step 4 — Validate in your pipeline
|
|
241
|
+
|
|
242
|
+
**From a local file (dev / CI):**
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from akad import DataContractValidator, DataContractBreachError
|
|
246
|
+
|
|
247
|
+
result = DataContractValidator(
|
|
248
|
+
contract_path="contracts/sales.yaml",
|
|
249
|
+
registry_url="http://localhost:8000",
|
|
250
|
+
).validate()
|
|
251
|
+
|
|
252
|
+
print(result.overall_status) # COMPLIANT or BREACH
|
|
253
|
+
print(result.row_count) # 48203
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**From the registry by name (Airflow workers — no local file needed):**
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
result = DataContractValidator(
|
|
260
|
+
contract_name="daily_sales",
|
|
261
|
+
registry_url="http://akad-registry:8000",
|
|
262
|
+
).validate()
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### Step 5 — Use in Airflow
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
from airflow.sdk import dag, task
|
|
269
|
+
from akad import DataContractValidator
|
|
270
|
+
import os
|
|
271
|
+
|
|
272
|
+
REGISTRY_URL = os.environ.get("AKAD_REGISTRY_URL", "http://akad-registry:8000")
|
|
273
|
+
|
|
274
|
+
@dag(schedule="@daily", ...)
|
|
275
|
+
def sales_pipeline():
|
|
276
|
+
|
|
277
|
+
@task
|
|
278
|
+
def extract_and_load() -> int:
|
|
279
|
+
# write dataset to /data/sales/daily.parquet
|
|
280
|
+
...
|
|
281
|
+
|
|
282
|
+
@task
|
|
283
|
+
def validate(row_count: int) -> str:
|
|
284
|
+
result = DataContractValidator(
|
|
285
|
+
contract_name="daily_sales", # fetched from registry — no local file
|
|
286
|
+
registry_url=REGISTRY_URL,
|
|
287
|
+
notifiers=[],
|
|
288
|
+
).validate()
|
|
289
|
+
|
|
290
|
+
if result.is_breach:
|
|
291
|
+
raise ValueError(f"Contract breach — pipeline halted")
|
|
292
|
+
|
|
293
|
+
return result.overall_status.value
|
|
294
|
+
|
|
295
|
+
@task
|
|
296
|
+
def transform(status: str) -> None:
|
|
297
|
+
... # only runs when validation passes
|
|
298
|
+
|
|
299
|
+
rows = extract_and_load()
|
|
300
|
+
status = validate(rows)
|
|
301
|
+
transform(status)
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
On breach: `validate` raises → Airflow marks it FAILED → `transform` is skipped — bad data never reaches downstream consumers.
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Contract YAML Reference
|
|
309
|
+
|
|
310
|
+
```yaml
|
|
311
|
+
apiVersion: datacontract/v1
|
|
312
|
+
kind: DataContract
|
|
313
|
+
|
|
314
|
+
metadata:
|
|
315
|
+
name: daily_sales # unique identifier
|
|
316
|
+
version: "1.0.0" # semantic version
|
|
317
|
+
owner:
|
|
318
|
+
team: Data Engineering
|
|
319
|
+
email: data@example.com
|
|
320
|
+
tags: [finance, daily]
|
|
321
|
+
|
|
322
|
+
dataset:
|
|
323
|
+
format: parquet # parquet | sql
|
|
324
|
+
location: /data/sales/daily.parquet
|
|
325
|
+
|
|
326
|
+
# SQL datasets:
|
|
327
|
+
# format: sql
|
|
328
|
+
# connection_string: postgresql://user:pass@host:5432/db
|
|
329
|
+
# table_name: daily_sales
|
|
330
|
+
|
|
331
|
+
on_breach: warn # warn | fail
|
|
332
|
+
|
|
333
|
+
schema:
|
|
334
|
+
enforce_no_extra_columns: false
|
|
335
|
+
columns:
|
|
336
|
+
- name: sale_id
|
|
337
|
+
type: string # string | integer | float | boolean | date | timestamp
|
|
338
|
+
nullable: false
|
|
339
|
+
allowed_values: [SALE, REFUND]
|
|
340
|
+
|
|
341
|
+
freshness:
|
|
342
|
+
max_age_hours: 25
|
|
343
|
+
check_column: sale_date # optional — uses max(column) instead of file mtime
|
|
344
|
+
|
|
345
|
+
volume:
|
|
346
|
+
min_rows: 1000
|
|
347
|
+
max_rows: 10000000
|
|
348
|
+
|
|
349
|
+
quality:
|
|
350
|
+
- column: sale_id
|
|
351
|
+
max_null_percentage: 0.0
|
|
352
|
+
max_duplicate_percentage: 0.0
|
|
353
|
+
- column: amount
|
|
354
|
+
min_value: 0.01
|
|
355
|
+
max_value: 9999999.0
|
|
356
|
+
|
|
357
|
+
notifications:
|
|
358
|
+
webhook:
|
|
359
|
+
url: https://hooks.slack.com/services/YOUR/WEBHOOK/URL
|
|
360
|
+
email:
|
|
361
|
+
smtp_host: smtp.example.com
|
|
362
|
+
smtp_port: 587
|
|
363
|
+
smtp_user: alerts@example.com
|
|
364
|
+
smtp_password_env: SMTP_PASSWORD
|
|
365
|
+
recipients:
|
|
366
|
+
- data-team@example.com
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
---
|
|
370
|
+
|
|
371
|
+
## CLI Reference
|
|
372
|
+
|
|
373
|
+
```
|
|
374
|
+
akad check --contract PATH
|
|
375
|
+
akad publish --contract PATH --registry-url URL
|
|
376
|
+
akad validate --contract PATH [--registry-url URL] [--output text|json]
|
|
377
|
+
akad list --registry-url URL
|
|
378
|
+
akad history --name NAME --registry-url URL [--limit N]
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
---
|
|
382
|
+
|
|
383
|
+
## Python SDK Reference
|
|
384
|
+
|
|
385
|
+
### `DataContractValidator`
|
|
386
|
+
|
|
387
|
+
```python
|
|
388
|
+
from akad import DataContractValidator, DataContractBreachError
|
|
389
|
+
|
|
390
|
+
# Option A — from local file
|
|
391
|
+
validator = DataContractValidator(
|
|
392
|
+
contract_path="contracts/sales.yaml",
|
|
393
|
+
registry_url="http://localhost:8000", # optional — enables breach history
|
|
394
|
+
extra_validators=[MyValidator()], # optional plugins
|
|
395
|
+
notifiers=[], # [] disables notifications
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Option B — from registry by name (Airflow / remote workers)
|
|
399
|
+
validator = DataContractValidator(
|
|
400
|
+
contract_name="daily_sales",
|
|
401
|
+
registry_url="http://localhost:8000",
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
result = validator.validate()
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
### `ValidationResult`
|
|
408
|
+
|
|
409
|
+
```python
|
|
410
|
+
result.overall_status # OverallStatus.COMPLIANT | BREACH | ERROR
|
|
411
|
+
result.is_breach # bool
|
|
412
|
+
result.row_count # int
|
|
413
|
+
result.failed_clauses # List[ClauseResult]
|
|
414
|
+
|
|
415
|
+
for c in result.failed_clauses:
|
|
416
|
+
print(c.clause_type) # e.g. "schema.allowed_values"
|
|
417
|
+
print(c.clause_target) # column name
|
|
418
|
+
print(c.message) # human-readable explanation
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
### `validate_dataframe()` — for unit testing
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
from akad.engine import validate_dataframe
|
|
425
|
+
import pandas as pd
|
|
426
|
+
|
|
427
|
+
df = pd.DataFrame({"sale_id": ["A", "B"], "amount": [10.0, 20.0]})
|
|
428
|
+
result = validate_dataframe(df, contract)
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Custom validator plugin
|
|
432
|
+
|
|
433
|
+
```python
|
|
434
|
+
from akad.validators.base import Validator
|
|
435
|
+
from akad.models.result import ClauseResult, ClauseStatus
|
|
436
|
+
|
|
437
|
+
class MyValidator(Validator):
|
|
438
|
+
def validate(self, df, contract, reader_last_modified):
|
|
439
|
+
ok = df["amount"].sum() > 0
|
|
440
|
+
return [ClauseResult(
|
|
441
|
+
clause_type="custom.positive_total",
|
|
442
|
+
clause_target="amount",
|
|
443
|
+
status=ClauseStatus.PASS if ok else ClauseStatus.FAIL,
|
|
444
|
+
expected="> 0",
|
|
445
|
+
observed=str(df["amount"].sum()),
|
|
446
|
+
message="" if ok else "Total amount must be positive",
|
|
447
|
+
)]
|
|
448
|
+
|
|
449
|
+
DataContractValidator(
|
|
450
|
+
contract_path="contracts/sales.yaml",
|
|
451
|
+
extra_validators=[MyValidator()],
|
|
452
|
+
).validate()
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
---
|
|
456
|
+
|
|
457
|
+
## Development Setup
|
|
458
|
+
|
|
459
|
+
```bash
|
|
460
|
+
git clone https://github.com/ParmenidesSartre/Akad.git
|
|
461
|
+
cd Akad
|
|
462
|
+
|
|
463
|
+
# Install with all extras + dev tools
|
|
464
|
+
pip install uv
|
|
465
|
+
uv sync
|
|
466
|
+
|
|
467
|
+
# Run tests
|
|
468
|
+
uv run pytest
|
|
469
|
+
|
|
470
|
+
# Unit tests only (no Docker)
|
|
471
|
+
uv run pytest tests/unit/ -v
|
|
472
|
+
|
|
473
|
+
# Integration tests (SQLite, no Docker)
|
|
474
|
+
uv run pytest tests/integration/ -v
|
|
475
|
+
|
|
476
|
+
# Start registry locally
|
|
477
|
+
uv run uvicorn registry.main:app --reload --port 8000
|
|
478
|
+
|
|
479
|
+
# Start dashboard
|
|
480
|
+
uv run uvicorn dashboard.main:app --reload --port 8501
|
|
481
|
+
|
|
482
|
+
# Start everything (registry + dashboard + postgres)
|
|
483
|
+
docker compose up -d
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Project structure
|
|
487
|
+
|
|
488
|
+
```
|
|
489
|
+
akad/
|
|
490
|
+
├── akad/ # Core package — install this on Airflow workers
|
|
491
|
+
│ ├── models/ # Contract and result Pydantic models
|
|
492
|
+
│ ├── readers/ # ParquetReader, SQLReader
|
|
493
|
+
│ ├── validators/ # Schema, Freshness, Volume, Quality validators
|
|
494
|
+
│ ├── notifiers/ # Webhook, Email notifiers
|
|
495
|
+
│ ├── engine.py # Orchestrates readers + validators
|
|
496
|
+
│ ├── sdk.py # DataContractValidator — main public API
|
|
497
|
+
│ ├── cli.py # akad CLI
|
|
498
|
+
│ └── registry_client.py # HTTP client for the registry
|
|
499
|
+
├── registry/ # FastAPI registry service
|
|
500
|
+
├── dashboard/ # FastAPI + Jinja2 + Tailwind observability dashboard
|
|
501
|
+
├── lab/ # End-to-end Docker test lab
|
|
502
|
+
├── tests/
|
|
503
|
+
│ ├── unit/ # Validator unit tests
|
|
504
|
+
│ ├── integration/ # Engine + registry API tests
|
|
505
|
+
│ └── fixtures/ # Sample contract YAML files
|
|
506
|
+
├── contracts/ # Example contracts
|
|
507
|
+
├── docker-compose.yml
|
|
508
|
+
└── pyproject.toml
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
---
|
|
512
|
+
|
|
513
|
+
## Contributing
|
|
514
|
+
|
|
515
|
+
Issues and pull requests are welcome. Before submitting a change:
|
|
516
|
+
|
|
517
|
+
```bash
|
|
518
|
+
uv run pytest # full suite must pass
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
Please keep new functionality covered by tests — the project maintains ~99% coverage.
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## License
|
|
526
|
+
|
|
527
|
+
[MIT](LICENSE) © Faizal Azman
|
|
528
|
+
|
|
529
|
+
---
|
|
530
|
+
|
|
531
|
+
*akad-framework v1.0.0*
|