data-contract-validator 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_contract_validator/__init__.py +24 -0
- data_contract_validator/cli.py +672 -0
- data_contract_validator/core/__init__.py +0 -0
- data_contract_validator/core/models.py +115 -0
- data_contract_validator/core/validator.py +187 -0
- data_contract_validator/extractors/__init__.py +14 -0
- data_contract_validator/extractors/base.py +45 -0
- data_contract_validator/extractors/dbt.py +213 -0
- data_contract_validator/extractors/fastapi.py +200 -0
- data_contract_validator/integrations/__init__.py +0 -0
- data_contract_validator/py.typed +2 -0
- data_contract_validator/templates/github-actions-template.yml +75 -0
- data_contract_validator-1.0.0.dist-info/METADATA +344 -0
- data_contract_validator-1.0.0.dist-info/RECORD +18 -0
- data_contract_validator-1.0.0.dist-info/WHEEL +5 -0
- data_contract_validator-1.0.0.dist-info/entry_points.txt +3 -0
- data_contract_validator-1.0.0.dist-info/licenses/LICENSE +21 -0
- data_contract_validator-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# data_contract_validator/extractors/fastapi.py
|
|
2
|
+
"""
|
|
3
|
+
FastAPI/Pydantic schema extractor - simplified version of your working code.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
import re
|
|
8
|
+
import requests
|
|
9
|
+
import os
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Any, Optional, Union, get_type_hints
|
|
12
|
+
|
|
13
|
+
from .base import BaseExtractor
|
|
14
|
+
from ..core.models import Schema
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FastAPIExtractor(BaseExtractor):
|
|
18
|
+
"""Extract schemas from FastAPI/Pydantic models."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, content: str, source: str = "unknown"):
|
|
21
|
+
self.content = content
|
|
22
|
+
self.source = source
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
|
|
26
|
+
"""Create extractor from local file."""
|
|
27
|
+
file_path = Path(file_path)
|
|
28
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
29
|
+
content = f.read()
|
|
30
|
+
return cls(content, source=f"local:{file_path}")
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def from_github_repo(
|
|
34
|
+
cls, repo: str, path: str, token: str = None
|
|
35
|
+
) -> "FastAPIExtractor":
|
|
36
|
+
"""Create extractor from GitHub repository."""
|
|
37
|
+
content = cls._fetch_github_file(repo, path, token)
|
|
38
|
+
if not content:
|
|
39
|
+
raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
|
|
40
|
+
return cls(content, source=f"github:{repo}/{path}")
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
|
|
44
|
+
"""Fetch file content from GitHub API."""
|
|
45
|
+
url = f"https://api.github.com/repos/{repo}/contents/{path}"
|
|
46
|
+
headers = {}
|
|
47
|
+
|
|
48
|
+
if token:
|
|
49
|
+
headers["Authorization"] = f"token {token}"
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
response = requests.get(url, headers=headers)
|
|
53
|
+
if response.status_code == 200:
|
|
54
|
+
import base64
|
|
55
|
+
|
|
56
|
+
content = base64.b64decode(response.json()["content"]).decode("utf-8")
|
|
57
|
+
print(f" ✅ Downloaded {path} from {repo}")
|
|
58
|
+
return content
|
|
59
|
+
else:
|
|
60
|
+
print(f" ❌ GitHub API error: {response.status_code}")
|
|
61
|
+
return None
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f" ❌ Error fetching from GitHub: {e}")
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def extract_schemas(self) -> Dict[str, Schema]:
|
|
67
|
+
"""Extract schemas from FastAPI/Pydantic models."""
|
|
68
|
+
print(f"🔍 Extracting FastAPI schemas from {self.source}")
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
schemas = self._parse_pydantic_models(self.content)
|
|
72
|
+
print(f" ✅ Found {len(schemas)} models")
|
|
73
|
+
return schemas
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(f" ❌ Error parsing models: {e}")
|
|
76
|
+
return {}
|
|
77
|
+
|
|
78
|
+
def _parse_pydantic_models(self, content: str) -> Dict[str, Schema]:
|
|
79
|
+
"""Parse Pydantic models from Python code."""
|
|
80
|
+
try:
|
|
81
|
+
tree = ast.parse(content)
|
|
82
|
+
schemas = {}
|
|
83
|
+
|
|
84
|
+
for node in ast.walk(tree):
|
|
85
|
+
if isinstance(node, ast.ClassDef):
|
|
86
|
+
# Check if it's a Pydantic model
|
|
87
|
+
if self._is_pydantic_model(node):
|
|
88
|
+
schema = self._analyze_pydantic_class(node)
|
|
89
|
+
if schema:
|
|
90
|
+
table_name = schema.name
|
|
91
|
+
schemas[table_name] = schema
|
|
92
|
+
print(f" ✅ Found model: {node.name} -> {table_name}")
|
|
93
|
+
|
|
94
|
+
return schemas
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f" ❌ Error parsing Python code: {e}")
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
|
|
101
|
+
"""Check if class inherits from BaseModel or SQLModel."""
|
|
102
|
+
for base in node.bases:
|
|
103
|
+
if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
|
|
104
|
+
return True
|
|
105
|
+
elif isinstance(base, ast.Attribute) and base.attr in [
|
|
106
|
+
"BaseModel",
|
|
107
|
+
"SQLModel",
|
|
108
|
+
]:
|
|
109
|
+
return True
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def _analyze_pydantic_class(self, node: ast.ClassDef) -> Optional[Schema]:
|
|
113
|
+
"""Analyze a Pydantic class to extract schema."""
|
|
114
|
+
# Convert class name to table name
|
|
115
|
+
table_name = self._class_to_table_name(node.name)
|
|
116
|
+
|
|
117
|
+
# Skip SQLModel tables (database models, not API models)
|
|
118
|
+
if self._is_sqlmodel_table(node):
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
columns = []
|
|
122
|
+
|
|
123
|
+
# Parse type annotations
|
|
124
|
+
for item in node.body:
|
|
125
|
+
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
|
|
126
|
+
field_name = item.target.id
|
|
127
|
+
field_type = self._parse_type_annotation(item.annotation)
|
|
128
|
+
is_required = not self._is_optional_type(item.annotation)
|
|
129
|
+
|
|
130
|
+
columns.append(
|
|
131
|
+
{
|
|
132
|
+
"name": field_name,
|
|
133
|
+
"type": self._python_to_sql_type(field_type),
|
|
134
|
+
"required": is_required,
|
|
135
|
+
"nullable": not is_required,
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if not columns:
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
return Schema(name=table_name, columns=columns, source=f"pydantic:{node.name}")
|
|
143
|
+
|
|
144
|
+
def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
|
|
145
|
+
"""Check if this is a SQLModel table (database model, not API model)."""
|
|
146
|
+
# Look for table=True in the class definition
|
|
147
|
+
for base in node.bases:
|
|
148
|
+
if isinstance(base, ast.Call):
|
|
149
|
+
for keyword in base.keywords:
|
|
150
|
+
if (
|
|
151
|
+
keyword.arg == "table"
|
|
152
|
+
and isinstance(keyword.value, ast.Constant)
|
|
153
|
+
and keyword.value.value is True
|
|
154
|
+
):
|
|
155
|
+
return True
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def _class_to_table_name(self, class_name: str) -> str:
|
|
159
|
+
"""Convert CamelCase class name to snake_case table name."""
|
|
160
|
+
# Insert underscore before capital letters
|
|
161
|
+
table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
|
|
162
|
+
table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
|
|
163
|
+
|
|
164
|
+
# Remove common suffixes
|
|
165
|
+
for suffix in ["_model", "_schema", "_response", "_request"]:
|
|
166
|
+
if table_name.endswith(suffix):
|
|
167
|
+
table_name = table_name[: -len(suffix)]
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
# Pluralize if it doesn't end with 's'
|
|
171
|
+
# if not table_name.endswith('s') and not table_name.endswith('_data'):
|
|
172
|
+
# table_name += 's'
|
|
173
|
+
|
|
174
|
+
return table_name
|
|
175
|
+
|
|
176
|
+
def _parse_type_annotation(self, annotation) -> str:
|
|
177
|
+
"""Parse type annotation to string."""
|
|
178
|
+
if isinstance(annotation, ast.Name):
|
|
179
|
+
return annotation.id
|
|
180
|
+
elif isinstance(annotation, ast.Subscript):
|
|
181
|
+
if isinstance(annotation.value, ast.Name):
|
|
182
|
+
# Handle Optional[Type], List[Type], etc.
|
|
183
|
+
inner_type = self._parse_type_annotation(annotation.slice)
|
|
184
|
+
return f"{annotation.value.id}[{inner_type}]"
|
|
185
|
+
elif isinstance(annotation, ast.Attribute):
|
|
186
|
+
# Handle datetime.datetime, etc.
|
|
187
|
+
if hasattr(annotation.value, "id"):
|
|
188
|
+
return f"{annotation.value.id}.{annotation.attr}"
|
|
189
|
+
return annotation.attr
|
|
190
|
+
|
|
191
|
+
return "unknown"
|
|
192
|
+
|
|
193
|
+
def _is_optional_type(self, annotation) -> bool:
|
|
194
|
+
"""Check if type annotation is Optional."""
|
|
195
|
+
if isinstance(annotation, ast.Subscript):
|
|
196
|
+
if isinstance(annotation.value, ast.Name):
|
|
197
|
+
# Check for Optional[Type] or Union[Type, None]
|
|
198
|
+
if annotation.value.id in ["Optional", "Union"]:
|
|
199
|
+
return True
|
|
200
|
+
return False
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Template GitHub Actions workflow for data contract validation
|
|
2
|
+
# Copy this to .github/workflows/validate-contracts.yml in your repository
|
|
3
|
+
|
|
4
|
+
name: 🔍 Validate Data Contracts
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
pull_request:
|
|
8
|
+
paths:
|
|
9
|
+
- 'models/**/*.sql' # DBT models
|
|
10
|
+
- 'dbt_project.yml' # DBT config
|
|
11
|
+
- '**/*models*.py' # API models
|
|
12
|
+
|
|
13
|
+
workflow_dispatch: # Manual trigger
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
pull-requests: write
|
|
18
|
+
issues: write
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
validate-contracts:
|
|
22
|
+
name: Contract Validation
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- name: Checkout repository
|
|
27
|
+
uses: actions/checkout@v4
|
|
28
|
+
|
|
29
|
+
- name: Setup Python
|
|
30
|
+
uses: actions/setup-python@v4
|
|
31
|
+
with:
|
|
32
|
+
python-version: '3.9'
|
|
33
|
+
|
|
34
|
+
- name: Install Data Contract Validator
|
|
35
|
+
run: pip install data-contract-validator
|
|
36
|
+
|
|
37
|
+
- name: Validate data contracts
|
|
38
|
+
env:
|
|
39
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
40
|
+
# For private API repos, add this secret:
|
|
41
|
+
# API_REPO_TOKEN: ${{ secrets.API_REPO_TOKEN }}
|
|
42
|
+
run: |
|
|
43
|
+
contract-validator validate \
|
|
44
|
+
--dbt-project . \
|
|
45
|
+
--fastapi-repo "YOUR-ORG/YOUR-API-REPO" \
|
|
46
|
+
--fastapi-path "app/models.py" \
|
|
47
|
+
--github-token "$GITHUB_TOKEN" \
|
|
48
|
+
--output github
|
|
49
|
+
|
|
50
|
+
- name: Comment on PR (if validation fails)
|
|
51
|
+
if: failure()
|
|
52
|
+
uses: actions/github-script@v6
|
|
53
|
+
with:
|
|
54
|
+
script: |
|
|
55
|
+
github.rest.issues.createComment({
|
|
56
|
+
issue_number: context.issue.number,
|
|
57
|
+
owner: context.repo.owner,
|
|
58
|
+
repo: context.repo.repo,
|
|
59
|
+
body: `## ❌ Data Contract Validation Failed
|
|
60
|
+
|
|
61
|
+
Your changes don't satisfy API requirements.
|
|
62
|
+
|
|
63
|
+
**Common issues:**
|
|
64
|
+
- Missing required columns in DBT model
|
|
65
|
+
- Type mismatches between DBT and API
|
|
66
|
+
- API expects tables that DBT doesn't provide
|
|
67
|
+
|
|
68
|
+
**How to fix:**
|
|
69
|
+
1. Check the validation logs above
|
|
70
|
+
2. Add missing columns to your DBT model
|
|
71
|
+
3. Or update API models to match DBT output
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
🤖 Automated by [Data Contract Validator](https://github.com/your-org/data-contract-validator)`
|
|
75
|
+
})
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-contract-validator
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Prevent production API breaks by validating data contracts between DBT models and API frameworks
|
|
5
|
+
Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
|
|
6
|
+
Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/OGsiji/retl_validator
|
|
9
|
+
Project-URL: Documentation, https://github.com/OGsiji/retl_validator/blob/main/README.md
|
|
10
|
+
Project-URL: Repository, https://github.com/OGsiji/retl_validator
|
|
11
|
+
Project-URL: Bug Reports, https://github.com/OGsiji/retl_validator/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/OGsiji/retl_validator/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: dbt,fastapi,contract-testing,api-validation,data-engineering,schema-validation,ci-cd,devops
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
25
|
+
Classifier: Topic :: Software Development :: Testing
|
|
26
|
+
Classifier: Topic :: Database
|
|
27
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: pydantic>=2.0.0
|
|
32
|
+
Requires-Dist: PyYAML>=6.0
|
|
33
|
+
Requires-Dist: requests>=2.25.0
|
|
34
|
+
Requires-Dist: click>=8.0.0
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=0.991; extra == "dev"
|
|
41
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
42
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
43
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
44
|
+
Provides-Extra: test
|
|
45
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
46
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
47
|
+
Requires-Dist: pytest-mock>=3.8.0; extra == "test"
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# 🛡️ Data Contract Validator
|
|
51
|
+
|
|
52
|
+
> **Prevent production API breaks by validating data contracts between your data pipelines and API frameworks**
|
|
53
|
+
|
|
54
|
+
[](https://badge.fury.io/py/data-contract-validator)
|
|
55
|
+
[](https://github.com/your-org/data-contract-validator/actions)
|
|
56
|
+
[](https://opensource.org/licenses/MIT)
|
|
57
|
+
|
|
58
|
+
## 🎯 **What This Solves**
|
|
59
|
+
|
|
60
|
+
Ever deployed a DBT model change only to break your FastAPI in production? This tool prevents that by validating data contracts between your data pipelines and APIs **before** deployment.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
DBT Models Contract FastAPI Models
|
|
64
|
+
(What data Validator (What APIs
|
|
65
|
+
produces) ↕️ VALIDATES ↕️ expect)
|
|
66
|
+
↓ ↓ ↓
|
|
67
|
+
Schema Finds Schema
|
|
68
|
+
Extraction Mismatches Extraction
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## ⚡ **Quick Start**
|
|
72
|
+
|
|
73
|
+
### **Installation**
|
|
74
|
+
```bash
|
|
75
|
+
pip install data-contract-validator
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### **Basic Usage**
|
|
79
|
+
```bash
|
|
80
|
+
# Validate local DBT project against FastAPI models
|
|
81
|
+
contract-validator validate \
|
|
82
|
+
--dbt-project ./my-dbt-project \
|
|
83
|
+
--fastapi-models ./my-api/models.py
|
|
84
|
+
|
|
85
|
+
# Validate across repositories (perfect for microservices)
|
|
86
|
+
contract-validator validate \
|
|
87
|
+
--dbt-project . \
|
|
88
|
+
--fastapi-repo "my-org/my-api-repo" \
|
|
89
|
+
--fastapi-path "app/models.py"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### **GitHub Actions Integration**
|
|
93
|
+
```yaml
|
|
94
|
+
# .github/workflows/validate-contracts.yml
|
|
95
|
+
name: Validate Data Contracts
|
|
96
|
+
on: [pull_request]
|
|
97
|
+
|
|
98
|
+
jobs:
|
|
99
|
+
validate:
|
|
100
|
+
runs-on: ubuntu-latest
|
|
101
|
+
steps:
|
|
102
|
+
- uses: actions/checkout@v4
|
|
103
|
+
- uses: actions/setup-python@v4
|
|
104
|
+
with:
|
|
105
|
+
python-version: '3.9'
|
|
106
|
+
|
|
107
|
+
- name: Install validator
|
|
108
|
+
run: pip install data-contract-validator
|
|
109
|
+
|
|
110
|
+
- name: Validate contracts
|
|
111
|
+
env:
|
|
112
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
113
|
+
run: |
|
|
114
|
+
contract-validator validate \
|
|
115
|
+
--dbt-project . \
|
|
116
|
+
--fastapi-repo "my-org/my-api" \
|
|
117
|
+
--github-token "$GITHUB_TOKEN"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## 🔍 **What It Validates**
|
|
121
|
+
|
|
122
|
+
### **❌ Critical Issues (Block Deployment)**
|
|
123
|
+
- **Missing tables**: API expects `user_analytics` but DBT doesn't provide it
|
|
124
|
+
- **Missing required columns**: API requires `total_revenue` but DBT model doesn't have it
|
|
125
|
+
|
|
126
|
+
### **⚠️ Warnings (Non-blocking)**
|
|
127
|
+
- **Type mismatches**: DBT provides `varchar` but API expects `integer`
|
|
128
|
+
- **Missing optional columns**: API can handle missing optional fields
|
|
129
|
+
|
|
130
|
+
### **ℹ️ Info (Good to Know)**
|
|
131
|
+
- **Extra columns**: DBT provides columns that API doesn't use
|
|
132
|
+
|
|
133
|
+
## 🎯 **Real-World Example**
|
|
134
|
+
|
|
135
|
+
### **Before (Production Breaks) 💥**
|
|
136
|
+
```sql
|
|
137
|
+
-- DBT model changes
|
|
138
|
+
select
|
|
139
|
+
user_id,
|
|
140
|
+
email,
|
|
141
|
+
-- total_orders, ❌ REMOVED this column
|
|
142
|
+
revenue
|
|
143
|
+
from users
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
# FastAPI model (unchanged)
|
|
148
|
+
class UserAnalytics(BaseModel):
|
|
149
|
+
user_id: str
|
|
150
|
+
email: str
|
|
151
|
+
total_orders: int # ❌ Still expects this!
|
|
152
|
+
revenue: float
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Result:** API breaks in production 💀
|
|
156
|
+
|
|
157
|
+
### **After (Caught by Validator) ✅**
|
|
158
|
+
```bash
|
|
159
|
+
❌ VALIDATION FAILED
|
|
160
|
+
💥 user_analytics.total_orders: FastAPI REQUIRES column but DBT removed it
|
|
161
|
+
🔧 Fix: Add 'total_orders' back to DBT model or update FastAPI model
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Result:** Issue caught in CI/CD, production safe! 🛡️
|
|
165
|
+
|
|
166
|
+
## 🚀 **Supported Frameworks**
|
|
167
|
+
|
|
168
|
+
### **Data Sources**
|
|
169
|
+
- ✅ **DBT** (dbt-core, all adapters)
|
|
170
|
+
- 🔄 **Databricks** (coming soon)
|
|
171
|
+
- 🔄 **Airflow** (coming soon)
|
|
172
|
+
|
|
173
|
+
### **API Frameworks**
|
|
174
|
+
- ✅ **FastAPI** (Pydantic + SQLModel)
|
|
175
|
+
- 🔄 **Django** (coming soon)
|
|
176
|
+
- 🔄 **Flask-SQLAlchemy** (coming soon)
|
|
177
|
+
|
|
178
|
+
*Want to add support for your framework? [See extending guide](docs/extending.md)*
|
|
179
|
+
|
|
180
|
+
## 📦 **Installation Options**
|
|
181
|
+
|
|
182
|
+
### **Option 1: PyPI (Recommended)**
|
|
183
|
+
```bash
|
|
184
|
+
pip install data-contract-validator
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### **Option 2: From Source**
|
|
188
|
+
```bash
|
|
189
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
190
|
+
cd data-contract-validator
|
|
191
|
+
pip install -e .
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### **Option 3: GitHub Actions Only**
|
|
195
|
+
```yaml
|
|
196
|
+
- name: Validate Contracts
|
|
197
|
+
uses: your-org/data-contract-validator@v1
|
|
198
|
+
with:
|
|
199
|
+
dbt-project: '.'
|
|
200
|
+
fastapi-repo: 'my-org/my-api'
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## 🔧 **Configuration**
|
|
204
|
+
|
|
205
|
+
### **Command Line**
|
|
206
|
+
```bash
|
|
207
|
+
contract-validator validate \
|
|
208
|
+
--dbt-project ./dbt-project \ # DBT project path
|
|
209
|
+
--fastapi-repo "org/repo" \ # GitHub repo
|
|
210
|
+
--fastapi-path "app/models.py" \ # Path to models
|
|
211
|
+
--github-token "$GITHUB_TOKEN" \ # For private repos
|
|
212
|
+
--output json # Output format
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### **Configuration File**
|
|
216
|
+
```yaml
|
|
217
|
+
# .contract-validator.yml
|
|
218
|
+
version: '1.0'
|
|
219
|
+
sources:
|
|
220
|
+
dbt:
|
|
221
|
+
project_path: './dbt-project'
|
|
222
|
+
auto_update_schemas: true
|
|
223
|
+
|
|
224
|
+
targets:
|
|
225
|
+
fastapi:
|
|
226
|
+
repo: 'my-org/my-api'
|
|
227
|
+
path: 'app/models.py'
|
|
228
|
+
|
|
229
|
+
validation:
|
|
230
|
+
fail_on: ['missing_tables', 'missing_required_columns']
|
|
231
|
+
warn_on: ['type_mismatches', 'missing_optional_columns']
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## 📊 **Output Formats**
|
|
235
|
+
|
|
236
|
+
### **Terminal (Default)**
|
|
237
|
+
```bash
|
|
238
|
+
🔍 Contract Validation Results:
|
|
239
|
+
|
|
240
|
+
❌ CRITICAL ISSUES:
|
|
241
|
+
💥 user_analytics.total_revenue: FastAPI expects this column but DBT doesn't provide it
|
|
242
|
+
🔧 Fix: Add 'total_revenue' to your DBT model
|
|
243
|
+
|
|
244
|
+
✅ VALIDATION PASSED (with warnings)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### **GitHub Actions**
|
|
248
|
+
```bash
|
|
249
|
+
::error::user_analytics.total_revenue: Missing required column
|
|
250
|
+
::warning::user_analytics.age: Type mismatch (varchar vs integer)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### **JSON**
|
|
254
|
+
```json
|
|
255
|
+
{
|
|
256
|
+
"success": false,
|
|
257
|
+
"issues": [
|
|
258
|
+
{
|
|
259
|
+
"severity": "error",
|
|
260
|
+
"table": "user_analytics",
|
|
261
|
+
"column": "total_revenue",
|
|
262
|
+
"message": "FastAPI expects column but DBT doesn't provide it",
|
|
263
|
+
"suggestion": "Add 'total_revenue' to your DBT model"
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## 🏗️ **Architecture**
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
# Simple, extensible architecture
|
|
273
|
+
from data_contract_validator import ContractValidator
|
|
274
|
+
from data_contract_validator.extractors import DBTExtractor, FastAPIExtractor
|
|
275
|
+
|
|
276
|
+
# Initialize extractors
|
|
277
|
+
dbt = DBTExtractor(project_path='./dbt-project')
|
|
278
|
+
fastapi = FastAPIExtractor(repo='my-org/my-api', path='app/models.py')
|
|
279
|
+
|
|
280
|
+
# Run validation
|
|
281
|
+
validator = ContractValidator(source=dbt, target=fastapi)
|
|
282
|
+
result = validator.validate()
|
|
283
|
+
|
|
284
|
+
if not result.success:
|
|
285
|
+
print(f"❌ {len(result.critical_issues)} critical issues found")
|
|
286
|
+
for issue in result.critical_issues:
|
|
287
|
+
print(f"💥 {issue.table}.{issue.column}: {issue.message}")
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## 🤝 **Contributing**
|
|
291
|
+
|
|
292
|
+
We love contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
293
|
+
|
|
294
|
+
### **Quick Setup**
|
|
295
|
+
```bash
|
|
296
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
297
|
+
cd data-contract-validator
|
|
298
|
+
pip install -e ".[dev]"
|
|
299
|
+
pytest
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### **Adding New Extractors**
|
|
303
|
+
```python
|
|
304
|
+
from data_contract_validator.extractors import BaseExtractor
|
|
305
|
+
|
|
306
|
+
class MyFrameworkExtractor(BaseExtractor):
|
|
307
|
+
def extract_schemas(self) -> Dict[str, Schema]:
|
|
308
|
+
# Your implementation
|
|
309
|
+
return schemas
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## 🎉 **Success Stories**
|
|
313
|
+
|
|
314
|
+
> *"We prevented 15 production incidents in our first month using this tool. It's now required in all our data pipeline PRs."*
|
|
315
|
+
> — Data Engineering Team, TechCorp
|
|
316
|
+
|
|
317
|
+
> *"Finally! A tool that validates the contract between our DBT models and FastAPI services. No more surprise 500 errors."*
|
|
318
|
+
> — Platform Team, StartupCo
|
|
319
|
+
|
|
320
|
+
## 📚 **Documentation**
|
|
321
|
+
|
|
322
|
+
- [Installation Guide](docs/installation.md)
|
|
323
|
+
- [Configuration Reference](docs/configuration.md)
|
|
324
|
+
- [GitHub Actions Setup](docs/github-actions.md)
|
|
325
|
+
- [Extending with New Extractors](docs/extending.md)
|
|
326
|
+
- [API Reference](docs/api-reference.md)
|
|
327
|
+
|
|
328
|
+
## 📄 **License**
|
|
329
|
+
|
|
330
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
331
|
+
|
|
332
|
+
## 🆘 **Support**
|
|
333
|
+
|
|
334
|
+
- 🐛 **Bug reports**: [GitHub Issues](https://github.com/your-org/data-contract-validator/issues)
|
|
335
|
+
- 💡 **Feature requests**: [GitHub Discussions](https://github.com/your-org/data-contract-validator/discussions)
|
|
336
|
+
- 📧 **Email**: your-email@example.com
|
|
337
|
+
|
|
338
|
+
## ⭐ **Star History**
|
|
339
|
+
|
|
340
|
+
If this tool helps you prevent production incidents, please star the repo! ⭐
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
**Built with ❤️ by data engineers, for data engineers.**
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
data_contract_validator/__init__.py,sha256=wC7dK5pxNGWjbXM3i1JAZzwIbPxifE9sLta61IEidQM,589
|
|
2
|
+
data_contract_validator/cli.py,sha256=kt3PorFBCWhUQjC7lC_98UjRj4yOQpP9P7hQI0dquLs,23262
|
|
3
|
+
data_contract_validator/py.typed,sha256=NmcyTpHrUDYCXZ92yGDO00lf_9gYJDpyQ2_zq1d1Jrw,60
|
|
4
|
+
data_contract_validator/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
data_contract_validator/core/models.py,sha256=JAOiOAfftkRRz8SppVYAFjJp9VBweYfp8RrEwS5cgGE,3512
|
|
6
|
+
data_contract_validator/core/validator.py,sha256=FjYBbkdN5upRmvvZjYeL-r8zjOA0a1VyfZehXv_cP44,7314
|
|
7
|
+
data_contract_validator/extractors/__init__.py,sha256=38_764gOoLRDqImgaqZYpjZ7cGitnEIGyXU1uNspl0E,282
|
|
8
|
+
data_contract_validator/extractors/base.py,sha256=b_NtwUoNKw0OxrvhZQILHb3DNTWfKoDIv7Qa82bYoEg,1282
|
|
9
|
+
data_contract_validator/extractors/dbt.py,sha256=R6y0-uwod68wLYlRlxZPfG2xyfAFmHdhpFB65281ijM,7417
|
|
10
|
+
data_contract_validator/extractors/fastapi.py,sha256=1RfcmxPPFS55gqRVCjVqmDWWtdrMCxKFBAjcygfVKxA,7592
|
|
11
|
+
data_contract_validator/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
data_contract_validator/templates/github-actions-template.yml,sha256=7MsWCZFgcyWy9x6wcDJEsnq_aWfU6HyeQiP5MGU3tV8,2271
|
|
13
|
+
data_contract_validator-1.0.0.dist-info/licenses/LICENSE,sha256=QhvdDVCpxC1HXlDaUCsgoM4xXTMKPMCO5OkiVwlM22w,1070
|
|
14
|
+
data_contract_validator-1.0.0.dist-info/METADATA,sha256=wov4IoUPbUxNmuuLIDG_9lOyuoKl4Xz7QhJyWZnYQ7Q,10377
|
|
15
|
+
data_contract_validator-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
16
|
+
data_contract_validator-1.0.0.dist-info/entry_points.txt,sha256=6lyq9Tu8r5XEGzvUiwG0GkeANnzz9JfupPhsANYKAbY,122
|
|
17
|
+
data_contract_validator-1.0.0.dist-info/top_level.txt,sha256=DGGU-8F--wthg0Nzfe1Fud2RcAB_e4u97C9Vzo365h0,24
|
|
18
|
+
data_contract_validator-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ogunniran Siji
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
data_contract_validator
|