data-contract-validator 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_contract_validator-1.0.0/CHANGELOG.md +38 -0
- data_contract_validator-1.0.0/LICENSE +21 -0
- data_contract_validator-1.0.0/MANIFEST.in +13 -0
- data_contract_validator-1.0.0/PKG-INFO +344 -0
- data_contract_validator-1.0.0/README.md +295 -0
- data_contract_validator-1.0.0/data_contract_validator/__init__.py +24 -0
- data_contract_validator-1.0.0/data_contract_validator/cli.py +672 -0
- data_contract_validator-1.0.0/data_contract_validator/core/__init__.py +0 -0
- data_contract_validator-1.0.0/data_contract_validator/core/models.py +115 -0
- data_contract_validator-1.0.0/data_contract_validator/core/validator.py +187 -0
- data_contract_validator-1.0.0/data_contract_validator/extractors/__init__.py +14 -0
- data_contract_validator-1.0.0/data_contract_validator/extractors/base.py +45 -0
- data_contract_validator-1.0.0/data_contract_validator/extractors/dbt.py +213 -0
- data_contract_validator-1.0.0/data_contract_validator/extractors/fastapi.py +200 -0
- data_contract_validator-1.0.0/data_contract_validator/integrations/__init__.py +0 -0
- data_contract_validator-1.0.0/data_contract_validator/py.typed +2 -0
- data_contract_validator-1.0.0/data_contract_validator/templates/github-actions-template.yml +75 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/PKG-INFO +344 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/SOURCES.txt +24 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/dependency_links.txt +1 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/entry_points.txt +3 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/requires.txt +19 -0
- data_contract_validator-1.0.0/data_contract_validator.egg-info/top_level.txt +1 -0
- data_contract_validator-1.0.0/pyproject.toml +141 -0
- data_contract_validator-1.0.0/requirements.txt +4 -0
- data_contract_validator-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [1.0.0] - 2025-01-XX
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Initial release of Data Contract Validator
|
|
14
|
+
- DBT schema extraction from SQL files and manifest.json
|
|
15
|
+
- FastAPI/Pydantic model extraction from local files and GitHub repos
|
|
16
|
+
- Command-line interface with multiple output formats
|
|
17
|
+
- GitHub Actions integration
|
|
18
|
+
- Contract validation with critical/warning/info severity levels
|
|
19
|
+
- Support for multiple repositories and complex validation scenarios
|
|
20
|
+
|
|
21
|
+
### Features
|
|
22
|
+
- ✅ DBT model schema extraction
|
|
23
|
+
- ✅ FastAPI/Pydantic schema extraction
|
|
24
|
+
- ✅ Cross-repository validation
|
|
25
|
+
- ✅ GitHub Actions workflows
|
|
26
|
+
- ✅ Multiple output formats (terminal, JSON, GitHub Actions)
|
|
27
|
+
- ✅ Comprehensive error reporting with suggested fixes
|
|
28
|
+
- ✅ Type compatibility checking
|
|
29
|
+
- ✅ Missing table/column detection
|
|
30
|
+
|
|
31
|
+
### Known Limitations
|
|
32
|
+
- Only supports DBT and FastAPI currently
|
|
33
|
+
- Requires manual installation of DBT CLI
|
|
34
|
+
- Limited type inference from SQL
|
|
35
|
+
- No support for complex nested types
|
|
36
|
+
|
|
37
|
+
[Unreleased]: https://github.com/OGsiji/retl_validator/compare/v1.0.0...HEAD
|
|
38
|
+
[1.0.0]: https://github.com/OGsiji/retl_validator/releases/tag/v1.0.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ogunniran Siji
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include requirements.txt
|
|
4
|
+
include requirements-dev.txt
|
|
5
|
+
include pyproject.toml
|
|
6
|
+
include CHANGELOG.md
|
|
7
|
+
recursive-include data_contract_validator/templates *.yml *.yaml
|
|
8
|
+
recursive-include data_contract_validator *.py
|
|
9
|
+
recursive-exclude * __pycache__
|
|
10
|
+
recursive-exclude * *.py[co]
|
|
11
|
+
recursive-exclude tests *
|
|
12
|
+
recursive-exclude examples *
|
|
13
|
+
recursive-exclude docs *
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-contract-validator
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Prevent production API breaks by validating data contracts between DBT models and API frameworks
|
|
5
|
+
Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
|
|
6
|
+
Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/OGsiji/retl_validator
|
|
9
|
+
Project-URL: Documentation, https://github.com/OGsiji/retl_validator/blob/main/README.md
|
|
10
|
+
Project-URL: Repository, https://github.com/OGsiji/retl_validator
|
|
11
|
+
Project-URL: Bug Reports, https://github.com/OGsiji/retl_validator/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/OGsiji/retl_validator/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: dbt,fastapi,contract-testing,api-validation,data-engineering,schema-validation,ci-cd,devops
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
25
|
+
Classifier: Topic :: Software Development :: Testing
|
|
26
|
+
Classifier: Topic :: Database
|
|
27
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: pydantic>=2.0.0
|
|
32
|
+
Requires-Dist: PyYAML>=6.0
|
|
33
|
+
Requires-Dist: requests>=2.25.0
|
|
34
|
+
Requires-Dist: click>=8.0.0
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
39
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=0.991; extra == "dev"
|
|
41
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
42
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
43
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
44
|
+
Provides-Extra: test
|
|
45
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
46
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
47
|
+
Requires-Dist: pytest-mock>=3.8.0; extra == "test"
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# 🛡️ Data Contract Validator
|
|
51
|
+
|
|
52
|
+
> **Prevent production API breaks by validating data contracts between your data pipelines and API frameworks**
|
|
53
|
+
|
|
54
|
+
[](https://badge.fury.io/py/data-contract-validator)
|
|
55
|
+
[](https://github.com/your-org/data-contract-validator/actions)
|
|
56
|
+
[](https://opensource.org/licenses/MIT)
|
|
57
|
+
|
|
58
|
+
## 🎯 **What This Solves**
|
|
59
|
+
|
|
60
|
+
Ever deployed a DBT model change only to break your FastAPI in production? This tool prevents that by validating data contracts between your data pipelines and APIs **before** deployment.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
DBT Models Contract FastAPI Models
|
|
64
|
+
(What data Validator (What APIs
|
|
65
|
+
produces) ↕️ VALIDATES ↕️ expect)
|
|
66
|
+
↓ ↓ ↓
|
|
67
|
+
Schema Finds Schema
|
|
68
|
+
Extraction Mismatches Extraction
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## ⚡ **Quick Start**
|
|
72
|
+
|
|
73
|
+
### **Installation**
|
|
74
|
+
```bash
|
|
75
|
+
pip install data-contract-validator
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### **Basic Usage**
|
|
79
|
+
```bash
|
|
80
|
+
# Validate local DBT project against FastAPI models
|
|
81
|
+
contract-validator validate \
|
|
82
|
+
--dbt-project ./my-dbt-project \
|
|
83
|
+
--fastapi-models ./my-api/models.py
|
|
84
|
+
|
|
85
|
+
# Validate across repositories (perfect for microservices)
|
|
86
|
+
contract-validator validate \
|
|
87
|
+
--dbt-project . \
|
|
88
|
+
--fastapi-repo "my-org/my-api-repo" \
|
|
89
|
+
--fastapi-path "app/models.py"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### **GitHub Actions Integration**
|
|
93
|
+
```yaml
|
|
94
|
+
# .github/workflows/validate-contracts.yml
|
|
95
|
+
name: Validate Data Contracts
|
|
96
|
+
on: [pull_request]
|
|
97
|
+
|
|
98
|
+
jobs:
|
|
99
|
+
validate:
|
|
100
|
+
runs-on: ubuntu-latest
|
|
101
|
+
steps:
|
|
102
|
+
- uses: actions/checkout@v4
|
|
103
|
+
- uses: actions/setup-python@v4
|
|
104
|
+
with:
|
|
105
|
+
python-version: '3.9'
|
|
106
|
+
|
|
107
|
+
- name: Install validator
|
|
108
|
+
run: pip install data-contract-validator
|
|
109
|
+
|
|
110
|
+
- name: Validate contracts
|
|
111
|
+
env:
|
|
112
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
113
|
+
run: |
|
|
114
|
+
contract-validator validate \
|
|
115
|
+
--dbt-project . \
|
|
116
|
+
--fastapi-repo "my-org/my-api" \
|
|
117
|
+
--github-token "$GITHUB_TOKEN"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## 🔍 **What It Validates**
|
|
121
|
+
|
|
122
|
+
### **❌ Critical Issues (Block Deployment)**
|
|
123
|
+
- **Missing tables**: API expects `user_analytics` but DBT doesn't provide it
|
|
124
|
+
- **Missing required columns**: API requires `total_revenue` but DBT model doesn't have it
|
|
125
|
+
|
|
126
|
+
### **⚠️ Warnings (Non-blocking)**
|
|
127
|
+
- **Type mismatches**: DBT provides `varchar` but API expects `integer`
|
|
128
|
+
- **Missing optional columns**: API can handle missing optional fields
|
|
129
|
+
|
|
130
|
+
### **ℹ️ Info (Good to Know)**
|
|
131
|
+
- **Extra columns**: DBT provides columns that API doesn't use
|
|
132
|
+
|
|
133
|
+
## 🎯 **Real-World Example**
|
|
134
|
+
|
|
135
|
+
### **Before (Production Breaks) 💥**
|
|
136
|
+
```sql
|
|
137
|
+
-- DBT model changes
|
|
138
|
+
select
|
|
139
|
+
user_id,
|
|
140
|
+
email,
|
|
141
|
+
-- total_orders, ❌ REMOVED this column
|
|
142
|
+
revenue
|
|
143
|
+
from users
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
# FastAPI model (unchanged)
|
|
148
|
+
class UserAnalytics(BaseModel):
|
|
149
|
+
user_id: str
|
|
150
|
+
email: str
|
|
151
|
+
total_orders: int # ❌ Still expects this!
|
|
152
|
+
revenue: float
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Result:** API breaks in production 💀
|
|
156
|
+
|
|
157
|
+
### **After (Caught by Validator) ✅**
|
|
158
|
+
```bash
|
|
159
|
+
❌ VALIDATION FAILED
|
|
160
|
+
💥 user_analytics.total_orders: FastAPI REQUIRES column but DBT removed it
|
|
161
|
+
🔧 Fix: Add 'total_orders' back to DBT model or update FastAPI model
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Result:** Issue caught in CI/CD, production safe! 🛡️
|
|
165
|
+
|
|
166
|
+
## 🚀 **Supported Frameworks**
|
|
167
|
+
|
|
168
|
+
### **Data Sources**
|
|
169
|
+
- ✅ **DBT** (dbt-core, all adapters)
|
|
170
|
+
- 🔄 **Databricks** (coming soon)
|
|
171
|
+
- 🔄 **Airflow** (coming soon)
|
|
172
|
+
|
|
173
|
+
### **API Frameworks**
|
|
174
|
+
- ✅ **FastAPI** (Pydantic + SQLModel)
|
|
175
|
+
- 🔄 **Django** (coming soon)
|
|
176
|
+
- 🔄 **Flask-SQLAlchemy** (coming soon)
|
|
177
|
+
|
|
178
|
+
*Want to add support for your framework? [See extending guide](docs/extending.md)*
|
|
179
|
+
|
|
180
|
+
## 📦 **Installation Options**
|
|
181
|
+
|
|
182
|
+
### **Option 1: PyPI (Recommended)**
|
|
183
|
+
```bash
|
|
184
|
+
pip install data-contract-validator
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### **Option 2: From Source**
|
|
188
|
+
```bash
|
|
189
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
190
|
+
cd data-contract-validator
|
|
191
|
+
pip install -e .
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### **Option 3: GitHub Actions Only**
|
|
195
|
+
```yaml
|
|
196
|
+
- name: Validate Contracts
|
|
197
|
+
uses: your-org/data-contract-validator@v1
|
|
198
|
+
with:
|
|
199
|
+
dbt-project: '.'
|
|
200
|
+
fastapi-repo: 'my-org/my-api'
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## 🔧 **Configuration**
|
|
204
|
+
|
|
205
|
+
### **Command Line**
|
|
206
|
+
```bash
|
|
207
|
+
contract-validator validate \
|
|
208
|
+
--dbt-project ./dbt-project \ # DBT project path
|
|
209
|
+
--fastapi-repo "org/repo" \ # GitHub repo
|
|
210
|
+
--fastapi-path "app/models.py" \ # Path to models
|
|
211
|
+
--github-token "$GITHUB_TOKEN" \ # For private repos
|
|
212
|
+
--output json # Output format
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### **Configuration File**
|
|
216
|
+
```yaml
|
|
217
|
+
# .contract-validator.yml
|
|
218
|
+
version: '1.0'
|
|
219
|
+
sources:
|
|
220
|
+
dbt:
|
|
221
|
+
project_path: './dbt-project'
|
|
222
|
+
auto_update_schemas: true
|
|
223
|
+
|
|
224
|
+
targets:
|
|
225
|
+
fastapi:
|
|
226
|
+
repo: 'my-org/my-api'
|
|
227
|
+
path: 'app/models.py'
|
|
228
|
+
|
|
229
|
+
validation:
|
|
230
|
+
fail_on: ['missing_tables', 'missing_required_columns']
|
|
231
|
+
warn_on: ['type_mismatches', 'missing_optional_columns']
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## 📊 **Output Formats**
|
|
235
|
+
|
|
236
|
+
### **Terminal (Default)**
|
|
237
|
+
```bash
|
|
238
|
+
🔍 Contract Validation Results:
|
|
239
|
+
|
|
240
|
+
❌ CRITICAL ISSUES:
|
|
241
|
+
💥 user_analytics.total_revenue: FastAPI expects this column but DBT doesn't provide it
|
|
242
|
+
🔧 Fix: Add 'total_revenue' to your DBT model
|
|
243
|
+
|
|
244
|
+
✅ VALIDATION PASSED (with warnings)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### **GitHub Actions**
|
|
248
|
+
```bash
|
|
249
|
+
::error::user_analytics.total_revenue: Missing required column
|
|
250
|
+
::warning::user_analytics.age: Type mismatch (varchar vs integer)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### **JSON**
|
|
254
|
+
```json
|
|
255
|
+
{
|
|
256
|
+
"success": false,
|
|
257
|
+
"issues": [
|
|
258
|
+
{
|
|
259
|
+
"severity": "error",
|
|
260
|
+
"table": "user_analytics",
|
|
261
|
+
"column": "total_revenue",
|
|
262
|
+
"message": "FastAPI expects column but DBT doesn't provide it",
|
|
263
|
+
"suggestion": "Add 'total_revenue' to your DBT model"
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## 🏗️ **Architecture**
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
# Simple, extensible architecture
|
|
273
|
+
from data_contract_validator import ContractValidator
|
|
274
|
+
from data_contract_validator.extractors import DBTExtractor, FastAPIExtractor
|
|
275
|
+
|
|
276
|
+
# Initialize extractors
|
|
277
|
+
dbt = DBTExtractor(project_path='./dbt-project')
|
|
278
|
+
fastapi = FastAPIExtractor(repo='my-org/my-api', path='app/models.py')
|
|
279
|
+
|
|
280
|
+
# Run validation
|
|
281
|
+
validator = ContractValidator(source=dbt, target=fastapi)
|
|
282
|
+
result = validator.validate()
|
|
283
|
+
|
|
284
|
+
if not result.success:
|
|
285
|
+
print(f"❌ {len(result.critical_issues)} critical issues found")
|
|
286
|
+
for issue in result.critical_issues:
|
|
287
|
+
print(f"💥 {issue.table}.{issue.column}: {issue.message}")
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## 🤝 **Contributing**
|
|
291
|
+
|
|
292
|
+
We love contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
293
|
+
|
|
294
|
+
### **Quick Setup**
|
|
295
|
+
```bash
|
|
296
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
297
|
+
cd data-contract-validator
|
|
298
|
+
pip install -e ".[dev]"
|
|
299
|
+
pytest
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### **Adding New Extractors**
|
|
303
|
+
```python
|
|
304
|
+
from data_contract_validator.extractors import BaseExtractor
|
|
305
|
+
|
|
306
|
+
class MyFrameworkExtractor(BaseExtractor):
|
|
307
|
+
def extract_schemas(self) -> Dict[str, Schema]:
|
|
308
|
+
# Your implementation
|
|
309
|
+
return schemas
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## 🎉 **Success Stories**
|
|
313
|
+
|
|
314
|
+
> *"We prevented 15 production incidents in our first month using this tool. It's now required in all our data pipeline PRs."*
|
|
315
|
+
> — Data Engineering Team, TechCorp
|
|
316
|
+
|
|
317
|
+
> *"Finally! A tool that validates the contract between our DBT models and FastAPI services. No more surprise 500 errors."*
|
|
318
|
+
> — Platform Team, StartupCo
|
|
319
|
+
|
|
320
|
+
## 📚 **Documentation**
|
|
321
|
+
|
|
322
|
+
- [Installation Guide](docs/installation.md)
|
|
323
|
+
- [Configuration Reference](docs/configuration.md)
|
|
324
|
+
- [GitHub Actions Setup](docs/github-actions.md)
|
|
325
|
+
- [Extending with New Extractors](docs/extending.md)
|
|
326
|
+
- [API Reference](docs/api-reference.md)
|
|
327
|
+
|
|
328
|
+
## 📄 **License**
|
|
329
|
+
|
|
330
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
331
|
+
|
|
332
|
+
## 🆘 **Support**
|
|
333
|
+
|
|
334
|
+
- 🐛 **Bug reports**: [GitHub Issues](https://github.com/your-org/data-contract-validator/issues)
|
|
335
|
+
- 💡 **Feature requests**: [GitHub Discussions](https://github.com/your-org/data-contract-validator/discussions)
|
|
336
|
+
- 📧 **Email**: your-email@example.com
|
|
337
|
+
|
|
338
|
+
## ⭐ **Star History**
|
|
339
|
+
|
|
340
|
+
If this tool helps you prevent production incidents, please star the repo! ⭐
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
**Built with ❤️ by data engineers, for data engineers.**
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# 🛡️ Data Contract Validator
|
|
2
|
+
|
|
3
|
+
> **Prevent production API breaks by validating data contracts between your data pipelines and API frameworks**
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/data-contract-validator)
|
|
6
|
+
[](https://github.com/your-org/data-contract-validator/actions)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
## 🎯 **What This Solves**
|
|
10
|
+
|
|
11
|
+
Ever deployed a DBT model change only to break your FastAPI in production? This tool prevents that by validating data contracts between your data pipelines and APIs **before** deployment.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
DBT Models Contract FastAPI Models
|
|
15
|
+
(What data Validator (What APIs
|
|
16
|
+
produces) ↕️ VALIDATES ↕️ expect)
|
|
17
|
+
↓ ↓ ↓
|
|
18
|
+
Schema Finds Schema
|
|
19
|
+
Extraction Mismatches Extraction
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## ⚡ **Quick Start**
|
|
23
|
+
|
|
24
|
+
### **Installation**
|
|
25
|
+
```bash
|
|
26
|
+
pip install data-contract-validator
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### **Basic Usage**
|
|
30
|
+
```bash
|
|
31
|
+
# Validate local DBT project against FastAPI models
|
|
32
|
+
contract-validator validate \
|
|
33
|
+
--dbt-project ./my-dbt-project \
|
|
34
|
+
--fastapi-models ./my-api/models.py
|
|
35
|
+
|
|
36
|
+
# Validate across repositories (perfect for microservices)
|
|
37
|
+
contract-validator validate \
|
|
38
|
+
--dbt-project . \
|
|
39
|
+
--fastapi-repo "my-org/my-api-repo" \
|
|
40
|
+
--fastapi-path "app/models.py"
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### **GitHub Actions Integration**
|
|
44
|
+
```yaml
|
|
45
|
+
# .github/workflows/validate-contracts.yml
|
|
46
|
+
name: Validate Data Contracts
|
|
47
|
+
on: [pull_request]
|
|
48
|
+
|
|
49
|
+
jobs:
|
|
50
|
+
validate:
|
|
51
|
+
runs-on: ubuntu-latest
|
|
52
|
+
steps:
|
|
53
|
+
- uses: actions/checkout@v4
|
|
54
|
+
- uses: actions/setup-python@v4
|
|
55
|
+
with:
|
|
56
|
+
python-version: '3.9'
|
|
57
|
+
|
|
58
|
+
- name: Install validator
|
|
59
|
+
run: pip install data-contract-validator
|
|
60
|
+
|
|
61
|
+
- name: Validate contracts
|
|
62
|
+
env:
|
|
63
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
64
|
+
run: |
|
|
65
|
+
contract-validator validate \
|
|
66
|
+
--dbt-project . \
|
|
67
|
+
--fastapi-repo "my-org/my-api" \
|
|
68
|
+
--github-token "$GITHUB_TOKEN"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 🔍 **What It Validates**
|
|
72
|
+
|
|
73
|
+
### **❌ Critical Issues (Block Deployment)**
|
|
74
|
+
- **Missing tables**: API expects `user_analytics` but DBT doesn't provide it
|
|
75
|
+
- **Missing required columns**: API requires `total_revenue` but DBT model doesn't have it
|
|
76
|
+
|
|
77
|
+
### **⚠️ Warnings (Non-blocking)**
|
|
78
|
+
- **Type mismatches**: DBT provides `varchar` but API expects `integer`
|
|
79
|
+
- **Missing optional columns**: API can handle missing optional fields
|
|
80
|
+
|
|
81
|
+
### **ℹ️ Info (Good to Know)**
|
|
82
|
+
- **Extra columns**: DBT provides columns that API doesn't use
|
|
83
|
+
|
|
84
|
+
## 🎯 **Real-World Example**
|
|
85
|
+
|
|
86
|
+
### **Before (Production Breaks) 💥**
|
|
87
|
+
```sql
|
|
88
|
+
-- DBT model changes
|
|
89
|
+
select
|
|
90
|
+
user_id,
|
|
91
|
+
email,
|
|
92
|
+
-- total_orders, ❌ REMOVED this column
|
|
93
|
+
revenue
|
|
94
|
+
from users
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
# FastAPI model (unchanged)
|
|
99
|
+
class UserAnalytics(BaseModel):
|
|
100
|
+
user_id: str
|
|
101
|
+
email: str
|
|
102
|
+
total_orders: int # ❌ Still expects this!
|
|
103
|
+
revenue: float
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
**Result:** API breaks in production 💀
|
|
107
|
+
|
|
108
|
+
### **After (Caught by Validator) ✅**
|
|
109
|
+
```bash
|
|
110
|
+
❌ VALIDATION FAILED
|
|
111
|
+
💥 user_analytics.total_orders: FastAPI REQUIRES column but DBT removed it
|
|
112
|
+
🔧 Fix: Add 'total_orders' back to DBT model or update FastAPI model
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Result:** Issue caught in CI/CD, production safe! 🛡️
|
|
116
|
+
|
|
117
|
+
## 🚀 **Supported Frameworks**
|
|
118
|
+
|
|
119
|
+
### **Data Sources**
|
|
120
|
+
- ✅ **DBT** (dbt-core, all adapters)
|
|
121
|
+
- 🔄 **Databricks** (coming soon)
|
|
122
|
+
- 🔄 **Airflow** (coming soon)
|
|
123
|
+
|
|
124
|
+
### **API Frameworks**
|
|
125
|
+
- ✅ **FastAPI** (Pydantic + SQLModel)
|
|
126
|
+
- 🔄 **Django** (coming soon)
|
|
127
|
+
- 🔄 **Flask-SQLAlchemy** (coming soon)
|
|
128
|
+
|
|
129
|
+
*Want to add support for your framework? [See extending guide](docs/extending.md)*
|
|
130
|
+
|
|
131
|
+
## 📦 **Installation Options**
|
|
132
|
+
|
|
133
|
+
### **Option 1: PyPI (Recommended)**
|
|
134
|
+
```bash
|
|
135
|
+
pip install data-contract-validator
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### **Option 2: From Source**
|
|
139
|
+
```bash
|
|
140
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
141
|
+
cd data-contract-validator
|
|
142
|
+
pip install -e .
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### **Option 3: GitHub Actions Only**
|
|
146
|
+
```yaml
|
|
147
|
+
- name: Validate Contracts
|
|
148
|
+
uses: your-org/data-contract-validator@v1
|
|
149
|
+
with:
|
|
150
|
+
dbt-project: '.'
|
|
151
|
+
fastapi-repo: 'my-org/my-api'
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## 🔧 **Configuration**
|
|
155
|
+
|
|
156
|
+
### **Command Line**
|
|
157
|
+
```bash
|
|
158
|
+
contract-validator validate \
|
|
159
|
+
--dbt-project ./dbt-project \ # DBT project path
|
|
160
|
+
--fastapi-repo "org/repo" \ # GitHub repo
|
|
161
|
+
--fastapi-path "app/models.py" \ # Path to models
|
|
162
|
+
--github-token "$GITHUB_TOKEN" \ # For private repos
|
|
163
|
+
--output json # Output format
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### **Configuration File**
|
|
167
|
+
```yaml
|
|
168
|
+
# .contract-validator.yml
|
|
169
|
+
version: '1.0'
|
|
170
|
+
sources:
|
|
171
|
+
dbt:
|
|
172
|
+
project_path: './dbt-project'
|
|
173
|
+
auto_update_schemas: true
|
|
174
|
+
|
|
175
|
+
targets:
|
|
176
|
+
fastapi:
|
|
177
|
+
repo: 'my-org/my-api'
|
|
178
|
+
path: 'app/models.py'
|
|
179
|
+
|
|
180
|
+
validation:
|
|
181
|
+
fail_on: ['missing_tables', 'missing_required_columns']
|
|
182
|
+
warn_on: ['type_mismatches', 'missing_optional_columns']
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## 📊 **Output Formats**
|
|
186
|
+
|
|
187
|
+
### **Terminal (Default)**
|
|
188
|
+
```bash
|
|
189
|
+
🔍 Contract Validation Results:
|
|
190
|
+
|
|
191
|
+
❌ CRITICAL ISSUES:
|
|
192
|
+
💥 user_analytics.total_revenue: FastAPI expects this column but DBT doesn't provide it
|
|
193
|
+
🔧 Fix: Add 'total_revenue' to your DBT model
|
|
194
|
+
|
|
195
|
+
✅ VALIDATION PASSED (with warnings)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### **GitHub Actions**
|
|
199
|
+
```bash
|
|
200
|
+
::error::user_analytics.total_revenue: Missing required column
|
|
201
|
+
::warning::user_analytics.age: Type mismatch (varchar vs integer)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### **JSON**
|
|
205
|
+
```json
|
|
206
|
+
{
|
|
207
|
+
"success": false,
|
|
208
|
+
"issues": [
|
|
209
|
+
{
|
|
210
|
+
"severity": "error",
|
|
211
|
+
"table": "user_analytics",
|
|
212
|
+
"column": "total_revenue",
|
|
213
|
+
"message": "FastAPI expects column but DBT doesn't provide it",
|
|
214
|
+
"suggestion": "Add 'total_revenue' to your DBT model"
|
|
215
|
+
}
|
|
216
|
+
]
|
|
217
|
+
}
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## 🏗️ **Architecture**
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
# Simple, extensible architecture
|
|
224
|
+
from data_contract_validator import ContractValidator
|
|
225
|
+
from data_contract_validator.extractors import DBTExtractor, FastAPIExtractor
|
|
226
|
+
|
|
227
|
+
# Initialize extractors
|
|
228
|
+
dbt = DBTExtractor(project_path='./dbt-project')
|
|
229
|
+
fastapi = FastAPIExtractor(repo='my-org/my-api', path='app/models.py')
|
|
230
|
+
|
|
231
|
+
# Run validation
|
|
232
|
+
validator = ContractValidator(source=dbt, target=fastapi)
|
|
233
|
+
result = validator.validate()
|
|
234
|
+
|
|
235
|
+
if not result.success:
|
|
236
|
+
print(f"❌ {len(result.critical_issues)} critical issues found")
|
|
237
|
+
for issue in result.critical_issues:
|
|
238
|
+
print(f"💥 {issue.table}.{issue.column}: {issue.message}")
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## 🤝 **Contributing**
|
|
242
|
+
|
|
243
|
+
We love contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
244
|
+
|
|
245
|
+
### **Quick Setup**
|
|
246
|
+
```bash
|
|
247
|
+
git clone https://github.com/your-org/data-contract-validator
|
|
248
|
+
cd data-contract-validator
|
|
249
|
+
pip install -e ".[dev]"
|
|
250
|
+
pytest
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### **Adding New Extractors**
|
|
254
|
+
```python
|
|
255
|
+
from data_contract_validator.extractors import BaseExtractor
|
|
256
|
+
|
|
257
|
+
class MyFrameworkExtractor(BaseExtractor):
|
|
258
|
+
def extract_schemas(self) -> Dict[str, Schema]:
|
|
259
|
+
# Your implementation
|
|
260
|
+
return schemas
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## 🎉 **Success Stories**
|
|
264
|
+
|
|
265
|
+
> *"We prevented 15 production incidents in our first month using this tool. It's now required in all our data pipeline PRs."*
|
|
266
|
+
> — Data Engineering Team, TechCorp
|
|
267
|
+
|
|
268
|
+
> *"Finally! A tool that validates the contract between our DBT models and FastAPI services. No more surprise 500 errors."*
|
|
269
|
+
> — Platform Team, StartupCo
|
|
270
|
+
|
|
271
|
+
## 📚 **Documentation**
|
|
272
|
+
|
|
273
|
+
- [Installation Guide](docs/installation.md)
|
|
274
|
+
- [Configuration Reference](docs/configuration.md)
|
|
275
|
+
- [GitHub Actions Setup](docs/github-actions.md)
|
|
276
|
+
- [Extending with New Extractors](docs/extending.md)
|
|
277
|
+
- [API Reference](docs/api-reference.md)
|
|
278
|
+
|
|
279
|
+
## 📄 **License**
|
|
280
|
+
|
|
281
|
+
MIT License - see [LICENSE](LICENSE) file for details.
|
|
282
|
+
|
|
283
|
+
## 🆘 **Support**
|
|
284
|
+
|
|
285
|
+
- 🐛 **Bug reports**: [GitHub Issues](https://github.com/your-org/data-contract-validator/issues)
|
|
286
|
+
- 💡 **Feature requests**: [GitHub Discussions](https://github.com/your-org/data-contract-validator/discussions)
|
|
287
|
+
- 📧 **Email**: your-email@example.com
|
|
288
|
+
|
|
289
|
+
## ⭐ **Star History**
|
|
290
|
+
|
|
291
|
+
If this tool helps you prevent production incidents, please star the repo! ⭐
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
**Built with ❤️ by data engineers, for data engineers.**
|