dbt-vitals 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_vitals-0.1.0/LICENSE +21 -0
- dbt_vitals-0.1.0/PKG-INFO +221 -0
- dbt_vitals-0.1.0/README.md +189 -0
- dbt_vitals-0.1.0/dbt_vitals/__init__.py +3 -0
- dbt_vitals-0.1.0/dbt_vitals/cli.py +120 -0
- dbt_vitals-0.1.0/dbt_vitals/findings.py +22 -0
- dbt_vitals-0.1.0/dbt_vitals/graph.py +33 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/__init__.py +0 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/documentation.py +65 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/duplicates.py +91 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/incremental.py +42 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/lineage.py +91 -0
- dbt_vitals-0.1.0/dbt_vitals/modules/testing.py +80 -0
- dbt_vitals-0.1.0/dbt_vitals/parser.py +160 -0
- dbt_vitals-0.1.0/dbt_vitals/report.py +110 -0
- dbt_vitals-0.1.0/dbt_vitals/scoring.py +49 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/PKG-INFO +221 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/SOURCES.txt +23 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/dependency_links.txt +1 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/entry_points.txt +2 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/requires.txt +9 -0
- dbt_vitals-0.1.0/dbt_vitals.egg-info/top_level.txt +1 -0
- dbt_vitals-0.1.0/pyproject.toml +50 -0
- dbt_vitals-0.1.0/setup.cfg +4 -0
- dbt_vitals-0.1.0/tests/test_dbt_vitals.py +104 -0
dbt_vitals-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shreeti
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbt-vitals
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Static analysis and health scoring for dbt projects — lineage, test coverage, duplicate logic, and documentation checks.
|
|
5
|
+
Author: Shreeti
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/shivah12/dbt-vitals
|
|
8
|
+
Project-URL: Issues, https://github.com/shivah12/dbt-vitals/issues
|
|
9
|
+
Keywords: dbt,data-engineering,static-analysis,sql,linter,data-quality
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: click>=8.1
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: networkx>=3.0
|
|
26
|
+
Requires-Dist: sqlglot>=23.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
29
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
30
|
+
Requires-Dist: twine>=4.0; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# dbt-vitals
|
|
34
|
+
|
|
35
|
+
Static analysis and health scoring for dbt projects — the "cargo clippy for dbt."
|
|
36
|
+
|
|
37
|
+
Unlike a SQL linter (indentation, keyword casing), dbt-vitals looks at your
|
|
38
|
+
project's *structure*: dead models, missing tests, duplicated business logic,
|
|
39
|
+
documentation coverage, and (given warehouse stats) incremental-model
|
|
40
|
+
candidates. It parses `manifest.json` / `catalog.json` — the artifacts dbt
|
|
41
|
+
already generates — so it needs no warehouse credentials of its own.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
Overall Health
|
|
45
|
+
76/100
|
|
46
|
+
Warnings: 8 Critical: 0 Info: 4
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## What it checks
|
|
50
|
+
|
|
51
|
+
| Module | What it flags | Needs `catalog.json`? |
|
|
52
|
+
|---|---|---|
|
|
53
|
+
| Lineage | Dead/unused models, circular dependencies | No |
|
|
54
|
+
| Testing | Models missing `unique`/`not_null` on their likely primary key | No |
|
|
55
|
+
| Duplicate Logic | Repeated `CASE WHEN` blocks across models (candidates for a macro) | No |
|
|
56
|
+
| Documentation | Model/column description coverage % | No |
|
|
57
|
+
| Incremental Candidates | Large `table`-materialized models that could be `incremental` | Yes |
|
|
58
|
+
|
|
59
|
+
## Local setup
|
|
60
|
+
|
|
61
|
+
Requires Python 3.9+.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
git clone https://github.com/YOUR_GITHUB_USERNAME/dbt-vitals.git
|
|
65
|
+
cd dbt-vitals
|
|
66
|
+
|
|
67
|
+
python -m venv .venv
|
|
68
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
69
|
+
|
|
70
|
+
pip install -e ".[dev]"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Run the test suite against the bundled synthetic example project:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pytest
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Try the CLI against the bundled example (no real dbt project needed):
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
dbt-vitals analyze . --manifest examples/sample_manifest.json --catalog examples/sample_catalog.json
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Run it against a real dbt project:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
cd /path/to/your/dbt/project
|
|
89
|
+
dbt compile # or `dbt docs generate` to also get catalog.json
|
|
90
|
+
dbt-vitals analyze .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### CLI options
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
dbt-vitals analyze <target_dir> # target_dir defaults to "."
|
|
97
|
+
--manifest PATH # override manifest.json location
|
|
98
|
+
--catalog PATH # override catalog.json location
|
|
99
|
+
--json # machine-readable output
|
|
100
|
+
--ci-comment # markdown summary for a PR comment
|
|
101
|
+
--fail-under N # exit 1 if health score < N (CI gating)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## CI integration
|
|
105
|
+
|
|
106
|
+
`.github/workflows/dbt-vitals.yml` is included — it runs `dbt-vitals` on every
|
|
107
|
+
PR, posts the health score as a comment, and fails the build if the score
|
|
108
|
+
drops below a threshold. Adjust the `dbt compile` step for your adapter/profile.
|
|
109
|
+
|
|
110
|
+
## Publishing to PyPI
|
|
111
|
+
|
|
112
|
+
This is a **Python** package, so it's published to [PyPI](https://pypi.org),
|
|
113
|
+
not npm — `pip install dbt-vitals` is the equivalent of `npm install`.
|
|
114
|
+
|
|
115
|
+
1. Create accounts at [pypi.org](https://pypi.org/account/register/) and
|
|
116
|
+
[test.pypi.org](https://test.pypi.org/account/register/) (the sandbox —
|
|
117
|
+
publish here first to make sure everything works).
|
|
118
|
+
2. Generate an API token: PyPI account settings → API tokens → scope it to
|
|
119
|
+
this project (after the first upload) or "entire account" (for the first
|
|
120
|
+
upload, since the project doesn't exist yet).
|
|
121
|
+
3. Update `pyproject.toml`: bump `version`, fix the `Homepage`/`Issues` URLs
|
|
122
|
+
to your actual GitHub repo, add a real `authors` email if you want one.
|
|
123
|
+
4. Build and check the package:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
pip install build twine
|
|
127
|
+
python -m build # creates dist/*.whl and dist/*.tar.gz
|
|
128
|
+
twine check dist/*
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
5. Upload to TestPyPI first:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
twine upload --repository testpypi dist/*
|
|
135
|
+
# username: __token__
|
|
136
|
+
# password: <your TestPyPI API token, including the pypi- prefix>
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Verify it installs cleanly:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
pip install --index-url https://test.pypi.org/simple/ --no-deps dbt-vitals
|
|
143
|
+
dbt-vitals --version
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
6. Once that works, upload for real:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
twine upload dist/*
|
|
150
|
+
# username: __token__
|
|
151
|
+
# password: <your PyPI API token>
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
7. From then on, anyone can install it with:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install dbt-vitals
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
To ship a new version later: bump `version` in `pyproject.toml`, delete the
|
|
161
|
+
old `dist/` folder, rebuild (`python -m build`), and `twine upload dist/*`
|
|
162
|
+
again — PyPI rejects re-uploading an existing version number.
|
|
163
|
+
|
|
164
|
+
## Pushing to GitHub
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
cd dbt-vitals
|
|
168
|
+
git init
|
|
169
|
+
git add .
|
|
170
|
+
git commit -m "Initial commit: dbt-vitals MVP"
|
|
171
|
+
|
|
172
|
+
# Create the repo on GitHub first (via github.com or `gh repo create`),
|
|
173
|
+
# then:
|
|
174
|
+
git remote add origin https://github.com/YOUR_GITHUB_USERNAME/dbt-vitals.git
|
|
175
|
+
git branch -M main
|
|
176
|
+
git push -u origin main
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
If you use the [GitHub CLI](https://cli.github.com/) instead of the website:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
gh repo create dbt-vitals --public --source=. --remote=origin --push
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Project layout
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
dbt-vitals/
|
|
189
|
+
dbt_doctor/
|
|
190
|
+
cli.py # click CLI entry point
|
|
191
|
+
parser.py # manifest.json / catalog.json loading
|
|
192
|
+
graph.py # dependency graph (networkx)
|
|
193
|
+
scoring.py # health score calculation
|
|
194
|
+
report.py # terminal + CI markdown rendering
|
|
195
|
+
findings.py # shared Finding data structure
|
|
196
|
+
modules/
|
|
197
|
+
lineage.py # dead models, circular deps
|
|
198
|
+
testing.py # missing test coverage
|
|
199
|
+
duplicates.py # repeated CASE-WHEN logic (via sqlglot AST)
|
|
200
|
+
documentation.py # doc coverage
|
|
201
|
+
incremental.py # incremental-model candidates
|
|
202
|
+
examples/ # synthetic manifest/catalog for demos & tests
|
|
203
|
+
tests/ # pytest suite
|
|
204
|
+
.github/workflows/ # CI action
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Known limitations (by design, for v0.1)
|
|
208
|
+
|
|
209
|
+
- **Duplicate detection** only looks at `CASE WHEN` expressions, not arbitrary
|
|
210
|
+
repeated subqueries or joins.
|
|
211
|
+
- **Incremental candidates** use a fixed row-count threshold, not a real
|
|
212
|
+
cost/runtime estimate — that would require warehouse-specific query plans.
|
|
213
|
+
- **Primary key inference** for the testing module is a naming heuristic
|
|
214
|
+
(`id`, `<model>_id`, or any `*_id` column) since dbt's manifest has no
|
|
215
|
+
first-class primary key concept.
|
|
216
|
+
- No plugin system yet (warehouse-specific checks) — deliberately deferred,
|
|
217
|
+
see the original scoping notes for why.
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# dbt-vitals
|
|
2
|
+
|
|
3
|
+
Static analysis and health scoring for dbt projects — the "cargo clippy for dbt."
|
|
4
|
+
|
|
5
|
+
Unlike a SQL linter (indentation, keyword casing), dbt-vitals looks at your
|
|
6
|
+
project's *structure*: dead models, missing tests, duplicated business logic,
|
|
7
|
+
documentation coverage, and (given warehouse stats) incremental-model
|
|
8
|
+
candidates. It parses `manifest.json` / `catalog.json` — the artifacts dbt
|
|
9
|
+
already generates — so it needs no warehouse credentials of its own.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
Overall Health
|
|
13
|
+
76/100
|
|
14
|
+
Warnings: 8 Critical: 0 Info: 4
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## What it checks
|
|
18
|
+
|
|
19
|
+
| Module | What it flags | Needs `catalog.json`? |
|
|
20
|
+
|---|---|---|
|
|
21
|
+
| Lineage | Dead/unused models, circular dependencies | No |
|
|
22
|
+
| Testing | Models missing `unique`/`not_null` on their likely primary key | No |
|
|
23
|
+
| Duplicate Logic | Repeated `CASE WHEN` blocks across models (candidates for a macro) | No |
|
|
24
|
+
| Documentation | Model/column description coverage % | No |
|
|
25
|
+
| Incremental Candidates | Large `table`-materialized models that could be `incremental` | Yes |
|
|
26
|
+
|
|
27
|
+
## Local setup
|
|
28
|
+
|
|
29
|
+
Requires Python 3.9+.
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
git clone https://github.com/YOUR_GITHUB_USERNAME/dbt-vitals.git
|
|
33
|
+
cd dbt-vitals
|
|
34
|
+
|
|
35
|
+
python -m venv .venv
|
|
36
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
37
|
+
|
|
38
|
+
pip install -e ".[dev]"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Run the test suite against the bundled synthetic example project:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pytest
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Try the CLI against the bundled example (no real dbt project needed):
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
dbt-vitals analyze . --manifest examples/sample_manifest.json --catalog examples/sample_catalog.json
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Run it against a real dbt project:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
cd /path/to/your/dbt/project
|
|
57
|
+
dbt compile # or `dbt docs generate` to also get catalog.json
|
|
58
|
+
dbt-vitals analyze .
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### CLI options
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
dbt-vitals analyze <target_dir> # target_dir defaults to "."
|
|
65
|
+
--manifest PATH # override manifest.json location
|
|
66
|
+
--catalog PATH # override catalog.json location
|
|
67
|
+
--json # machine-readable output
|
|
68
|
+
--ci-comment # markdown summary for a PR comment
|
|
69
|
+
--fail-under N # exit 1 if health score < N (CI gating)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## CI integration
|
|
73
|
+
|
|
74
|
+
`.github/workflows/dbt-vitals.yml` is included — it runs `dbt-vitals` on every
|
|
75
|
+
PR, posts the health score as a comment, and fails the build if the score
|
|
76
|
+
drops below a threshold. Adjust the `dbt compile` step for your adapter/profile.
|
|
77
|
+
|
|
78
|
+
## Publishing to PyPI
|
|
79
|
+
|
|
80
|
+
This is a **Python** package, so it's published to [PyPI](https://pypi.org),
|
|
81
|
+
not npm — `pip install dbt-vitals` is the equivalent of `npm install`.
|
|
82
|
+
|
|
83
|
+
1. Create accounts at [pypi.org](https://pypi.org/account/register/) and
|
|
84
|
+
[test.pypi.org](https://test.pypi.org/account/register/) (the sandbox —
|
|
85
|
+
publish here first to make sure everything works).
|
|
86
|
+
2. Generate an API token: PyPI account settings → API tokens → scope it to
|
|
87
|
+
this project (after the first upload) or "entire account" (for the first
|
|
88
|
+
upload, since the project doesn't exist yet).
|
|
89
|
+
3. Update `pyproject.toml`: bump `version`, fix the `Homepage`/`Issues` URLs
|
|
90
|
+
to your actual GitHub repo, add a real `authors` email if you want one.
|
|
91
|
+
4. Build and check the package:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pip install build twine
|
|
95
|
+
python -m build # creates dist/*.whl and dist/*.tar.gz
|
|
96
|
+
twine check dist/*
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
5. Upload to TestPyPI first:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
twine upload --repository testpypi dist/*
|
|
103
|
+
# username: __token__
|
|
104
|
+
# password: <your TestPyPI API token, including the pypi- prefix>
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Verify it installs cleanly:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install --index-url https://test.pypi.org/simple/ --no-deps dbt-vitals
|
|
111
|
+
dbt-vitals --version
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
6. Once that works, upload for real:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
twine upload dist/*
|
|
118
|
+
# username: __token__
|
|
119
|
+
# password: <your PyPI API token>
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
7. From then on, anyone can install it with:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pip install dbt-vitals
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
To ship a new version later: bump `version` in `pyproject.toml`, delete the
|
|
129
|
+
old `dist/` folder, rebuild (`python -m build`), and `twine upload dist/*`
|
|
130
|
+
again — PyPI rejects re-uploading an existing version number.
|
|
131
|
+
|
|
132
|
+
## Pushing to GitHub
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
cd dbt-vitals
|
|
136
|
+
git init
|
|
137
|
+
git add .
|
|
138
|
+
git commit -m "Initial commit: dbt-vitals MVP"
|
|
139
|
+
|
|
140
|
+
# Create the repo on GitHub first (via github.com or `gh repo create`),
|
|
141
|
+
# then:
|
|
142
|
+
git remote add origin https://github.com/YOUR_GITHUB_USERNAME/dbt-vitals.git
|
|
143
|
+
git branch -M main
|
|
144
|
+
git push -u origin main
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
If you use the [GitHub CLI](https://cli.github.com/) instead of the website:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
gh repo create dbt-vitals --public --source=. --remote=origin --push
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Project layout
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
dbt-vitals/
|
|
157
|
+
dbt_doctor/
|
|
158
|
+
cli.py # click CLI entry point
|
|
159
|
+
parser.py # manifest.json / catalog.json loading
|
|
160
|
+
graph.py # dependency graph (networkx)
|
|
161
|
+
scoring.py # health score calculation
|
|
162
|
+
report.py # terminal + CI markdown rendering
|
|
163
|
+
findings.py # shared Finding data structure
|
|
164
|
+
modules/
|
|
165
|
+
lineage.py # dead models, circular deps
|
|
166
|
+
testing.py # missing test coverage
|
|
167
|
+
duplicates.py # repeated CASE-WHEN logic (via sqlglot AST)
|
|
168
|
+
documentation.py # doc coverage
|
|
169
|
+
incremental.py # incremental-model candidates
|
|
170
|
+
examples/ # synthetic manifest/catalog for demos & tests
|
|
171
|
+
tests/ # pytest suite
|
|
172
|
+
.github/workflows/ # CI action
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Known limitations (by design, for v0.1)
|
|
176
|
+
|
|
177
|
+
- **Duplicate detection** only looks at `CASE WHEN` expressions, not arbitrary
|
|
178
|
+
repeated subqueries or joins.
|
|
179
|
+
- **Incremental candidates** use a fixed row-count threshold, not a real
|
|
180
|
+
cost/runtime estimate — that would require warehouse-specific query plans.
|
|
181
|
+
- **Primary key inference** for the testing module is a naming heuristic
|
|
182
|
+
(`id`, `<model>_id`, or any `*_id` column) since dbt's manifest has no
|
|
183
|
+
first-class primary key concept.
|
|
184
|
+
- No plugin system yet (warehouse-specific checks) — deliberately deferred,
|
|
185
|
+
see the original scoping notes for why.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
from dbt_vitals import __version__
|
|
10
|
+
from dbt_vitals.graph import build_model_graph
|
|
11
|
+
from dbt_vitals.modules import documentation, duplicates, incremental, lineage, testing
|
|
12
|
+
from dbt_vitals.parser import ManifestNotFoundError, load_catalog, load_manifest
|
|
13
|
+
from dbt_vitals.report import render, render_ci_comment
|
|
14
|
+
from dbt_vitals.scoring import compute_score
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@click.group()
|
|
20
|
+
@click.version_option(__version__, prog_name="dbt-vitals")
|
|
21
|
+
def cli() -> None:
|
|
22
|
+
"""dbt-vitals: static analysis and health scoring for dbt projects."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@cli.command()
|
|
26
|
+
@click.argument("target_dir", type=click.Path(exists=True, file_okay=False), default=".")
|
|
27
|
+
@click.option(
|
|
28
|
+
"--manifest",
|
|
29
|
+
"manifest_override",
|
|
30
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
31
|
+
help="Explicit path to manifest.json (default: <target_dir>/target/manifest.json)",
|
|
32
|
+
)
|
|
33
|
+
@click.option(
|
|
34
|
+
"--catalog",
|
|
35
|
+
"catalog_override",
|
|
36
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
37
|
+
help="Explicit path to catalog.json (default: <target_dir>/target/catalog.json)",
|
|
38
|
+
)
|
|
39
|
+
@click.option("--json", "as_json", is_flag=True, help="Print raw JSON instead of the terminal report.")
|
|
40
|
+
@click.option("--ci-comment", is_flag=True, help="Print a markdown summary suitable for a PR comment.")
|
|
41
|
+
@click.option(
|
|
42
|
+
"--fail-under",
|
|
43
|
+
type=int,
|
|
44
|
+
default=None,
|
|
45
|
+
help="Exit with a non-zero status code if the health score is below this threshold (useful in CI).",
|
|
46
|
+
)
|
|
47
|
+
def analyze(
|
|
48
|
+
target_dir: str,
|
|
49
|
+
manifest_override: str | None,
|
|
50
|
+
catalog_override: str | None,
|
|
51
|
+
as_json: bool,
|
|
52
|
+
ci_comment: bool,
|
|
53
|
+
fail_under: int | None,
|
|
54
|
+
) -> None:
|
|
55
|
+
"""Analyze a dbt project and print its health report.
|
|
56
|
+
|
|
57
|
+
TARGET_DIR is the root of your dbt project (the directory containing
|
|
58
|
+
dbt_project.yml). Defaults to the current directory. Run `dbt compile`
|
|
59
|
+
or `dbt docs generate` first so target/manifest.json exists.
|
|
60
|
+
"""
|
|
61
|
+
root = Path(target_dir)
|
|
62
|
+
manifest_path = Path(manifest_override) if manifest_override else root / "target" / "manifest.json"
|
|
63
|
+
catalog_path = Path(catalog_override) if catalog_override else root / "target" / "catalog.json"
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
project = load_manifest(manifest_path)
|
|
67
|
+
except ManifestNotFoundError as e:
|
|
68
|
+
console.print(f"[bold red]Error:[/] {e}")
|
|
69
|
+
raise SystemExit(1)
|
|
70
|
+
|
|
71
|
+
catalog_loaded = load_catalog(catalog_path, project)
|
|
72
|
+
|
|
73
|
+
graph = build_model_graph(project)
|
|
74
|
+
|
|
75
|
+
findings = []
|
|
76
|
+
findings += lineage.analyze(project, graph)
|
|
77
|
+
findings += testing.analyze(project)
|
|
78
|
+
findings += duplicates.analyze(project)
|
|
79
|
+
doc_coverage, doc_findings = documentation.analyze(project)
|
|
80
|
+
findings += doc_findings
|
|
81
|
+
findings += incremental.analyze(project, catalog_loaded)
|
|
82
|
+
|
|
83
|
+
model_count = len(project.models)
|
|
84
|
+
score = compute_score(findings, model_count)
|
|
85
|
+
|
|
86
|
+
if as_json:
|
|
87
|
+
payload = {
|
|
88
|
+
"score": score.score,
|
|
89
|
+
"critical_count": score.critical_count,
|
|
90
|
+
"warning_count": score.warning_count,
|
|
91
|
+
"info_count": score.info_count,
|
|
92
|
+
"model_count": model_count,
|
|
93
|
+
"documentation": {
|
|
94
|
+
"model_coverage_pct": doc_coverage.model_coverage_pct,
|
|
95
|
+
"column_coverage_pct": doc_coverage.column_coverage_pct,
|
|
96
|
+
},
|
|
97
|
+
"findings": [
|
|
98
|
+
{
|
|
99
|
+
"module": f.module,
|
|
100
|
+
"severity": f.severity.value,
|
|
101
|
+
"subject": f.subject,
|
|
102
|
+
"message": f.message,
|
|
103
|
+
"detail": f.detail,
|
|
104
|
+
"suggestion": f.suggestion,
|
|
105
|
+
}
|
|
106
|
+
for f in findings
|
|
107
|
+
],
|
|
108
|
+
}
|
|
109
|
+
click.echo(json.dumps(payload, indent=2))
|
|
110
|
+
elif ci_comment:
|
|
111
|
+
click.echo(render_ci_comment(score, findings))
|
|
112
|
+
else:
|
|
113
|
+
render(console, score, findings, doc_coverage, model_count)
|
|
114
|
+
|
|
115
|
+
if fail_under is not None and score.score < fail_under:
|
|
116
|
+
raise SystemExit(1)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
cli()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Severity(str, Enum):
|
|
8
|
+
CRITICAL = "critical"
|
|
9
|
+
WARNING = "warning"
|
|
10
|
+
INFO = "info"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class Finding:
|
|
15
|
+
module: str # e.g. "lineage", "testing", "duplicates"
|
|
16
|
+
severity: Severity
|
|
17
|
+
subject: str # model/macro name this finding is about
|
|
18
|
+
message: str # short human-readable summary
|
|
19
|
+
detail: str = "" # optional longer explanation / "why"
|
|
20
|
+
suggestion: str = "" # optional recommended fix
|
|
21
|
+
weight: float = 1.0 # deduction weight used by the scoring engine
|
|
22
|
+
meta: dict = field(default_factory=dict)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Builds a directed dependency graph of models from a parsed DbtProject."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import networkx as nx
|
|
6
|
+
|
|
7
|
+
from dbt_vitals.parser import DbtProject
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_model_graph(project: DbtProject) -> nx.DiGraph:
|
|
11
|
+
"""Nodes = model unique_ids. Edge A -> B means B depends on A
|
|
12
|
+
(A feeds into B), matching dbt's own lineage direction."""
|
|
13
|
+
graph = nx.DiGraph()
|
|
14
|
+
|
|
15
|
+
for uid, node in project.models.items():
|
|
16
|
+
graph.add_node(uid, name=node.name, path=node.path)
|
|
17
|
+
|
|
18
|
+
for uid, node in project.models.items():
|
|
19
|
+
for dep in node.depends_on:
|
|
20
|
+
if dep in project.models:
|
|
21
|
+
graph.add_edge(dep, uid)
|
|
22
|
+
|
|
23
|
+
return graph
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def models_referenced_by_tests_or_exposures(project: DbtProject) -> set[str]:
|
|
27
|
+
"""Models that have at least one test attached to them - used as a
|
|
28
|
+
signal that a model is intentionally a checked, "real" asset."""
|
|
29
|
+
referenced = set()
|
|
30
|
+
for test in project.tests.values():
|
|
31
|
+
for dep in test.depends_on:
|
|
32
|
+
referenced.add(dep)
|
|
33
|
+
return referenced
|
|
File without changes
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Documentation coverage module. Reports coverage %, plus a finding per
|
|
2
|
+
undocumented model so the report can list them."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from dbt_vitals.findings import Finding, Severity
|
|
9
|
+
from dbt_vitals.parser import DbtProject
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DocCoverage:
|
|
14
|
+
model_coverage_pct: float
|
|
15
|
+
column_coverage_pct: float
|
|
16
|
+
total_models: int
|
|
17
|
+
documented_models: int
|
|
18
|
+
total_columns: int
|
|
19
|
+
documented_columns: int
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def compute_coverage(project: DbtProject) -> DocCoverage:
|
|
23
|
+
models = project.models
|
|
24
|
+
total_models = len(models)
|
|
25
|
+
documented_models = sum(1 for m in models.values() if m.description.strip())
|
|
26
|
+
|
|
27
|
+
total_columns = 0
|
|
28
|
+
documented_columns = 0
|
|
29
|
+
for m in models.values():
|
|
30
|
+
for col in m.columns.values():
|
|
31
|
+
total_columns += 1
|
|
32
|
+
if col.description.strip():
|
|
33
|
+
documented_columns += 1
|
|
34
|
+
|
|
35
|
+
return DocCoverage(
|
|
36
|
+
model_coverage_pct=_pct(documented_models, total_models),
|
|
37
|
+
column_coverage_pct=_pct(documented_columns, total_columns),
|
|
38
|
+
total_models=total_models,
|
|
39
|
+
documented_models=documented_models,
|
|
40
|
+
total_columns=total_columns,
|
|
41
|
+
documented_columns=documented_columns,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _pct(part: int, whole: int) -> float:
|
|
46
|
+
return round((part / whole) * 100, 1) if whole else 0.0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def analyze(project: DbtProject) -> tuple[DocCoverage, list[Finding]]:
|
|
50
|
+
coverage = compute_coverage(project)
|
|
51
|
+
findings: list[Finding] = []
|
|
52
|
+
|
|
53
|
+
for uid, model in project.models.items():
|
|
54
|
+
if not model.description.strip():
|
|
55
|
+
findings.append(
|
|
56
|
+
Finding(
|
|
57
|
+
module="documentation",
|
|
58
|
+
severity=Severity.INFO,
|
|
59
|
+
subject=model.name,
|
|
60
|
+
message="Model has no description.",
|
|
61
|
+
weight=0.25,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return coverage, findings
|