dbt-tester 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. dbt_tester-0.1.0/LICENSE +21 -0
  2. dbt_tester-0.1.0/PKG-INFO +72 -0
  3. dbt_tester-0.1.0/README.md +54 -0
  4. dbt_tester-0.1.0/dbt_tester/__init__.py +4 -0
  5. dbt_tester-0.1.0/dbt_tester/__main__.py +7 -0
  6. dbt_tester-0.1.0/dbt_tester/checks/__init__.py +24 -0
  7. dbt_tester-0.1.0/dbt_tester/checks/base_check.py +35 -0
  8. dbt_tester-0.1.0/dbt_tester/checks/doc_checks.py +46 -0
  9. dbt_tester-0.1.0/dbt_tester/checks/lineage_checks.py +35 -0
  10. dbt_tester-0.1.0/dbt_tester/checks/model_checks.py +52 -0
  11. dbt_tester-0.1.0/dbt_tester/checks/source_checks.py +24 -0
  12. dbt_tester-0.1.0/dbt_tester/checks/test_checks.py +57 -0
  13. dbt_tester-0.1.0/dbt_tester/cli.py +48 -0
  14. dbt_tester-0.1.0/dbt_tester/config.py +27 -0
  15. dbt_tester-0.1.0/dbt_tester/core/manifest_reader.py +23 -0
  16. dbt_tester-0.1.0/dbt_tester/core/project_scanner.py +121 -0
  17. dbt_tester-0.1.0/dbt_tester/core/schema_parser.py +50 -0
  18. dbt_tester-0.1.0/dbt_tester/reporters/base_reporter.py +23 -0
  19. dbt_tester-0.1.0/dbt_tester/reporters/console_reporter.py +41 -0
  20. dbt_tester-0.1.0/dbt_tester/reporters/html_reporter.py +35 -0
  21. dbt_tester-0.1.0/dbt_tester/reporters/json_reporter.py +31 -0
  22. dbt_tester-0.1.0/dbt_tester/reporters/templates/report.html +62 -0
  23. dbt_tester-0.1.0/dbt_tester/utils/file_utils.py +47 -0
  24. dbt_tester-0.1.0/dbt_tester.egg-info/PKG-INFO +72 -0
  25. dbt_tester-0.1.0/dbt_tester.egg-info/SOURCES.txt +30 -0
  26. dbt_tester-0.1.0/dbt_tester.egg-info/dependency_links.txt +1 -0
  27. dbt_tester-0.1.0/dbt_tester.egg-info/entry_points.txt +2 -0
  28. dbt_tester-0.1.0/dbt_tester.egg-info/requires.txt +8 -0
  29. dbt_tester-0.1.0/dbt_tester.egg-info/top_level.txt +1 -0
  30. dbt_tester-0.1.0/pyproject.toml +36 -0
  31. dbt_tester-0.1.0/setup.cfg +4 -0
  32. dbt_tester-0.1.0/tests/test_package.py +42 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Your Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbt-tester
3
+ Version: 0.1.0
4
+ Summary: Static analysis and health report generator for dbt projects
5
+ Author-email: Vinoth J <career.vinothj@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: pyyaml>=6.0
12
+ Requires-Dist: rich>=13.0
13
+ Requires-Dist: jinja2>=3.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=7.0; extra == "dev"
16
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
17
+ Dynamic: license-file
18
+
19
+ # dbt-tester
20
+
21
+ `dbt-tester` is a lightweight static analysis tool that inspects dbt projects without running dbt itself. It scans your models, schema YAML files, and compiled manifest metadata to catch missing documentation, test coverage gaps, naming inconsistencies, and lineage issues. The CLI outputs results in the terminal or exports shareable HTML/JSON reports, making it easy to embed into CI pipelines.
22
+
23
+ ## Features
24
+ - Discovers all models, sources, and tests in a dbt project directory
25
+ - Plug-in style check engine grouped by models, tests, docs, lineage, and sources
26
+ - Console, HTML, and JSON reporters with severity levels
27
+ - Configurable failure threshold for CI/CD
28
+ - Simple to extend with custom checks
29
+
30
+ ## Quick start
31
+
32
+ ```bash
33
+ python -m venv .venv
34
+ source .venv/bin/activate
35
+ pip install -e .[dev]
36
+ dbt-tester run path/to/dbt_project --format console
37
+ ```
38
+
39
+ - Quotes are required when the project path contains spaces: `dbt-tester run "/home/me/dbt project" --format console`.
40
+ - Use `--format html --output report.html` to produce a shareable artifact (`.html`), or `--format json --output findings.json` for automation payloads.
41
+ - Control CI failure behavior with `--fail-on error|warning`; defaults to `error`.
42
+
43
+ ## Common commands
44
+
45
+ | Purpose | Command |
46
+ | --- | --- |
47
+ | Install for development | `pip install -e .[dev]` |
48
+ | Run unit tests | `pytest` |
49
+ | Scan current directory | `dbt-tester run . --format console` |
50
+ | Export HTML report | `dbt-tester run . --format html --output dbt_report.html` |
51
+ | Export JSON report | `dbt-tester run . --format json --output dbt_report.json` |
52
+ | Enforce zero warnings in CI | `dbt-tester run . --fail-on warning` |
53
+
54
+ ## Development
55
+
56
+ ```bash
57
+ python -m venv .venv
58
+ source .venv/bin/activate
59
+ pip install -e .[dev]
60
+ pytest
61
+ ```
62
+
63
+ To ship the package:
64
+
65
+ ```bash
66
+ python -m build
67
+ twine upload dist/*
68
+ ```
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,54 @@
1
+ # dbt-tester
2
+
3
+ `dbt-tester` is a lightweight static analysis tool that inspects dbt projects without running dbt itself. It scans your models, schema YAML files, and compiled manifest metadata to catch missing documentation, test coverage gaps, naming inconsistencies, and lineage issues. The CLI outputs results in the terminal or exports shareable HTML/JSON reports, making it easy to embed into CI pipelines.
4
+
5
+ ## Features
6
+ - Discovers all models, sources, and tests in a dbt project directory
7
+ - Plug-in style check engine grouped by models, tests, docs, lineage, and sources
8
+ - Console, HTML, and JSON reporters with severity levels
9
+ - Configurable failure threshold for CI/CD
10
+ - Simple to extend with custom checks
11
+
12
+ ## Quick start
13
+
14
+ ```bash
15
+ python -m venv .venv
16
+ source .venv/bin/activate
17
+ pip install -e .[dev]
18
+ dbt-tester run path/to/dbt_project --format console
19
+ ```
20
+
21
+ - Quotes are required when the project path contains spaces: `dbt-tester run "/home/me/dbt project" --format console`.
22
+ - Use `--format html --output report.html` to produce a shareable artifact (`.html`), or `--format json --output findings.json` for automation payloads.
23
+ - Control CI failure behavior with `--fail-on error|warning`; defaults to `error`.
24
+
25
+ ## Common commands
26
+
27
+ | Purpose | Command |
28
+ | --- | --- |
29
+ | Install for development | `pip install -e .[dev]` |
30
+ | Run unit tests | `pytest` |
31
+ | Scan current directory | `dbt-tester run . --format console` |
32
+ | Export HTML report | `dbt-tester run . --format html --output dbt_report.html` |
33
+ | Export JSON report | `dbt-tester run . --format json --output dbt_report.json` |
34
+ | Enforce zero warnings in CI | `dbt-tester run . --fail-on warning` |
35
+
36
+ ## Development
37
+
38
+ ```bash
39
+ python -m venv .venv
40
+ source .venv/bin/activate
41
+ pip install -e .[dev]
42
+ pytest
43
+ ```
44
+
45
+ To ship the package:
46
+
47
+ ```bash
48
+ python -m build
49
+ twine upload dist/*
50
+ ```
51
+
52
+ ## License
53
+
54
+ MIT
@@ -0,0 +1,4 @@
1
+ """dbt-tester package."""
2
+
3
+ __all__ = ["__version__"]
4
+ __version__ = "0.1.0"
@@ -0,0 +1,7 @@
1
+ """Enable python -m dbt_tester."""
2
+
3
+ from .cli import main
4
+
5
+
6
+ if __name__ == "__main__":
7
+ main()
@@ -0,0 +1,24 @@
1
+ """Check registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .doc_checks import ColumnDocumentationCheck, ModelDocumentationCheck
6
+ from .lineage_checks import OrphanModelCheck
7
+ from .model_checks import ModelSchemaPresenceCheck, StagingNamingCheck
8
+ from .source_checks import SourceFreshnessCheck
9
+ from .test_checks import ColumnNotNullCheck, ModelTestsCoverageCheck
10
+
11
+
12
+ def get_registered_checks():
13
+ """Return instantiated checks in execution order."""
14
+
15
+ return [
16
+ ModelSchemaPresenceCheck(),
17
+ StagingNamingCheck(),
18
+ ModelDocumentationCheck(),
19
+ ColumnDocumentationCheck(),
20
+ ModelTestsCoverageCheck(),
21
+ ColumnNotNullCheck(),
22
+ SourceFreshnessCheck(),
23
+ OrphanModelCheck(),
24
+ ]
@@ -0,0 +1,35 @@
1
+ """Base class for all dbt-tester checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, List
8
+
9
+
10
+ @dataclass
11
+ class Finding:
12
+ level: str
13
+ model: str
14
+ rule: str
15
+ message: str
16
+ details: Dict[str, Any] | None = None
17
+
18
+
19
+ class BaseCheck(ABC):
20
+ rule: str
21
+
22
+ @abstractmethod
23
+ def run(self, context: Dict[str, Any]) -> List[Finding]:
24
+ """Execute the rule and return findings."""
25
+
26
+ def _finding(
27
+ self,
28
+ *,
29
+ level: str,
30
+ model: str,
31
+ message: str,
32
+ rule: str | None = None,
33
+ details: Dict[str, Any] | None = None,
34
+ ) -> Finding:
35
+ return Finding(level=level, model=model, rule=rule or self.rule, message=message, details=details)
@@ -0,0 +1,46 @@
1
+ """Documentation-related checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+
7
+ from .base_check import BaseCheck, Finding
8
+
9
+
10
+ class ModelDocumentationCheck(BaseCheck):
11
+ rule = "model_missing_description"
12
+
13
+ def run(self, context: Dict) -> List[Finding]:
14
+ findings: List[Finding] = []
15
+ for model in context["models"].values():
16
+ if not model["defined_in_schema"]:
17
+ continue
18
+ if not (model.get("description") or "").strip():
19
+ findings.append(
20
+ self._finding(
21
+ level="warning",
22
+ model=model["name"],
23
+ message="Model description missing in schema.yml",
24
+ )
25
+ )
26
+ return findings
27
+
28
+
29
+ class ColumnDocumentationCheck(BaseCheck):
30
+ rule = "column_missing_description"
31
+
32
+ def run(self, context: Dict) -> List[Finding]:
33
+ findings: List[Finding] = []
34
+ for model in context["models"].values():
35
+ if not model["columns"]:
36
+ continue
37
+ for column in model["columns"]:
38
+ if not (column.get("description") or "").strip():
39
+ findings.append(
40
+ self._finding(
41
+ level="warning",
42
+ model=model["name"],
43
+ message=f"Column '{column.get('name')}' missing description",
44
+ )
45
+ )
46
+ return findings
@@ -0,0 +1,35 @@
1
+ """Lineage checks that rely on manifest data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+
7
+ from .base_check import BaseCheck, Finding
8
+
9
+
10
+ class OrphanModelCheck(BaseCheck):
11
+ rule = "orphan_model"
12
+
13
+ def run(self, context: Dict) -> List[Finding]:
14
+ manifest = context.get("manifest") or {}
15
+ nodes = manifest.get("nodes") or {}
16
+ child_map = manifest.get("child_map") or {}
17
+ if not nodes:
18
+ return []
19
+
20
+ findings: List[Finding] = []
21
+ for node_id, node in nodes.items():
22
+ if node.get("resource_type") != "model":
23
+ continue
24
+ model_name = node.get("name", node_id)
25
+ parents = node.get("depends_on", {}).get("nodes", [])
26
+ children = child_map.get(node_id, [])
27
+ if not parents and not children:
28
+ findings.append(
29
+ self._finding(
30
+ level="info",
31
+ model=model_name,
32
+ message="Model has no upstream or downstream refs",
33
+ )
34
+ )
35
+ return findings
@@ -0,0 +1,52 @@
1
+ """Model structure checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+
7
+ from .base_check import BaseCheck, Finding
8
+
9
+
10
+ class ModelSchemaPresenceCheck(BaseCheck):
11
+ rule = "model_missing_schema"
12
+
13
+ def run(self, context: Dict) -> List[Finding]:
14
+ findings: List[Finding] = []
15
+ for model in context["models"].values():
16
+ if not model["defined_in_schema"]:
17
+ findings.append(
18
+ self._finding(
19
+ level="warning",
20
+ model=model["name"],
21
+ message="Model has SQL file but is not declared in schema.yml",
22
+ )
23
+ )
24
+ if model["missing_sql"]:
25
+ findings.append(
26
+ self._finding(
27
+ level="error",
28
+ model=model["name"],
29
+ message="Model declared in schema.yml but SQL file missing",
30
+ )
31
+ )
32
+ return findings
33
+
34
+
35
+ class StagingNamingCheck(BaseCheck):
36
+ rule = "staging_naming_mismatch"
37
+
38
+ def run(self, context: Dict) -> List[Finding]:
39
+ findings: List[Finding] = []
40
+ for model in context["models"].values():
41
+ name = model["name"]
42
+ path = model.get("path") or ""
43
+ if name.startswith("stg_") and "models/staging" not in path:
44
+ findings.append(
45
+ self._finding(
46
+ level="warning",
47
+ model=name,
48
+ message="Staging model should live under models/staging/",
49
+ details={"path": path},
50
+ )
51
+ )
52
+ return findings
@@ -0,0 +1,24 @@
1
+ """Source-related checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+
7
+ from .base_check import BaseCheck, Finding
8
+
9
+
10
+ class SourceFreshnessCheck(BaseCheck):
11
+ rule = "source_missing_freshness"
12
+
13
+ def run(self, context: Dict) -> List[Finding]:
14
+ findings: List[Finding] = []
15
+ for name, source in context["sources"].items():
16
+ if not source.get("freshness"):
17
+ findings.append(
18
+ self._finding(
19
+ level="warning",
20
+ model=name,
21
+ message="Source freshness not defined",
22
+ )
23
+ )
24
+ return findings
@@ -0,0 +1,57 @@
1
+ """Test coverage checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Iterable, List
6
+
7
+ from .base_check import BaseCheck, Finding
8
+
9
+
10
+ class ModelTestsCoverageCheck(BaseCheck):
11
+ rule = "model_missing_tests"
12
+
13
+ def run(self, context: Dict) -> List[Finding]:
14
+ findings: List[Finding] = []
15
+ for model in context["models"].values():
16
+ column_tests = sum(len(col.get("tests", []) or []) for col in model["columns"])
17
+ total_tests = len(model.get("tests", [])) + column_tests
18
+ if total_tests == 0:
19
+ findings.append(
20
+ self._finding(
21
+ level="error",
22
+ model=model["name"],
23
+ message="Model has no tests defined",
24
+ )
25
+ )
26
+ return findings
27
+
28
+
29
+ class ColumnNotNullCheck(BaseCheck):
30
+ rule = "column_missing_not_null"
31
+
32
+ def run(self, context: Dict) -> List[Finding]:
33
+ findings: List[Finding] = []
34
+ for model in context["models"].values():
35
+ for column in model["columns"]:
36
+ name = column.get("name", "")
37
+ if not name:
38
+ continue
39
+ if name.endswith("_id") or name == "id":
40
+ if not _has_test(column.get("tests", []), "not_null"):
41
+ findings.append(
42
+ self._finding(
43
+ level="warning",
44
+ model=model["name"],
45
+ message=f"Column '{name}' missing not_null test",
46
+ )
47
+ )
48
+ return findings
49
+
50
+
51
+ def _has_test(tests: Iterable, expected: str) -> bool:
52
+ for test in tests or []:
53
+ if isinstance(test, str) and test == expected:
54
+ return True
55
+ if isinstance(test, dict) and expected in test:
56
+ return True
57
+ return False
@@ -0,0 +1,48 @@
1
+ """CLI entrypoint built with Click."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from .config import load_config
10
+ from .core.project_scanner import ProjectScanner
11
+ from .reporters.console_reporter import ConsoleReporter
12
+ from .reporters.html_reporter import HtmlReporter
13
+ from .reporters.json_reporter import JsonReporter
14
+
15
+
16
+ @click.group()
17
+ def main() -> None:
18
+ """dbt-tester: static health checks for dbt projects."""
19
+
20
+
21
+ @main.command()
22
+ @click.argument("project_path", type=click.Path(file_okay=False, path_type=Path), default=Path("."))
23
+ @click.option("--format", "format_", type=click.Choice(["console", "html", "json"]), default=None)
24
+ @click.option("--output", default=None, help="Output file path for html/json reporters")
25
+ @click.option("--fail-on", type=click.Choice(["error", "warning"]), default=None)
26
+ def run(project_path: Path, format_: str | None, output: str | None, fail_on: str | None) -> None:
27
+ """Scan a dbt project and emit a report."""
28
+
29
+ config = load_config(project_path)
30
+ fmt = format_ or config["format"]
31
+ fail_level = fail_on or config["fail_on"]
32
+ output_path = output or config["output"]
33
+
34
+ scanner = ProjectScanner(project_path)
35
+ result = scanner.run_all_checks()
36
+
37
+ if fmt == "html":
38
+ HtmlReporter(result).write(output_path if output else f"{output_path}.html")
39
+ elif fmt == "json":
40
+ JsonReporter(result).write(output_path if output else f"{output_path}.json")
41
+ else:
42
+ ConsoleReporter(result).render()
43
+
44
+ counts = result.count_by_level()
45
+ if fail_level == "error" and counts.get("error", 0) > 0:
46
+ raise SystemExit(1)
47
+ if fail_level == "warning" and (counts.get("warning", 0) > 0 or counts.get("error", 0) > 0):
48
+ raise SystemExit(1)
@@ -0,0 +1,27 @@
1
+ """Configuration helpers for dbt-tester."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any, Dict
7
+
8
+ from .utils.file_utils import load_yaml
9
+
10
+
11
+ DEFAULT_CONFIG: Dict[str, Any] = {
12
+ "fail_on": "error",
13
+ "format": "console",
14
+ "output": "dbt_tester_report",
15
+ }
16
+
17
+
18
+ def load_config(project_path: str | Path) -> Dict[str, Any]:
19
+ """Load dbt-tester.yml or dbt-tester.yaml from ``project_path``."""
20
+
21
+ project = Path(project_path)
22
+ for name in ("dbt-tester.yml", "dbt-tester.yaml"):
23
+ candidate = project / name
24
+ if candidate.exists():
25
+ data = load_yaml(candidate)
26
+ return {**DEFAULT_CONFIG, **data}
27
+ return DEFAULT_CONFIG.copy()
@@ -0,0 +1,23 @@
1
+ """Read dbt manifest files when available."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Dict
8
+
9
+
10
+ class ManifestReader:
11
+ """Load manifest.json if present."""
12
+
13
+ def __init__(self, project_root: Path):
14
+ self.project_root = project_root
15
+
16
+ def read(self) -> Dict[str, Any]:
17
+ manifest_path = self.project_root / "target" / "manifest.json"
18
+ if not manifest_path.exists():
19
+ return {}
20
+ try:
21
+ return json.loads(manifest_path.read_text())
22
+ except json.JSONDecodeError:
23
+ return {}
@@ -0,0 +1,121 @@
1
+ """Entry point for building context and executing checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
+
9
+ from ..checks import get_registered_checks
10
+ from ..checks.base_check import Finding
11
+ from ..utils.file_utils import find_files, read_text, relative_to
12
+ from .manifest_reader import ManifestReader
13
+ from .schema_parser import SchemaParser
14
+
15
+
16
+ @dataclass
17
+ class ScanResult:
18
+ """Holds final findings and allows post-processing."""
19
+
20
+ context: Dict[str, Any]
21
+ findings: List[Finding] = field(default_factory=list)
22
+
23
+ def has_errors(self) -> bool:
24
+ return any(f.level == "error" for f in self.findings)
25
+
26
+ def count_by_level(self) -> Dict[str, int]:
27
+ counts: Dict[str, int] = {"error": 0, "warning": 0, "info": 0}
28
+ for finding in self.findings:
29
+ counts[finding.level] = counts.get(finding.level, 0) + 1
30
+ return counts
31
+
32
+
33
+ class ProjectScanner:
34
+ """Builds the dbt project context and runs all checks."""
35
+
36
+ def __init__(self, project_path: str | Path):
37
+ self.project_path = Path(project_path).resolve()
38
+ if not self.project_path.exists():
39
+ raise FileNotFoundError(f"Project path {self.project_path} does not exist")
40
+
41
+ def build_context(self) -> Dict[str, Any]:
42
+ schema_data = SchemaParser(self.project_path).parse()
43
+ manifest = ManifestReader(self.project_path).read()
44
+
45
+ models: Dict[str, Dict[str, Any]] = {}
46
+ models_dir = self.project_path / "models"
47
+ if models_dir.exists():
48
+ for sql_path in find_files(models_dir, {".sql"}):
49
+ name = sql_path.stem
50
+ schema_entry = schema_data["models"].get(name)
51
+ models[name] = self._build_model_record(sql_path, schema_entry)
52
+
53
+ # include schema-only definitions
54
+ for name, schema_entry in schema_data["models"].items():
55
+ if name not in models:
56
+ models[name] = self._build_model_record(None, schema_entry, defined_in_schema=True)
57
+
58
+ context: Dict[str, Any] = {
59
+ "project_path": self.project_path,
60
+ "models": models,
61
+ "sources": schema_data["sources"],
62
+ "manifest": manifest,
63
+ "stats": {
64
+ "model_count": len(models),
65
+ "source_count": len(schema_data["sources"]),
66
+ },
67
+ }
68
+ return context
69
+
70
+ def run_all_checks(self) -> ScanResult:
71
+ context = self.build_context()
72
+ findings: List[Finding] = []
73
+ for check in get_registered_checks():
74
+ findings.extend(check.run(context))
75
+ return ScanResult(context=context, findings=findings)
76
+
77
+ def _build_model_record(
78
+ self,
79
+ sql_path: Path | None,
80
+ schema_entry: Dict[str, Any] | None,
81
+ *,
82
+ defined_in_schema: bool | None = None,
83
+ ) -> Dict[str, Any]:
84
+ if defined_in_schema is None:
85
+ defined_in_schema = schema_entry is not None
86
+
87
+ rel_path = relative_to(sql_path, self.project_path) if sql_path else None
88
+ sql_text = read_text(sql_path) if sql_path else ""
89
+ columns = []
90
+ tests: List[Any] = []
91
+ description = ""
92
+ materialized = None
93
+ meta = {}
94
+
95
+ if schema_entry:
96
+ description = schema_entry.get("description", "") or ""
97
+ columns = schema_entry.get("columns", []) or []
98
+ tests = schema_entry.get("tests", []) or []
99
+ config = schema_entry.get("config", {}) or {}
100
+ materialized = config.get("materialized")
101
+ meta = schema_entry.get("meta", {}) or {}
102
+
103
+ if sql_path:
104
+ model_name = sql_path.stem
105
+ elif schema_entry and schema_entry.get("name"):
106
+ model_name = schema_entry.get("name")
107
+ else:
108
+ model_name = "unknown"
109
+
110
+ return {
111
+ "name": model_name,
112
+ "path": rel_path,
113
+ "sql": sql_text,
114
+ "description": description,
115
+ "columns": columns,
116
+ "tests": tests,
117
+ "materialized": materialized,
118
+ "meta": meta,
119
+ "defined_in_schema": defined_in_schema,
120
+ "missing_sql": sql_path is None,
121
+ }
@@ -0,0 +1,50 @@
1
+ """Schema YAML parsing utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Dict, List
7
+
8
+ from ..utils.file_utils import find_files, load_yaml
9
+
10
+
11
+ class SchemaParser:
12
+ """Collect metadata from schema.yml / sources.yml files."""
13
+
14
+ def __init__(self, project_root: Path):
15
+ self.project_root = project_root
16
+
17
+ def parse(self) -> Dict[str, Dict[str, Dict]]:
18
+ models: Dict[str, Dict] = {}
19
+ sources: Dict[str, Dict] = {}
20
+
21
+ schema_paths = list(find_files(self.project_root, {".yml", ".yaml"}))
22
+ for path in schema_paths:
23
+ if path.name.startswith("dbt_project"):
24
+ continue
25
+ doc = load_yaml(path)
26
+ for model in doc.get("models", []) or []:
27
+ name = model.get("name")
28
+ if not name:
29
+ continue
30
+ models[name] = {
31
+ "description": model.get("description", ""),
32
+ "columns": model.get("columns", []) or [],
33
+ "tests": model.get("tests", []) or [],
34
+ "meta": model.get("meta", {}) or {},
35
+ "config": model.get("config", {}) or {},
36
+ }
37
+ for source in doc.get("sources", []) or []:
38
+ source_name = source.get("name")
39
+ for table in source.get("tables", []) or []:
40
+ table_name = table.get("name")
41
+ if not source_name or not table_name:
42
+ continue
43
+ key = f"{source_name}.{table_name}"
44
+ sources[key] = {
45
+ "description": table.get("description", ""),
46
+ "freshness": source.get("freshness"),
47
+ "meta": table.get("meta", {}) or {},
48
+ }
49
+
50
+ return {"models": models, "sources": sources}
@@ -0,0 +1,23 @@
1
+ """Reporter base class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Protocol
7
+
8
+ from ..core.project_scanner import ScanResult
9
+
10
+
11
+ class Reporter(Protocol):
12
+ def render(self) -> None: # pragma: no cover - interface only
13
+ ...
14
+
15
+
16
+ class BaseReporter:
17
+ def __init__(self, result: ScanResult):
18
+ self.result = result
19
+
20
+ def ensure_output_path(self, path: str | Path) -> Path:
21
+ output_path = Path(path)
22
+ output_path.parent.mkdir(parents=True, exist_ok=True)
23
+ return output_path
@@ -0,0 +1,41 @@
1
+ """Console reporter using Rich."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ from ..checks.base_check import Finding
9
+ from .base_reporter import BaseReporter
10
+
11
+
12
+ class ConsoleReporter(BaseReporter):
13
+ def render(self) -> None:
14
+ console = Console()
15
+ counts = self.result.count_by_level()
16
+ summary = Table(title="dbt-tester summary")
17
+ summary.add_column("Severity")
18
+ summary.add_column("Count", justify="right")
19
+ for level in ("error", "warning", "info"):
20
+ summary.add_row(level, str(counts.get(level, 0)))
21
+
22
+ console.print(summary)
23
+
24
+ if not self.result.findings:
25
+ console.print("[green]No findings detected.[/green]")
26
+ return
27
+
28
+ detail = Table(title="Findings", show_lines=False)
29
+ detail.add_column("Level")
30
+ detail.add_column("Model")
31
+ detail.add_column("Rule")
32
+ detail.add_column("Message")
33
+ for finding in self.result.findings:
34
+ detail.add_row(
35
+ finding.level,
36
+ finding.model,
37
+ finding.rule,
38
+ finding.message,
39
+ )
40
+
41
+ console.print(detail)
@@ -0,0 +1,35 @@
1
+ """HTML reporter built with Jinja2."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
8
+
9
+ from .base_reporter import BaseReporter
10
+
11
+
12
+ class HtmlReporter(BaseReporter):
13
+ template_name = "report.html"
14
+
15
+ def __init__(self, result, template_dir: str | Path | None = None):
16
+ super().__init__(result)
17
+ self.template_dir = Path(template_dir or Path(__file__).parent / "templates")
18
+
19
+ def render(self) -> None:
20
+ self.write("dbt_tester_report.html")
21
+
22
+ def write(self, output_path: str | Path) -> Path:
23
+ env = Environment(
24
+ loader=FileSystemLoader(self.template_dir),
25
+ autoescape=select_autoescape(["html"]),
26
+ )
27
+ template = env.get_template(self.template_name)
28
+ html = template.render(
29
+ findings=self.result.findings,
30
+ counts=self.result.count_by_level(),
31
+ stats=self.result.context.get("stats", {}),
32
+ )
33
+ output = self.ensure_output_path(output_path)
34
+ output.write_text(html)
35
+ return output
@@ -0,0 +1,31 @@
1
+ """JSON reporter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ from .base_reporter import BaseReporter
9
+
10
+
11
+ class JsonReporter(BaseReporter):
12
+ def render(self) -> None:
13
+ self.write("dbt_tester_report.json")
14
+
15
+ def write(self, output_path: str | Path) -> Path:
16
+ payload = {
17
+ "counts": self.result.count_by_level(),
18
+ "findings": [
19
+ {
20
+ "level": f.level,
21
+ "model": f.model,
22
+ "rule": f.rule,
23
+ "message": f.message,
24
+ "details": f.details,
25
+ }
26
+ for f in self.result.findings
27
+ ],
28
+ }
29
+ output = self.ensure_output_path(output_path)
30
+ output.write_text(json.dumps(payload, indent=2))
31
+ return output
@@ -0,0 +1,62 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <title>dbt-tester report</title>
6
+ <style>
7
+ body { font-family: "Segoe UI", Tahoma, sans-serif; margin: 2rem; background: #f9fafb; color: #0f172a; }
8
+ h1 { margin-bottom: 0.5rem; }
9
+ .summary { display: flex; gap: 1rem; margin-bottom: 2rem; }
10
+ .card { padding: 1rem 1.5rem; border-radius: 0.75rem; background: white; box-shadow: 0 10px 25px rgba(15,23,42,0.08); }
11
+ table { width: 100%; border-collapse: collapse; margin-top: 1.5rem; background: white; box-shadow: 0 10px 25px rgba(15,23,42,0.08); }
12
+ th, td { padding: 0.75rem 1rem; border-bottom: 1px solid #e2e8f0; text-align: left; }
13
+ th { background: #e2e8f0; text-transform: uppercase; letter-spacing: 0.04em; font-size: 0.8rem; }
14
+ .level-error { color: #b91c1c; font-weight: 600; }
15
+ .level-warning { color: #b45309; font-weight: 600; }
16
+ .level-info { color: #0369a1; font-weight: 600; }
17
+ </style>
18
+ </head>
19
+ <body>
20
+ <h1>dbt-tester report</h1>
21
+ <p>{{ stats.model_count }} models scanned · {{ stats.source_count }} sources</p>
22
+ <section class="summary">
23
+ <div class="card">
24
+ <strong>Errors</strong>
25
+ <div>{{ counts["error"] }}</div>
26
+ </div>
27
+ <div class="card">
28
+ <strong>Warnings</strong>
29
+ <div>{{ counts["warning"] }}</div>
30
+ </div>
31
+ <div class="card">
32
+ <strong>Info</strong>
33
+ <div>{{ counts["info"] }}</div>
34
+ </div>
35
+ </section>
36
+
37
+ <table>
38
+ <thead>
39
+ <tr>
40
+ <th>Level</th>
41
+ <th>Model</th>
42
+ <th>Rule</th>
43
+ <th>Message</th>
44
+ </tr>
45
+ </thead>
46
+ <tbody>
47
+ {% for finding in findings %}
48
+ <tr>
49
+ <td class="level-{{ finding.level }}">{{ finding.level }}</td>
50
+ <td>{{ finding.model }}</td>
51
+ <td>{{ finding.rule }}</td>
52
+ <td>{{ finding.message }}</td>
53
+ </tr>
54
+ {% else %}
55
+ <tr>
56
+ <td colspan="4">No findings 🎉</td>
57
+ </tr>
58
+ {% endfor %}
59
+ </tbody>
60
+ </table>
61
+ </body>
62
+ </html>
@@ -0,0 +1,47 @@
1
+ """Utility helpers for file IO."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Dict, Iterable, Iterator, List, Optional
7
+
8
+ import yaml
9
+
10
+
11
+ def find_files(root: Path, suffixes: Iterable[str]) -> Iterator[Path]:
12
+ """Yield files under ``root`` that match any suffix in ``suffixes``."""
13
+
14
+ suffix_set = {s.lower() for s in suffixes}
15
+ for path in root.rglob("*"):
16
+ if path.is_file() and path.suffix.lower() in suffix_set:
17
+ yield path
18
+
19
+
20
+ def load_yaml(path: Path) -> Dict:
21
+ """Safely read a YAML file, returning an empty dict when missing."""
22
+
23
+ if not path.exists():
24
+ return {}
25
+ data = yaml.safe_load(path.read_text())
26
+ if data is None:
27
+ return {}
28
+ if not isinstance(data, dict):
29
+ return {"value": data}
30
+ return data
31
+
32
+
33
+ def read_text(path: Path) -> str:
34
+ """Return the text contents of ``path`` or empty string when absent."""
35
+
36
+ if not path.exists():
37
+ return ""
38
+ return path.read_text()
39
+
40
+
41
+ def relative_to(path: Path, base: Path) -> str:
42
+ """Return a POSIX relative path string, falling back to absolute."""
43
+
44
+ try:
45
+ return path.relative_to(base).as_posix()
46
+ except ValueError:
47
+ return path.as_posix()
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbt-tester
3
+ Version: 0.1.0
4
+ Summary: Static analysis and health report generator for dbt projects
5
+ Author-email: Vinoth J <career.vinothj@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: pyyaml>=6.0
12
+ Requires-Dist: rich>=13.0
13
+ Requires-Dist: jinja2>=3.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=7.0; extra == "dev"
16
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
17
+ Dynamic: license-file
18
+
19
+ # dbt-tester
20
+
21
+ `dbt-tester` is a lightweight static analysis tool that inspects dbt projects without running dbt itself. It scans your models, schema YAML files, and compiled manifest metadata to catch missing documentation, test coverage gaps, naming inconsistencies, and lineage issues. The CLI outputs results in the terminal or exports shareable HTML/JSON reports, making it easy to embed into CI pipelines.
22
+
23
+ ## Features
24
+ - Discovers all models, sources, and tests in a dbt project directory
25
+ - Plug-in style check engine grouped by models, tests, docs, lineage, and sources
26
+ - Console, HTML, and JSON reporters with severity levels
27
+ - Configurable failure threshold for CI/CD
28
+ - Simple to extend with custom checks
29
+
30
+ ## Quick start
31
+
32
+ ```bash
33
+ python -m venv .venv
34
+ source .venv/bin/activate
35
+ pip install -e .[dev]
36
+ dbt-tester run path/to/dbt_project --format console
37
+ ```
38
+
39
+ - Quotes are required when the project path contains spaces: `dbt-tester run "/home/me/dbt project" --format console`.
40
+ - Use `--format html --output report.html` to produce a shareable artifact (`.html`), or `--format json --output findings.json` for automation payloads.
41
+ - Control CI failure behavior with `--fail-on error|warning`; defaults to `error`.
42
+
43
+ ## Common commands
44
+
45
+ | Purpose | Command |
46
+ | --- | --- |
47
+ | Install for development | `pip install -e .[dev]` |
48
+ | Run unit tests | `pytest` |
49
+ | Scan current directory | `dbt-tester run . --format console` |
50
+ | Export HTML report | `dbt-tester run . --format html --output dbt_report.html` |
51
+ | Export JSON report | `dbt-tester run . --format json --output dbt_report.json` |
52
+ | Enforce zero warnings in CI | `dbt-tester run . --fail-on warning` |
53
+
54
+ ## Development
55
+
56
+ ```bash
57
+ python -m venv .venv
58
+ source .venv/bin/activate
59
+ pip install -e .[dev]
60
+ pytest
61
+ ```
62
+
63
+ To ship the package:
64
+
65
+ ```bash
66
+ python -m build
67
+ twine upload dist/*
68
+ ```
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,30 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ dbt_tester/__init__.py
5
+ dbt_tester/__main__.py
6
+ dbt_tester/cli.py
7
+ dbt_tester/config.py
8
+ dbt_tester.egg-info/PKG-INFO
9
+ dbt_tester.egg-info/SOURCES.txt
10
+ dbt_tester.egg-info/dependency_links.txt
11
+ dbt_tester.egg-info/entry_points.txt
12
+ dbt_tester.egg-info/requires.txt
13
+ dbt_tester.egg-info/top_level.txt
14
+ dbt_tester/checks/__init__.py
15
+ dbt_tester/checks/base_check.py
16
+ dbt_tester/checks/doc_checks.py
17
+ dbt_tester/checks/lineage_checks.py
18
+ dbt_tester/checks/model_checks.py
19
+ dbt_tester/checks/source_checks.py
20
+ dbt_tester/checks/test_checks.py
21
+ dbt_tester/core/manifest_reader.py
22
+ dbt_tester/core/project_scanner.py
23
+ dbt_tester/core/schema_parser.py
24
+ dbt_tester/reporters/base_reporter.py
25
+ dbt_tester/reporters/console_reporter.py
26
+ dbt_tester/reporters/html_reporter.py
27
+ dbt_tester/reporters/json_reporter.py
28
+ dbt_tester/reporters/templates/report.html
29
+ dbt_tester/utils/file_utils.py
30
+ tests/test_package.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dbt-tester = dbt_tester.cli:main
@@ -0,0 +1,8 @@
1
+ click>=8.0
2
+ pyyaml>=6.0
3
+ rich>=13.0
4
+ jinja2>=3.0
5
+
6
+ [dev]
7
+ pytest>=7.0
8
+ pytest-cov>=4.0
@@ -0,0 +1 @@
1
+ dbt_tester
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "dbt-tester"
7
+ version = "0.1.0"
8
+ description = "Static analysis and health report generator for dbt projects"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [
12
+ { name = "Vinoth J", email = "career.vinothj@gmail.com" }
13
+ ]
14
+ requires-python = ">=3.9"
15
+ dependencies = [
16
+ "click>=8.0",
17
+ "pyyaml>=6.0",
18
+ "rich>=13.0",
19
+ "jinja2>=3.0"
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest>=7.0",
25
+ "pytest-cov>=4.0"
26
+ ]
27
+
28
+ [project.scripts]
29
+ "dbt-tester" = "dbt_tester.cli:main"
30
+
31
+ [tool.setuptools.package-data]
32
+ "dbt_tester" = ["reporters/templates/*.html"]
33
+
34
+ [tool.pytest.ini_options]
35
+ pythonpath = ["."]
36
+ addopts = "-q"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,42 @@
1
+ from pathlib import Path
2
+
3
+ from click.testing import CliRunner
4
+
5
+ from dbt_tester.cli import main
6
+ from dbt_tester.core.project_scanner import ProjectScanner
7
+
8
+ FIXTURE = Path(__file__).parent / "fixtures" / "basic_project"
9
+
10
+
11
+ def test_build_context_collects_models_and_sources():
12
+ scanner = ProjectScanner(FIXTURE)
13
+ context = scanner.build_context()
14
+ assert "stg_orders" in context["models"]
15
+ assert context["stats"]["source_count"] == 1
16
+
17
+
18
+ def test_run_all_checks_finds_expected_issues():
19
+ result = ProjectScanner(FIXTURE).run_all_checks()
20
+ rules = {finding.rule for finding in result.findings}
21
+ assert "column_missing_description" in rules
22
+ assert any(f.level == "error" for f in result.findings)
23
+
24
+
25
+ def test_cli_run_generates_output(tmp_path):
26
+ runner = CliRunner()
27
+ out_path = tmp_path / "report.json"
28
+ response = runner.invoke(
29
+ main,
30
+ [
31
+ "run",
32
+ str(FIXTURE),
33
+ "--format",
34
+ "json",
35
+ "--output",
36
+ str(out_path),
37
+ "--fail-on",
38
+ "warning",
39
+ ],
40
+ )
41
+ assert response.exit_code == 1
42
+ assert out_path.exists()