dbt-colibri 0.2.0b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. dbt_colibri-0.2.0b4/LICENSE +22 -0
  2. dbt_colibri-0.2.0b4/PKG-INFO +74 -0
  3. dbt_colibri-0.2.0b4/README.md +61 -0
  4. dbt_colibri-0.2.0b4/pyproject.toml +46 -0
  5. dbt_colibri-0.2.0b4/setup.cfg +4 -0
  6. dbt_colibri-0.2.0b4/src/dbt_colibri/cli/__init__.py +3 -0
  7. dbt_colibri-0.2.0b4/src/dbt_colibri/cli/cli.py +72 -0
  8. dbt_colibri-0.2.0b4/src/dbt_colibri/cli/main.py +10 -0
  9. dbt_colibri-0.2.0b4/src/dbt_colibri/lineage_extractor/__init__.py +20 -0
  10. dbt_colibri-0.2.0b4/src/dbt_colibri/lineage_extractor/extractor.py +766 -0
  11. dbt_colibri-0.2.0b4/src/dbt_colibri/lineage_extractor/lineage.py +298 -0
  12. dbt_colibri-0.2.0b4/src/dbt_colibri/lineage_extractor/utils.py +81 -0
  13. dbt_colibri-0.2.0b4/src/dbt_colibri/report/__init__.py +3 -0
  14. dbt_colibri-0.2.0b4/src/dbt_colibri/report/generator.py +280 -0
  15. dbt_colibri-0.2.0b4/src/dbt_colibri/report/index.html +276 -0
  16. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/PKG-INFO +74 -0
  17. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/SOURCES.txt +25 -0
  18. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/dependency_links.txt +1 -0
  19. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/entry_points.txt +2 -0
  20. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/requires.txt +2 -0
  21. dbt_colibri-0.2.0b4/src/dbt_colibri.egg-info/top_level.txt +1 -0
  22. dbt_colibri-0.2.0b4/tests/test_cli.py +119 -0
  23. dbt_colibri-0.2.0b4/tests/test_composition.py +218 -0
  24. dbt_colibri-0.2.0b4/tests/test_extractor.py +702 -0
  25. dbt_colibri-0.2.0b4/tests/test_integration.py +251 -0
  26. dbt_colibri-0.2.0b4/tests/test_selectors.py +490 -0
  27. dbt_colibri-0.2.0b4/tests/test_utils.py +154 -0
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Canva OpenSource
4
+ Modifications copyright (c) 2025 `b-ned`
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbt-colibri
3
+ Version: 0.2.0b4
4
+ Summary: A column lineage parser and dashboarding tool
5
+ Author: bned
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: click>=8.1.7
11
+ Requires-Dist: sqlglot[rs]>=26.33.0
12
+ Dynamic: license-file
13
+
14
+ # dbt-colibri
15
+
16
+ **dbt-colibri** is a lightweight, developer-friendly CLI tool and self-hostable dashboard that extracts and visualizes **full column-level lineage** from your `dbt-core` project β€” no cloud syncs, agents, or vendor lock-in required.
17
+
18
+ It’s built for data teams who want a transparent, flexible, and open approach to lineage tracking without relying on complex enterprise tooling.
19
+
20
+ ---
21
+
22
+ ## ✨ Features
23
+
24
+ - βœ… **Column-level lineage graph**: Understand how every column is derived across models
25
+ - πŸ” **Model + column metadata**: Parse directly from `dbt`'s manifest and catalog
26
+ - 🧠 **Smart parsing**: Uses the structure of your SQL to extract relationships
27
+ - πŸ“¦ **Self-hostable**: Generate static HTML or JSON reports (no server required)
28
+ - πŸ’‘ Built for `dbt-core` users working locally or in CI pipelines
29
+
30
+ ---
31
+
32
+ ## πŸš€ Quickstart
33
+
34
+ ### πŸ“¦ Installation
35
+
36
+ For local development:
37
+
38
+ ```
39
+ git clone https://github.com/b-ned/dbt-colibri.git
40
+ cd dbt-colibri
41
+ pip install -e .
42
+ ```
43
+
44
+ ### βš™οΈ Usage
45
+ Generate a lineage report from your dbt project directory:
46
+
47
+ ```
48
+ colibri generate
49
+ ```
50
+
51
+ By default, this will:
52
+
53
+ Look for `target/manifest.json` and `target/catalog.json`
54
+
55
+ Output the results to the `dist/` folder:
56
+
57
+ `colibri-manifest.json`: human-readable lineage data
58
+
59
+ `index.html`: interactive visualization
60
+
61
+ ### Compatibility:
62
+ - dbt-core == 1.10.6
63
+ - python = 3.13.6
64
+ - snowflake dialect
65
+
66
+
67
+ ### 🧰 Built on Open Source
68
+
69
+ This project is based on a fork of [`dbt-column-lineage-extractor`](https://github.com/canva-public/dbt-column-lineage-extractor), originally created under the MIT license.
70
+
71
+ Some core logic is adapted and modified for enhanced usability and reporting.
72
+
73
+
74
+
@@ -0,0 +1,61 @@
1
+ # dbt-colibri
2
+
3
+ **dbt-colibri** is a lightweight, developer-friendly CLI tool and self-hostable dashboard that extracts and visualizes **full column-level lineage** from your `dbt-core` project β€” no cloud syncs, agents, or vendor lock-in required.
4
+
5
+ It’s built for data teams who want a transparent, flexible, and open approach to lineage tracking without relying on complex enterprise tooling.
6
+
7
+ ---
8
+
9
+ ## ✨ Features
10
+
11
+ - βœ… **Column-level lineage graph**: Understand how every column is derived across models
12
+ - πŸ” **Model + column metadata**: Parse directly from `dbt`'s manifest and catalog
13
+ - 🧠 **Smart parsing**: Uses the structure of your SQL to extract relationships
14
+ - πŸ“¦ **Self-hostable**: Generate static HTML or JSON reports (no server required)
15
+ - πŸ’‘ Built for `dbt-core` users working locally or in CI pipelines
16
+
17
+ ---
18
+
19
+ ## πŸš€ Quickstart
20
+
21
+ ### πŸ“¦ Installation
22
+
23
+ For local development:
24
+
25
+ ```
26
+ git clone https://github.com/b-ned/dbt-colibri.git
27
+ cd dbt-colibri
28
+ pip install -e .
29
+ ```
30
+
31
+ ### βš™οΈ Usage
32
+ Generate a lineage report from your dbt project directory:
33
+
34
+ ```
35
+ colibri generate
36
+ ```
37
+
38
+ By default, this will:
39
+
40
+ Look for `target/manifest.json` and `target/catalog.json`
41
+
42
+ Output the results to the `dist/` folder:
43
+
44
+ `colibri-manifest.json`: human-readable lineage data
45
+
46
+ `index.html`: interactive visualization
47
+
48
+ ### Compatibility:
49
+ - dbt-core == 1.10.6
50
+ - python = 3.13.6
51
+ - snowflake dialect
52
+
53
+
54
+ ### 🧰 Built on Open Source
55
+
56
+ This project is based on a fork of [`dbt-column-lineage-extractor`](https://github.com/canva-public/dbt-column-lineage-extractor), originally created under the MIT license.
57
+
58
+ Some core logic is adapted and modified for enhanced usability and reporting.
59
+
60
+
61
+
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "dbt-colibri"
3
+ version = "0.2.0b4"
4
+ description = "A column lineage parser and dashboarding tool"
5
+ authors = [
6
+ { name="bned" }
7
+ ]
8
+ license = { text = "MIT" }
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ dependencies = [
12
+ "click>=8.1.7",
13
+ "sqlglot[rs]>=26.33.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ colibri = "dbt_colibri.cli.cli:cli"
18
+
19
+ [build-system]
20
+ requires = ["setuptools", "wheel"]
21
+ build-backend = "setuptools.build_meta"
22
+
23
+ [tool.setuptools]
24
+ package-dir = {"" = "src"}
25
+ include-package-data = true
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
29
+
30
+ [tool.setuptools.package-data]
31
+ "*" = ["*.html"]
32
+
33
+ [dependency-groups]
34
+ dev = [
35
+ "pytest>=8.3.5",
36
+ "ruff",
37
+ "mypy",
38
+ "build",
39
+ "twine",
40
+ ]
41
+
42
+ [[tool.uv.index]]
43
+ name = "testpypi"
44
+ url = "https://test.pypi.org/simple/"
45
+ publish-url = "https://test.pypi.org/legacy/"
46
+ explicit = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .cli import generate_report
2
+
3
+ __all__ = ["generate_report"]
@@ -0,0 +1,72 @@
1
+ # src/dbt_colibri/cli/cli.py
2
+
3
+ import click
4
+ import os
5
+ from ..lineage_extractor.extractor import DbtColumnLineageExtractor
6
+ from ..report.generator import DbtColibriReportGenerator
7
+
8
+
9
+ COLIBRI_LOGO = r"""
10
+ ______ ______ __ __ ______ ______ __
11
+ /\ ___\ /\ __ \ /\ \ /\ \ /\ == \ /\ == \ /\ \
12
+ \ \ \____ \ \ \/\ \ \ \ \____ \ \ \ \ \ __< \ \ __< \ \ \
13
+ \ \_____\ \ \_____\ \ \_____\ \ \_\ \ \_____\ \ \_\ \_\ \ \_\
14
+ \/_____/ \/_____/ \/_____/ \/_/ \/_____/ \/_/ /_/ \/_/
15
+ """
16
+
17
+ @click.group()
18
+ def cli():
19
+ """dbt-colibri CLI tool"""
20
+ pass
21
+
22
+ @cli.command("generate")
23
+ @click.option(
24
+ "--target-dir",
25
+ type=str,
26
+ default="dist",
27
+ help="Directory to save both JSON and HTML files (default: dist)"
28
+ )
29
+ @click.option(
30
+ "--manifest",
31
+ type=str,
32
+ default="target/manifest.json",
33
+ help="Path to dbt manifest.json file (default: target/manifest.json)"
34
+ )
35
+ @click.option(
36
+ "--catalog",
37
+ type=str,
38
+ default="target/catalog.json",
39
+ help="Path to dbt catalog.json file (default: target/catalog.json)"
40
+ )
41
+ def generate_report(target_dir, manifest, catalog):
42
+ """Generate a dbt-colibri lineage report with both JSON and HTML output."""
43
+ try:
44
+ click.echo(f"{COLIBRI_LOGO}\n")
45
+
46
+ if not os.path.exists(manifest):
47
+ click.echo(f"❌ Manifest file not found at {manifest}")
48
+ return 1
49
+ if not os.path.exists(catalog):
50
+ click.echo(f"❌ Catalog file not found at {catalog}")
51
+ return 1
52
+
53
+ click.echo("πŸ” Loading dbt manifest and catalog...")
54
+ extractor = DbtColumnLineageExtractor(manifest, catalog)
55
+
56
+ click.echo("πŸ“Š Extracting lineage data...")
57
+ report_generator = DbtColibriReportGenerator(extractor)
58
+
59
+ click.echo("πŸš€ Generating report...")
60
+ report_generator.generate_report(target_dir=target_dir)
61
+
62
+ click.echo("βœ… Report completed!")
63
+ click.echo(f" πŸ“ JSON: {target_dir}/colibri-manifest.json")
64
+ click.echo(f" 🌐 HTML: {target_dir}/index.html")
65
+ return 0
66
+ except Exception as e:
67
+ click.echo(f"❌ Error: {str(e)}")
68
+ return 1
69
+
70
+
71
+ if __name__ == "__main__":
72
+ cli()
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ dbt-colibri CLI entry point
4
+ """
5
+
6
+ import sys
7
+ from .cli import generate_report
8
+
9
+ if __name__ == "__main__":
10
+ sys.exit(generate_report())
@@ -0,0 +1,20 @@
1
+ from .extractor import DbtColumnLineageExtractor, DBTNodeCatalog
2
+ from .utils import (
3
+ clear_screen,
4
+ read_json,
5
+ pretty_print_dict,
6
+ write_dict_to_file,
7
+ read_dict_from_file,
8
+ setup_logging
9
+ )
10
+
11
+ __all__ = [
12
+ "DbtColumnLineageExtractor",
13
+ "DBTNodeCatalog",
14
+ "clear_screen",
15
+ "read_json",
16
+ "pretty_print_dict",
17
+ "write_dict_to_file",
18
+ "read_dict_from_file",
19
+ "setup_logging",
20
+ ]