catalogkit-lineage 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. catalogkit_lineage-0.1.2/PKG-INFO +114 -0
  2. catalogkit_lineage-0.1.2/README.md +84 -0
  3. catalogkit_lineage-0.1.2/pyproject.toml +65 -0
  4. catalogkit_lineage-0.1.2/setup.cfg +4 -0
  5. catalogkit_lineage-0.1.2/src/catalogkit/lineage/__init__.py +36 -0
  6. catalogkit_lineage-0.1.2/src/catalogkit/lineage/__main__.py +8 -0
  7. catalogkit_lineage-0.1.2/src/catalogkit/lineage/_version.py +3 -0
  8. catalogkit_lineage-0.1.2/src/catalogkit/lineage/api.py +71 -0
  9. catalogkit_lineage-0.1.2/src/catalogkit/lineage/build.py +496 -0
  10. catalogkit_lineage-0.1.2/src/catalogkit/lineage/cli.py +119 -0
  11. catalogkit_lineage-0.1.2/src/catalogkit/lineage/errors.py +15 -0
  12. catalogkit_lineage-0.1.2/src/catalogkit/lineage/loaders.py +243 -0
  13. catalogkit_lineage-0.1.2/src/catalogkit/lineage/models.py +33 -0
  14. catalogkit_lineage-0.1.2/src/catalogkit/lineage/render/json.py +10 -0
  15. catalogkit_lineage-0.1.2/src/catalogkit/lineage/render/text.py +46 -0
  16. catalogkit_lineage-0.1.2/src/catalogkit/lineage/sql_analyzer.py +62 -0
  17. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/PKG-INFO +114 -0
  18. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/SOURCES.txt +23 -0
  19. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/dependency_links.txt +1 -0
  20. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/entry_points.txt +2 -0
  21. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/requires.txt +9 -0
  22. catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/top_level.txt +1 -0
  23. catalogkit_lineage-0.1.2/tests/test_build.py +102 -0
  24. catalogkit_lineage-0.1.2/tests/test_cli.py +104 -0
  25. catalogkit_lineage-0.1.2/tests/test_errors.py +87 -0
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.4
2
+ Name: catalogkit-lineage
3
+ Version: 0.1.2
4
+ Summary: Build project-level SQL lineage artifacts from dbt manifests or SQL folders.
5
+ Author: ClearMetric Labs
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/Clearmetric-Labs/CatalogKit
8
+ Project-URL: Source, https://github.com/Clearmetric-Labs/CatalogKit
9
+ Project-URL: Issues, https://github.com/Clearmetric-Labs/CatalogKit/issues
10
+ Keywords: lineage,dbt,sql,catalog,sqlglot
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: catalogkit-core>=0.1.2
24
+ Requires-Dist: sqlglot>=25.0.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
27
+ Provides-Extra: release
28
+ Requires-Dist: build>=1.2.2; extra == "release"
29
+ Requires-Dist: twine>=5.1.1; extra == "release"
30
+
31
+ # catalogkit-lineage
32
+
33
+ `catalogkit-lineage` builds project-level SQL lineage from either:
34
+
35
+ - a dbt `manifest.json` with compiled SQL available, or
36
+ - a folder of `.sql` files
37
+
38
+ It is a headless static-analysis tool:
39
+
40
+ - input: one dbt manifest path or one SQL folder
41
+ - output: a deterministic `LineageMap` plus the shared `CatalogArtifact`
42
+ - no warehouse credentials
43
+ - no dbt execution
44
+ - no AI key
45
+
46
+ ## Install
47
+
48
+ ```bash
49
+ python -m pip install catalogkit-lineage
50
+ ```
51
+
52
+ ## Imports
53
+
54
+ ```python
55
+ from catalogkit.lineage import (
56
+ build_catalog_artifact,
57
+ build_lineage_map,
58
+ build_openlineage_export,
59
+ render_json,
60
+ render_text,
61
+ trace_downstream,
62
+ trace_upstream,
63
+ )
64
+ ```
65
+
66
+ For local development:
67
+
68
+ ```bash
69
+ python -m pip install -e ../catalogkit-core
70
+ python -m pip install -e ".[dev,release]"
71
+ ```
72
+
73
+ ## Quickstart
74
+
75
+ Manifest input:
76
+
77
+ ```bash
78
+ catalogkit-lineage --dialect postgres ./examples/jaffle_shop/manifest.json
79
+ catalogkit-lineage --dialect postgres --format json ./examples/jaffle_shop/manifest.json
80
+ catalogkit-lineage --dialect postgres --format openlineage ./examples/jaffle_shop/manifest.json
81
+ ```
82
+
83
+ Folder input:
84
+
85
+ ```bash
86
+ catalogkit-lineage --dialect postgres ./examples/sql_folder
87
+ catalogkit-lineage --dialect postgres --upstream customers_report.customer_lifetime_value ./examples/sql_folder
88
+ catalogkit-lineage --dialect postgres --downstream orders_base.amount ./examples/sql_folder
89
+ ```
90
+
91
+ ## Output Contract
92
+
93
+ `catalogkit-lineage` exposes a module-specific `LineageMap` with:
94
+
95
+ - `version`
96
+ - `summary`
97
+ - `nodes`
98
+ - `edges`
99
+ - `warnings`
100
+
101
+ For CatalogKit composition, the package also exposes a shared
102
+ `CatalogArtifact` builder backed by `catalogkit-core`.
103
+
104
+ The shared core artifact contains:
105
+
106
+ - `version`
107
+ - `nodes`
108
+ - `edges`
109
+ - `warnings`
110
+
111
+ ## Contract Docs
112
+
113
+ - [`../catalogkit-core/docs/contract.md`](../catalogkit-core/docs/contract.md)
114
+ - [`docs/limitations.md`](docs/limitations.md)
@@ -0,0 +1,84 @@
1
+ # catalogkit-lineage
2
+
3
+ `catalogkit-lineage` builds project-level SQL lineage from either:
4
+
5
+ - a dbt `manifest.json` with compiled SQL available, or
6
+ - a folder of `.sql` files
7
+
8
+ It is a headless static-analysis tool:
9
+
10
+ - input: one dbt manifest path or one SQL folder
11
+ - output: a deterministic `LineageMap` plus the shared `CatalogArtifact`
12
+ - no warehouse credentials
13
+ - no dbt execution
14
+ - no AI key
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ python -m pip install catalogkit-lineage
20
+ ```
21
+
22
+ ## Imports
23
+
24
+ ```python
25
+ from catalogkit.lineage import (
26
+ build_catalog_artifact,
27
+ build_lineage_map,
28
+ build_openlineage_export,
29
+ render_json,
30
+ render_text,
31
+ trace_downstream,
32
+ trace_upstream,
33
+ )
34
+ ```
35
+
36
+ For local development:
37
+
38
+ ```bash
39
+ python -m pip install -e ../catalogkit-core
40
+ python -m pip install -e ".[dev,release]"
41
+ ```
42
+
43
+ ## Quickstart
44
+
45
+ Manifest input:
46
+
47
+ ```bash
48
+ catalogkit-lineage --dialect postgres ./examples/jaffle_shop/manifest.json
49
+ catalogkit-lineage --dialect postgres --format json ./examples/jaffle_shop/manifest.json
50
+ catalogkit-lineage --dialect postgres --format openlineage ./examples/jaffle_shop/manifest.json
51
+ ```
52
+
53
+ Folder input:
54
+
55
+ ```bash
56
+ catalogkit-lineage --dialect postgres ./examples/sql_folder
57
+ catalogkit-lineage --dialect postgres --upstream customers_report.customer_lifetime_value ./examples/sql_folder
58
+ catalogkit-lineage --dialect postgres --downstream orders_base.amount ./examples/sql_folder
59
+ ```
60
+
61
+ ## Output Contract
62
+
63
+ `catalogkit-lineage` exposes a module-specific `LineageMap` with:
64
+
65
+ - `version`
66
+ - `summary`
67
+ - `nodes`
68
+ - `edges`
69
+ - `warnings`
70
+
71
+ For CatalogKit composition, the package also exposes a shared
72
+ `CatalogArtifact` builder backed by `catalogkit-core`.
73
+
74
+ The shared core artifact contains:
75
+
76
+ - `version`
77
+ - `nodes`
78
+ - `edges`
79
+ - `warnings`
80
+
81
+ ## Contract Docs
82
+
83
+ - [`../catalogkit-core/docs/contract.md`](../catalogkit-core/docs/contract.md)
84
+ - [`docs/limitations.md`](docs/limitations.md)
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "catalogkit-lineage"
7
+ dynamic = ["version"]
8
+ description = "Build project-level SQL lineage artifacts from dbt manifests or SQL folders."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "Apache-2.0"
12
+ authors = [
13
+ {name = "ClearMetric Labs"},
14
+ ]
15
+ keywords = [
16
+ "lineage",
17
+ "dbt",
18
+ "sql",
19
+ "catalog",
20
+ "sqlglot",
21
+ ]
22
+ classifiers = [
23
+ "Development Status :: 3 - Alpha",
24
+ "Intended Audience :: Developers",
25
+ "Programming Language :: Python :: 3",
26
+ "Programming Language :: Python :: 3 :: Only",
27
+ "Programming Language :: Python :: 3.10",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Programming Language :: Python :: 3.13",
31
+ "Topic :: Database",
32
+ "Topic :: Software Development :: Libraries :: Python Modules",
33
+ ]
34
+ dependencies = [
35
+ "catalogkit-core>=0.1.2",
36
+ "sqlglot>=25.0.0",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = [
41
+ "pytest>=7.0.0",
42
+ ]
43
+ release = [
44
+ "build>=1.2.2",
45
+ "twine>=5.1.1",
46
+ ]
47
+
48
+ [project.scripts]
49
+ catalogkit-lineage = "catalogkit.lineage.cli:main"
50
+
51
+ [project.urls]
52
+ Homepage = "https://github.com/Clearmetric-Labs/CatalogKit"
53
+ Source = "https://github.com/Clearmetric-Labs/CatalogKit"
54
+ Issues = "https://github.com/Clearmetric-Labs/CatalogKit/issues"
55
+
56
+ [tool.setuptools.dynamic]
57
+ version = {attr = "catalogkit.lineage._version.__version__"}
58
+
59
+ [tool.setuptools.package-dir]
60
+ "" = "src"
61
+
62
+ [tool.setuptools.packages.find]
63
+ where = ["src"]
64
+ include = ["catalogkit.lineage*"]
65
+ namespaces = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,36 @@
1
+ """Public package surface for catalogkit-lineage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from catalogkit.core import CatalogArtifact
6
+
7
+ from ._version import __version__
8
+ from .api import (
9
+ build_catalog_artifact,
10
+ build_lineage_map,
11
+ build_openlineage_export,
12
+ render_json,
13
+ render_text,
14
+ trace_downstream,
15
+ trace_upstream,
16
+ )
17
+ from .errors import LineageContractError, LineageError, LineageInputError
18
+ from .models import LineageMap, LineageSummary, TraversalResult
19
+
20
+ __all__ = [
21
+ "__version__",
22
+ "build_catalog_artifact",
23
+ "build_lineage_map",
24
+ "build_openlineage_export",
25
+ "CatalogArtifact",
26
+ "LineageContractError",
27
+ "LineageError",
28
+ "LineageInputError",
29
+ "LineageMap",
30
+ "LineageSummary",
31
+ "render_json",
32
+ "render_text",
33
+ "trace_downstream",
34
+ "trace_upstream",
35
+ "TraversalResult",
36
+ ]
@@ -0,0 +1,8 @@
1
+ """Module entrypoint for catalogkit-lineage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .cli import main
6
+
7
+ if __name__ == "__main__":
8
+ raise SystemExit(main())
@@ -0,0 +1,3 @@
1
+ """Package version for catalogkit-lineage."""
2
+
3
+ __version__ = "0.1.2"
@@ -0,0 +1,71 @@
1
+ """Public API for catalogkit-lineage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from catalogkit.core import CatalogArtifact
9
+
10
+ from .build import (
11
+ build_catalog_artifact_from_project,
12
+ build_lineage_map_from_project,
13
+ build_openlineage_export_from_project,
14
+ trace_downstream_from_project,
15
+ trace_upstream_from_project,
16
+ )
17
+ from .loaders import load_project
18
+ from .models import LineageMap, TraversalResult
19
+ from .render.json import render_json
20
+ from .render.text import render_text
21
+
22
+
23
+ def build_lineage_map(path: str | Path, *, dialect: str) -> LineageMap:
24
+ """Build the public catalogkit-lineage artifact for one project input."""
25
+ project = load_project(path, dialect=dialect)
26
+ return build_lineage_map_from_project(project, dialect=dialect)
27
+
28
+
29
+ def build_catalog_artifact(path: str | Path, *, dialect: str) -> CatalogArtifact:
30
+ """Build the shared catalog artifact for CatalogKit composition."""
31
+ project = load_project(path, dialect=dialect)
32
+ return build_catalog_artifact_from_project(project, dialect=dialect)
33
+
34
+
35
+ def trace_upstream(
36
+ path: str | Path,
37
+ *,
38
+ dialect: str,
39
+ selection: str,
40
+ ) -> TraversalResult:
41
+ """Trace upstream column lineage for one selected dataset column."""
42
+ project = load_project(path, dialect=dialect)
43
+ return trace_upstream_from_project(project, dialect=dialect, selection=selection)
44
+
45
+
46
+ def trace_downstream(
47
+ path: str | Path,
48
+ *,
49
+ dialect: str,
50
+ selection: str,
51
+ ) -> TraversalResult:
52
+ """Trace downstream column lineage for one selected dataset column."""
53
+ project = load_project(path, dialect=dialect)
54
+ return trace_downstream_from_project(project, dialect=dialect, selection=selection)
55
+
56
+
57
+ def build_openlineage_export(path: str | Path, *, dialect: str) -> dict[str, Any]:
58
+ """Build an OpenLineage-compatible export view for one project input."""
59
+ project = load_project(path, dialect=dialect)
60
+ return build_openlineage_export_from_project(project, dialect=dialect)
61
+
62
+
63
+ __all__ = [
64
+ "build_catalog_artifact",
65
+ "build_lineage_map",
66
+ "build_openlineage_export",
67
+ "render_json",
68
+ "render_text",
69
+ "trace_downstream",
70
+ "trace_upstream",
71
+ ]