catalogkit-lineage 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- catalogkit_lineage-0.1.2/PKG-INFO +114 -0
- catalogkit_lineage-0.1.2/README.md +84 -0
- catalogkit_lineage-0.1.2/pyproject.toml +65 -0
- catalogkit_lineage-0.1.2/setup.cfg +4 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/__init__.py +36 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/__main__.py +8 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/_version.py +3 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/api.py +71 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/build.py +496 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/cli.py +119 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/errors.py +15 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/loaders.py +243 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/models.py +33 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/render/json.py +10 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/render/text.py +46 -0
- catalogkit_lineage-0.1.2/src/catalogkit/lineage/sql_analyzer.py +62 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/PKG-INFO +114 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/SOURCES.txt +23 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/dependency_links.txt +1 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/entry_points.txt +2 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/requires.txt +9 -0
- catalogkit_lineage-0.1.2/src/catalogkit_lineage.egg-info/top_level.txt +1 -0
- catalogkit_lineage-0.1.2/tests/test_build.py +102 -0
- catalogkit_lineage-0.1.2/tests/test_cli.py +104 -0
- catalogkit_lineage-0.1.2/tests/test_errors.py +87 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: catalogkit-lineage
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Build project-level SQL lineage artifacts from dbt manifests or SQL folders.
|
|
5
|
+
Author: ClearMetric Labs
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/Clearmetric-Labs/CatalogKit
|
|
8
|
+
Project-URL: Source, https://github.com/Clearmetric-Labs/CatalogKit
|
|
9
|
+
Project-URL: Issues, https://github.com/Clearmetric-Labs/CatalogKit/issues
|
|
10
|
+
Keywords: lineage,dbt,sql,catalog,sqlglot
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: catalogkit-core>=0.1.2
|
|
24
|
+
Requires-Dist: sqlglot>=25.0.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
27
|
+
Provides-Extra: release
|
|
28
|
+
Requires-Dist: build>=1.2.2; extra == "release"
|
|
29
|
+
Requires-Dist: twine>=5.1.1; extra == "release"
|
|
30
|
+
|
|
31
|
+
# catalogkit-lineage
|
|
32
|
+
|
|
33
|
+
`catalogkit-lineage` builds project-level SQL lineage from either:
|
|
34
|
+
|
|
35
|
+
- a dbt `manifest.json` with compiled SQL available, or
|
|
36
|
+
- a folder of `.sql` files
|
|
37
|
+
|
|
38
|
+
It is a headless static-analysis tool:
|
|
39
|
+
|
|
40
|
+
- input: one dbt manifest path or one SQL folder
|
|
41
|
+
- output: a deterministic `LineageMap` plus the shared `CatalogArtifact`
|
|
42
|
+
- no warehouse credentials
|
|
43
|
+
- no dbt execution
|
|
44
|
+
- no AI key
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
python -m pip install catalogkit-lineage
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Imports
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from catalogkit.lineage import (
|
|
56
|
+
build_catalog_artifact,
|
|
57
|
+
build_lineage_map,
|
|
58
|
+
build_openlineage_export,
|
|
59
|
+
render_json,
|
|
60
|
+
render_text,
|
|
61
|
+
trace_downstream,
|
|
62
|
+
trace_upstream,
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
For local development:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
python -m pip install -e ../catalogkit-core
|
|
70
|
+
python -m pip install -e ".[dev,release]"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quickstart
|
|
74
|
+
|
|
75
|
+
Manifest input:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
catalogkit-lineage --dialect postgres ./examples/jaffle_shop/manifest.json
|
|
79
|
+
catalogkit-lineage --dialect postgres --format json ./examples/jaffle_shop/manifest.json
|
|
80
|
+
catalogkit-lineage --dialect postgres --format openlineage ./examples/jaffle_shop/manifest.json
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Folder input:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
catalogkit-lineage --dialect postgres ./examples/sql_folder
|
|
87
|
+
catalogkit-lineage --dialect postgres --upstream customers_report.customer_lifetime_value ./examples/sql_folder
|
|
88
|
+
catalogkit-lineage --dialect postgres --downstream orders_base.amount ./examples/sql_folder
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Output Contract
|
|
92
|
+
|
|
93
|
+
`catalogkit-lineage` exposes a module-specific `LineageMap` with:
|
|
94
|
+
|
|
95
|
+
- `version`
|
|
96
|
+
- `summary`
|
|
97
|
+
- `nodes`
|
|
98
|
+
- `edges`
|
|
99
|
+
- `warnings`
|
|
100
|
+
|
|
101
|
+
For CatalogKit composition, the package also exposes a shared
|
|
102
|
+
`CatalogArtifact` builder backed by `catalogkit-core`.
|
|
103
|
+
|
|
104
|
+
The shared core artifact contains:
|
|
105
|
+
|
|
106
|
+
- `version`
|
|
107
|
+
- `nodes`
|
|
108
|
+
- `edges`
|
|
109
|
+
- `warnings`
|
|
110
|
+
|
|
111
|
+
## Contract Docs
|
|
112
|
+
|
|
113
|
+
- [`../catalogkit-core/docs/contract.md`](../catalogkit-core/docs/contract.md)
|
|
114
|
+
- [`docs/limitations.md`](docs/limitations.md)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# catalogkit-lineage
|
|
2
|
+
|
|
3
|
+
`catalogkit-lineage` builds project-level SQL lineage from either:
|
|
4
|
+
|
|
5
|
+
- a dbt `manifest.json` with compiled SQL available, or
|
|
6
|
+
- a folder of `.sql` files
|
|
7
|
+
|
|
8
|
+
It is a headless static-analysis tool:
|
|
9
|
+
|
|
10
|
+
- input: one dbt manifest path or one SQL folder
|
|
11
|
+
- output: a deterministic `LineageMap` plus the shared `CatalogArtifact`
|
|
12
|
+
- no warehouse credentials
|
|
13
|
+
- no dbt execution
|
|
14
|
+
- no AI key
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
python -m pip install catalogkit-lineage
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Imports
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from catalogkit.lineage import (
|
|
26
|
+
build_catalog_artifact,
|
|
27
|
+
build_lineage_map,
|
|
28
|
+
build_openlineage_export,
|
|
29
|
+
render_json,
|
|
30
|
+
render_text,
|
|
31
|
+
trace_downstream,
|
|
32
|
+
trace_upstream,
|
|
33
|
+
)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
For local development:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
python -m pip install -e ../catalogkit-core
|
|
40
|
+
python -m pip install -e ".[dev,release]"
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quickstart
|
|
44
|
+
|
|
45
|
+
Manifest input:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
catalogkit-lineage --dialect postgres ./examples/jaffle_shop/manifest.json
|
|
49
|
+
catalogkit-lineage --dialect postgres --format json ./examples/jaffle_shop/manifest.json
|
|
50
|
+
catalogkit-lineage --dialect postgres --format openlineage ./examples/jaffle_shop/manifest.json
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Folder input:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
catalogkit-lineage --dialect postgres ./examples/sql_folder
|
|
57
|
+
catalogkit-lineage --dialect postgres --upstream customers_report.customer_lifetime_value ./examples/sql_folder
|
|
58
|
+
catalogkit-lineage --dialect postgres --downstream orders_base.amount ./examples/sql_folder
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Output Contract
|
|
62
|
+
|
|
63
|
+
`catalogkit-lineage` exposes a module-specific `LineageMap` with:
|
|
64
|
+
|
|
65
|
+
- `version`
|
|
66
|
+
- `summary`
|
|
67
|
+
- `nodes`
|
|
68
|
+
- `edges`
|
|
69
|
+
- `warnings`
|
|
70
|
+
|
|
71
|
+
For CatalogKit composition, the package also exposes a shared
|
|
72
|
+
`CatalogArtifact` builder backed by `catalogkit-core`.
|
|
73
|
+
|
|
74
|
+
The shared core artifact contains:
|
|
75
|
+
|
|
76
|
+
- `version`
|
|
77
|
+
- `nodes`
|
|
78
|
+
- `edges`
|
|
79
|
+
- `warnings`
|
|
80
|
+
|
|
81
|
+
## Contract Docs
|
|
82
|
+
|
|
83
|
+
- [`../catalogkit-core/docs/contract.md`](../catalogkit-core/docs/contract.md)
|
|
84
|
+
- [`docs/limitations.md`](docs/limitations.md)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "catalogkit-lineage"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Build project-level SQL lineage artifacts from dbt manifests or SQL folders."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "ClearMetric Labs"},
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"lineage",
|
|
17
|
+
"dbt",
|
|
18
|
+
"sql",
|
|
19
|
+
"catalog",
|
|
20
|
+
"sqlglot",
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 3 - Alpha",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Programming Language :: Python :: 3",
|
|
26
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
27
|
+
"Programming Language :: Python :: 3.10",
|
|
28
|
+
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Topic :: Database",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
]
|
|
34
|
+
dependencies = [
|
|
35
|
+
"catalogkit-core>=0.1.2",
|
|
36
|
+
"sqlglot>=25.0.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
dev = [
|
|
41
|
+
"pytest>=7.0.0",
|
|
42
|
+
]
|
|
43
|
+
release = [
|
|
44
|
+
"build>=1.2.2",
|
|
45
|
+
"twine>=5.1.1",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
catalogkit-lineage = "catalogkit.lineage.cli:main"
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://github.com/Clearmetric-Labs/CatalogKit"
|
|
53
|
+
Source = "https://github.com/Clearmetric-Labs/CatalogKit"
|
|
54
|
+
Issues = "https://github.com/Clearmetric-Labs/CatalogKit/issues"
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.dynamic]
|
|
57
|
+
version = {attr = "catalogkit.lineage._version.__version__"}
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.package-dir]
|
|
60
|
+
"" = "src"
|
|
61
|
+
|
|
62
|
+
[tool.setuptools.packages.find]
|
|
63
|
+
where = ["src"]
|
|
64
|
+
include = ["catalogkit.lineage*"]
|
|
65
|
+
namespaces = true
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Public package surface for catalogkit-lineage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from catalogkit.core import CatalogArtifact
|
|
6
|
+
|
|
7
|
+
from ._version import __version__
|
|
8
|
+
from .api import (
|
|
9
|
+
build_catalog_artifact,
|
|
10
|
+
build_lineage_map,
|
|
11
|
+
build_openlineage_export,
|
|
12
|
+
render_json,
|
|
13
|
+
render_text,
|
|
14
|
+
trace_downstream,
|
|
15
|
+
trace_upstream,
|
|
16
|
+
)
|
|
17
|
+
from .errors import LineageContractError, LineageError, LineageInputError
|
|
18
|
+
from .models import LineageMap, LineageSummary, TraversalResult
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"__version__",
|
|
22
|
+
"build_catalog_artifact",
|
|
23
|
+
"build_lineage_map",
|
|
24
|
+
"build_openlineage_export",
|
|
25
|
+
"CatalogArtifact",
|
|
26
|
+
"LineageContractError",
|
|
27
|
+
"LineageError",
|
|
28
|
+
"LineageInputError",
|
|
29
|
+
"LineageMap",
|
|
30
|
+
"LineageSummary",
|
|
31
|
+
"render_json",
|
|
32
|
+
"render_text",
|
|
33
|
+
"trace_downstream",
|
|
34
|
+
"trace_upstream",
|
|
35
|
+
"TraversalResult",
|
|
36
|
+
]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Public API for catalogkit-lineage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from catalogkit.core import CatalogArtifact
|
|
9
|
+
|
|
10
|
+
from .build import (
|
|
11
|
+
build_catalog_artifact_from_project,
|
|
12
|
+
build_lineage_map_from_project,
|
|
13
|
+
build_openlineage_export_from_project,
|
|
14
|
+
trace_downstream_from_project,
|
|
15
|
+
trace_upstream_from_project,
|
|
16
|
+
)
|
|
17
|
+
from .loaders import load_project
|
|
18
|
+
from .models import LineageMap, TraversalResult
|
|
19
|
+
from .render.json import render_json
|
|
20
|
+
from .render.text import render_text
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def build_lineage_map(path: str | Path, *, dialect: str) -> LineageMap:
|
|
24
|
+
"""Build the public catalogkit-lineage artifact for one project input."""
|
|
25
|
+
project = load_project(path, dialect=dialect)
|
|
26
|
+
return build_lineage_map_from_project(project, dialect=dialect)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_catalog_artifact(path: str | Path, *, dialect: str) -> CatalogArtifact:
|
|
30
|
+
"""Build the shared catalog artifact for CatalogKit composition."""
|
|
31
|
+
project = load_project(path, dialect=dialect)
|
|
32
|
+
return build_catalog_artifact_from_project(project, dialect=dialect)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def trace_upstream(
|
|
36
|
+
path: str | Path,
|
|
37
|
+
*,
|
|
38
|
+
dialect: str,
|
|
39
|
+
selection: str,
|
|
40
|
+
) -> TraversalResult:
|
|
41
|
+
"""Trace upstream column lineage for one selected dataset column."""
|
|
42
|
+
project = load_project(path, dialect=dialect)
|
|
43
|
+
return trace_upstream_from_project(project, dialect=dialect, selection=selection)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def trace_downstream(
|
|
47
|
+
path: str | Path,
|
|
48
|
+
*,
|
|
49
|
+
dialect: str,
|
|
50
|
+
selection: str,
|
|
51
|
+
) -> TraversalResult:
|
|
52
|
+
"""Trace downstream column lineage for one selected dataset column."""
|
|
53
|
+
project = load_project(path, dialect=dialect)
|
|
54
|
+
return trace_downstream_from_project(project, dialect=dialect, selection=selection)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def build_openlineage_export(path: str | Path, *, dialect: str) -> dict[str, Any]:
|
|
58
|
+
"""Build an OpenLineage-compatible export view for one project input."""
|
|
59
|
+
project = load_project(path, dialect=dialect)
|
|
60
|
+
return build_openlineage_export_from_project(project, dialect=dialect)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
"build_catalog_artifact",
|
|
65
|
+
"build_lineage_map",
|
|
66
|
+
"build_openlineage_export",
|
|
67
|
+
"render_json",
|
|
68
|
+
"render_text",
|
|
69
|
+
"trace_downstream",
|
|
70
|
+
"trace_upstream",
|
|
71
|
+
]
|