dash-ontology 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dash_ontology-0.1.1/.github/workflows/ci.yml +53 -0
- dash_ontology-0.1.1/.github/workflows/daily.yml +33 -0
- dash_ontology-0.1.1/.github/workflows/release.yml +267 -0
- dash_ontology-0.1.1/.gitignore +9 -0
- dash_ontology-0.1.1/CLAUDE.md +25 -0
- dash_ontology-0.1.1/PKG-INFO +63 -0
- dash_ontology-0.1.1/README.md +35 -0
- dash_ontology-0.1.1/dashontology/__init__.py +15 -0
- dash_ontology-0.1.1/dashontology/_classifier_bridge.py +53 -0
- dash_ontology-0.1.1/dashontology/cardinality.py +89 -0
- dash_ontology-0.1.1/dashontology/inference.py +315 -0
- dash_ontology-0.1.1/dashontology/models.py +149 -0
- dash_ontology-0.1.1/dashontology/naming.py +118 -0
- dash_ontology-0.1.1/dashontology/ui.py +173 -0
- dash_ontology-0.1.1/docs/api/dashontology.html +1566 -0
- dash_ontology-0.1.1/docs/api/index.html +7 -0
- dash_ontology-0.1.1/docs/api/search.js +46 -0
- dash_ontology-0.1.1/marketplace/listing.json +13 -0
- dash_ontology-0.1.1/marketplace/quickstart_notebook.py +29 -0
- dash_ontology-0.1.1/pyproject.toml +41 -0
- dash_ontology-0.1.1/tests/test_cardinality.py +111 -0
- dash_ontology-0.1.1/tests/test_dashontology.py +21 -0
- dash_ontology-0.1.1/tests/test_inference.py +232 -0
- dash_ontology-0.1.1/tests/test_naming.py +103 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.11"
|
|
17
|
+
- run: pip install ruff
|
|
18
|
+
- run: ruff check dashontology/
|
|
19
|
+
|
|
20
|
+
test:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
needs: lint
|
|
23
|
+
strategy:
|
|
24
|
+
matrix:
|
|
25
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: ${{ matrix.python-version }}
|
|
31
|
+
- name: Install
|
|
32
|
+
run: pip install -e ".[dev]" pytest pytest-cov
|
|
33
|
+
- name: Test
|
|
34
|
+
run: pytest tests/ -v --cov=dashontology --cov-report=xml
|
|
35
|
+
- name: Upload coverage
|
|
36
|
+
uses: codecov/codecov-action@v4
|
|
37
|
+
with:
|
|
38
|
+
files: coverage.xml
|
|
39
|
+
|
|
40
|
+
build:
|
|
41
|
+
runs-on: ubuntu-latest
|
|
42
|
+
needs: test
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v4
|
|
45
|
+
- uses: actions/setup-python@v5
|
|
46
|
+
with:
|
|
47
|
+
python-version: "3.11"
|
|
48
|
+
- run: pip install hatch
|
|
49
|
+
- run: hatch build
|
|
50
|
+
- uses: actions/upload-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: dist
|
|
53
|
+
path: dist/
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Daily Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: "0 6 * * *" # Every day 06:00 UTC — tests only, no commit
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
|
|
23
|
+
- name: Install
|
|
24
|
+
run: pip install -e ".[dev]" pytest pytest-cov
|
|
25
|
+
|
|
26
|
+
- name: Run tests
|
|
27
|
+
run: pytest tests/ -v --cov=dashontology --cov-report=xml --cov-report=term-missing
|
|
28
|
+
|
|
29
|
+
- name: Upload coverage
|
|
30
|
+
uses: codecov/codecov-action@v4
|
|
31
|
+
with:
|
|
32
|
+
files: coverage.xml
|
|
33
|
+
fail_ci_if_error: false
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
name: Weekly Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: "0 9 * * 1" # Every Monday 09:00 UTC
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
inputs:
|
|
8
|
+
release_note:
|
|
9
|
+
description: "Optional release note (shown in GitHub release body)"
|
|
10
|
+
required: false
|
|
11
|
+
default: ""
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
# ── Gate: tests must pass ────────────────────────────────────────────────
|
|
15
|
+
test:
|
|
16
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
strategy:
|
|
19
|
+
fail-fast: true
|
|
20
|
+
matrix:
|
|
21
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: ${{ matrix.python-version }}
|
|
28
|
+
|
|
29
|
+
- name: Install
|
|
30
|
+
run: pip install -e ".[dev]" pytest pytest-cov
|
|
31
|
+
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: pytest tests/ -v --cov=dashontology --cov-report=xml --cov-report=term-missing
|
|
34
|
+
|
|
35
|
+
- name: Upload coverage
|
|
36
|
+
uses: codecov/codecov-action@v4
|
|
37
|
+
with:
|
|
38
|
+
files: coverage.xml
|
|
39
|
+
fail_ci_if_error: false
|
|
40
|
+
|
|
41
|
+
# ── Generate docs ────────────────────────────────────────────────────────
|
|
42
|
+
docs:
|
|
43
|
+
name: Generate API docs
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
needs: test
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/checkout@v4
|
|
48
|
+
|
|
49
|
+
- uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: "3.11"
|
|
52
|
+
|
|
53
|
+
- name: Install
|
|
54
|
+
run: pip install -e ".[dev]" pdoc
|
|
55
|
+
|
|
56
|
+
- name: Generate docs
|
|
57
|
+
run: |
|
|
58
|
+
pdoc dashontology --output-dir docs/api --docformat google
|
|
59
|
+
echo "Docs generated at $(date -u)" > docs/api/.generated
|
|
60
|
+
|
|
61
|
+
- name: Upload docs artifact
|
|
62
|
+
uses: actions/upload-artifact@v4
|
|
63
|
+
with:
|
|
64
|
+
name: api-docs
|
|
65
|
+
path: docs/api/
|
|
66
|
+
|
|
67
|
+
# ── Release: tag, GitHub release, commit docs ────────────────────────────
|
|
68
|
+
release:
|
|
69
|
+
name: Bump version & release
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
needs: [test, docs]
|
|
72
|
+
permissions:
|
|
73
|
+
contents: write
|
|
74
|
+
outputs:
|
|
75
|
+
version: ${{ steps.bump.outputs.version }}
|
|
76
|
+
steps:
|
|
77
|
+
- uses: actions/checkout@v4
|
|
78
|
+
with:
|
|
79
|
+
fetch-depth: 0
|
|
80
|
+
|
|
81
|
+
- uses: actions/setup-python@v5
|
|
82
|
+
with:
|
|
83
|
+
python-version: "3.11"
|
|
84
|
+
|
|
85
|
+
- name: Install build tools
|
|
86
|
+
run: pip install hatch pdoc
|
|
87
|
+
|
|
88
|
+
- name: Bump patch version
|
|
89
|
+
id: bump
|
|
90
|
+
run: |
|
|
91
|
+
current=$(hatch version)
|
|
92
|
+
hatch version patch
|
|
93
|
+
new=$(hatch version)
|
|
94
|
+
echo "version=$new" >> $GITHUB_OUTPUT
|
|
95
|
+
echo "prev_version=$current" >> $GITHUB_OUTPUT
|
|
96
|
+
echo "Bumped $current → $new"
|
|
97
|
+
|
|
98
|
+
- name: Regenerate docs into repo
|
|
99
|
+
run: |
|
|
100
|
+
pip install -e ".[dev]"
|
|
101
|
+
pdoc dashontology --output-dir docs/api --docformat google
|
|
102
|
+
|
|
103
|
+
- name: Build wheel + sdist
|
|
104
|
+
run: hatch build
|
|
105
|
+
|
|
106
|
+
- name: Write release notes
|
|
107
|
+
env:
|
|
108
|
+
VERSION: ${{ steps.bump.outputs.version }}
|
|
109
|
+
PREV_VERSION: ${{ steps.bump.outputs.prev_version }}
|
|
110
|
+
RELEASE_NOTE: ${{ github.event.inputs.release_note }}
|
|
111
|
+
run: |
|
|
112
|
+
cat > RELEASE_NOTES.md << EOF
|
|
113
|
+
## DashOntology — Ontology and Lineage v${VERSION}
|
|
114
|
+
|
|
115
|
+
**Released:** $(date -u '+%Y-%m-%d')
|
|
116
|
+
**Previous:** v${PREV_VERSION}
|
|
117
|
+
|
|
118
|
+
$( [ -n "${RELEASE_NOTE}" ] && echo "### Notes" && echo "${RELEASE_NOTE}" || true )
|
|
119
|
+
|
|
120
|
+
### What's included
|
|
121
|
+
- All tests passing across Python 3.9, 3.10, 3.11, 3.12
|
|
122
|
+
- API documentation regenerated (see \`docs/api/\`)
|
|
123
|
+
- Published to PyPI and Databricks Marketplace
|
|
124
|
+
|
|
125
|
+
### Install
|
|
126
|
+
\`\`\`bash
|
|
127
|
+
pip install dash-ontology==${VERSION}
|
|
128
|
+
\`\`\`
|
|
129
|
+
|
|
130
|
+
### Quick Start (Databricks notebook)
|
|
131
|
+
\`\`\`python
|
|
132
|
+
%pip install dash-ontology==${VERSION}
|
|
133
|
+
import dashontology
|
|
134
|
+
dashontology.launch()
|
|
135
|
+
\`\`\`
|
|
136
|
+
EOF
|
|
137
|
+
|
|
138
|
+
- name: Commit version bump + docs to a release branch
|
|
139
|
+
env:
|
|
140
|
+
VERSION: ${{ steps.bump.outputs.version }}
|
|
141
|
+
run: |
|
|
142
|
+
git config user.name "github-actions[bot]"
|
|
143
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
144
|
+
git push origin --delete "refs/tags/v${VERSION}" 2>/dev/null || true
|
|
145
|
+
git push origin --delete "release/v${VERSION}" 2>/dev/null || true
|
|
146
|
+
git tag -d "v${VERSION}" 2>/dev/null || true
|
|
147
|
+
git checkout -b "release/v${VERSION}"
|
|
148
|
+
git add .
|
|
149
|
+
git commit -m "release: v${VERSION} — tests passed, docs updated"
|
|
150
|
+
git tag "v${VERSION}"
|
|
151
|
+
git push origin "release/v${VERSION}"
|
|
152
|
+
git push origin "v${VERSION}"
|
|
153
|
+
|
|
154
|
+
- name: Open and auto-merge release PR
|
|
155
|
+
continue-on-error: true
|
|
156
|
+
env:
|
|
157
|
+
GH_TOKEN: ${{ secrets.RELEASE_TOKEN || github.token }}
|
|
158
|
+
VERSION: ${{ steps.bump.outputs.version }}
|
|
159
|
+
run: |
|
|
160
|
+
gh pr create --base main --head "release/v${VERSION}" \
|
|
161
|
+
--title "release: v${VERSION}" \
|
|
162
|
+
--body "Automated release PR — tests passed, docs regenerated, version bumped to v${VERSION}." \
|
|
163
|
+
2>/dev/null || true
|
|
164
|
+
gh pr merge "release/v${VERSION}" --merge --admin --delete-branch
|
|
165
|
+
|
|
166
|
+
- name: Create GitHub Release
|
|
167
|
+
uses: softprops/action-gh-release@v2
|
|
168
|
+
with:
|
|
169
|
+
tag_name: "v${{ steps.bump.outputs.version }}"
|
|
170
|
+
body_path: RELEASE_NOTES.md
|
|
171
|
+
files: dist/*
|
|
172
|
+
|
|
173
|
+
- name: Upload dist artifact for PyPI job
|
|
174
|
+
uses: actions/upload-artifact@v4
|
|
175
|
+
with:
|
|
176
|
+
name: dist
|
|
177
|
+
path: dist/
|
|
178
|
+
|
|
179
|
+
# ── Publish to PyPI (Trusted Publisher / OIDC — no token needed) ─────────
|
|
180
|
+
publish-pypi:
|
|
181
|
+
name: Publish to PyPI
|
|
182
|
+
runs-on: ubuntu-latest
|
|
183
|
+
needs: release
|
|
184
|
+
permissions:
|
|
185
|
+
id-token: write # required for OIDC trusted publisher
|
|
186
|
+
environment:
|
|
187
|
+
name: pypi
|
|
188
|
+
url: https://pypi.org/project/dash-ontology
|
|
189
|
+
steps:
|
|
190
|
+
- name: Download dist
|
|
191
|
+
uses: actions/download-artifact@v4
|
|
192
|
+
with:
|
|
193
|
+
name: dist
|
|
194
|
+
path: dist/
|
|
195
|
+
|
|
196
|
+
- name: Publish to PyPI
|
|
197
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
198
|
+
|
|
199
|
+
# ── Package for Databricks Marketplace ───────────────────────────────────
|
|
200
|
+
publish-databricks:
|
|
201
|
+
name: Package for Databricks Marketplace
|
|
202
|
+
runs-on: ubuntu-latest
|
|
203
|
+
needs: release
|
|
204
|
+
steps:
|
|
205
|
+
- uses: actions/checkout@v4
|
|
206
|
+
with:
|
|
207
|
+
ref: "v${{ needs.release.outputs.version }}"
|
|
208
|
+
|
|
209
|
+
- name: Download dist
|
|
210
|
+
uses: actions/download-artifact@v4
|
|
211
|
+
with:
|
|
212
|
+
name: dist
|
|
213
|
+
path: dist/
|
|
214
|
+
|
|
215
|
+
- name: Build Marketplace bundle
|
|
216
|
+
env:
|
|
217
|
+
VERSION: ${{ needs.release.outputs.version }}
|
|
218
|
+
run: |
|
|
219
|
+
mkdir -p marketplace-bundle/files
|
|
220
|
+
|
|
221
|
+
# Copy wheel
|
|
222
|
+
cp dist/*.whl marketplace-bundle/files/
|
|
223
|
+
|
|
224
|
+
# Generate companion notebook
|
|
225
|
+
cat > marketplace-bundle/files/DashOntology — Ontology and Lineage_Quickstart.py << NBEOF
|
|
226
|
+
# Databricks notebook source
|
|
227
|
+
# MAGIC %md
|
|
228
|
+
# MAGIC # DashOntology — Ontology and Lineage v${VERSION} for Databricks
|
|
229
|
+
# MAGIC Install and launch the interactive UI.
|
|
230
|
+
|
|
231
|
+
# COMMAND ----------
|
|
232
|
+
# MAGIC %pip install dash-ontology==${VERSION}
|
|
233
|
+
|
|
234
|
+
# COMMAND ----------
|
|
235
|
+
dbutils.library.restartPython()
|
|
236
|
+
|
|
237
|
+
# COMMAND ----------
|
|
238
|
+
import dashontology
|
|
239
|
+
dashontology.launch()
|
|
240
|
+
NBEOF
|
|
241
|
+
|
|
242
|
+
# Generate listing metadata
|
|
243
|
+
cat > marketplace-bundle/listing.json << LEOF
|
|
244
|
+
{
|
|
245
|
+
"listing_name": "DashOntology — Ontology and Lineage",
|
|
246
|
+
"version": "${VERSION}",
|
|
247
|
+
"short_description": "Define entities, relationships and lineage for AI usage",
|
|
248
|
+
"long_description": "DashOntology — Ontology and Lineage provides an ipywidgets UI inside Databricks notebooks to auto-infer a data ontology (object types, links, metrics) from lineage graphs. No coding required for business users.",
|
|
249
|
+
"categories": ["Data Governance,AI/ML"],
|
|
250
|
+
"tags": ["ontology", "lineage", "databricks", "unity-catalog", "pyspark"],
|
|
251
|
+
"provider": "dash-libs",
|
|
252
|
+
"documentation_url": "https://github.com/dash-libs/dash-ontology",
|
|
253
|
+
"source_url": "https://github.com/dash-libs/dash-ontology",
|
|
254
|
+
"pypi_package": "dash-ontology==${VERSION}"
|
|
255
|
+
}
|
|
256
|
+
LEOF
|
|
257
|
+
|
|
258
|
+
# Zip the bundle
|
|
259
|
+
cd marketplace-bundle && zip -r ../dashontology-marketplace-${VERSION}.zip .
|
|
260
|
+
echo "Bundle created: dashontology-marketplace-${VERSION}.zip"
|
|
261
|
+
|
|
262
|
+
- name: Upload Marketplace bundle artifact
|
|
263
|
+
uses: actions/upload-artifact@v4
|
|
264
|
+
with:
|
|
265
|
+
name: marketplace-bundle
|
|
266
|
+
path: dashontology-marketplace-*.zip
|
|
267
|
+
retention-days: 90
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# CLAUDE.md — dash-ontology
|
|
2
|
+
|
|
3
|
+
Part of the **Dashlibs** suite. See ~/dashlibs for the full context.
|
|
4
|
+
|
|
5
|
+
## Purpose
|
|
6
|
+
Auto-infers a data ontology (object types, links, metrics) from lineage
|
|
7
|
+
graphs — no AI tokens required. `inference.py`=`infer_ontology()`,
|
|
8
|
+
`models.py`=`ObjectType`/`Link`/`Metric`/`Property`/`OntologyGraph`.
|
|
9
|
+
|
|
10
|
+
## Structure
|
|
11
|
+
- `/ui.py` — ipywidgets UI, `launch()` entrypoint
|
|
12
|
+
- `/inference.py` — core inference engine
|
|
13
|
+
- `/models.py` — dataclasses for the ontology graph
|
|
14
|
+
- `/cardinality.py`, `/naming.py`, `/_classifier_bridge.py` — inference helpers
|
|
15
|
+
- `tests/` — pytest, no Spark dependency for unit tests
|
|
16
|
+
|
|
17
|
+
## Key Design Rules
|
|
18
|
+
- Never import Spark at module level — always inside functions
|
|
19
|
+
- UI calls core classes; never contains business logic
|
|
20
|
+
- `launch()` is always the public entrypoint for business users
|
|
21
|
+
|
|
22
|
+
## CI
|
|
23
|
+
- `ci.yml` — PR gate: lint → test → build
|
|
24
|
+
- `daily.yml` — 06:00 UTC: tests + .health/log.txt commit
|
|
25
|
+
- `release.yml`— Monday 09:00 UTC: patch bump + GitHub release
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dash-ontology
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Auto-inferred data ontology from lineage graphs — no AI tokens required
|
|
5
|
+
Project-URL: Homepage, https://github.com/dash-libs/dash-ontology
|
|
6
|
+
Author-email: Darshan Shah <darshan.innovation@gmail.com>
|
|
7
|
+
License: Apache-2.0
|
|
8
|
+
Keywords: data-catalog,databricks,knowledge-graph,lineage,ontology,unity-catalog
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: ipywidgets>=8.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: hatch; extra == 'dev'
|
|
23
|
+
Requires-Dist: pdoc; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# DashOntology — Databricks Library
|
|
30
|
+
|
|
31
|
+
[](https://github.com/dash-libs/dash-ontology/actions)
|
|
32
|
+
[](https://pypi.org/project/dash-ontology/)
|
|
33
|
+
[](LICENSE)
|
|
34
|
+
|
|
35
|
+
Part of the **[Dashlibs](https://github.com/dash-libs)** suite — Databricks libraries built for business users.
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
%pip install dash-ontology
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import dashontology
|
|
47
|
+
dashontology.launch() # Opens interactive UI in your Databricks notebook
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Part of Dashlibs
|
|
51
|
+
|
|
52
|
+
| Library | Purpose |
|
|
53
|
+
|---|---|
|
|
54
|
+
| dash-dq | Data Quality |
|
|
55
|
+
| dash-synthetic | Synthetic Data Generation |
|
|
56
|
+
| dash-ml | ML Model Monitoring |
|
|
57
|
+
| dash-ingest | Data Ingestion |
|
|
58
|
+
| dash-gov | Data Governance |
|
|
59
|
+
| dash-ontology | Ontology & Lineage for AI |
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
Apache 2.0
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# DashOntology — Databricks Library
|
|
2
|
+
|
|
3
|
+
[](https://github.com/dash-libs/dash-ontology/actions)
|
|
4
|
+
[](https://pypi.org/project/dash-ontology/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Part of the **[Dashlibs](https://github.com/dash-libs)** suite — Databricks libraries built for business users.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
%pip install dash-ontology
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import dashontology
|
|
19
|
+
dashontology.launch() # Opens interactive UI in your Databricks notebook
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Part of Dashlibs
|
|
23
|
+
|
|
24
|
+
| Library | Purpose |
|
|
25
|
+
|---|---|
|
|
26
|
+
| dash-dq | Data Quality |
|
|
27
|
+
| dash-synthetic | Synthetic Data Generation |
|
|
28
|
+
| dash-ml | ML Model Monitoring |
|
|
29
|
+
| dash-ingest | Data Ingestion |
|
|
30
|
+
| dash-gov | Data Governance |
|
|
31
|
+
| dash-ontology | Ontology & Lineage for AI |
|
|
32
|
+
|
|
33
|
+
## License
|
|
34
|
+
|
|
35
|
+
Apache 2.0
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""DashOntology — Auto-inferred data ontology from lineage graphs."""
|
|
2
|
+
from dashontology.models import ObjectType, Link, Metric, Property, OntologyGraph
|
|
3
|
+
from dashontology.naming import normalize_name, singularize, to_camel_case
|
|
4
|
+
from dashontology.cardinality import infer_cardinality, infer_cardinality_from_ratio
|
|
5
|
+
from dashontology.inference import infer_ontology
|
|
6
|
+
from dashontology.ui import launch
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.1"
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ObjectType", "Link", "Metric", "Property", "OntologyGraph",
|
|
11
|
+
"normalize_name", "singularize", "to_camel_case",
|
|
12
|
+
"infer_cardinality", "infer_cardinality_from_ratio",
|
|
13
|
+
"infer_ontology",
|
|
14
|
+
"launch",
|
|
15
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lightweight table role classifier — mirrors dashgov.classifier logic
|
|
3
|
+
without importing from dashgov (keeping dash-ontology self-contained).
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
_STAGING_PREFIXES = {"stg_", "staging_", "tmp_", "temp_", "raw_", "src_", "landing_", "bronze_"}
|
|
8
|
+
_DIMENSION_PREFIXES = {"dim_", "d_"}
|
|
9
|
+
_FACT_PREFIXES = {"fact_", "fct_", "f_"}
|
|
10
|
+
_AGG_SUFFIXES = {
|
|
11
|
+
"_agg", "_aggregated", "_summary", "_report",
|
|
12
|
+
"_metrics", "_stats", "_kpi", "_rollup",
|
|
13
|
+
"_daily", "_weekly", "_monthly", "_yearly",
|
|
14
|
+
}
|
|
15
|
+
_JUNCTION_SUFFIXES = {"_map", "_mapping", "_xref", "_bridge", "_link", "_rel", "_assoc"}
|
|
16
|
+
_FK_SUFFIXES = ("_id", "_pk", "_key", "_fk", "_ref", "_uuid")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _name(full: str) -> str:
|
|
20
|
+
return full.split(".")[-1].lower()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def classify_table_role(
|
|
24
|
+
full_name: str,
|
|
25
|
+
columns: list[dict],
|
|
26
|
+
n_upstream: int = 0,
|
|
27
|
+
n_downstream: int = 0,
|
|
28
|
+
) -> tuple[str, float]:
|
|
29
|
+
name = _name(full_name)
|
|
30
|
+
n_cols = len(columns)
|
|
31
|
+
n_fk = sum(
|
|
32
|
+
1 for c in columns
|
|
33
|
+
if c.get("name", "").lower() not in ("id",)
|
|
34
|
+
and c.get("name", "").lower().endswith(_FK_SUFFIXES)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if any(name.startswith(p) for p in _STAGING_PREFIXES):
|
|
38
|
+
return "staging", 0.90
|
|
39
|
+
if any(name.endswith(s) for s in _AGG_SUFFIXES):
|
|
40
|
+
return "aggregation", 0.90
|
|
41
|
+
if any(name.startswith(p) for p in _DIMENSION_PREFIXES):
|
|
42
|
+
return "entity", 0.90
|
|
43
|
+
if any(name.endswith(s) for s in _JUNCTION_SUFFIXES):
|
|
44
|
+
return "junction", 0.88
|
|
45
|
+
if n_cols >= 2 and n_fk >= 2 and n_fk / max(n_cols, 1) >= 0.6:
|
|
46
|
+
return "junction", 0.80
|
|
47
|
+
if n_upstream == 0 and n_cols >= 3:
|
|
48
|
+
return "entity", 0.78
|
|
49
|
+
if n_upstream >= 1 and n_fk >= 1 and n_downstream >= 1:
|
|
50
|
+
return "fact", 0.70
|
|
51
|
+
if n_upstream >= 2 and n_downstream == 0:
|
|
52
|
+
return "aggregation", 0.60
|
|
53
|
+
return "unknown", 0.40
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cardinality inference for object type links.
|
|
3
|
+
|
|
4
|
+
Uses column statistics (unique counts vs total counts) from both sides
|
|
5
|
+
of a join to determine whether the relationship is 1:1, 1:N, or N:M.
|
|
6
|
+
All pure Python — no Spark required.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def infer_cardinality(
|
|
12
|
+
from_unique: int,
|
|
13
|
+
from_total: int,
|
|
14
|
+
to_unique: int,
|
|
15
|
+
to_total: int,
|
|
16
|
+
one_to_one_threshold: float = 0.95,
|
|
17
|
+
) -> tuple[str, float]:
|
|
18
|
+
"""
|
|
19
|
+
Infer cardinality from column uniqueness stats.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
from_unique : distinct values in the FK column of the *from* table
|
|
24
|
+
from_total : total non-null rows in the FK column
|
|
25
|
+
to_unique : distinct values in the PK column of the *to* table
|
|
26
|
+
to_total : total non-null rows in the PK column
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
(cardinality: str, confidence: float)
|
|
31
|
+
cardinality ∈ {"1:1", "1:N", "N:M"}
|
|
32
|
+
|
|
33
|
+
Heuristics
|
|
34
|
+
----------
|
|
35
|
+
- to_unique ≈ to_total → PK side is truly unique (good PK)
|
|
36
|
+
- from_unique ≈ from_total → FK side is also unique → 1:1
|
|
37
|
+
- from_unique < from_total → many FK rows per PK value → 1:N
|
|
38
|
+
- from_unique ≈ from_total AND to_unique < to_total → N:M or data quality issue
|
|
39
|
+
"""
|
|
40
|
+
if from_total <= 0 or to_total <= 0:
|
|
41
|
+
return "1:N", 0.40 # can't tell, default to most common
|
|
42
|
+
|
|
43
|
+
from_uniq_rate = from_unique / from_total
|
|
44
|
+
to_uniq_rate = to_unique / to_total
|
|
45
|
+
|
|
46
|
+
pk_is_unique = to_uniq_rate >= one_to_one_threshold
|
|
47
|
+
|
|
48
|
+
if not pk_is_unique:
|
|
49
|
+
# PK side has duplicates — likely N:M or a bad join
|
|
50
|
+
return "N:M", 0.55
|
|
51
|
+
|
|
52
|
+
fk_is_unique = from_uniq_rate >= one_to_one_threshold
|
|
53
|
+
|
|
54
|
+
if fk_is_unique:
|
|
55
|
+
# Both sides are unique → 1:1
|
|
56
|
+
return "1:1", 0.85
|
|
57
|
+
|
|
58
|
+
# FK has duplicates, PK is unique → 1:N (one PK row → many FK rows)
|
|
59
|
+
# Confidence scales with how non-unique the FK side is
|
|
60
|
+
spread = 1.0 - from_uniq_rate # 0 = all unique, 1 = all same value
|
|
61
|
+
confidence = min(0.95, 0.65 + spread * 0.3)
|
|
62
|
+
return "1:N", round(confidence, 3)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def infer_cardinality_from_ratio(avg_fk_per_pk: float) -> tuple[str, float]:
|
|
66
|
+
"""
|
|
67
|
+
Simpler heuristic when only the average FK-per-PK ratio is known.
|
|
68
|
+
|
|
69
|
+
avg_fk_per_pk — average number of FK rows per unique PK value
|
|
70
|
+
|
|
71
|
+
Examples:
|
|
72
|
+
1.0 → 1:1
|
|
73
|
+
3.5 → 1:N
|
|
74
|
+
12.0 → 1:N (strong)
|
|
75
|
+
"""
|
|
76
|
+
if avg_fk_per_pk <= 1.05:
|
|
77
|
+
return "1:1", 0.80
|
|
78
|
+
if avg_fk_per_pk <= 1.5:
|
|
79
|
+
return "1:N", 0.60 # borderline
|
|
80
|
+
return "1:N", min(0.95, 0.70 + min(avg_fk_per_pk, 20) / 100)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def cardinality_label(card: str) -> str:
|
|
84
|
+
"""Human-readable cardinality label."""
|
|
85
|
+
return {
|
|
86
|
+
"1:1": "one-to-one",
|
|
87
|
+
"1:N": "one-to-many",
|
|
88
|
+
"N:M": "many-to-many",
|
|
89
|
+
}.get(card, card)
|