dash-ontology 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+ - run: pip install ruff
18
+ - run: ruff check dashontology/
19
+
20
+ test:
21
+ runs-on: ubuntu-latest
22
+ needs: lint
23
+ strategy:
24
+ matrix:
25
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python-version }}
31
+ - name: Install
32
+ run: pip install -e ".[dev]" pytest pytest-cov
33
+ - name: Test
34
+ run: pytest tests/ -v --cov=dashontology --cov-report=xml
35
+ - name: Upload coverage
36
+ uses: codecov/codecov-action@v4
37
+ with:
38
+ files: coverage.xml
39
+
40
+ build:
41
+ runs-on: ubuntu-latest
42
+ needs: test
43
+ steps:
44
+ - uses: actions/checkout@v4
45
+ - uses: actions/setup-python@v5
46
+ with:
47
+ python-version: "3.11"
48
+ - run: pip install hatch
49
+ - run: hatch build
50
+ - uses: actions/upload-artifact@v4
51
+ with:
52
+ name: dist
53
+ path: dist/
@@ -0,0 +1,33 @@
1
+ name: Daily Tests
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 6 * * *" # Every day 06:00 UTC — tests only, no commit
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ test:
10
+ name: Test (Python ${{ matrix.python-version }})
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+
23
+ - name: Install
24
+ run: pip install -e ".[dev]" pytest pytest-cov
25
+
26
+ - name: Run tests
27
+ run: pytest tests/ -v --cov=dashontology --cov-report=xml --cov-report=term-missing
28
+
29
+ - name: Upload coverage
30
+ uses: codecov/codecov-action@v4
31
+ with:
32
+ files: coverage.xml
33
+ fail_ci_if_error: false
@@ -0,0 +1,267 @@
1
+ name: Weekly Release
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 9 * * 1" # Every Monday 09:00 UTC
6
+ workflow_dispatch:
7
+ inputs:
8
+ release_note:
9
+ description: "Optional release note (shown in GitHub release body)"
10
+ required: false
11
+ default: ""
12
+
13
+ jobs:
14
+ # ── Gate: tests must pass ────────────────────────────────────────────────
15
+ test:
16
+ name: Test (Python ${{ matrix.python-version }})
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: true
20
+ matrix:
21
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+
29
+ - name: Install
30
+ run: pip install -e ".[dev]" pytest pytest-cov
31
+
32
+ - name: Run tests
33
+ run: pytest tests/ -v --cov=dashontology --cov-report=xml --cov-report=term-missing
34
+
35
+ - name: Upload coverage
36
+ uses: codecov/codecov-action@v4
37
+ with:
38
+ files: coverage.xml
39
+ fail_ci_if_error: false
40
+
41
+ # ── Generate docs ────────────────────────────────────────────────────────
42
+ docs:
43
+ name: Generate API docs
44
+ runs-on: ubuntu-latest
45
+ needs: test
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+
49
+ - uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.11"
52
+
53
+ - name: Install
54
+ run: pip install -e ".[dev]" pdoc
55
+
56
+ - name: Generate docs
57
+ run: |
58
+ pdoc dashontology --output-dir docs/api --docformat google
59
+ echo "Docs generated at $(date -u)" > docs/api/.generated
60
+
61
+ - name: Upload docs artifact
62
+ uses: actions/upload-artifact@v4
63
+ with:
64
+ name: api-docs
65
+ path: docs/api/
66
+
67
+ # ── Release: tag, GitHub release, commit docs ────────────────────────────
68
+ release:
69
+ name: Bump version & release
70
+ runs-on: ubuntu-latest
71
+ needs: [test, docs]
72
+ permissions:
73
+ contents: write
74
+ outputs:
75
+ version: ${{ steps.bump.outputs.version }}
76
+ steps:
77
+ - uses: actions/checkout@v4
78
+ with:
79
+ fetch-depth: 0
80
+
81
+ - uses: actions/setup-python@v5
82
+ with:
83
+ python-version: "3.11"
84
+
85
+ - name: Install build tools
86
+ run: pip install hatch pdoc
87
+
88
+ - name: Bump patch version
89
+ id: bump
90
+ run: |
91
+ current=$(hatch version)
92
+ hatch version patch
93
+ new=$(hatch version)
94
+ echo "version=$new" >> $GITHUB_OUTPUT
95
+ echo "prev_version=$current" >> $GITHUB_OUTPUT
96
+ echo "Bumped $current → $new"
97
+
98
+ - name: Regenerate docs into repo
99
+ run: |
100
+ pip install -e ".[dev]"
101
+ pdoc dashontology --output-dir docs/api --docformat google
102
+
103
+ - name: Build wheel + sdist
104
+ run: hatch build
105
+
106
+ - name: Write release notes
107
+ env:
108
+ VERSION: ${{ steps.bump.outputs.version }}
109
+ PREV_VERSION: ${{ steps.bump.outputs.prev_version }}
110
+ RELEASE_NOTE: ${{ github.event.inputs.release_note }}
111
+ run: |
112
+ cat > RELEASE_NOTES.md << EOF
113
+ ## DashOntology — Ontology and Lineage v${VERSION}
114
+
115
+ **Released:** $(date -u '+%Y-%m-%d')
116
+ **Previous:** v${PREV_VERSION}
117
+
118
+ $( [ -n "${RELEASE_NOTE}" ] && echo "### Notes" && echo "${RELEASE_NOTE}" || true )
119
+
120
+ ### What's included
121
+ - All tests passing across Python 3.9, 3.10, 3.11, 3.12
122
+ - API documentation regenerated (see \`docs/api/\`)
123
+ - Published to PyPI and Databricks Marketplace
124
+
125
+ ### Install
126
+ \`\`\`bash
127
+ pip install dash-ontology==${VERSION}
128
+ \`\`\`
129
+
130
+ ### Quick Start (Databricks notebook)
131
+ \`\`\`python
132
+ %pip install dash-ontology==${VERSION}
133
+ import dashontology
134
+ dashontology.launch()
135
+ \`\`\`
136
+ EOF
137
+
138
+ - name: Commit version bump + docs to a release branch
139
+ env:
140
+ VERSION: ${{ steps.bump.outputs.version }}
141
+ run: |
142
+ git config user.name "github-actions[bot]"
143
+ git config user.email "github-actions[bot]@users.noreply.github.com"
144
+ git push origin --delete "refs/tags/v${VERSION}" 2>/dev/null || true
145
+ git push origin --delete "release/v${VERSION}" 2>/dev/null || true
146
+ git tag -d "v${VERSION}" 2>/dev/null || true
147
+ git checkout -b "release/v${VERSION}"
148
+ git add .
149
+ git commit -m "release: v${VERSION} — tests passed, docs updated"
150
+ git tag "v${VERSION}"
151
+ git push origin "release/v${VERSION}"
152
+ git push origin "v${VERSION}"
153
+
154
+ - name: Open and auto-merge release PR
155
+ continue-on-error: true
156
+ env:
157
+ GH_TOKEN: ${{ secrets.RELEASE_TOKEN || github.token }}
158
+ VERSION: ${{ steps.bump.outputs.version }}
159
+ run: |
160
+ gh pr create --base main --head "release/v${VERSION}" \
161
+ --title "release: v${VERSION}" \
162
+ --body "Automated release PR — tests passed, docs regenerated, version bumped to v${VERSION}." \
163
+ 2>/dev/null || true
164
+ gh pr merge "release/v${VERSION}" --merge --admin --delete-branch
165
+
166
+ - name: Create GitHub Release
167
+ uses: softprops/action-gh-release@v2
168
+ with:
169
+ tag_name: "v${{ steps.bump.outputs.version }}"
170
+ body_path: RELEASE_NOTES.md
171
+ files: dist/*
172
+
173
+ - name: Upload dist artifact for PyPI job
174
+ uses: actions/upload-artifact@v4
175
+ with:
176
+ name: dist
177
+ path: dist/
178
+
179
+ # ── Publish to PyPI (Trusted Publisher / OIDC — no token needed) ─────────
180
+ publish-pypi:
181
+ name: Publish to PyPI
182
+ runs-on: ubuntu-latest
183
+ needs: release
184
+ permissions:
185
+ id-token: write # required for OIDC trusted publisher
186
+ environment:
187
+ name: pypi
188
+ url: https://pypi.org/project/dash-ontology
189
+ steps:
190
+ - name: Download dist
191
+ uses: actions/download-artifact@v4
192
+ with:
193
+ name: dist
194
+ path: dist/
195
+
196
+ - name: Publish to PyPI
197
+ uses: pypa/gh-action-pypi-publish@release/v1
198
+
199
+ # ── Package for Databricks Marketplace ───────────────────────────────────
200
+ publish-databricks:
201
+ name: Package for Databricks Marketplace
202
+ runs-on: ubuntu-latest
203
+ needs: release
204
+ steps:
205
+ - uses: actions/checkout@v4
206
+ with:
207
+ ref: "v${{ needs.release.outputs.version }}"
208
+
209
+ - name: Download dist
210
+ uses: actions/download-artifact@v4
211
+ with:
212
+ name: dist
213
+ path: dist/
214
+
215
+ - name: Build Marketplace bundle
216
+ env:
217
+ VERSION: ${{ needs.release.outputs.version }}
218
+ run: |
219
+ mkdir -p marketplace-bundle/files
220
+
221
+ # Copy wheel
222
+ cp dist/*.whl marketplace-bundle/files/
223
+
224
+ # Generate companion notebook
225
+ cat > marketplace-bundle/files/DashOntology — Ontology and Lineage_Quickstart.py << NBEOF
226
+ # Databricks notebook source
227
+ # MAGIC %md
228
+ # MAGIC # DashOntology — Ontology and Lineage v${VERSION} for Databricks
229
+ # MAGIC Install and launch the interactive UI.
230
+
231
+ # COMMAND ----------
232
+ # MAGIC %pip install dash-ontology==${VERSION}
233
+
234
+ # COMMAND ----------
235
+ dbutils.library.restartPython()
236
+
237
+ # COMMAND ----------
238
+ import dashontology
239
+ dashontology.launch()
240
+ NBEOF
241
+
242
+ # Generate listing metadata
243
+ cat > marketplace-bundle/listing.json << LEOF
244
+ {
245
+ "listing_name": "DashOntology — Ontology and Lineage",
246
+ "version": "${VERSION}",
247
+ "short_description": "Define entities, relationships and lineage for AI usage",
248
+ "long_description": "DashOntology — Ontology and Lineage provides an ipywidgets UI inside Databricks notebooks to auto-infer a data ontology (object types, links, metrics) from lineage graphs. No coding required for business users.",
249
+ "categories": ["Data Governance,AI/ML"],
250
+ "tags": ["ontology", "lineage", "databricks", "unity-catalog", "pyspark"],
251
+ "provider": "dash-libs",
252
+ "documentation_url": "https://github.com/dash-libs/dash-ontology",
253
+ "source_url": "https://github.com/dash-libs/dash-ontology",
254
+ "pypi_package": "dash-ontology==${VERSION}"
255
+ }
256
+ LEOF
257
+
258
+ # Zip the bundle
259
+ cd marketplace-bundle && zip -r ../dashontology-marketplace-${VERSION}.zip .
260
+ echo "Bundle created: dashontology-marketplace-${VERSION}.zip"
261
+
262
+ - name: Upload Marketplace bundle artifact
263
+ uses: actions/upload-artifact@v4
264
+ with:
265
+ name: marketplace-bundle
266
+ path: dashontology-marketplace-*.zip
267
+ retention-days: 90
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .coverage
7
+ coverage.xml
8
+ .pytest_cache/
9
+ .ruff_cache/
@@ -0,0 +1,25 @@
1
+ # CLAUDE.md — dash-ontology
2
+
3
+ Part of the **Dashlibs** suite. See ~/dashlibs for the full context.
4
+
5
+ ## Purpose
6
+ Auto-infers a data ontology (object types, links, metrics) from lineage
7
+ graphs — no AI tokens required. `inference.py`=`infer_ontology()`,
8
+ `models.py`=`ObjectType`/`Link`/`Metric`/`Property`/`OntologyGraph`.
9
+
10
+ ## Structure
11
+ - `/ui.py` — ipywidgets UI, `launch()` entrypoint
12
+ - `/inference.py` — core inference engine
13
+ - `/models.py` — dataclasses for the ontology graph
14
+ - `/cardinality.py`, `/naming.py`, `/_classifier_bridge.py` — inference helpers
15
+ - `tests/` — pytest, no Spark dependency for unit tests
16
+
17
+ ## Key Design Rules
18
+ - Never import Spark at module level — always inside functions
19
+ - UI calls core classes; never contains business logic
20
+ - `launch()` is always the public entrypoint for business users
21
+
22
+ ## CI
23
+ - `ci.yml` — PR gate: lint → test → build
24
+ - `daily.yml` — 06:00 UTC: tests + .health/log.txt commit
25
+ - `release.yml`— Monday 09:00 UTC: patch bump + GitHub release
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.4
2
+ Name: dash-ontology
3
+ Version: 0.1.1
4
+ Summary: Auto-inferred data ontology from lineage graphs — no AI tokens required
5
+ Project-URL: Homepage, https://github.com/dash-libs/dash-ontology
6
+ Author-email: Darshan Shah <darshan.innovation@gmail.com>
7
+ License: Apache-2.0
8
+ Keywords: data-catalog,databricks,knowledge-graph,lineage,ontology,unity-catalog
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: ipywidgets>=8.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: hatch; extra == 'dev'
23
+ Requires-Dist: pdoc; extra == 'dev'
24
+ Requires-Dist: pytest; extra == 'dev'
25
+ Requires-Dist: pytest-cov; extra == 'dev'
26
+ Requires-Dist: ruff; extra == 'dev'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # DashOntology — Databricks Library
30
+
31
+ [![CI](https://github.com/dash-libs/dash-ontology/actions/workflows/ci.yml/badge.svg)](https://github.com/dash-libs/dash-ontology/actions)
32
+ [![PyPI](https://img.shields.io/pypi/v/dash-ontology)](https://pypi.org/project/dash-ontology/)
33
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE)
34
+
35
+ Part of the **[Dashlibs](https://github.com/dash-libs)** suite — Databricks libraries built for business users.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ %pip install dash-ontology
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ```python
46
+ import dashontology
47
+ dashontology.launch() # Opens interactive UI in your Databricks notebook
48
+ ```
49
+
50
+ ## Part of Dashlibs
51
+
52
+ | Library | Purpose |
53
+ |---|---|
54
+ | dash-dq | Data Quality |
55
+ | dash-synthetic | Synthetic Data Generation |
56
+ | dash-ml | ML Model Monitoring |
57
+ | dash-ingest | Data Ingestion |
58
+ | dash-gov | Data Governance |
59
+ | dash-ontology | Ontology & Lineage for AI |
60
+
61
+ ## License
62
+
63
+ Apache 2.0
@@ -0,0 +1,35 @@
1
+ # DashOntology — Databricks Library
2
+
3
+ [![CI](https://github.com/dash-libs/dash-ontology/actions/workflows/ci.yml/badge.svg)](https://github.com/dash-libs/dash-ontology/actions)
4
+ [![PyPI](https://img.shields.io/pypi/v/dash-ontology)](https://pypi.org/project/dash-ontology/)
5
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE)
6
+
7
+ Part of the **[Dashlibs](https://github.com/dash-libs)** suite — Databricks libraries built for business users.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ %pip install dash-ontology
13
+ ```
14
+
15
+ ## Quick Start
16
+
17
+ ```python
18
+ import dashontology
19
+ dashontology.launch() # Opens interactive UI in your Databricks notebook
20
+ ```
21
+
22
+ ## Part of Dashlibs
23
+
24
+ | Library | Purpose |
25
+ |---|---|
26
+ | dash-dq | Data Quality |
27
+ | dash-synthetic | Synthetic Data Generation |
28
+ | dash-ml | ML Model Monitoring |
29
+ | dash-ingest | Data Ingestion |
30
+ | dash-gov | Data Governance |
31
+ | dash-ontology | Ontology & Lineage for AI |
32
+
33
+ ## License
34
+
35
+ Apache 2.0
@@ -0,0 +1,15 @@
1
+ """DashOntology — Auto-inferred data ontology from lineage graphs."""
2
+ from dashontology.models import ObjectType, Link, Metric, Property, OntologyGraph
3
+ from dashontology.naming import normalize_name, singularize, to_camel_case
4
+ from dashontology.cardinality import infer_cardinality, infer_cardinality_from_ratio
5
+ from dashontology.inference import infer_ontology
6
+ from dashontology.ui import launch
7
+
8
+ __version__ = "0.1.1"
9
+ __all__ = [
10
+ "ObjectType", "Link", "Metric", "Property", "OntologyGraph",
11
+ "normalize_name", "singularize", "to_camel_case",
12
+ "infer_cardinality", "infer_cardinality_from_ratio",
13
+ "infer_ontology",
14
+ "launch",
15
+ ]
@@ -0,0 +1,53 @@
1
+ """
2
+ Lightweight table role classifier — mirrors dashgov.classifier logic
3
+ without importing from dashgov (keeping dash-ontology self-contained).
4
+ """
5
+ from __future__ import annotations
6
+
7
+ _STAGING_PREFIXES = {"stg_", "staging_", "tmp_", "temp_", "raw_", "src_", "landing_", "bronze_"}
8
+ _DIMENSION_PREFIXES = {"dim_", "d_"}
9
+ _FACT_PREFIXES = {"fact_", "fct_", "f_"}
10
+ _AGG_SUFFIXES = {
11
+ "_agg", "_aggregated", "_summary", "_report",
12
+ "_metrics", "_stats", "_kpi", "_rollup",
13
+ "_daily", "_weekly", "_monthly", "_yearly",
14
+ }
15
+ _JUNCTION_SUFFIXES = {"_map", "_mapping", "_xref", "_bridge", "_link", "_rel", "_assoc"}
16
+ _FK_SUFFIXES = ("_id", "_pk", "_key", "_fk", "_ref", "_uuid")
17
+
18
+
19
+ def _name(full: str) -> str:
20
+ return full.split(".")[-1].lower()
21
+
22
+
23
+ def classify_table_role(
24
+ full_name: str,
25
+ columns: list[dict],
26
+ n_upstream: int = 0,
27
+ n_downstream: int = 0,
28
+ ) -> tuple[str, float]:
29
+ name = _name(full_name)
30
+ n_cols = len(columns)
31
+ n_fk = sum(
32
+ 1 for c in columns
33
+ if c.get("name", "").lower() not in ("id",)
34
+ and c.get("name", "").lower().endswith(_FK_SUFFIXES)
35
+ )
36
+
37
+ if any(name.startswith(p) for p in _STAGING_PREFIXES):
38
+ return "staging", 0.90
39
+ if any(name.endswith(s) for s in _AGG_SUFFIXES):
40
+ return "aggregation", 0.90
41
+ if any(name.startswith(p) for p in _DIMENSION_PREFIXES):
42
+ return "entity", 0.90
43
+ if any(name.endswith(s) for s in _JUNCTION_SUFFIXES):
44
+ return "junction", 0.88
45
+ if n_cols >= 2 and n_fk >= 2 and n_fk / max(n_cols, 1) >= 0.6:
46
+ return "junction", 0.80
47
+ if n_upstream == 0 and n_cols >= 3:
48
+ return "entity", 0.78
49
+ if n_upstream >= 1 and n_fk >= 1 and n_downstream >= 1:
50
+ return "fact", 0.70
51
+ if n_upstream >= 2 and n_downstream == 0:
52
+ return "aggregation", 0.60
53
+ return "unknown", 0.40
@@ -0,0 +1,89 @@
1
+ """
2
+ Cardinality inference for object type links.
3
+
4
+ Uses column statistics (unique counts vs total counts) from both sides
5
+ of a join to determine whether the relationship is 1:1, 1:N, or N:M.
6
+ All pure Python — no Spark required.
7
+ """
8
+ from __future__ import annotations
9
+
10
+
11
+ def infer_cardinality(
12
+ from_unique: int,
13
+ from_total: int,
14
+ to_unique: int,
15
+ to_total: int,
16
+ one_to_one_threshold: float = 0.95,
17
+ ) -> tuple[str, float]:
18
+ """
19
+ Infer cardinality from column uniqueness stats.
20
+
21
+ Parameters
22
+ ----------
23
+ from_unique : distinct values in the FK column of the *from* table
24
+ from_total : total non-null rows in the FK column
25
+ to_unique : distinct values in the PK column of the *to* table
26
+ to_total : total non-null rows in the PK column
27
+
28
+ Returns
29
+ -------
30
+ (cardinality: str, confidence: float)
31
+ cardinality ∈ {"1:1", "1:N", "N:M"}
32
+
33
+ Heuristics
34
+ ----------
35
+ - to_unique ≈ to_total → PK side is truly unique (good PK)
36
+ - from_unique ≈ from_total → FK side is also unique → 1:1
37
+ - from_unique < from_total → many FK rows per PK value → 1:N
38
+ - from_unique ≈ from_total AND to_unique < to_total → N:M or data quality issue
39
+ """
40
+ if from_total <= 0 or to_total <= 0:
41
+ return "1:N", 0.40 # can't tell, default to most common
42
+
43
+ from_uniq_rate = from_unique / from_total
44
+ to_uniq_rate = to_unique / to_total
45
+
46
+ pk_is_unique = to_uniq_rate >= one_to_one_threshold
47
+
48
+ if not pk_is_unique:
49
+ # PK side has duplicates — likely N:M or a bad join
50
+ return "N:M", 0.55
51
+
52
+ fk_is_unique = from_uniq_rate >= one_to_one_threshold
53
+
54
+ if fk_is_unique:
55
+ # Both sides are unique → 1:1
56
+ return "1:1", 0.85
57
+
58
+ # FK has duplicates, PK is unique → 1:N (one PK row → many FK rows)
59
+ # Confidence scales with how non-unique the FK side is
60
+ spread = 1.0 - from_uniq_rate # 0 = all unique, 1 = all same value
61
+ confidence = min(0.95, 0.65 + spread * 0.3)
62
+ return "1:N", round(confidence, 3)
63
+
64
+
65
+ def infer_cardinality_from_ratio(avg_fk_per_pk: float) -> tuple[str, float]:
66
+ """
67
+ Simpler heuristic when only the average FK-per-PK ratio is known.
68
+
69
+ avg_fk_per_pk — average number of FK rows per unique PK value
70
+
71
+ Examples:
72
+ 1.0 → 1:1
73
+ 3.5 → 1:N
74
+ 12.0 → 1:N (strong)
75
+ """
76
+ if avg_fk_per_pk <= 1.05:
77
+ return "1:1", 0.80
78
+ if avg_fk_per_pk <= 1.5:
79
+ return "1:N", 0.60 # borderline
80
+ return "1:N", min(0.95, 0.70 + min(avg_fk_per_pk, 20) / 100)
81
+
82
+
83
+ def cardinality_label(card: str) -> str:
84
+ """Human-readable cardinality label."""
85
+ return {
86
+ "1:1": "one-to-one",
87
+ "1:N": "one-to-many",
88
+ "N:M": "many-to-many",
89
+ }.get(card, card)