datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,797 @@
1
+ """Documentation generator for DataLex models.
2
+
3
+ Generates:
4
+ - Static HTML data dictionary site (single-page, self-contained)
5
+ - Markdown export for GitHub wiki / Confluence
6
+ - Auto-changelog from model diffs
7
+ """
8
+
9
+ import html
10
+ import json
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ from datalex_core.canonical import compile_model
16
+ from datalex_core.loader import load_yaml_model
17
+
18
+
19
+ def _esc(text: str) -> str:
20
+ """HTML-escape a string."""
21
+ return html.escape(str(text)) if text else ""
22
+
23
+
24
+ def _field_badges_html(field: Dict[str, Any]) -> str:
25
+ """Generate HTML badge spans for field properties."""
26
+ badges = []
27
+ if field.get("primary_key"):
28
+ badges.append('<span class="badge badge-pk">PK</span>')
29
+ if field.get("unique"):
30
+ badges.append('<span class="badge badge-uq">UQ</span>')
31
+ if field.get("foreign_key"):
32
+ badges.append('<span class="badge badge-fk">FK</span>')
33
+ if field.get("nullable") is False:
34
+ badges.append('<span class="badge badge-nn">NOT NULL</span>')
35
+ if field.get("computed"):
36
+ badges.append('<span class="badge badge-comp">COMPUTED</span>')
37
+ if field.get("deprecated"):
38
+ badges.append('<span class="badge badge-dep">DEPRECATED</span>')
39
+ if field.get("sensitivity"):
40
+ badges.append(f'<span class="badge badge-sens">{_esc(field["sensitivity"]).upper()}</span>')
41
+ if field.get("default") is not None:
42
+ badges.append(f'<span class="badge badge-def">DEFAULT: {_esc(str(field["default"]))}</span>')
43
+ if field.get("check"):
44
+ badges.append(f'<span class="badge badge-chk">CHECK</span>')
45
+ return " ".join(badges)
46
+
47
+
48
+ def _entity_type_class(entity_type: str) -> str:
49
+ """CSS class for entity type."""
50
+ return {
51
+ "table": "type-table",
52
+ "view": "type-view",
53
+ "materialized_view": "type-mv",
54
+ "external_table": "type-ext",
55
+ "snapshot": "type-snap",
56
+ }.get(entity_type, "type-table")
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # HTML Generation
61
+ # ---------------------------------------------------------------------------
62
+
63
+ _CSS = """
64
+ :root {
65
+ --bg: #f8fafc; --surface: #ffffff; --border: #e2e8f0;
66
+ --text: #1e293b; --text-muted: #64748b; --text-light: #94a3b8;
67
+ --accent: #3b82f6; --accent-light: #dbeafe;
68
+ --green: #22c55e; --yellow: #eab308; --red: #ef4444; --purple: #8b5cf6;
69
+ --cyan: #06b6d4; --orange: #f97316; --indigo: #6366f1;
70
+ }
71
+ * { box-sizing: border-box; margin: 0; padding: 0; }
72
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; }
73
+ a { color: var(--accent); text-decoration: none; }
74
+ a:hover { text-decoration: underline; }
75
+
76
+ .container { max-width: 1200px; margin: 0 auto; padding: 0 24px; }
77
+ header { background: var(--surface); border-bottom: 1px solid var(--border); padding: 20px 0; position: sticky; top: 0; z-index: 100; }
78
+ header .container { display: flex; align-items: center; justify-content: space-between; }
79
+ header h1 { font-size: 20px; font-weight: 700; }
80
+ header h1 span { color: var(--accent); }
81
+ .header-meta { font-size: 12px; color: var(--text-muted); }
82
+
83
+ .search-box { margin: 20px 0; }
84
+ .search-box input { width: 100%; padding: 10px 16px; border: 1px solid var(--border); border-radius: 8px; font-size: 14px; background: var(--surface); outline: none; }
85
+ .search-box input:focus { border-color: var(--accent); box-shadow: 0 0 0 3px var(--accent-light); }
86
+
87
+ .stats-bar { display: flex; gap: 16px; margin: 16px 0; flex-wrap: wrap; }
88
+ .stat { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 12px 20px; text-align: center; min-width: 120px; }
89
+ .stat-value { font-size: 24px; font-weight: 700; color: var(--accent); }
90
+ .stat-label { font-size: 11px; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.5px; }
91
+
92
+ nav.toc { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 16px 20px; margin: 20px 0; }
93
+ nav.toc h2 { font-size: 14px; margin-bottom: 8px; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.5px; }
94
+ nav.toc ul { list-style: none; display: flex; flex-wrap: wrap; gap: 6px; }
95
+ nav.toc li a { display: inline-block; padding: 4px 10px; border-radius: 6px; font-size: 13px; font-weight: 500; background: var(--bg); border: 1px solid var(--border); }
96
+ nav.toc li a:hover { background: var(--accent-light); border-color: var(--accent); text-decoration: none; }
97
+
98
+ .entity-card { background: var(--surface); border: 1px solid var(--border); border-radius: 10px; margin: 20px 0; overflow: hidden; }
99
+ .entity-header { padding: 16px 20px; border-bottom: 1px solid var(--border); display: flex; align-items: center; gap: 12px; }
100
+ .entity-header h2 { font-size: 18px; font-weight: 600; }
101
+ .entity-type { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600; text-transform: uppercase; }
102
+ .type-table { background: #dbeafe; color: #1d4ed8; }
103
+ .type-view { background: #dcfce7; color: #15803d; }
104
+ .type-mv { background: #f3e8ff; color: #7c3aed; }
105
+ .type-ext { background: #ffedd5; color: #c2410c; }
106
+ .type-snap { background: #fecdd3; color: #be123c; }
107
+ .entity-meta { padding: 12px 20px; display: flex; flex-wrap: wrap; gap: 16px; font-size: 12px; color: var(--text-muted); border-bottom: 1px solid var(--border); }
108
+ .entity-meta span { display: flex; align-items: center; gap: 4px; }
109
+ .entity-desc { padding: 12px 20px; font-size: 14px; color: var(--text-muted); border-bottom: 1px solid var(--border); }
110
+
111
+ table { width: 100%; border-collapse: collapse; font-size: 13px; }
112
+ th { text-align: left; padding: 8px 12px; background: var(--bg); font-size: 11px; text-transform: uppercase; letter-spacing: 0.5px; color: var(--text-muted); font-weight: 600; border-bottom: 1px solid var(--border); }
113
+ td { padding: 8px 12px; border-bottom: 1px solid var(--border); vertical-align: top; }
114
+ tr:hover td { background: #f1f5f9; }
115
+ tr.deprecated td { opacity: 0.6; text-decoration: line-through; }
116
+ .field-name { font-family: 'SF Mono', Monaco, Consolas, monospace; font-weight: 500; font-size: 13px; }
117
+ .field-type { font-family: 'SF Mono', Monaco, Consolas, monospace; color: var(--purple); font-size: 12px; }
118
+
119
+ .badge { display: inline-block; padding: 1px 6px; border-radius: 3px; font-size: 10px; font-weight: 600; margin-right: 3px; }
120
+ .badge-pk { background: #fef3c7; color: #92400e; }
121
+ .badge-uq { background: #cffafe; color: #0e7490; }
122
+ .badge-fk { background: #dbeafe; color: #1d4ed8; }
123
+ .badge-nn { background: #fecdd3; color: #9f1239; }
124
+ .badge-comp { background: #dcfce7; color: #15803d; }
125
+ .badge-dep { background: #fecdd3; color: #be123c; }
126
+ .badge-sens { background: #fef3c7; color: #92400e; }
127
+ .badge-def { background: #e0e7ff; color: #4338ca; }
128
+ .badge-chk { background: #ffedd5; color: #c2410c; }
129
+ .badge-idx { background: #f3e8ff; color: #7c3aed; }
130
+ .badge-tag { background: var(--bg); color: var(--text-muted); border: 1px solid var(--border); }
131
+
132
+ .section { margin: 20px 0; }
133
+ .section h2 { font-size: 16px; font-weight: 600; margin-bottom: 12px; padding-bottom: 8px; border-bottom: 2px solid var(--accent); }
134
+ .section h3 { font-size: 14px; font-weight: 600; margin: 12px 0 8px; }
135
+
136
+ .rel-card { display: flex; align-items: center; gap: 8px; padding: 8px 12px; background: var(--bg); border: 1px solid var(--border); border-radius: 6px; margin: 4px 0; font-size: 13px; }
137
+ .rel-card .rel-name { font-weight: 600; }
138
+ .rel-card .rel-arrow { color: var(--text-light); }
139
+ .rel-card code { font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 12px; color: var(--purple); }
140
+ .rel-card .cardinality { font-size: 11px; font-weight: 600; padding: 1px 6px; border-radius: 3px; }
141
+ .card-1to1 { background: #dcfce7; color: #15803d; }
142
+ .card-1toN { background: #dbeafe; color: #1d4ed8; }
143
+ .card-Nto1 { background: #f3e8ff; color: #7c3aed; }
144
+ .card-NtoN { background: #ffedd5; color: #c2410c; }
145
+
146
+ .glossary-card { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 12px 16px; margin: 8px 0; }
147
+ .glossary-card h4 { font-size: 14px; font-weight: 600; margin-bottom: 4px; }
148
+ .glossary-card p { font-size: 13px; color: var(--text-muted); }
149
+ .glossary-card .gl-meta { font-size: 11px; color: var(--text-light); margin-top: 4px; }
150
+
151
+ .index-row { display: flex; align-items: center; gap: 8px; padding: 6px 12px; background: var(--bg); border: 1px solid var(--border); border-radius: 6px; margin: 4px 0; font-size: 13px; }
152
+ .index-row code { font-family: 'SF Mono', Monaco, Consolas, monospace; font-size: 12px; }
153
+
154
+ footer { margin: 40px 0 20px; padding: 20px 0; border-top: 1px solid var(--border); text-align: center; font-size: 12px; color: var(--text-light); }
155
+
156
+ .hidden { display: none !important; }
157
+
158
+ @media (max-width: 768px) {
159
+ .stats-bar { flex-direction: column; }
160
+ .entity-meta { flex-direction: column; gap: 4px; }
161
+ }
162
+ """
163
+
164
+ _JS = """
165
+ function filterEntities() {
166
+ const q = document.getElementById('search').value.toLowerCase();
167
+ document.querySelectorAll('.entity-card').forEach(card => {
168
+ const text = card.textContent.toLowerCase();
169
+ card.classList.toggle('hidden', q && !text.includes(q));
170
+ });
171
+ document.querySelectorAll('.glossary-card').forEach(card => {
172
+ const text = card.textContent.toLowerCase();
173
+ card.classList.toggle('hidden', q && !text.includes(q));
174
+ });
175
+ document.querySelectorAll('nav.toc li').forEach(li => {
176
+ const text = li.textContent.toLowerCase();
177
+ li.classList.toggle('hidden', q && !text.includes(q));
178
+ });
179
+ }
180
+ """
181
+
182
+
183
+ def generate_html_docs(
184
+ model: Dict[str, Any],
185
+ title: Optional[str] = None,
186
+ ) -> str:
187
+ """Generate a self-contained HTML data dictionary from a model."""
188
+ meta = model.get("model", {})
189
+ model_name = meta.get("name", "unknown")
190
+ model_version = meta.get("version", "")
191
+ model_domain = meta.get("domain", "")
192
+ model_desc = meta.get("description", "")
193
+ model_state = meta.get("state", "")
194
+ owners = meta.get("owners", [])
195
+
196
+ entities = model.get("entities", [])
197
+ relationships = model.get("relationships", [])
198
+ indexes = model.get("indexes", [])
199
+ metrics = model.get("metrics", [])
200
+ glossary = model.get("glossary", [])
201
+ rules = model.get("rules", [])
202
+ governance = model.get("governance", {})
203
+ classifications = governance.get("classification", {})
204
+
205
+ page_title = title or f"{model_name} — Data Dictionary"
206
+
207
+ # Stats
208
+ total_fields = sum(len(e.get("fields", [])) for e in entities)
209
+ total_rels = len(relationships)
210
+ total_indexes = len(indexes)
211
+ total_metrics = len(metrics)
212
+ total_glossary = len(glossary)
213
+
214
+ # Build index of entity fields for cross-referencing
215
+ entity_fields = {}
216
+ for e in entities:
217
+ entity_fields[e.get("name", "")] = {f.get("name", "") for f in e.get("fields", [])}
218
+
219
+ # Index by entity
220
+ indexes_by_entity: Dict[str, List[Dict]] = {}
221
+ for idx in indexes:
222
+ ent = idx.get("entity", "")
223
+ indexes_by_entity.setdefault(ent, []).append(idx)
224
+
225
+ indexed_fields: Dict[str, set] = {}
226
+ for idx in indexes:
227
+ ent = idx.get("entity", "")
228
+ indexed_fields.setdefault(ent, set())
229
+ for f in idx.get("fields", []):
230
+ indexed_fields[ent].add(f)
231
+
232
+ # Relationships by entity
233
+ rels_by_entity: Dict[str, List[Dict]] = {}
234
+ for rel in relationships:
235
+ from_ent = (rel.get("from", "") or "").split(".")[0]
236
+ to_ent = (rel.get("to", "") or "").split(".")[0]
237
+ rels_by_entity.setdefault(from_ent, []).append(rel)
238
+ if to_ent != from_ent:
239
+ rels_by_entity.setdefault(to_ent, []).append(rel)
240
+
241
+ parts = []
242
+ parts.append(f"""<!DOCTYPE html>
243
+ <html lang="en">
244
+ <head>
245
+ <meta charset="UTF-8">
246
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
247
+ <title>{_esc(page_title)}</title>
248
+ <style>{_CSS}</style>
249
+ </head>
250
+ <body>
251
+ <header>
252
+ <div class="container">
253
+ <h1><span>DataLex</span> Data Dictionary</h1>
254
+ <div class="header-meta">
255
+ {_esc(model_name)} v{_esc(model_version)} &middot; {_esc(model_domain)} &middot; {_esc(model_state)}
256
+ &middot; Generated {datetime.now().strftime('%Y-%m-%d %H:%M')}
257
+ </div>
258
+ </div>
259
+ </header>
260
+
261
+ <div class="container">
262
+ """)
263
+
264
+ # Model description
265
+ if model_desc:
266
+ parts.append(f'<p style="margin:16px 0;font-size:15px;color:var(--text-muted)">{_esc(model_desc)}</p>')
267
+
268
+ # Search
269
+ parts.append("""
270
+ <div class="search-box">
271
+ <input type="text" id="search" placeholder="Search entities, fields, tags, glossary..." oninput="filterEntities()">
272
+ </div>
273
+ """)
274
+
275
+ # Stats bar
276
+ parts.append(f"""
277
+ <div class="stats-bar">
278
+ <div class="stat"><div class="stat-value">{len(entities)}</div><div class="stat-label">Entities</div></div>
279
+ <div class="stat"><div class="stat-value">{total_fields}</div><div class="stat-label">Fields</div></div>
280
+ <div class="stat"><div class="stat-value">{total_rels}</div><div class="stat-label">Relationships</div></div>
281
+ <div class="stat"><div class="stat-value">{total_indexes}</div><div class="stat-label">Indexes</div></div>
282
+ <div class="stat"><div class="stat-value">{total_metrics}</div><div class="stat-label">Metrics</div></div>
283
+ <div class="stat"><div class="stat-value">{total_glossary}</div><div class="stat-label">Glossary Terms</div></div>
284
+ </div>
285
+ """)
286
+
287
+ # TOC
288
+ parts.append('<nav class="toc"><h2>Entities</h2><ul>')
289
+ for e in entities:
290
+ ename = _esc(e.get("name", ""))
291
+ etype = e.get("type", "table")
292
+ parts.append(f'<li><a href="#entity-{ename}"><span class="entity-type {_entity_type_class(etype)}">{_esc(etype)}</span> {ename}</a></li>')
293
+ parts.append("</ul></nav>")
294
+
295
+ # Entity cards
296
+ for e in entities:
297
+ ename = e.get("name", "")
298
+ etype = e.get("type", "table")
299
+ edesc = e.get("description", "")
300
+ etags = e.get("tags", [])
301
+ eschema = e.get("schema", "")
302
+ edb = e.get("database", "")
303
+ esubject = e.get("subject_area", "")
304
+ eowner = e.get("owner", "")
305
+ esla = e.get("sla", {})
306
+ fields = e.get("fields", [])
307
+ ent_indexes = indexes_by_entity.get(ename, [])
308
+ ent_rels = rels_by_entity.get(ename, [])
309
+ ent_indexed = indexed_fields.get(ename, set())
310
+
311
+ parts.append(f'<div class="entity-card" id="entity-{_esc(ename)}">')
312
+
313
+ # Header
314
+ parts.append(f"""
315
+ <div class="entity-header">
316
+ <span class="entity-type {_entity_type_class(etype)}">{_esc(etype)}</span>
317
+ <h2>{_esc(ename)}</h2>
318
+ <span style="margin-left:auto;font-size:12px;color:var(--text-light)">{len(fields)} fields</span>
319
+ </div>
320
+ """)
321
+
322
+ # Meta row
323
+ meta_parts = []
324
+ if eschema:
325
+ meta_parts.append(f"<span>Schema: <strong>{_esc(eschema)}</strong></span>")
326
+ if edb:
327
+ meta_parts.append(f"<span>Database: <strong>{_esc(edb)}</strong></span>")
328
+ if esubject:
329
+ meta_parts.append(f"<span>Subject Area: <strong>{_esc(esubject)}</strong></span>")
330
+ if eowner:
331
+ meta_parts.append(f"<span>Owner: <strong>{_esc(eowner)}</strong></span>")
332
+ if esla:
333
+ sla_parts = []
334
+ if esla.get("freshness"):
335
+ sla_parts.append(f"Freshness: {_esc(str(esla['freshness']))}")
336
+ if esla.get("quality_score") is not None:
337
+ sla_parts.append(f"Quality: {esla['quality_score']}%")
338
+ if sla_parts:
339
+ meta_parts.append(f"<span>SLA: <strong>{' · '.join(sla_parts)}</strong></span>")
340
+ for tag in etags:
341
+ meta_parts.append(f'<span class="badge badge-tag">{_esc(str(tag))}</span>')
342
+ if meta_parts:
343
+ parts.append(f'<div class="entity-meta">{"".join(meta_parts)}</div>')
344
+
345
+ # Description
346
+ if edesc:
347
+ parts.append(f'<div class="entity-desc">{_esc(edesc)}</div>')
348
+
349
+ # Fields table
350
+ parts.append("""<table>
351
+ <thead><tr><th>Field</th><th>Type</th><th>Badges</th><th>Description</th></tr></thead>
352
+ <tbody>""")
353
+ for field in fields:
354
+ fname = field.get("name", "")
355
+ ftype = field.get("type", "")
356
+ fdesc = field.get("description", "")
357
+ is_dep = field.get("deprecated", False)
358
+ badges = _field_badges_html(field)
359
+ if fname in ent_indexed:
360
+ badges += ' <span class="badge badge-idx">IDX</span>'
361
+ cls_key = f"{ename}.{fname}"
362
+ if cls_key in classifications:
363
+ badges += f' <span class="badge badge-sens">{_esc(classifications[cls_key])}</span>'
364
+ row_class = ' class="deprecated"' if is_dep else ""
365
+ dep_msg = ""
366
+ if is_dep and field.get("deprecated_message"):
367
+ dep_msg = f' <em style="color:var(--red);font-size:11px">({_esc(field["deprecated_message"])})</em>'
368
+ parts.append(f"""<tr{row_class}>
369
+ <td class="field-name">{_esc(fname)}</td>
370
+ <td class="field-type">{_esc(ftype)}</td>
371
+ <td>{badges}</td>
372
+ <td>{_esc(fdesc)}{dep_msg}</td>
373
+ </tr>""")
374
+ parts.append("</tbody></table>")
375
+
376
+ # Entity indexes
377
+ if ent_indexes:
378
+ parts.append(f'<div style="padding:12px 20px"><h3 style="font-size:13px;color:var(--text-muted);margin-bottom:8px">Indexes ({len(ent_indexes)})</h3>')
379
+ for idx in ent_indexes:
380
+ unique_badge = ' <span class="badge badge-uq">UNIQUE</span>' if idx.get("unique") else ""
381
+ type_badge = f' <span class="badge badge-tag">{_esc(idx.get("type", ""))}</span>' if idx.get("type") and idx.get("type") != "btree" else ""
382
+ parts.append(f'<div class="index-row"><code>{_esc(idx.get("name", ""))}</code> <span style="color:var(--text-light)">({", ".join(_esc(f) for f in idx.get("fields", []))})</span>{unique_badge}{type_badge}</div>')
383
+ parts.append("</div>")
384
+
385
+ # Entity relationships
386
+ if ent_rels:
387
+ parts.append(f'<div style="padding:12px 20px"><h3 style="font-size:13px;color:var(--text-muted);margin-bottom:8px">Relationships ({len(ent_rels)})</h3>')
388
+ seen = set()
389
+ for rel in ent_rels:
390
+ rname = rel.get("name", "")
391
+ if rname in seen:
392
+ continue
393
+ seen.add(rname)
394
+ card = rel.get("cardinality", "one_to_many")
395
+ card_class = {"one_to_one": "card-1to1", "one_to_many": "card-1toN", "many_to_one": "card-Nto1", "many_to_many": "card-NtoN"}.get(card, "card-1toN")
396
+ from_ref = _esc(rel.get("from", ""))
397
+ to_ref = _esc(rel.get("to", ""))
398
+ rdesc = rel.get("description", "")
399
+ parts.append(f'<div class="rel-card"><span class="rel-name">{_esc(rname)}</span> <code>{from_ref}</code> <span class="rel-arrow">→</span> <code>{to_ref}</code> <span class="cardinality {card_class}">{_esc(card.replace("_", ":"))}</span></div>')
400
+ if rdesc:
401
+ parts.append(f'<div style="padding:0 12px 4px;font-size:12px;color:var(--text-light)">{_esc(rdesc)}</div>')
402
+ parts.append("</div>")
403
+
404
+ parts.append("</div>") # entity-card
405
+
406
+ # Relationships section
407
+ if relationships:
408
+ parts.append('<div class="section" id="relationships"><h2>All Relationships</h2>')
409
+ for rel in relationships:
410
+ rname = rel.get("name", "")
411
+ card = rel.get("cardinality", "one_to_many")
412
+ card_class = {"one_to_one": "card-1to1", "one_to_many": "card-1toN", "many_to_one": "card-Nto1", "many_to_many": "card-NtoN"}.get(card, "card-1toN")
413
+ parts.append(f'<div class="rel-card"><span class="rel-name">{_esc(rname)}</span> <code>{_esc(rel.get("from", ""))}</code> <span class="rel-arrow">→</span> <code>{_esc(rel.get("to", ""))}</code> <span class="cardinality {card_class}">{_esc(card.replace("_", ":"))}</span></div>')
414
+ parts.append("</div>")
415
+
416
+ # Metrics section
417
+ if metrics:
418
+ parts.append('<div class="section" id="metrics"><h2>Metric Contracts</h2><table>')
419
+ parts.append("<thead><tr><th>Metric</th><th>Entity</th><th>Aggregation</th><th>Grain</th><th>Dimensions</th><th>Description</th></tr></thead><tbody>")
420
+ for metric in metrics:
421
+ mname = metric.get("name", "")
422
+ mentity = metric.get("entity", "")
423
+ magg = metric.get("aggregation", "")
424
+ mgrain = ", ".join(metric.get("grain", []))
425
+ mdims = ", ".join(metric.get("dimensions", []))
426
+ mdesc = metric.get("description", "")
427
+ if metric.get("deprecated"):
428
+ dep_msg = f" ({metric.get('deprecated_message', 'deprecated')})"
429
+ mdesc = (mdesc + dep_msg).strip()
430
+ parts.append(
431
+ "<tr>"
432
+ f"<td><code>{_esc(mname)}</code></td>"
433
+ f"<td><code>{_esc(mentity)}</code></td>"
434
+ f"<td>{_esc(magg)}</td>"
435
+ f"<td>{_esc(mgrain)}</td>"
436
+ f"<td>{_esc(mdims)}</td>"
437
+ f"<td>{_esc(mdesc)}</td>"
438
+ "</tr>"
439
+ )
440
+ parts.append("</tbody></table></div>")
441
+
442
+ # Glossary section
443
+ if glossary:
444
+ parts.append('<div class="section" id="glossary"><h2>Business Glossary</h2>')
445
+ for term in glossary:
446
+ tname = term.get("term", "")
447
+ tabbr = term.get("abbreviation", "")
448
+ tdef = term.get("definition", "")
449
+ towner = term.get("owner", "")
450
+ tfields = term.get("related_fields", [])
451
+ ttags = term.get("tags", [])
452
+ parts.append(f'<div class="glossary-card">')
453
+ abbr_str = f" ({_esc(tabbr)})" if tabbr else ""
454
+ parts.append(f'<h4>{_esc(tname)}{abbr_str}</h4>')
455
+ if tdef:
456
+ parts.append(f'<p>{_esc(tdef)}</p>')
457
+ meta_bits = []
458
+ if towner:
459
+ meta_bits.append(f"Owner: {_esc(towner)}")
460
+ if tfields:
461
+ meta_bits.append(f"Fields: {', '.join(_esc(f) for f in tfields)}")
462
+ if ttags:
463
+ meta_bits.append(f"Tags: {', '.join(_esc(str(t)) for t in ttags)}")
464
+ if meta_bits:
465
+ parts.append(f'<div class="gl-meta">{" · ".join(meta_bits)}</div>')
466
+ parts.append("</div>")
467
+ parts.append("</div>")
468
+
469
+ # Governance section
470
+ if classifications:
471
+ parts.append('<div class="section" id="governance"><h2>Data Classification</h2><table>')
472
+ parts.append("<thead><tr><th>Target</th><th>Classification</th></tr></thead><tbody>")
473
+ for target, cls in sorted(classifications.items()):
474
+ parts.append(f'<tr><td><code>{_esc(target)}</code></td><td><span class="badge badge-sens">{_esc(cls)}</span></td></tr>')
475
+ parts.append("</tbody></table></div>")
476
+
477
+ # Footer
478
+ parts.append(f"""
479
+ <footer>
480
+ Generated by <strong>DataLex</strong> &middot; {_esc(model_name)} v{_esc(model_version)}
481
+ &middot; {datetime.now().strftime('%Y-%m-%d %H:%M')}
482
+ </footer>
483
+ </div>
484
+ <script>{_JS}</script>
485
+ </body>
486
+ </html>""")
487
+
488
+ return "\n".join(parts)
489
+
490
+
491
+ # ---------------------------------------------------------------------------
492
+ # Markdown Generation
493
+ # ---------------------------------------------------------------------------
494
+
495
+ def generate_markdown_docs(
496
+ model: Dict[str, Any],
497
+ title: Optional[str] = None,
498
+ ) -> str:
499
+ """Generate Markdown data dictionary from a model."""
500
+ meta = model.get("model", {})
501
+ model_name = meta.get("name", "unknown")
502
+ model_version = meta.get("version", "")
503
+ model_domain = meta.get("domain", "")
504
+ model_desc = meta.get("description", "")
505
+ owners = meta.get("owners", [])
506
+
507
+ entities = model.get("entities", [])
508
+ relationships = model.get("relationships", [])
509
+ indexes = model.get("indexes", [])
510
+ metrics = model.get("metrics", [])
511
+ glossary = model.get("glossary", [])
512
+ governance = model.get("governance", {})
513
+ classifications = governance.get("classification", {})
514
+
515
+ lines = []
516
+ page_title = title or f"{model_name} — Data Dictionary"
517
+ lines.append(f"# {page_title}")
518
+ lines.append("")
519
+ lines.append(f"**Model:** {model_name} v{model_version} ")
520
+ lines.append(f"**Domain:** {model_domain} ")
521
+ if owners:
522
+ lines.append(f"**Owners:** {', '.join(owners)} ")
523
+ if model_desc:
524
+ lines.append(f"**Description:** {model_desc} ")
525
+ lines.append("")
526
+
527
+ # Stats
528
+ total_fields = sum(len(e.get("fields", [])) for e in entities)
529
+ lines.append(f"| Entities | Fields | Relationships | Indexes | Metrics | Glossary |")
530
+ lines.append(f"|----------|--------|---------------|---------|---------|----------|")
531
+ lines.append(f"| {len(entities)} | {total_fields} | {len(relationships)} | {len(indexes)} | {len(metrics)} | {len(glossary)} |")
532
+ lines.append("")
533
+
534
+ # TOC
535
+ lines.append("## Table of Contents")
536
+ lines.append("")
537
+ for e in entities:
538
+ ename = e.get("name", "")
539
+ etype = e.get("type", "table")
540
+ lines.append(f"- [{ename}](#{ename.lower()}) ({etype})")
541
+ if relationships:
542
+ lines.append("- [Relationships](#relationships)")
543
+ if metrics:
544
+ lines.append("- [Metric Contracts](#metric-contracts)")
545
+ if glossary:
546
+ lines.append("- [Glossary](#glossary)")
547
+ if classifications:
548
+ lines.append("- [Data Classification](#data-classification)")
549
+ lines.append("")
550
+
551
+ # Entities
552
+ lines.append("---")
553
+ lines.append("")
554
+
555
+ indexes_by_entity: Dict[str, List[Dict]] = {}
556
+ for idx in indexes:
557
+ ent = idx.get("entity", "")
558
+ indexes_by_entity.setdefault(ent, []).append(idx)
559
+
560
+ for e in entities:
561
+ ename = e.get("name", "")
562
+ etype = e.get("type", "table")
563
+ edesc = e.get("description", "")
564
+ etags = e.get("tags", [])
565
+ eschema = e.get("schema", "")
566
+ eowner = e.get("owner", "")
567
+ esubject = e.get("subject_area", "")
568
+ fields = e.get("fields", [])
569
+
570
+ lines.append(f"## {ename}")
571
+ lines.append("")
572
+ lines.append(f"**Type:** `{etype}` ")
573
+ if edesc:
574
+ lines.append(f"**Description:** {edesc} ")
575
+ if eschema:
576
+ lines.append(f"**Schema:** `{eschema}` ")
577
+ if esubject:
578
+ lines.append(f"**Subject Area:** {esubject} ")
579
+ if eowner:
580
+ lines.append(f"**Owner:** {eowner} ")
581
+ if etags:
582
+ lines.append(f"**Tags:** {', '.join(f'`{t}`' for t in etags)} ")
583
+ lines.append("")
584
+
585
+ # Fields table
586
+ lines.append("| Field | Type | Nullable | PK | Description |")
587
+ lines.append("|-------|------|----------|----|-------------|")
588
+ for field in fields:
589
+ fname = field.get("name", "")
590
+ ftype = field.get("type", "")
591
+ fnull = "Yes" if field.get("nullable", True) else "No"
592
+ fpk = "Yes" if field.get("primary_key") else ""
593
+ fdesc = field.get("description", "")
594
+ extras = []
595
+ if field.get("unique"):
596
+ extras.append("UQ")
597
+ if field.get("foreign_key"):
598
+ extras.append("FK")
599
+ if field.get("deprecated"):
600
+ extras.append("DEPRECATED")
601
+ if field.get("sensitivity"):
602
+ extras.append(f"sensitivity:{field['sensitivity']}")
603
+ extra_str = f" [{', '.join(extras)}]" if extras else ""
604
+ lines.append(f"| `{fname}` | `{ftype}` | {fnull} | {fpk} | {fdesc}{extra_str} |")
605
+ lines.append("")
606
+
607
+ # Entity indexes
608
+ ent_indexes = indexes_by_entity.get(ename, [])
609
+ if ent_indexes:
610
+ lines.append(f"**Indexes:**")
611
+ lines.append("")
612
+ for idx in ent_indexes:
613
+ unique = " (UNIQUE)" if idx.get("unique") else ""
614
+ lines.append(f"- `{idx.get('name', '')}` on ({', '.join(idx.get('fields', []))}){unique}")
615
+ lines.append("")
616
+
617
+ # Relationships
618
+ if relationships:
619
+ lines.append("---")
620
+ lines.append("")
621
+ lines.append("## Relationships")
622
+ lines.append("")
623
+ lines.append("| Name | From | To | Cardinality | Description |")
624
+ lines.append("|------|------|----|-------------|-------------|")
625
+ for rel in relationships:
626
+ rname = rel.get("name", "")
627
+ rfrom = rel.get("from", "")
628
+ rto = rel.get("to", "")
629
+ rcard = rel.get("cardinality", "")
630
+ rdesc = rel.get("description", "")
631
+ lines.append(f"| {rname} | `{rfrom}` | `{rto}` | {rcard} | {rdesc} |")
632
+ lines.append("")
633
+
634
+ # Metrics
635
+ if metrics:
636
+ lines.append("---")
637
+ lines.append("")
638
+ lines.append("## Metric Contracts")
639
+ lines.append("")
640
+ lines.append("| Metric | Entity | Aggregation | Grain | Dimensions | Description |")
641
+ lines.append("|--------|--------|-------------|-------|------------|-------------|")
642
+ for metric in metrics:
643
+ mname = metric.get("name", "")
644
+ mentity = metric.get("entity", "")
645
+ magg = metric.get("aggregation", "")
646
+ mgrain = ", ".join(metric.get("grain", []))
647
+ mdims = ", ".join(metric.get("dimensions", []))
648
+ mdesc = metric.get("description", "")
649
+ if metric.get("deprecated"):
650
+ dep_msg = metric.get("deprecated_message", "deprecated")
651
+ mdesc = (mdesc + f" (DEPRECATED: {dep_msg})").strip()
652
+ lines.append(f"| `{mname}` | `{mentity}` | {magg} | {mgrain} | {mdims} | {mdesc} |")
653
+ lines.append("")
654
+
655
+ # Glossary
656
+ if glossary:
657
+ lines.append("---")
658
+ lines.append("")
659
+ lines.append("## Glossary")
660
+ lines.append("")
661
+ for term in glossary:
662
+ tname = term.get("term", "")
663
+ tdef = term.get("definition", "")
664
+ lines.append(f"### {tname}")
665
+ if tdef:
666
+ lines.append(f"{tdef}")
667
+ tfields = term.get("related_fields", [])
668
+ if tfields:
669
+ lines.append(f" Related fields: {', '.join(f'`{f}`' for f in tfields)}")
670
+ lines.append("")
671
+
672
+ # Classifications
673
+ if classifications:
674
+ lines.append("---")
675
+ lines.append("")
676
+ lines.append("## Data Classification")
677
+ lines.append("")
678
+ lines.append("| Target | Classification |")
679
+ lines.append("|--------|----------------|")
680
+ for target, cls in sorted(classifications.items()):
681
+ lines.append(f"| `{target}` | {cls} |")
682
+ lines.append("")
683
+
684
+ return "\n".join(lines)
685
+
686
+
687
+ # ---------------------------------------------------------------------------
688
+ # Changelog Generation
689
+ # ---------------------------------------------------------------------------
690
+
691
+ def generate_changelog(
692
+ diff_result: Dict[str, Any],
693
+ new_version: str = "",
694
+ old_version: str = "",
695
+ ) -> str:
696
+ """Generate a Markdown changelog from a semantic diff result."""
697
+ lines = []
698
+ lines.append(f"# Changelog")
699
+ if new_version or old_version:
700
+ lines.append(f"**{old_version or '?'}** → **{new_version or '?'}**")
701
+ lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
702
+ lines.append("")
703
+
704
+ summary = diff_result.get("summary", {})
705
+ lines.append("## Summary")
706
+ lines.append(f"- Entities added: {summary.get('added_entities', 0)}")
707
+ lines.append(f"- Entities removed: {summary.get('removed_entities', 0)}")
708
+ lines.append(f"- Entities changed: {summary.get('changed_entities', 0)}")
709
+ lines.append(f"- Relationships added: {summary.get('added_relationships', 0)}")
710
+ lines.append(f"- Relationships removed: {summary.get('removed_relationships', 0)}")
711
+ lines.append(f"- Indexes added: {summary.get('added_indexes', 0)}")
712
+ lines.append(f"- Indexes removed: {summary.get('removed_indexes', 0)}")
713
+ lines.append(f"- Metrics added: {summary.get('added_metrics', 0)}")
714
+ lines.append(f"- Metrics removed: {summary.get('removed_metrics', 0)}")
715
+ lines.append(f"- Metrics changed: {summary.get('changed_metrics', 0)}")
716
+ has_breaking = diff_result.get("has_breaking_changes", False)
717
+ lines.append(f"- Breaking changes: {'Yes' if has_breaking else 'None'}")
718
+ lines.append("")
719
+
720
+ added = diff_result.get("added_entities", [])
721
+ if added:
722
+ lines.append("## Added Entities")
723
+ for e in added:
724
+ lines.append(f"- `{e}`")
725
+ lines.append("")
726
+
727
+ removed = diff_result.get("removed_entities", [])
728
+ if removed:
729
+ lines.append("## Removed Entities")
730
+ for e in removed:
731
+ lines.append(f"- `{e}`")
732
+ lines.append("")
733
+
734
+ changed = diff_result.get("changed_entities", [])
735
+ if changed:
736
+ lines.append("## Changed Entities")
737
+ for change in changed:
738
+ ename = change.get("entity", "")
739
+ lines.append(f"### {ename}")
740
+ for f in change.get("added_fields", []):
741
+ lines.append(f"- Added field: `{f}`")
742
+ for f in change.get("removed_fields", []):
743
+ lines.append(f"- Removed field: `{f}`")
744
+ for tc in change.get("type_changes", []):
745
+ lines.append(f"- Type changed: `{tc['field']}` ({tc['from_type']} → {tc['to_type']})")
746
+ for nc in change.get("nullability_changes", []):
747
+ lines.append(f"- Nullability changed: `{nc['field']}` ({nc['from_nullable']} → {nc['to_nullable']})")
748
+ lines.append("")
749
+
750
+ changed_metrics = diff_result.get("changed_metrics", [])
751
+ if changed_metrics:
752
+ lines.append("## Changed Metrics")
753
+ for metric_change in changed_metrics:
754
+ mname = metric_change.get("metric", "")
755
+ changed_fields = metric_change.get("changed_fields", [])
756
+ lines.append(f"- `{mname}`: {', '.join(changed_fields)}")
757
+ lines.append("")
758
+
759
+ breaking = diff_result.get("breaking_changes", [])
760
+ if breaking:
761
+ lines.append("## Breaking Changes")
762
+ for bc in breaking:
763
+ lines.append(f"- {bc}")
764
+ lines.append("")
765
+
766
+ return "\n".join(lines)
767
+
768
+
769
+ # ---------------------------------------------------------------------------
770
+ # File writers
771
+ # ---------------------------------------------------------------------------
772
+
773
+ def write_html_docs(model: Dict[str, Any], output_path: str, title: Optional[str] = None) -> str:
774
+ """Generate and write HTML docs to a file. Returns the output path."""
775
+ content = generate_html_docs(model, title=title)
776
+ path = Path(output_path)
777
+ path.parent.mkdir(parents=True, exist_ok=True)
778
+ path.write_text(content, encoding="utf-8")
779
+ return str(path)
780
+
781
+
782
+ def write_markdown_docs(model: Dict[str, Any], output_path: str, title: Optional[str] = None) -> str:
783
+ """Generate and write Markdown docs to a file. Returns the output path."""
784
+ content = generate_markdown_docs(model, title=title)
785
+ path = Path(output_path)
786
+ path.parent.mkdir(parents=True, exist_ok=True)
787
+ path.write_text(content, encoding="utf-8")
788
+ return str(path)
789
+
790
+
791
+ def write_changelog(diff_result: Dict[str, Any], output_path: str, **kwargs) -> str:
792
+ """Generate and write changelog to a file. Returns the output path."""
793
+ content = generate_changelog(diff_result, **kwargs)
794
+ path = Path(output_path)
795
+ path.parent.mkdir(parents=True, exist_ok=True)
796
+ path.write_text(content, encoding="utf-8")
797
+ return str(path)