datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,679 @@
1
+ from __future__ import annotations
2
+
3
+ from copy import deepcopy
4
+ import re
5
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
6
+
7
+ from datalex_core.issues import Issue
8
+
9
+ MODEL_KINDS = {"conceptual", "logical", "physical"}
10
+ DIMENSIONAL_ENTITY_TYPES = {"fact_table", "dimension_table", "bridge_table"}
11
+ DATA_VAULT_ENTITY_TYPES = {"hub", "link", "satellite"}
12
+ LOGICAL_ENTITY_TYPES = {"concept", "logical_entity"} | DIMENSIONAL_ENTITY_TYPES | DATA_VAULT_ENTITY_TYPES
13
+ PHYSICAL_ENTITY_TYPES = {
14
+ "table",
15
+ "view",
16
+ "materialized_view",
17
+ "external_table",
18
+ "snapshot",
19
+ *DIMENSIONAL_ENTITY_TYPES,
20
+ *DATA_VAULT_ENTITY_TYPES,
21
+ }
22
+ SUPPORTED_NAMING_STYLES = {"pascal_case", "snake_case", "lower_snake_case", "upper_snake_case"}
23
+
24
+
25
+ def _clone(model: Dict[str, Any]) -> Dict[str, Any]:
26
+ return deepcopy(model) if isinstance(model, dict) else {}
27
+
28
+
29
+ def _to_snake(text: str) -> str:
30
+ cleaned = re.sub(r"[^A-Za-z0-9]+", "_", str(text or "").strip())
31
+ cleaned = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", cleaned)
32
+ cleaned = re.sub(r"__+", "_", cleaned).strip("_").lower()
33
+ if not cleaned:
34
+ return ""
35
+ if cleaned[0].isdigit():
36
+ cleaned = f"f_{cleaned}"
37
+ return cleaned
38
+
39
+
40
+ def _to_pascal(text: str) -> str:
41
+ parts = re.split(r"[^A-Za-z0-9]+", str(text or "").strip())
42
+ joined = "".join(p[:1].upper() + p[1:] for p in parts if p)
43
+ return joined or "Entity"
44
+
45
+
46
+ def _to_upper_snake(text: str) -> str:
47
+ return _to_snake(text).upper()
48
+
49
+
50
+ def _merge_unique_strings(*values: Iterable[str]) -> List[str]:
51
+ seen: Set[str] = set()
52
+ merged: List[str] = []
53
+ for collection in values:
54
+ if not isinstance(collection, list):
55
+ continue
56
+ for item in collection:
57
+ value = str(item or "").strip()
58
+ if not value or value in seen:
59
+ continue
60
+ seen.add(value)
61
+ merged.append(value)
62
+ return merged
63
+
64
+
65
+ def infer_model_kind(model: Dict[str, Any]) -> str:
66
+ meta = model.get("model", {})
67
+ declared = str(meta.get("kind") or "").strip().lower()
68
+ if declared in MODEL_KINDS:
69
+ return declared
70
+
71
+ entity_types = {
72
+ str(entity.get("type") or "").strip().lower()
73
+ for entity in model.get("entities", [])
74
+ if isinstance(entity, dict)
75
+ }
76
+ if "concept" in entity_types:
77
+ return "conceptual"
78
+ if "logical_entity" in entity_types:
79
+ return "logical"
80
+ return "physical"
81
+
82
+
83
+ def _has_v3_sections(model: Dict[str, Any]) -> bool:
84
+ return any(
85
+ model.get(key)
86
+ for key in ("domains", "enums", "templates", "naming_rules", "subject_areas")
87
+ )
88
+
89
+
90
+ def _coerce_list(value: Any) -> List[Any]:
91
+ return value if isinstance(value, list) else []
92
+
93
+
94
+ def _coerce_dict(value: Any) -> Dict[str, Any]:
95
+ return value if isinstance(value, dict) else {}
96
+
97
+
98
+ def _templates_map(model: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
99
+ items = {}
100
+ for template in _coerce_list(model.get("templates")):
101
+ if not isinstance(template, dict):
102
+ continue
103
+ name = str(template.get("name") or "").strip()
104
+ if name:
105
+ items[name] = template
106
+ return items
107
+
108
+
109
+ def _domains_map(model: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
110
+ items = {}
111
+ for domain in _coerce_list(model.get("domains")):
112
+ if not isinstance(domain, dict):
113
+ continue
114
+ name = str(domain.get("name") or "").strip()
115
+ if name:
116
+ items[name] = domain
117
+ return items
118
+
119
+
120
+ def _template_names(entity: Dict[str, Any]) -> List[str]:
121
+ names = []
122
+ single = str(entity.get("template") or "").strip()
123
+ if single:
124
+ names.append(single)
125
+ names.extend(
126
+ str(item or "").strip()
127
+ for item in _coerce_list(entity.get("templates"))
128
+ if str(item or "").strip()
129
+ )
130
+ # preserve order while deduplicating
131
+ return list(dict.fromkeys(names))
132
+
133
+
134
+ def _merge_template(entity: Dict[str, Any], template: Dict[str, Any]) -> Dict[str, Any]:
135
+ merged = deepcopy(entity)
136
+
137
+ for key, value in _coerce_dict(template.get("entity_defaults")).items():
138
+ merged.setdefault(key, deepcopy(value))
139
+
140
+ merged["tags"] = _merge_unique_strings(template.get("tags"), merged.get("tags"))
141
+
142
+ template_fields = [deepcopy(field) for field in _coerce_list(template.get("fields")) if isinstance(field, dict)]
143
+ local_fields = [deepcopy(field) for field in _coerce_list(merged.get("fields")) if isinstance(field, dict)]
144
+ local_by_name = {str(field.get("name") or ""): field for field in local_fields if field.get("name")}
145
+
146
+ resolved_fields: List[Dict[str, Any]] = []
147
+ for field in template_fields:
148
+ name = str(field.get("name") or "")
149
+ if name and name in local_by_name:
150
+ override = deepcopy(local_by_name.pop(name))
151
+ merged_field = deepcopy(field)
152
+ merged_field.update(override)
153
+ resolved_fields.append(merged_field)
154
+ else:
155
+ resolved_fields.append(field)
156
+ resolved_fields.extend(local_by_name.values())
157
+ if resolved_fields:
158
+ merged["fields"] = resolved_fields
159
+
160
+ return merged
161
+
162
+
163
+ def _apply_templates(model: Dict[str, Any]) -> None:
164
+ templates = _templates_map(model)
165
+ if not templates:
166
+ return
167
+ resolved_entities: List[Dict[str, Any]] = []
168
+ for entity in _coerce_list(model.get("entities")):
169
+ if not isinstance(entity, dict):
170
+ continue
171
+ merged = deepcopy(entity)
172
+ for template_name in _template_names(entity):
173
+ template = templates.get(template_name)
174
+ if template:
175
+ merged = _merge_template(merged, template)
176
+ resolved_entities.append(merged)
177
+ model["entities"] = resolved_entities
178
+
179
+
180
+ def _apply_domain_defaults(model: Dict[str, Any]) -> None:
181
+ domains = _domains_map(model)
182
+ if not domains:
183
+ return
184
+
185
+ for entity in _coerce_list(model.get("entities")):
186
+ if not isinstance(entity, dict):
187
+ continue
188
+ fields = []
189
+ for field in _coerce_list(entity.get("fields")):
190
+ if not isinstance(field, dict):
191
+ continue
192
+ merged = deepcopy(field)
193
+ domain_name = str(merged.get("domain") or "").strip()
194
+ domain = domains.get(domain_name)
195
+ if domain:
196
+ if not merged.get("type") and domain.get("data_type"):
197
+ merged["type"] = domain.get("data_type")
198
+ for key in ("nullable", "default", "check", "sensitivity", "description", "examples"):
199
+ if key not in merged and key in domain:
200
+ merged[key] = deepcopy(domain[key])
201
+ merged["tags"] = _merge_unique_strings(domain.get("tags"), merged.get("tags"))
202
+ if merged.get("enum") is None and domain.get("enum"):
203
+ merged["enum"] = domain.get("enum")
204
+ fields.append(merged)
205
+ entity["fields"] = fields
206
+
207
+
208
+ def normalize_model(model: Dict[str, Any]) -> Dict[str, Any]:
209
+ normalized = _clone(model)
210
+ meta = _coerce_dict(normalized.get("model"))
211
+ normalized["model"] = meta
212
+ meta["kind"] = infer_model_kind(normalized)
213
+
214
+ if _has_v3_sections(normalized) or meta.get("kind") != "physical" or meta.get("spec_version") == 3:
215
+ meta["spec_version"] = 3
216
+
217
+ for key in ("entities", "relationships", "indexes", "glossary", "metrics", "rules", "domains", "enums", "templates", "subject_areas"):
218
+ normalized[key] = _coerce_list(normalized.get(key))
219
+ normalized["governance"] = _coerce_dict(normalized.get("governance"))
220
+ normalized["display"] = _coerce_dict(normalized.get("display"))
221
+ normalized["naming_rules"] = _coerce_dict(normalized.get("naming_rules"))
222
+
223
+ _apply_templates(normalized)
224
+ _apply_domain_defaults(normalized)
225
+
226
+ return normalized
227
+
228
+
229
+ def _style_rule(naming_rules: Dict[str, Any], key: str) -> Tuple[str, str]:
230
+ raw = naming_rules.get(key)
231
+ if isinstance(raw, str):
232
+ return raw, ""
233
+ if isinstance(raw, dict):
234
+ return str(raw.get("style") or "").strip().lower(), str(raw.get("pattern") or "").strip()
235
+ return "", ""
236
+
237
+
238
+ def _matches_style(value: str, style: str) -> bool:
239
+ if not value or not style:
240
+ return True
241
+ if style == "pascal_case":
242
+ return bool(re.fullmatch(r"[A-Z][A-Za-z0-9]*", value))
243
+ if style in {"snake_case", "lower_snake_case"}:
244
+ return bool(re.fullmatch(r"[a-z][a-z0-9_]*", value))
245
+ if style == "upper_snake_case":
246
+ return bool(re.fullmatch(r"[A-Z][A-Z0-9_]*", value))
247
+ return True
248
+
249
+
250
+ def _apply_style(value: str, style: str) -> str:
251
+ if not style or not value:
252
+ return value
253
+ if style == "pascal_case":
254
+ return _to_pascal(value)
255
+ if style in {"snake_case", "lower_snake_case"}:
256
+ return _to_snake(value)
257
+ if style == "upper_snake_case":
258
+ return _to_upper_snake(value)
259
+ return value
260
+
261
+
262
+ def _rename_entity_refs(model: Dict[str, Any], entity_map: Dict[str, str], field_maps: Dict[str, Dict[str, str]]) -> None:
263
+ if not entity_map and not field_maps:
264
+ return
265
+
266
+ def rewrite_ref(ref: str) -> str:
267
+ if "." not in ref:
268
+ return ref
269
+ entity_name, field_name = ref.split(".", 1)
270
+ next_entity = entity_map.get(entity_name, entity_name)
271
+ next_field = field_maps.get(entity_name, {}).get(field_name, field_name)
272
+ return f"{next_entity}.{next_field}"
273
+
274
+ for relationship in _coerce_list(model.get("relationships")):
275
+ relationship["from"] = rewrite_ref(str(relationship.get("from") or ""))
276
+ relationship["to"] = rewrite_ref(str(relationship.get("to") or ""))
277
+
278
+ governance = _coerce_dict(model.get("governance"))
279
+ for map_name in ("classification", "stewards"):
280
+ values = _coerce_dict(governance.get(map_name))
281
+ rewritten = {}
282
+ for key, value in values.items():
283
+ rewritten[rewrite_ref(str(key))] = value
284
+ governance[map_name] = rewritten
285
+
286
+ for glossary in _coerce_list(model.get("glossary")):
287
+ glossary["related_fields"] = [rewrite_ref(str(item)) for item in _coerce_list(glossary.get("related_fields"))]
288
+
289
+ for index in _coerce_list(model.get("indexes")):
290
+ entity_name = str(index.get("entity") or "")
291
+ index["entity"] = entity_map.get(entity_name, entity_name)
292
+ if entity_name in field_maps:
293
+ index["fields"] = [field_maps[entity_name].get(str(name), str(name)) for name in _coerce_list(index.get("fields"))]
294
+
295
+ for metric in _coerce_list(model.get("metrics")):
296
+ entity_name = str(metric.get("entity") or "")
297
+ metric["entity"] = entity_map.get(entity_name, entity_name)
298
+ if entity_name in field_maps:
299
+ mapping = field_maps[entity_name]
300
+ metric["grain"] = [mapping.get(str(name), str(name)) for name in _coerce_list(metric.get("grain"))]
301
+ metric["dimensions"] = [mapping.get(str(name), str(name)) for name in _coerce_list(metric.get("dimensions"))]
302
+ if metric.get("time_dimension"):
303
+ metric["time_dimension"] = mapping.get(str(metric.get("time_dimension")), str(metric.get("time_dimension")))
304
+
305
+ for entity in _coerce_list(model.get("entities")):
306
+ if not isinstance(entity, dict):
307
+ continue
308
+ if entity.get("subtype_of"):
309
+ entity["subtype_of"] = entity_map.get(str(entity.get("subtype_of")), str(entity.get("subtype_of")))
310
+ entity["subtypes"] = [entity_map.get(str(name), str(name)) for name in _coerce_list(entity.get("subtypes"))]
311
+ entity["dimension_refs"] = [entity_map.get(str(name), str(name)) for name in _coerce_list(entity.get("dimension_refs"))]
312
+ if entity.get("natural_key"):
313
+ mapping = field_maps.get(str(entity.get("name") or ""), {})
314
+ entity["natural_key"] = mapping.get(str(entity.get("natural_key")), str(entity.get("natural_key")))
315
+ if entity.get("surrogate_key"):
316
+ mapping = field_maps.get(str(entity.get("name") or ""), {})
317
+ entity["surrogate_key"] = mapping.get(str(entity.get("surrogate_key")), str(entity.get("surrogate_key")))
318
+ if entity.get("grain"):
319
+ mapping = field_maps.get(str(entity.get("name") or ""), {})
320
+ entity["grain"] = [mapping.get(str(name), str(name)) for name in _coerce_list(entity.get("grain"))]
321
+ candidate_keys = []
322
+ mapping = field_maps.get(str(entity.get("name") or ""), {})
323
+ for keyset in _coerce_list(entity.get("candidate_keys")):
324
+ candidate_keys.append([mapping.get(str(name), str(name)) for name in _coerce_list(keyset)])
325
+ if candidate_keys:
326
+ entity["candidate_keys"] = candidate_keys
327
+
328
+
329
+ def standards_issues(model: Dict[str, Any]) -> List[Issue]:
330
+ normalized = normalize_model(model)
331
+ issues: List[Issue] = []
332
+
333
+ naming_rules = _coerce_dict(normalized.get("naming_rules"))
334
+ domains = _domains_map(normalized)
335
+ templates = _templates_map(normalized)
336
+ subject_area_names = {
337
+ str(item.get("name") or "").strip()
338
+ for item in _coerce_list(normalized.get("subject_areas"))
339
+ if isinstance(item, dict)
340
+ }
341
+
342
+ for entity in _coerce_list(normalized.get("entities")):
343
+ if not isinstance(entity, dict):
344
+ continue
345
+ entity_name = str(entity.get("name") or "")
346
+ entity_style, entity_pattern = _style_rule(naming_rules, "entity")
347
+ if entity_style and not _matches_style(entity_name, entity_style):
348
+ issues.append(Issue("warn", "ENTITY_NAMING_RULE", f"Entity '{entity_name}' does not match naming rule '{entity_style}'.", f"/entities/{entity_name}/name"))
349
+ if entity_pattern and not re.fullmatch(entity_pattern, entity_name):
350
+ issues.append(Issue("warn", "ENTITY_NAMING_PATTERN", f"Entity '{entity_name}' does not match configured pattern '{entity_pattern}'.", f"/entities/{entity_name}/name"))
351
+
352
+ area = str(entity.get("subject_area") or "").strip()
353
+ if area and subject_area_names and area not in subject_area_names:
354
+ issues.append(Issue("warn", "SUBJECT_AREA_NOT_DEFINED", f"Entity '{entity_name}' references subject_area '{area}' which is not declared in subject_areas.", f"/entities/{entity_name}/subject_area"))
355
+
356
+ for template_name in _template_names(entity):
357
+ if template_name not in templates:
358
+ issues.append(Issue("warn", "TEMPLATE_NOT_FOUND", f"Entity '{entity_name}' references missing template '{template_name}'.", f"/entities/{entity_name}/templates"))
359
+
360
+ for field in _coerce_list(entity.get("fields")):
361
+ if not isinstance(field, dict):
362
+ continue
363
+ field_name = str(field.get("name") or "")
364
+ field_style, field_pattern = _style_rule(naming_rules, "field")
365
+ if field_style and not _matches_style(field_name, field_style):
366
+ issues.append(Issue("warn", "FIELD_NAMING_RULE", f"Field '{entity_name}.{field_name}' does not match naming rule '{field_style}'.", f"/entities/{entity_name}/fields/{field_name}/name"))
367
+ if field_pattern and not re.fullmatch(field_pattern, field_name):
368
+ issues.append(Issue("warn", "FIELD_NAMING_PATTERN", f"Field '{entity_name}.{field_name}' does not match configured pattern '{field_pattern}'.", f"/entities/{entity_name}/fields/{field_name}/name"))
369
+ domain_name = str(field.get("domain") or "").strip()
370
+ if domain_name and domain_name not in domains:
371
+ issues.append(Issue("warn", "DOMAIN_NOT_FOUND", f"Field '{entity_name}.{field_name}' references missing domain '{domain_name}'.", f"/entities/{entity_name}/fields/{field_name}/domain"))
372
+
373
+ physical_style, physical_pattern = _style_rule(naming_rules, "physical_name")
374
+ physical_name = str(entity.get("physical_name") or "")
375
+ if physical_name:
376
+ if physical_style and not _matches_style(physical_name, physical_style):
377
+ issues.append(Issue("warn", "PHYSICAL_NAME_RULE", f"physical_name '{physical_name}' does not match naming rule '{physical_style}'.", f"/entities/{entity_name}/physical_name"))
378
+ if physical_pattern and not re.fullmatch(physical_pattern, physical_name):
379
+ issues.append(Issue("warn", "PHYSICAL_NAME_PATTERN", f"physical_name '{physical_name}' does not match configured pattern '{physical_pattern}'.", f"/entities/{entity_name}/physical_name"))
380
+
381
+ for relationship in _coerce_list(normalized.get("relationships")):
382
+ name = str(relationship.get("name") or "")
383
+ style, pattern = _style_rule(naming_rules, "relationship")
384
+ if style and name and not _matches_style(name, style):
385
+ issues.append(Issue("warn", "RELATIONSHIP_NAMING_RULE", f"Relationship '{name}' does not match naming rule '{style}'.", "/relationships"))
386
+ if pattern and name and not re.fullmatch(pattern, name):
387
+ issues.append(Issue("warn", "RELATIONSHIP_NAMING_PATTERN", f"Relationship '{name}' does not match configured pattern '{pattern}'.", "/relationships"))
388
+
389
+ for index in _coerce_list(normalized.get("indexes")):
390
+ name = str(index.get("name") or "")
391
+ style, pattern = _style_rule(naming_rules, "index")
392
+ if style and name and not _matches_style(name, style):
393
+ issues.append(Issue("warn", "INDEX_NAMING_RULE", f"Index '{name}' does not match naming rule '{style}'.", "/indexes"))
394
+ if pattern and name and not re.fullmatch(pattern, name):
395
+ issues.append(Issue("warn", "INDEX_NAMING_PATTERN", f"Index '{name}' does not match configured pattern '{pattern}'.", "/indexes"))
396
+
397
+ return issues
398
+
399
+
400
+ def apply_standards_fixes(model: Dict[str, Any]) -> Tuple[Dict[str, Any], List[str]]:
401
+ fixed = normalize_model(model)
402
+ changes: List[str] = []
403
+ naming_rules = _coerce_dict(fixed.get("naming_rules"))
404
+
405
+ entity_style, _ = _style_rule(naming_rules, "entity")
406
+ field_style, _ = _style_rule(naming_rules, "field")
407
+ relationship_style, _ = _style_rule(naming_rules, "relationship")
408
+ index_style, _ = _style_rule(naming_rules, "index")
409
+ physical_style, _ = _style_rule(naming_rules, "physical_name")
410
+
411
+ entity_map: Dict[str, str] = {}
412
+ field_maps: Dict[str, Dict[str, str]] = {}
413
+ for entity in _coerce_list(fixed.get("entities")):
414
+ if not isinstance(entity, dict):
415
+ continue
416
+ old_entity_name = str(entity.get("name") or "")
417
+ new_entity_name = _apply_style(old_entity_name, entity_style)
418
+ if new_entity_name and new_entity_name != old_entity_name:
419
+ entity_map[old_entity_name] = new_entity_name
420
+ entity["name"] = new_entity_name
421
+ changes.append(f"Renamed entity {old_entity_name} -> {new_entity_name}")
422
+
423
+ local_field_map: Dict[str, str] = {}
424
+ for field in _coerce_list(entity.get("fields")):
425
+ if not isinstance(field, dict):
426
+ continue
427
+ old_field_name = str(field.get("name") or "")
428
+ new_field_name = _apply_style(old_field_name, field_style)
429
+ if new_field_name and new_field_name != old_field_name:
430
+ local_field_map[old_field_name] = new_field_name
431
+ field["name"] = new_field_name
432
+ changes.append(f"Renamed field {old_entity_name}.{old_field_name} -> {new_field_name}")
433
+ if local_field_map:
434
+ field_maps[old_entity_name] = local_field_map
435
+
436
+ if fixed.get("model", {}).get("kind") == "physical":
437
+ if not entity.get("physical_name"):
438
+ style = physical_style or "upper_snake_case"
439
+ entity["physical_name"] = _apply_style(str(entity.get("name") or ""), style)
440
+ changes.append(f"Generated physical_name for {entity.get('name')}")
441
+
442
+ _rename_entity_refs(fixed, entity_map, field_maps)
443
+
444
+ for relationship in _coerce_list(fixed.get("relationships")):
445
+ name = str(relationship.get("name") or "")
446
+ next_name = _apply_style(name, relationship_style)
447
+ if next_name and next_name != name:
448
+ relationship["name"] = next_name
449
+ changes.append(f"Renamed relationship {name} -> {next_name}")
450
+
451
+ for index in _coerce_list(fixed.get("indexes")):
452
+ name = str(index.get("name") or "")
453
+ next_name = _apply_style(name, index_style)
454
+ if next_name and next_name != name:
455
+ index["name"] = next_name
456
+ changes.append(f"Renamed index {name} -> {next_name}")
457
+
458
+ if not fixed.get("subject_areas"):
459
+ derived_areas = sorted(
460
+ {
461
+ str(entity.get("subject_area") or "").strip()
462
+ for entity in _coerce_list(fixed.get("entities"))
463
+ if str(entity.get("subject_area") or "").strip()
464
+ }
465
+ )
466
+ if derived_areas:
467
+ fixed["subject_areas"] = [{"name": area} for area in derived_areas]
468
+ changes.append("Created subject_areas library from entity subject_area usage")
469
+
470
+ return fixed, changes
471
+
472
+
473
+ def _copy_entity(entity: Dict[str, Any]) -> Dict[str, Any]:
474
+ copy = deepcopy(entity)
475
+ copy["fields"] = [deepcopy(field) for field in _coerce_list(copy.get("fields")) if isinstance(field, dict)]
476
+ return copy
477
+
478
+
479
+ def _logical_fields(entity: Dict[str, Any], naming_rules: Dict[str, Any]) -> List[Dict[str, Any]]:
480
+ field_style, _ = _style_rule(naming_rules, "field")
481
+ result = []
482
+ for field in _coerce_list(entity.get("fields")):
483
+ if not isinstance(field, dict):
484
+ continue
485
+ next_field = deepcopy(field)
486
+ next_field["mapped_from"] = f"{entity.get('name')}.{field.get('name')}"
487
+ next_field["name"] = _apply_style(str(field.get("name") or ""), field_style or "snake_case")
488
+ next_field.pop("physical_name", None)
489
+ result.append(next_field)
490
+ return result
491
+
492
+
493
+ def _field_type_for_physical(field: Dict[str, Any], domains: Dict[str, Dict[str, Any]], dialect: str) -> str:
494
+ domain = domains.get(str(field.get("domain") or "").strip(), {})
495
+ physical_types = _coerce_dict(domain.get("physical_types"))
496
+ if physical_types.get(dialect):
497
+ return str(physical_types[dialect])
498
+ if domain.get("data_type"):
499
+ return str(domain.get("data_type"))
500
+ return str(field.get("type") or "string")
501
+
502
+
503
+ def _keyset_to_primary_keys(entity: Dict[str, Any]) -> None:
504
+ fields_by_name = {str(field.get("name") or ""): field for field in _coerce_list(entity.get("fields")) if isinstance(field, dict)}
505
+ if any(field.get("primary_key") for field in fields_by_name.values()):
506
+ return
507
+ candidate_keys = _coerce_list(entity.get("candidate_keys"))
508
+ if candidate_keys:
509
+ first = _coerce_list(candidate_keys[0])
510
+ for name in first:
511
+ field = fields_by_name.get(str(name))
512
+ if field:
513
+ field["primary_key"] = True
514
+ field["nullable"] = False
515
+
516
+
517
+ def _build_relationship_field_maps(model: Dict[str, Any]) -> Dict[str, Dict[str, str]]:
518
+ return {
519
+ str(entity.get("name") or ""): {
520
+ str(field.get("mapped_from") or field.get("name") or ""): str(field.get("name") or "")
521
+ for field in _coerce_list(entity.get("fields"))
522
+ if isinstance(field, dict)
523
+ }
524
+ for entity in _coerce_list(model.get("entities"))
525
+ if isinstance(entity, dict)
526
+ }
527
+
528
+
529
+ def _remap_relationships(source_model: Dict[str, Any], target_model: Dict[str, Any]) -> List[Dict[str, Any]]:
530
+ entity_map = {
531
+ str(entity.get("mapped_from") or entity.get("derived_from") or entity.get("name") or ""): str(entity.get("name") or "")
532
+ for entity in _coerce_list(target_model.get("entities"))
533
+ if isinstance(entity, dict)
534
+ }
535
+ field_map = _build_relationship_field_maps(target_model)
536
+
537
+ relationships = []
538
+ for relationship in _coerce_list(source_model.get("relationships")):
539
+ if not isinstance(relationship, dict):
540
+ continue
541
+ new_rel = deepcopy(relationship)
542
+ for key in ("from", "to"):
543
+ ref = str(relationship.get(key) or "")
544
+ if "." not in ref:
545
+ continue
546
+ source_entity, source_field = ref.split(".", 1)
547
+ next_entity = entity_map.get(source_entity, source_entity)
548
+ next_field = field_map.get(next_entity, {}).get(source_field, source_field)
549
+ new_rel[key] = f"{next_entity}.{next_field}"
550
+ relationships.append(new_rel)
551
+ return relationships
552
+
553
+
554
+ def transform_model(model: Dict[str, Any], target_kind: str, dialect: str = "postgres") -> Dict[str, Any]:
555
+ normalized = normalize_model(model)
556
+ source_kind = infer_model_kind(normalized)
557
+ target = str(target_kind or "").strip().lower()
558
+ if target not in MODEL_KINDS:
559
+ raise ValueError(f"Unsupported target kind '{target_kind}'. Use one of: conceptual, logical, physical.")
560
+ if source_kind == target:
561
+ return normalized
562
+
563
+ if source_kind == "conceptual" and target == "physical":
564
+ logical = transform_model(normalized, "logical", dialect=dialect)
565
+ return transform_model(logical, "physical", dialect=dialect)
566
+
567
+ naming_rules = _coerce_dict(normalized.get("naming_rules"))
568
+ domains = _domains_map(normalized)
569
+ transformed = deepcopy(normalized)
570
+ transformed["model"]["kind"] = target
571
+ transformed["model"]["spec_version"] = 3
572
+
573
+ entities: List[Dict[str, Any]] = []
574
+ for entity in _coerce_list(normalized.get("entities")):
575
+ if not isinstance(entity, dict):
576
+ continue
577
+ next_entity = _copy_entity(entity)
578
+ source_entity_name = str(entity.get("name") or "")
579
+
580
+ if source_kind == "conceptual" and target == "logical":
581
+ next_entity["type"] = "logical_entity"
582
+ next_entity["derived_from"] = source_entity_name
583
+ next_entity["mapped_from"] = source_entity_name
584
+ next_entity["name"] = _apply_style(source_entity_name, _style_rule(naming_rules, "entity")[0] or "pascal_case")
585
+ next_entity["fields"] = _logical_fields(entity, naming_rules)
586
+ next_entity.pop("physical_name", None)
587
+ next_entity.pop("schema", None)
588
+ next_entity.pop("database", None)
589
+ next_entity.pop("partition_by", None)
590
+ next_entity.pop("cluster_by", None)
591
+ next_entity.pop("distribution", None)
592
+ next_entity.pop("storage", None)
593
+ next_entity.pop("identity", None)
594
+ next_entity.pop("sequence", None)
595
+ next_entity.setdefault("candidate_keys", [])
596
+ if any(field.get("primary_key") for field in next_entity.get("fields", [])):
597
+ next_entity["candidate_keys"] = [[field["name"] for field in next_entity["fields"] if field.get("primary_key")]]
598
+ for field in next_entity["fields"]:
599
+ field.pop("primary_key", None)
600
+
601
+ elif source_kind in {"logical", "conceptual"} and target == "physical":
602
+ source_entity_type = str(entity.get("type") or "")
603
+ next_entity["type"] = "table" if source_entity_type in {"concept", "logical_entity"} else (source_entity_type or "table")
604
+ next_entity["derived_from"] = source_entity_name
605
+ next_entity["mapped_from"] = source_entity_name
606
+ next_entity["physical_name"] = str(entity.get("physical_name") or _apply_style(source_entity_name, _style_rule(naming_rules, "physical_name")[0] or "upper_snake_case"))
607
+ resolved_fields: List[Dict[str, Any]] = []
608
+ for field in _coerce_list(entity.get("fields")):
609
+ if not isinstance(field, dict):
610
+ continue
611
+ next_field = deepcopy(field)
612
+ next_field["mapped_from"] = str(field.get("mapped_from") or field.get("name") or "")
613
+ next_field["type"] = _field_type_for_physical(next_field, domains, dialect)
614
+ resolved_fields.append(next_field)
615
+ next_entity["fields"] = resolved_fields
616
+ _keyset_to_primary_keys(next_entity)
617
+
618
+ else:
619
+ raise ValueError(f"Unsupported transform path: {source_kind} -> {target}")
620
+
621
+ entities.append(next_entity)
622
+
623
+ transformed["entities"] = entities
624
+ transformed["relationships"] = _remap_relationships(normalized, transformed)
625
+ return normalize_model(transformed)
626
+
627
+
628
+ def merge_models_preserving_docs(current: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
629
+ current_model = normalize_model(current)
630
+ candidate_model = normalize_model(candidate)
631
+
632
+ current_entities = {
633
+ str(entity.get("name") or ""): entity
634
+ for entity in _coerce_list(current_model.get("entities"))
635
+ if isinstance(entity, dict)
636
+ }
637
+
638
+ merged = deepcopy(candidate_model)
639
+ merged_entities: List[Dict[str, Any]] = []
640
+ for entity in _coerce_list(candidate_model.get("entities")):
641
+ if not isinstance(entity, dict):
642
+ continue
643
+ current_entity = current_entities.get(str(entity.get("name") or ""))
644
+ if not current_entity:
645
+ merged_entities.append(entity)
646
+ continue
647
+
648
+ next_entity = deepcopy(entity)
649
+ for key in ("description", "owner", "subject_area", "tags", "grain", "sla"):
650
+ if current_entity.get(key):
651
+ next_entity[key] = deepcopy(current_entity[key])
652
+
653
+ current_fields = {
654
+ str(field.get("name") or ""): field
655
+ for field in _coerce_list(current_entity.get("fields"))
656
+ if isinstance(field, dict)
657
+ }
658
+ next_fields = []
659
+ for field in _coerce_list(entity.get("fields")):
660
+ if not isinstance(field, dict):
661
+ continue
662
+ current_field = current_fields.get(str(field.get("name") or ""))
663
+ if not current_field:
664
+ next_fields.append(field)
665
+ continue
666
+ merged_field = deepcopy(field)
667
+ for key in ("description", "tags", "sensitivity", "examples", "deprecated", "deprecated_message", "domain"):
668
+ if current_field.get(key):
669
+ merged_field[key] = deepcopy(current_field[key])
670
+ next_fields.append(merged_field)
671
+ next_entity["fields"] = next_fields
672
+ merged_entities.append(next_entity)
673
+
674
+ merged["entities"] = merged_entities
675
+ if current_model.get("glossary"):
676
+ merged["glossary"] = deepcopy(current_model["glossary"])
677
+ if current_model.get("subject_areas"):
678
+ merged["subject_areas"] = deepcopy(current_model["subject_areas"])
679
+ return normalize_model(merged)