datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
datalex_core/modeling.py
ADDED
|
@@ -0,0 +1,679 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
6
|
+
|
|
7
|
+
from datalex_core.issues import Issue
|
|
8
|
+
|
|
9
|
+
MODEL_KINDS = {"conceptual", "logical", "physical"}
|
|
10
|
+
DIMENSIONAL_ENTITY_TYPES = {"fact_table", "dimension_table", "bridge_table"}
|
|
11
|
+
DATA_VAULT_ENTITY_TYPES = {"hub", "link", "satellite"}
|
|
12
|
+
LOGICAL_ENTITY_TYPES = {"concept", "logical_entity"} | DIMENSIONAL_ENTITY_TYPES | DATA_VAULT_ENTITY_TYPES
|
|
13
|
+
PHYSICAL_ENTITY_TYPES = {
|
|
14
|
+
"table",
|
|
15
|
+
"view",
|
|
16
|
+
"materialized_view",
|
|
17
|
+
"external_table",
|
|
18
|
+
"snapshot",
|
|
19
|
+
*DIMENSIONAL_ENTITY_TYPES,
|
|
20
|
+
*DATA_VAULT_ENTITY_TYPES,
|
|
21
|
+
}
|
|
22
|
+
SUPPORTED_NAMING_STYLES = {"pascal_case", "snake_case", "lower_snake_case", "upper_snake_case"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _clone(model: Dict[str, Any]) -> Dict[str, Any]:
|
|
26
|
+
return deepcopy(model) if isinstance(model, dict) else {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _to_snake(text: str) -> str:
|
|
30
|
+
cleaned = re.sub(r"[^A-Za-z0-9]+", "_", str(text or "").strip())
|
|
31
|
+
cleaned = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", cleaned)
|
|
32
|
+
cleaned = re.sub(r"__+", "_", cleaned).strip("_").lower()
|
|
33
|
+
if not cleaned:
|
|
34
|
+
return ""
|
|
35
|
+
if cleaned[0].isdigit():
|
|
36
|
+
cleaned = f"f_{cleaned}"
|
|
37
|
+
return cleaned
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _to_pascal(text: str) -> str:
|
|
41
|
+
parts = re.split(r"[^A-Za-z0-9]+", str(text or "").strip())
|
|
42
|
+
joined = "".join(p[:1].upper() + p[1:] for p in parts if p)
|
|
43
|
+
return joined or "Entity"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _to_upper_snake(text: str) -> str:
|
|
47
|
+
return _to_snake(text).upper()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _merge_unique_strings(*values: Iterable[str]) -> List[str]:
|
|
51
|
+
seen: Set[str] = set()
|
|
52
|
+
merged: List[str] = []
|
|
53
|
+
for collection in values:
|
|
54
|
+
if not isinstance(collection, list):
|
|
55
|
+
continue
|
|
56
|
+
for item in collection:
|
|
57
|
+
value = str(item or "").strip()
|
|
58
|
+
if not value or value in seen:
|
|
59
|
+
continue
|
|
60
|
+
seen.add(value)
|
|
61
|
+
merged.append(value)
|
|
62
|
+
return merged
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def infer_model_kind(model: Dict[str, Any]) -> str:
|
|
66
|
+
meta = model.get("model", {})
|
|
67
|
+
declared = str(meta.get("kind") or "").strip().lower()
|
|
68
|
+
if declared in MODEL_KINDS:
|
|
69
|
+
return declared
|
|
70
|
+
|
|
71
|
+
entity_types = {
|
|
72
|
+
str(entity.get("type") or "").strip().lower()
|
|
73
|
+
for entity in model.get("entities", [])
|
|
74
|
+
if isinstance(entity, dict)
|
|
75
|
+
}
|
|
76
|
+
if "concept" in entity_types:
|
|
77
|
+
return "conceptual"
|
|
78
|
+
if "logical_entity" in entity_types:
|
|
79
|
+
return "logical"
|
|
80
|
+
return "physical"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _has_v3_sections(model: Dict[str, Any]) -> bool:
|
|
84
|
+
return any(
|
|
85
|
+
model.get(key)
|
|
86
|
+
for key in ("domains", "enums", "templates", "naming_rules", "subject_areas")
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _coerce_list(value: Any) -> List[Any]:
|
|
91
|
+
return value if isinstance(value, list) else []
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _coerce_dict(value: Any) -> Dict[str, Any]:
|
|
95
|
+
return value if isinstance(value, dict) else {}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _templates_map(model: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
|
|
99
|
+
items = {}
|
|
100
|
+
for template in _coerce_list(model.get("templates")):
|
|
101
|
+
if not isinstance(template, dict):
|
|
102
|
+
continue
|
|
103
|
+
name = str(template.get("name") or "").strip()
|
|
104
|
+
if name:
|
|
105
|
+
items[name] = template
|
|
106
|
+
return items
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _domains_map(model: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
|
|
110
|
+
items = {}
|
|
111
|
+
for domain in _coerce_list(model.get("domains")):
|
|
112
|
+
if not isinstance(domain, dict):
|
|
113
|
+
continue
|
|
114
|
+
name = str(domain.get("name") or "").strip()
|
|
115
|
+
if name:
|
|
116
|
+
items[name] = domain
|
|
117
|
+
return items
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _template_names(entity: Dict[str, Any]) -> List[str]:
|
|
121
|
+
names = []
|
|
122
|
+
single = str(entity.get("template") or "").strip()
|
|
123
|
+
if single:
|
|
124
|
+
names.append(single)
|
|
125
|
+
names.extend(
|
|
126
|
+
str(item or "").strip()
|
|
127
|
+
for item in _coerce_list(entity.get("templates"))
|
|
128
|
+
if str(item or "").strip()
|
|
129
|
+
)
|
|
130
|
+
# preserve order while deduplicating
|
|
131
|
+
return list(dict.fromkeys(names))
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _merge_template(entity: Dict[str, Any], template: Dict[str, Any]) -> Dict[str, Any]:
|
|
135
|
+
merged = deepcopy(entity)
|
|
136
|
+
|
|
137
|
+
for key, value in _coerce_dict(template.get("entity_defaults")).items():
|
|
138
|
+
merged.setdefault(key, deepcopy(value))
|
|
139
|
+
|
|
140
|
+
merged["tags"] = _merge_unique_strings(template.get("tags"), merged.get("tags"))
|
|
141
|
+
|
|
142
|
+
template_fields = [deepcopy(field) for field in _coerce_list(template.get("fields")) if isinstance(field, dict)]
|
|
143
|
+
local_fields = [deepcopy(field) for field in _coerce_list(merged.get("fields")) if isinstance(field, dict)]
|
|
144
|
+
local_by_name = {str(field.get("name") or ""): field for field in local_fields if field.get("name")}
|
|
145
|
+
|
|
146
|
+
resolved_fields: List[Dict[str, Any]] = []
|
|
147
|
+
for field in template_fields:
|
|
148
|
+
name = str(field.get("name") or "")
|
|
149
|
+
if name and name in local_by_name:
|
|
150
|
+
override = deepcopy(local_by_name.pop(name))
|
|
151
|
+
merged_field = deepcopy(field)
|
|
152
|
+
merged_field.update(override)
|
|
153
|
+
resolved_fields.append(merged_field)
|
|
154
|
+
else:
|
|
155
|
+
resolved_fields.append(field)
|
|
156
|
+
resolved_fields.extend(local_by_name.values())
|
|
157
|
+
if resolved_fields:
|
|
158
|
+
merged["fields"] = resolved_fields
|
|
159
|
+
|
|
160
|
+
return merged
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _apply_templates(model: Dict[str, Any]) -> None:
|
|
164
|
+
templates = _templates_map(model)
|
|
165
|
+
if not templates:
|
|
166
|
+
return
|
|
167
|
+
resolved_entities: List[Dict[str, Any]] = []
|
|
168
|
+
for entity in _coerce_list(model.get("entities")):
|
|
169
|
+
if not isinstance(entity, dict):
|
|
170
|
+
continue
|
|
171
|
+
merged = deepcopy(entity)
|
|
172
|
+
for template_name in _template_names(entity):
|
|
173
|
+
template = templates.get(template_name)
|
|
174
|
+
if template:
|
|
175
|
+
merged = _merge_template(merged, template)
|
|
176
|
+
resolved_entities.append(merged)
|
|
177
|
+
model["entities"] = resolved_entities
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _apply_domain_defaults(model: Dict[str, Any]) -> None:
|
|
181
|
+
domains = _domains_map(model)
|
|
182
|
+
if not domains:
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
for entity in _coerce_list(model.get("entities")):
|
|
186
|
+
if not isinstance(entity, dict):
|
|
187
|
+
continue
|
|
188
|
+
fields = []
|
|
189
|
+
for field in _coerce_list(entity.get("fields")):
|
|
190
|
+
if not isinstance(field, dict):
|
|
191
|
+
continue
|
|
192
|
+
merged = deepcopy(field)
|
|
193
|
+
domain_name = str(merged.get("domain") or "").strip()
|
|
194
|
+
domain = domains.get(domain_name)
|
|
195
|
+
if domain:
|
|
196
|
+
if not merged.get("type") and domain.get("data_type"):
|
|
197
|
+
merged["type"] = domain.get("data_type")
|
|
198
|
+
for key in ("nullable", "default", "check", "sensitivity", "description", "examples"):
|
|
199
|
+
if key not in merged and key in domain:
|
|
200
|
+
merged[key] = deepcopy(domain[key])
|
|
201
|
+
merged["tags"] = _merge_unique_strings(domain.get("tags"), merged.get("tags"))
|
|
202
|
+
if merged.get("enum") is None and domain.get("enum"):
|
|
203
|
+
merged["enum"] = domain.get("enum")
|
|
204
|
+
fields.append(merged)
|
|
205
|
+
entity["fields"] = fields
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def normalize_model(model: Dict[str, Any]) -> Dict[str, Any]:
|
|
209
|
+
normalized = _clone(model)
|
|
210
|
+
meta = _coerce_dict(normalized.get("model"))
|
|
211
|
+
normalized["model"] = meta
|
|
212
|
+
meta["kind"] = infer_model_kind(normalized)
|
|
213
|
+
|
|
214
|
+
if _has_v3_sections(normalized) or meta.get("kind") != "physical" or meta.get("spec_version") == 3:
|
|
215
|
+
meta["spec_version"] = 3
|
|
216
|
+
|
|
217
|
+
for key in ("entities", "relationships", "indexes", "glossary", "metrics", "rules", "domains", "enums", "templates", "subject_areas"):
|
|
218
|
+
normalized[key] = _coerce_list(normalized.get(key))
|
|
219
|
+
normalized["governance"] = _coerce_dict(normalized.get("governance"))
|
|
220
|
+
normalized["display"] = _coerce_dict(normalized.get("display"))
|
|
221
|
+
normalized["naming_rules"] = _coerce_dict(normalized.get("naming_rules"))
|
|
222
|
+
|
|
223
|
+
_apply_templates(normalized)
|
|
224
|
+
_apply_domain_defaults(normalized)
|
|
225
|
+
|
|
226
|
+
return normalized
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _style_rule(naming_rules: Dict[str, Any], key: str) -> Tuple[str, str]:
|
|
230
|
+
raw = naming_rules.get(key)
|
|
231
|
+
if isinstance(raw, str):
|
|
232
|
+
return raw, ""
|
|
233
|
+
if isinstance(raw, dict):
|
|
234
|
+
return str(raw.get("style") or "").strip().lower(), str(raw.get("pattern") or "").strip()
|
|
235
|
+
return "", ""
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _matches_style(value: str, style: str) -> bool:
|
|
239
|
+
if not value or not style:
|
|
240
|
+
return True
|
|
241
|
+
if style == "pascal_case":
|
|
242
|
+
return bool(re.fullmatch(r"[A-Z][A-Za-z0-9]*", value))
|
|
243
|
+
if style in {"snake_case", "lower_snake_case"}:
|
|
244
|
+
return bool(re.fullmatch(r"[a-z][a-z0-9_]*", value))
|
|
245
|
+
if style == "upper_snake_case":
|
|
246
|
+
return bool(re.fullmatch(r"[A-Z][A-Z0-9_]*", value))
|
|
247
|
+
return True
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _apply_style(value: str, style: str) -> str:
|
|
251
|
+
if not style or not value:
|
|
252
|
+
return value
|
|
253
|
+
if style == "pascal_case":
|
|
254
|
+
return _to_pascal(value)
|
|
255
|
+
if style in {"snake_case", "lower_snake_case"}:
|
|
256
|
+
return _to_snake(value)
|
|
257
|
+
if style == "upper_snake_case":
|
|
258
|
+
return _to_upper_snake(value)
|
|
259
|
+
return value
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _rename_entity_refs(model: Dict[str, Any], entity_map: Dict[str, str], field_maps: Dict[str, Dict[str, str]]) -> None:
|
|
263
|
+
if not entity_map and not field_maps:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
def rewrite_ref(ref: str) -> str:
|
|
267
|
+
if "." not in ref:
|
|
268
|
+
return ref
|
|
269
|
+
entity_name, field_name = ref.split(".", 1)
|
|
270
|
+
next_entity = entity_map.get(entity_name, entity_name)
|
|
271
|
+
next_field = field_maps.get(entity_name, {}).get(field_name, field_name)
|
|
272
|
+
return f"{next_entity}.{next_field}"
|
|
273
|
+
|
|
274
|
+
for relationship in _coerce_list(model.get("relationships")):
|
|
275
|
+
relationship["from"] = rewrite_ref(str(relationship.get("from") or ""))
|
|
276
|
+
relationship["to"] = rewrite_ref(str(relationship.get("to") or ""))
|
|
277
|
+
|
|
278
|
+
governance = _coerce_dict(model.get("governance"))
|
|
279
|
+
for map_name in ("classification", "stewards"):
|
|
280
|
+
values = _coerce_dict(governance.get(map_name))
|
|
281
|
+
rewritten = {}
|
|
282
|
+
for key, value in values.items():
|
|
283
|
+
rewritten[rewrite_ref(str(key))] = value
|
|
284
|
+
governance[map_name] = rewritten
|
|
285
|
+
|
|
286
|
+
for glossary in _coerce_list(model.get("glossary")):
|
|
287
|
+
glossary["related_fields"] = [rewrite_ref(str(item)) for item in _coerce_list(glossary.get("related_fields"))]
|
|
288
|
+
|
|
289
|
+
for index in _coerce_list(model.get("indexes")):
|
|
290
|
+
entity_name = str(index.get("entity") or "")
|
|
291
|
+
index["entity"] = entity_map.get(entity_name, entity_name)
|
|
292
|
+
if entity_name in field_maps:
|
|
293
|
+
index["fields"] = [field_maps[entity_name].get(str(name), str(name)) for name in _coerce_list(index.get("fields"))]
|
|
294
|
+
|
|
295
|
+
for metric in _coerce_list(model.get("metrics")):
|
|
296
|
+
entity_name = str(metric.get("entity") or "")
|
|
297
|
+
metric["entity"] = entity_map.get(entity_name, entity_name)
|
|
298
|
+
if entity_name in field_maps:
|
|
299
|
+
mapping = field_maps[entity_name]
|
|
300
|
+
metric["grain"] = [mapping.get(str(name), str(name)) for name in _coerce_list(metric.get("grain"))]
|
|
301
|
+
metric["dimensions"] = [mapping.get(str(name), str(name)) for name in _coerce_list(metric.get("dimensions"))]
|
|
302
|
+
if metric.get("time_dimension"):
|
|
303
|
+
metric["time_dimension"] = mapping.get(str(metric.get("time_dimension")), str(metric.get("time_dimension")))
|
|
304
|
+
|
|
305
|
+
for entity in _coerce_list(model.get("entities")):
|
|
306
|
+
if not isinstance(entity, dict):
|
|
307
|
+
continue
|
|
308
|
+
if entity.get("subtype_of"):
|
|
309
|
+
entity["subtype_of"] = entity_map.get(str(entity.get("subtype_of")), str(entity.get("subtype_of")))
|
|
310
|
+
entity["subtypes"] = [entity_map.get(str(name), str(name)) for name in _coerce_list(entity.get("subtypes"))]
|
|
311
|
+
entity["dimension_refs"] = [entity_map.get(str(name), str(name)) for name in _coerce_list(entity.get("dimension_refs"))]
|
|
312
|
+
if entity.get("natural_key"):
|
|
313
|
+
mapping = field_maps.get(str(entity.get("name") or ""), {})
|
|
314
|
+
entity["natural_key"] = mapping.get(str(entity.get("natural_key")), str(entity.get("natural_key")))
|
|
315
|
+
if entity.get("surrogate_key"):
|
|
316
|
+
mapping = field_maps.get(str(entity.get("name") or ""), {})
|
|
317
|
+
entity["surrogate_key"] = mapping.get(str(entity.get("surrogate_key")), str(entity.get("surrogate_key")))
|
|
318
|
+
if entity.get("grain"):
|
|
319
|
+
mapping = field_maps.get(str(entity.get("name") or ""), {})
|
|
320
|
+
entity["grain"] = [mapping.get(str(name), str(name)) for name in _coerce_list(entity.get("grain"))]
|
|
321
|
+
candidate_keys = []
|
|
322
|
+
mapping = field_maps.get(str(entity.get("name") or ""), {})
|
|
323
|
+
for keyset in _coerce_list(entity.get("candidate_keys")):
|
|
324
|
+
candidate_keys.append([mapping.get(str(name), str(name)) for name in _coerce_list(keyset)])
|
|
325
|
+
if candidate_keys:
|
|
326
|
+
entity["candidate_keys"] = candidate_keys
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def standards_issues(model: Dict[str, Any]) -> List[Issue]:
|
|
330
|
+
normalized = normalize_model(model)
|
|
331
|
+
issues: List[Issue] = []
|
|
332
|
+
|
|
333
|
+
naming_rules = _coerce_dict(normalized.get("naming_rules"))
|
|
334
|
+
domains = _domains_map(normalized)
|
|
335
|
+
templates = _templates_map(normalized)
|
|
336
|
+
subject_area_names = {
|
|
337
|
+
str(item.get("name") or "").strip()
|
|
338
|
+
for item in _coerce_list(normalized.get("subject_areas"))
|
|
339
|
+
if isinstance(item, dict)
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
for entity in _coerce_list(normalized.get("entities")):
|
|
343
|
+
if not isinstance(entity, dict):
|
|
344
|
+
continue
|
|
345
|
+
entity_name = str(entity.get("name") or "")
|
|
346
|
+
entity_style, entity_pattern = _style_rule(naming_rules, "entity")
|
|
347
|
+
if entity_style and not _matches_style(entity_name, entity_style):
|
|
348
|
+
issues.append(Issue("warn", "ENTITY_NAMING_RULE", f"Entity '{entity_name}' does not match naming rule '{entity_style}'.", f"/entities/{entity_name}/name"))
|
|
349
|
+
if entity_pattern and not re.fullmatch(entity_pattern, entity_name):
|
|
350
|
+
issues.append(Issue("warn", "ENTITY_NAMING_PATTERN", f"Entity '{entity_name}' does not match configured pattern '{entity_pattern}'.", f"/entities/{entity_name}/name"))
|
|
351
|
+
|
|
352
|
+
area = str(entity.get("subject_area") or "").strip()
|
|
353
|
+
if area and subject_area_names and area not in subject_area_names:
|
|
354
|
+
issues.append(Issue("warn", "SUBJECT_AREA_NOT_DEFINED", f"Entity '{entity_name}' references subject_area '{area}' which is not declared in subject_areas.", f"/entities/{entity_name}/subject_area"))
|
|
355
|
+
|
|
356
|
+
for template_name in _template_names(entity):
|
|
357
|
+
if template_name not in templates:
|
|
358
|
+
issues.append(Issue("warn", "TEMPLATE_NOT_FOUND", f"Entity '{entity_name}' references missing template '{template_name}'.", f"/entities/{entity_name}/templates"))
|
|
359
|
+
|
|
360
|
+
for field in _coerce_list(entity.get("fields")):
|
|
361
|
+
if not isinstance(field, dict):
|
|
362
|
+
continue
|
|
363
|
+
field_name = str(field.get("name") or "")
|
|
364
|
+
field_style, field_pattern = _style_rule(naming_rules, "field")
|
|
365
|
+
if field_style and not _matches_style(field_name, field_style):
|
|
366
|
+
issues.append(Issue("warn", "FIELD_NAMING_RULE", f"Field '{entity_name}.{field_name}' does not match naming rule '{field_style}'.", f"/entities/{entity_name}/fields/{field_name}/name"))
|
|
367
|
+
if field_pattern and not re.fullmatch(field_pattern, field_name):
|
|
368
|
+
issues.append(Issue("warn", "FIELD_NAMING_PATTERN", f"Field '{entity_name}.{field_name}' does not match configured pattern '{field_pattern}'.", f"/entities/{entity_name}/fields/{field_name}/name"))
|
|
369
|
+
domain_name = str(field.get("domain") or "").strip()
|
|
370
|
+
if domain_name and domain_name not in domains:
|
|
371
|
+
issues.append(Issue("warn", "DOMAIN_NOT_FOUND", f"Field '{entity_name}.{field_name}' references missing domain '{domain_name}'.", f"/entities/{entity_name}/fields/{field_name}/domain"))
|
|
372
|
+
|
|
373
|
+
physical_style, physical_pattern = _style_rule(naming_rules, "physical_name")
|
|
374
|
+
physical_name = str(entity.get("physical_name") or "")
|
|
375
|
+
if physical_name:
|
|
376
|
+
if physical_style and not _matches_style(physical_name, physical_style):
|
|
377
|
+
issues.append(Issue("warn", "PHYSICAL_NAME_RULE", f"physical_name '{physical_name}' does not match naming rule '{physical_style}'.", f"/entities/{entity_name}/physical_name"))
|
|
378
|
+
if physical_pattern and not re.fullmatch(physical_pattern, physical_name):
|
|
379
|
+
issues.append(Issue("warn", "PHYSICAL_NAME_PATTERN", f"physical_name '{physical_name}' does not match configured pattern '{physical_pattern}'.", f"/entities/{entity_name}/physical_name"))
|
|
380
|
+
|
|
381
|
+
for relationship in _coerce_list(normalized.get("relationships")):
|
|
382
|
+
name = str(relationship.get("name") or "")
|
|
383
|
+
style, pattern = _style_rule(naming_rules, "relationship")
|
|
384
|
+
if style and name and not _matches_style(name, style):
|
|
385
|
+
issues.append(Issue("warn", "RELATIONSHIP_NAMING_RULE", f"Relationship '{name}' does not match naming rule '{style}'.", "/relationships"))
|
|
386
|
+
if pattern and name and not re.fullmatch(pattern, name):
|
|
387
|
+
issues.append(Issue("warn", "RELATIONSHIP_NAMING_PATTERN", f"Relationship '{name}' does not match configured pattern '{pattern}'.", "/relationships"))
|
|
388
|
+
|
|
389
|
+
for index in _coerce_list(normalized.get("indexes")):
|
|
390
|
+
name = str(index.get("name") or "")
|
|
391
|
+
style, pattern = _style_rule(naming_rules, "index")
|
|
392
|
+
if style and name and not _matches_style(name, style):
|
|
393
|
+
issues.append(Issue("warn", "INDEX_NAMING_RULE", f"Index '{name}' does not match naming rule '{style}'.", "/indexes"))
|
|
394
|
+
if pattern and name and not re.fullmatch(pattern, name):
|
|
395
|
+
issues.append(Issue("warn", "INDEX_NAMING_PATTERN", f"Index '{name}' does not match configured pattern '{pattern}'.", "/indexes"))
|
|
396
|
+
|
|
397
|
+
return issues
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def apply_standards_fixes(model: Dict[str, Any]) -> Tuple[Dict[str, Any], List[str]]:
|
|
401
|
+
fixed = normalize_model(model)
|
|
402
|
+
changes: List[str] = []
|
|
403
|
+
naming_rules = _coerce_dict(fixed.get("naming_rules"))
|
|
404
|
+
|
|
405
|
+
entity_style, _ = _style_rule(naming_rules, "entity")
|
|
406
|
+
field_style, _ = _style_rule(naming_rules, "field")
|
|
407
|
+
relationship_style, _ = _style_rule(naming_rules, "relationship")
|
|
408
|
+
index_style, _ = _style_rule(naming_rules, "index")
|
|
409
|
+
physical_style, _ = _style_rule(naming_rules, "physical_name")
|
|
410
|
+
|
|
411
|
+
entity_map: Dict[str, str] = {}
|
|
412
|
+
field_maps: Dict[str, Dict[str, str]] = {}
|
|
413
|
+
for entity in _coerce_list(fixed.get("entities")):
|
|
414
|
+
if not isinstance(entity, dict):
|
|
415
|
+
continue
|
|
416
|
+
old_entity_name = str(entity.get("name") or "")
|
|
417
|
+
new_entity_name = _apply_style(old_entity_name, entity_style)
|
|
418
|
+
if new_entity_name and new_entity_name != old_entity_name:
|
|
419
|
+
entity_map[old_entity_name] = new_entity_name
|
|
420
|
+
entity["name"] = new_entity_name
|
|
421
|
+
changes.append(f"Renamed entity {old_entity_name} -> {new_entity_name}")
|
|
422
|
+
|
|
423
|
+
local_field_map: Dict[str, str] = {}
|
|
424
|
+
for field in _coerce_list(entity.get("fields")):
|
|
425
|
+
if not isinstance(field, dict):
|
|
426
|
+
continue
|
|
427
|
+
old_field_name = str(field.get("name") or "")
|
|
428
|
+
new_field_name = _apply_style(old_field_name, field_style)
|
|
429
|
+
if new_field_name and new_field_name != old_field_name:
|
|
430
|
+
local_field_map[old_field_name] = new_field_name
|
|
431
|
+
field["name"] = new_field_name
|
|
432
|
+
changes.append(f"Renamed field {old_entity_name}.{old_field_name} -> {new_field_name}")
|
|
433
|
+
if local_field_map:
|
|
434
|
+
field_maps[old_entity_name] = local_field_map
|
|
435
|
+
|
|
436
|
+
if fixed.get("model", {}).get("kind") == "physical":
|
|
437
|
+
if not entity.get("physical_name"):
|
|
438
|
+
style = physical_style or "upper_snake_case"
|
|
439
|
+
entity["physical_name"] = _apply_style(str(entity.get("name") or ""), style)
|
|
440
|
+
changes.append(f"Generated physical_name for {entity.get('name')}")
|
|
441
|
+
|
|
442
|
+
_rename_entity_refs(fixed, entity_map, field_maps)
|
|
443
|
+
|
|
444
|
+
for relationship in _coerce_list(fixed.get("relationships")):
|
|
445
|
+
name = str(relationship.get("name") or "")
|
|
446
|
+
next_name = _apply_style(name, relationship_style)
|
|
447
|
+
if next_name and next_name != name:
|
|
448
|
+
relationship["name"] = next_name
|
|
449
|
+
changes.append(f"Renamed relationship {name} -> {next_name}")
|
|
450
|
+
|
|
451
|
+
for index in _coerce_list(fixed.get("indexes")):
|
|
452
|
+
name = str(index.get("name") or "")
|
|
453
|
+
next_name = _apply_style(name, index_style)
|
|
454
|
+
if next_name and next_name != name:
|
|
455
|
+
index["name"] = next_name
|
|
456
|
+
changes.append(f"Renamed index {name} -> {next_name}")
|
|
457
|
+
|
|
458
|
+
if not fixed.get("subject_areas"):
|
|
459
|
+
derived_areas = sorted(
|
|
460
|
+
{
|
|
461
|
+
str(entity.get("subject_area") or "").strip()
|
|
462
|
+
for entity in _coerce_list(fixed.get("entities"))
|
|
463
|
+
if str(entity.get("subject_area") or "").strip()
|
|
464
|
+
}
|
|
465
|
+
)
|
|
466
|
+
if derived_areas:
|
|
467
|
+
fixed["subject_areas"] = [{"name": area} for area in derived_areas]
|
|
468
|
+
changes.append("Created subject_areas library from entity subject_area usage")
|
|
469
|
+
|
|
470
|
+
return fixed, changes
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _copy_entity(entity: Dict[str, Any]) -> Dict[str, Any]:
|
|
474
|
+
copy = deepcopy(entity)
|
|
475
|
+
copy["fields"] = [deepcopy(field) for field in _coerce_list(copy.get("fields")) if isinstance(field, dict)]
|
|
476
|
+
return copy
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _logical_fields(entity: Dict[str, Any], naming_rules: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
480
|
+
field_style, _ = _style_rule(naming_rules, "field")
|
|
481
|
+
result = []
|
|
482
|
+
for field in _coerce_list(entity.get("fields")):
|
|
483
|
+
if not isinstance(field, dict):
|
|
484
|
+
continue
|
|
485
|
+
next_field = deepcopy(field)
|
|
486
|
+
next_field["mapped_from"] = f"{entity.get('name')}.{field.get('name')}"
|
|
487
|
+
next_field["name"] = _apply_style(str(field.get("name") or ""), field_style or "snake_case")
|
|
488
|
+
next_field.pop("physical_name", None)
|
|
489
|
+
result.append(next_field)
|
|
490
|
+
return result
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _field_type_for_physical(field: Dict[str, Any], domains: Dict[str, Dict[str, Any]], dialect: str) -> str:
|
|
494
|
+
domain = domains.get(str(field.get("domain") or "").strip(), {})
|
|
495
|
+
physical_types = _coerce_dict(domain.get("physical_types"))
|
|
496
|
+
if physical_types.get(dialect):
|
|
497
|
+
return str(physical_types[dialect])
|
|
498
|
+
if domain.get("data_type"):
|
|
499
|
+
return str(domain.get("data_type"))
|
|
500
|
+
return str(field.get("type") or "string")
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _keyset_to_primary_keys(entity: Dict[str, Any]) -> None:
|
|
504
|
+
fields_by_name = {str(field.get("name") or ""): field for field in _coerce_list(entity.get("fields")) if isinstance(field, dict)}
|
|
505
|
+
if any(field.get("primary_key") for field in fields_by_name.values()):
|
|
506
|
+
return
|
|
507
|
+
candidate_keys = _coerce_list(entity.get("candidate_keys"))
|
|
508
|
+
if candidate_keys:
|
|
509
|
+
first = _coerce_list(candidate_keys[0])
|
|
510
|
+
for name in first:
|
|
511
|
+
field = fields_by_name.get(str(name))
|
|
512
|
+
if field:
|
|
513
|
+
field["primary_key"] = True
|
|
514
|
+
field["nullable"] = False
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _build_relationship_field_maps(model: Dict[str, Any]) -> Dict[str, Dict[str, str]]:
|
|
518
|
+
return {
|
|
519
|
+
str(entity.get("name") or ""): {
|
|
520
|
+
str(field.get("mapped_from") or field.get("name") or ""): str(field.get("name") or "")
|
|
521
|
+
for field in _coerce_list(entity.get("fields"))
|
|
522
|
+
if isinstance(field, dict)
|
|
523
|
+
}
|
|
524
|
+
for entity in _coerce_list(model.get("entities"))
|
|
525
|
+
if isinstance(entity, dict)
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _remap_relationships(source_model: Dict[str, Any], target_model: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
530
|
+
entity_map = {
|
|
531
|
+
str(entity.get("mapped_from") or entity.get("derived_from") or entity.get("name") or ""): str(entity.get("name") or "")
|
|
532
|
+
for entity in _coerce_list(target_model.get("entities"))
|
|
533
|
+
if isinstance(entity, dict)
|
|
534
|
+
}
|
|
535
|
+
field_map = _build_relationship_field_maps(target_model)
|
|
536
|
+
|
|
537
|
+
relationships = []
|
|
538
|
+
for relationship in _coerce_list(source_model.get("relationships")):
|
|
539
|
+
if not isinstance(relationship, dict):
|
|
540
|
+
continue
|
|
541
|
+
new_rel = deepcopy(relationship)
|
|
542
|
+
for key in ("from", "to"):
|
|
543
|
+
ref = str(relationship.get(key) or "")
|
|
544
|
+
if "." not in ref:
|
|
545
|
+
continue
|
|
546
|
+
source_entity, source_field = ref.split(".", 1)
|
|
547
|
+
next_entity = entity_map.get(source_entity, source_entity)
|
|
548
|
+
next_field = field_map.get(next_entity, {}).get(source_field, source_field)
|
|
549
|
+
new_rel[key] = f"{next_entity}.{next_field}"
|
|
550
|
+
relationships.append(new_rel)
|
|
551
|
+
return relationships
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def transform_model(model: Dict[str, Any], target_kind: str, dialect: str = "postgres") -> Dict[str, Any]:
|
|
555
|
+
normalized = normalize_model(model)
|
|
556
|
+
source_kind = infer_model_kind(normalized)
|
|
557
|
+
target = str(target_kind or "").strip().lower()
|
|
558
|
+
if target not in MODEL_KINDS:
|
|
559
|
+
raise ValueError(f"Unsupported target kind '{target_kind}'. Use one of: conceptual, logical, physical.")
|
|
560
|
+
if source_kind == target:
|
|
561
|
+
return normalized
|
|
562
|
+
|
|
563
|
+
if source_kind == "conceptual" and target == "physical":
|
|
564
|
+
logical = transform_model(normalized, "logical", dialect=dialect)
|
|
565
|
+
return transform_model(logical, "physical", dialect=dialect)
|
|
566
|
+
|
|
567
|
+
naming_rules = _coerce_dict(normalized.get("naming_rules"))
|
|
568
|
+
domains = _domains_map(normalized)
|
|
569
|
+
transformed = deepcopy(normalized)
|
|
570
|
+
transformed["model"]["kind"] = target
|
|
571
|
+
transformed["model"]["spec_version"] = 3
|
|
572
|
+
|
|
573
|
+
entities: List[Dict[str, Any]] = []
|
|
574
|
+
for entity in _coerce_list(normalized.get("entities")):
|
|
575
|
+
if not isinstance(entity, dict):
|
|
576
|
+
continue
|
|
577
|
+
next_entity = _copy_entity(entity)
|
|
578
|
+
source_entity_name = str(entity.get("name") or "")
|
|
579
|
+
|
|
580
|
+
if source_kind == "conceptual" and target == "logical":
|
|
581
|
+
next_entity["type"] = "logical_entity"
|
|
582
|
+
next_entity["derived_from"] = source_entity_name
|
|
583
|
+
next_entity["mapped_from"] = source_entity_name
|
|
584
|
+
next_entity["name"] = _apply_style(source_entity_name, _style_rule(naming_rules, "entity")[0] or "pascal_case")
|
|
585
|
+
next_entity["fields"] = _logical_fields(entity, naming_rules)
|
|
586
|
+
next_entity.pop("physical_name", None)
|
|
587
|
+
next_entity.pop("schema", None)
|
|
588
|
+
next_entity.pop("database", None)
|
|
589
|
+
next_entity.pop("partition_by", None)
|
|
590
|
+
next_entity.pop("cluster_by", None)
|
|
591
|
+
next_entity.pop("distribution", None)
|
|
592
|
+
next_entity.pop("storage", None)
|
|
593
|
+
next_entity.pop("identity", None)
|
|
594
|
+
next_entity.pop("sequence", None)
|
|
595
|
+
next_entity.setdefault("candidate_keys", [])
|
|
596
|
+
if any(field.get("primary_key") for field in next_entity.get("fields", [])):
|
|
597
|
+
next_entity["candidate_keys"] = [[field["name"] for field in next_entity["fields"] if field.get("primary_key")]]
|
|
598
|
+
for field in next_entity["fields"]:
|
|
599
|
+
field.pop("primary_key", None)
|
|
600
|
+
|
|
601
|
+
elif source_kind in {"logical", "conceptual"} and target == "physical":
|
|
602
|
+
source_entity_type = str(entity.get("type") or "")
|
|
603
|
+
next_entity["type"] = "table" if source_entity_type in {"concept", "logical_entity"} else (source_entity_type or "table")
|
|
604
|
+
next_entity["derived_from"] = source_entity_name
|
|
605
|
+
next_entity["mapped_from"] = source_entity_name
|
|
606
|
+
next_entity["physical_name"] = str(entity.get("physical_name") or _apply_style(source_entity_name, _style_rule(naming_rules, "physical_name")[0] or "upper_snake_case"))
|
|
607
|
+
resolved_fields: List[Dict[str, Any]] = []
|
|
608
|
+
for field in _coerce_list(entity.get("fields")):
|
|
609
|
+
if not isinstance(field, dict):
|
|
610
|
+
continue
|
|
611
|
+
next_field = deepcopy(field)
|
|
612
|
+
next_field["mapped_from"] = str(field.get("mapped_from") or field.get("name") or "")
|
|
613
|
+
next_field["type"] = _field_type_for_physical(next_field, domains, dialect)
|
|
614
|
+
resolved_fields.append(next_field)
|
|
615
|
+
next_entity["fields"] = resolved_fields
|
|
616
|
+
_keyset_to_primary_keys(next_entity)
|
|
617
|
+
|
|
618
|
+
else:
|
|
619
|
+
raise ValueError(f"Unsupported transform path: {source_kind} -> {target}")
|
|
620
|
+
|
|
621
|
+
entities.append(next_entity)
|
|
622
|
+
|
|
623
|
+
transformed["entities"] = entities
|
|
624
|
+
transformed["relationships"] = _remap_relationships(normalized, transformed)
|
|
625
|
+
return normalize_model(transformed)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def merge_models_preserving_docs(current: Dict[str, Any], candidate: Dict[str, Any]) -> Dict[str, Any]:
|
|
629
|
+
current_model = normalize_model(current)
|
|
630
|
+
candidate_model = normalize_model(candidate)
|
|
631
|
+
|
|
632
|
+
current_entities = {
|
|
633
|
+
str(entity.get("name") or ""): entity
|
|
634
|
+
for entity in _coerce_list(current_model.get("entities"))
|
|
635
|
+
if isinstance(entity, dict)
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
merged = deepcopy(candidate_model)
|
|
639
|
+
merged_entities: List[Dict[str, Any]] = []
|
|
640
|
+
for entity in _coerce_list(candidate_model.get("entities")):
|
|
641
|
+
if not isinstance(entity, dict):
|
|
642
|
+
continue
|
|
643
|
+
current_entity = current_entities.get(str(entity.get("name") or ""))
|
|
644
|
+
if not current_entity:
|
|
645
|
+
merged_entities.append(entity)
|
|
646
|
+
continue
|
|
647
|
+
|
|
648
|
+
next_entity = deepcopy(entity)
|
|
649
|
+
for key in ("description", "owner", "subject_area", "tags", "grain", "sla"):
|
|
650
|
+
if current_entity.get(key):
|
|
651
|
+
next_entity[key] = deepcopy(current_entity[key])
|
|
652
|
+
|
|
653
|
+
current_fields = {
|
|
654
|
+
str(field.get("name") or ""): field
|
|
655
|
+
for field in _coerce_list(current_entity.get("fields"))
|
|
656
|
+
if isinstance(field, dict)
|
|
657
|
+
}
|
|
658
|
+
next_fields = []
|
|
659
|
+
for field in _coerce_list(entity.get("fields")):
|
|
660
|
+
if not isinstance(field, dict):
|
|
661
|
+
continue
|
|
662
|
+
current_field = current_fields.get(str(field.get("name") or ""))
|
|
663
|
+
if not current_field:
|
|
664
|
+
next_fields.append(field)
|
|
665
|
+
continue
|
|
666
|
+
merged_field = deepcopy(field)
|
|
667
|
+
for key in ("description", "tags", "sensitivity", "examples", "deprecated", "deprecated_message", "domain"):
|
|
668
|
+
if current_field.get(key):
|
|
669
|
+
merged_field[key] = deepcopy(current_field[key])
|
|
670
|
+
next_fields.append(merged_field)
|
|
671
|
+
next_entity["fields"] = next_fields
|
|
672
|
+
merged_entities.append(next_entity)
|
|
673
|
+
|
|
674
|
+
merged["entities"] = merged_entities
|
|
675
|
+
if current_model.get("glossary"):
|
|
676
|
+
merged["glossary"] = deepcopy(current_model["glossary"])
|
|
677
|
+
if current_model.get("subject_areas"):
|
|
678
|
+
merged["subject_areas"] = deepcopy(current_model["subject_areas"])
|
|
679
|
+
return normalize_model(merged)
|