datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,358 @@
1
+ import glob
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Optional, Set, Tuple
4
+
5
+ from datalex_core.canonical import compile_model
6
+ from datalex_core.loader import load_yaml_model
7
+ from datalex_core.modeling import normalize_model
8
+
9
+
10
+ def _index_entities(model: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
11
+ return {entity.get("name", ""): entity for entity in model.get("entities", [])}
12
+
13
+
14
+ def _index_fields(entity: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
15
+ return {field.get("name", ""): field for field in entity.get("fields", [])}
16
+
17
+
18
+ def _relationship_key(relationship: Dict[str, Any]) -> Tuple[str, str, str, str]:
19
+ return (
20
+ relationship.get("name", ""),
21
+ relationship.get("from", ""),
22
+ relationship.get("to", ""),
23
+ relationship.get("cardinality", ""),
24
+ )
25
+
26
+
27
+ def _index_key(idx: Dict[str, Any]) -> str:
28
+ fields = ",".join(idx.get("fields", []))
29
+ return f"{idx.get('name', '')}|{idx.get('entity', '')}|{fields}|{idx.get('unique', False)}"
30
+
31
+
32
+ def _normalized_meta_value(value: Any) -> Any:
33
+ if isinstance(value, list):
34
+ normalized_items = [_normalized_meta_value(item) for item in value]
35
+ try:
36
+ return sorted(normalized_items)
37
+ except TypeError:
38
+ return normalized_items
39
+ if isinstance(value, dict):
40
+ return {key: _normalized_meta_value(value[key]) for key in sorted(value.keys())}
41
+ return value
42
+
43
+
44
+ def _diff_indexes(
45
+ old_canonical: Dict[str, Any], new_canonical: Dict[str, Any]
46
+ ) -> Tuple[List[str], List[str], List[str]]:
47
+ old_indexes = {_index_key(idx) for idx in old_canonical.get("indexes", [])}
48
+ new_indexes = {_index_key(idx) for idx in new_canonical.get("indexes", [])}
49
+
50
+ old_names = {idx.get("name", "") for idx in old_canonical.get("indexes", [])}
51
+ new_names = {idx.get("name", "") for idx in new_canonical.get("indexes", [])}
52
+
53
+ added = sorted(new_names - old_names)
54
+ removed = sorted(old_names - new_names)
55
+ breaking: List[str] = []
56
+ for name in removed:
57
+ if name:
58
+ breaking.append(f"Index removed: {name}")
59
+
60
+ return added, removed, breaking
61
+
62
+
63
+ def _diff_metrics(
64
+ old_canonical: Dict[str, Any], new_canonical: Dict[str, Any]
65
+ ) -> Tuple[List[str], List[str], List[Dict[str, Any]], List[str]]:
66
+ old_metrics = {m.get("name", ""): m for m in old_canonical.get("metrics", []) if m.get("name")}
67
+ new_metrics = {m.get("name", ""): m for m in new_canonical.get("metrics", []) if m.get("name")}
68
+
69
+ old_names = set(old_metrics.keys())
70
+ new_names = set(new_metrics.keys())
71
+
72
+ added = sorted(new_names - old_names)
73
+ removed = sorted(old_names - new_names)
74
+ changed: List[Dict[str, Any]] = []
75
+ breaking: List[str] = []
76
+
77
+ for name in removed:
78
+ breaking.append(f"Metric removed: {name}")
79
+
80
+ for name in sorted(old_names & new_names):
81
+ old_metric = old_metrics[name]
82
+ new_metric = new_metrics[name]
83
+ if old_metric == new_metric:
84
+ continue
85
+
86
+ changed_fields: List[str] = []
87
+ for field in (
88
+ "entity",
89
+ "expression",
90
+ "aggregation",
91
+ "grain",
92
+ "dimensions",
93
+ "time_dimension",
94
+ "owner",
95
+ "deprecated",
96
+ ):
97
+ if old_metric.get(field) != new_metric.get(field):
98
+ changed_fields.append(field)
99
+
100
+ changed.append({"metric": name, "changed_fields": sorted(changed_fields)})
101
+
102
+ if any(f in {"entity", "expression", "aggregation", "grain", "time_dimension"} for f in changed_fields):
103
+ breaking.append(f"Metric contract changed: {name}")
104
+
105
+ return added, removed, changed, breaking
106
+
107
+
108
+ def semantic_diff(old_model: Dict[str, Any], new_model: Dict[str, Any]) -> Dict[str, Any]:
109
+ old_canonical = compile_model(normalize_model(old_model))
110
+ new_canonical = compile_model(normalize_model(new_model))
111
+
112
+ old_meta = old_canonical.get("model", {})
113
+ new_meta = new_canonical.get("model", {})
114
+
115
+ old_entities = _index_entities(old_canonical)
116
+ new_entities = _index_entities(new_canonical)
117
+
118
+ old_entity_names = set(old_entities.keys())
119
+ new_entity_names = set(new_entities.keys())
120
+
121
+ added_entities = sorted(name for name in new_entity_names - old_entity_names if name)
122
+ removed_entities = sorted(name for name in old_entity_names - new_entity_names if name)
123
+
124
+ changed_entities: List[Dict[str, Any]] = []
125
+ breaking_changes: List[str] = []
126
+
127
+ if old_meta.get("kind") != new_meta.get("kind"):
128
+ breaking_changes.append(
129
+ f"Model kind changed: {old_meta.get('kind', 'physical')} -> {new_meta.get('kind', 'physical')}"
130
+ )
131
+ if old_meta.get("layer") != new_meta.get("layer"):
132
+ breaking_changes.append(
133
+ f"Model layer changed: {old_meta.get('layer', '(none)')} -> {new_meta.get('layer', '(none)')}"
134
+ )
135
+
136
+ for name in sorted(old_entity_names & new_entity_names):
137
+ old_entity = old_entities[name]
138
+ new_entity = new_entities[name]
139
+
140
+ old_fields = _index_fields(old_entity)
141
+ new_fields = _index_fields(new_entity)
142
+
143
+ old_field_names = set(old_fields.keys())
144
+ new_field_names = set(new_fields.keys())
145
+
146
+ added_fields = sorted(field for field in new_field_names - old_field_names if field)
147
+ removed_fields = sorted(field for field in old_field_names - new_field_names if field)
148
+
149
+ type_changes = []
150
+ nullability_changes = []
151
+ metadata_changes = []
152
+
153
+ for field in sorted(old_field_names & new_field_names):
154
+ old_field = old_fields[field]
155
+ new_field = new_fields[field]
156
+ old_type = old_field.get("type")
157
+ new_type = new_field.get("type")
158
+ if old_type != new_type:
159
+ type_changes.append(
160
+ {"field": field, "from_type": old_type, "to_type": new_type}
161
+ )
162
+ breaking_changes.append(f"Field type changed: {name}.{field}")
163
+
164
+ old_nullable = old_field.get("nullable", True)
165
+ new_nullable = new_field.get("nullable", True)
166
+ if old_nullable != new_nullable:
167
+ nullability_changes.append(
168
+ {
169
+ "field": field,
170
+ "from_nullable": old_nullable,
171
+ "to_nullable": new_nullable,
172
+ }
173
+ )
174
+ if old_nullable and not new_nullable:
175
+ breaking_changes.append(f"Field became non-nullable: {name}.{field}")
176
+
177
+ for meta_field in (
178
+ "type",
179
+ "grain",
180
+ "candidate_keys",
181
+ "business_keys",
182
+ "subtype_of",
183
+ "subtypes",
184
+ "natural_key",
185
+ "surrogate_key",
186
+ "scd_type",
187
+ "conformed",
188
+ "dimension_refs",
189
+ "hash_key",
190
+ "link_refs",
191
+ "parent_entity",
192
+ "hash_diff_fields",
193
+ "load_timestamp_field",
194
+ "record_source_field",
195
+ "partition_by",
196
+ "cluster_by",
197
+ "physical_name",
198
+ ):
199
+ old_value = _normalized_meta_value(old_entity.get(meta_field))
200
+ new_value = _normalized_meta_value(new_entity.get(meta_field))
201
+ if old_value != new_value:
202
+ metadata_changes.append(
203
+ {
204
+ "field": meta_field,
205
+ "from": old_entity.get(meta_field),
206
+ "to": new_entity.get(meta_field),
207
+ }
208
+ )
209
+ if meta_field == "type":
210
+ breaking_changes.append(
211
+ f"Entity type changed: {name} ({old_entity.get(meta_field)} -> {new_entity.get(meta_field)})"
212
+ )
213
+ else:
214
+ breaking_changes.append(f"Entity modeling metadata changed: {name}.{meta_field}")
215
+
216
+ if removed_fields:
217
+ for field in removed_fields:
218
+ breaking_changes.append(f"Field removed: {name}.{field}")
219
+
220
+ if added_fields or removed_fields or type_changes or nullability_changes or metadata_changes:
221
+ changed_entities.append(
222
+ {
223
+ "entity": name,
224
+ "added_fields": added_fields,
225
+ "removed_fields": removed_fields,
226
+ "type_changes": type_changes,
227
+ "nullability_changes": nullability_changes,
228
+ "metadata_changes": metadata_changes,
229
+ }
230
+ )
231
+
232
+ old_relationships = {_relationship_key(item) for item in old_canonical.get("relationships", [])}
233
+ new_relationships = {_relationship_key(item) for item in new_canonical.get("relationships", [])}
234
+
235
+ added_relationships = [
236
+ {"name": key[0], "from": key[1], "to": key[2], "cardinality": key[3]}
237
+ for key in sorted(new_relationships - old_relationships)
238
+ ]
239
+ removed_relationships = [
240
+ {"name": key[0], "from": key[1], "to": key[2], "cardinality": key[3]}
241
+ for key in sorted(old_relationships - new_relationships)
242
+ ]
243
+
244
+ for entity in removed_entities:
245
+ breaking_changes.append(f"Entity removed: {entity}")
246
+
247
+ added_indexes, removed_indexes, index_breaking = _diff_indexes(old_canonical, new_canonical)
248
+ breaking_changes.extend(index_breaking)
249
+ added_metrics, removed_metrics, changed_metrics, metric_breaking = _diff_metrics(old_canonical, new_canonical)
250
+ breaking_changes.extend(metric_breaking)
251
+
252
+ return {
253
+ "summary": {
254
+ "added_entities": len(added_entities),
255
+ "removed_entities": len(removed_entities),
256
+ "changed_entities": len(changed_entities),
257
+ "added_relationships": len(added_relationships),
258
+ "removed_relationships": len(removed_relationships),
259
+ "added_indexes": len(added_indexes),
260
+ "removed_indexes": len(removed_indexes),
261
+ "added_metrics": len(added_metrics),
262
+ "removed_metrics": len(removed_metrics),
263
+ "changed_metrics": len(changed_metrics),
264
+ "breaking_change_count": len(sorted(set(breaking_changes))),
265
+ },
266
+ "added_entities": added_entities,
267
+ "removed_entities": removed_entities,
268
+ "changed_entities": changed_entities,
269
+ "added_relationships": added_relationships,
270
+ "removed_relationships": removed_relationships,
271
+ "added_indexes": added_indexes,
272
+ "removed_indexes": removed_indexes,
273
+ "added_metrics": added_metrics,
274
+ "removed_metrics": removed_metrics,
275
+ "changed_metrics": changed_metrics,
276
+ "breaking_changes": sorted(set(breaking_changes)),
277
+ "has_breaking_changes": bool(breaking_changes),
278
+ }
279
+
280
+
281
+ def _find_model_files(directory: str) -> Dict[str, str]:
282
+ """Find all model YAML files in a directory, keyed by model name."""
283
+ dir_path = Path(directory).resolve()
284
+ models: Dict[str, str] = {}
285
+ for pattern in ["**/*.model.yaml", "**/*.model.yml"]:
286
+ for path in sorted(dir_path.glob(pattern)):
287
+ try:
288
+ data = load_yaml_model(str(path))
289
+ name = data.get("model", {}).get("name", "")
290
+ if name:
291
+ models[name] = str(path)
292
+ except Exception:
293
+ continue
294
+ return models
295
+
296
+
297
+ def project_diff(
298
+ old_dir: str,
299
+ new_dir: str,
300
+ ) -> Dict[str, Any]:
301
+ """Compare two directories of model files and produce a project-level diff.
302
+
303
+ Returns a summary of added/removed/changed models and per-model diffs.
304
+ """
305
+ old_models = _find_model_files(old_dir)
306
+ new_models = _find_model_files(new_dir)
307
+
308
+ old_names = set(old_models.keys())
309
+ new_names = set(new_models.keys())
310
+
311
+ added_models = sorted(new_names - old_names)
312
+ removed_models = sorted(old_names - new_names)
313
+ common_models = sorted(old_names & new_names)
314
+
315
+ model_diffs: Dict[str, Dict[str, Any]] = {}
316
+ all_breaking: List[str] = []
317
+
318
+ for name in common_models:
319
+ old_model = load_yaml_model(old_models[name])
320
+ new_model = load_yaml_model(new_models[name])
321
+ diff = semantic_diff(old_model, new_model)
322
+
323
+ has_changes = (
324
+ diff["summary"]["added_entities"] > 0
325
+ or diff["summary"]["removed_entities"] > 0
326
+ or diff["summary"]["changed_entities"] > 0
327
+ or diff["summary"]["added_relationships"] > 0
328
+ or diff["summary"]["removed_relationships"] > 0
329
+ or diff["summary"]["added_indexes"] > 0
330
+ or diff["summary"]["removed_indexes"] > 0
331
+ or diff["summary"]["added_metrics"] > 0
332
+ or diff["summary"]["removed_metrics"] > 0
333
+ or diff["summary"]["changed_metrics"] > 0
334
+ )
335
+
336
+ if has_changes:
337
+ model_diffs[name] = diff
338
+ for bc in diff["breaking_changes"]:
339
+ all_breaking.append(f"[{name}] {bc}")
340
+
341
+ for name in removed_models:
342
+ all_breaking.append(f"Model removed: {name}")
343
+
344
+ return {
345
+ "summary": {
346
+ "added_models": len(added_models),
347
+ "removed_models": len(removed_models),
348
+ "changed_models": len(model_diffs),
349
+ "unchanged_models": len(common_models) - len(model_diffs),
350
+ "breaking_change_count": len(all_breaking),
351
+ },
352
+ "added_models": added_models,
353
+ "removed_models": removed_models,
354
+ "changed_models": list(model_diffs.keys()),
355
+ "model_diffs": model_diffs,
356
+ "breaking_changes": sorted(all_breaking),
357
+ "has_breaking_changes": bool(all_breaking),
358
+ }