datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,456 @@
1
+ """Multi-model resolver: resolves cross-file imports into a unified model graph.
2
+
3
+ Resolution strategy:
4
+ 1. Load the root model file.
5
+ 2. For each entry in model.imports, locate the target model file:
6
+ a. If import.path is given, resolve relative to the root model's directory.
7
+ b. Otherwise, scan search_dirs for <model_name>.model.yaml or <model_name>.model.yml.
8
+ 3. Load each imported model (recursively resolving its own imports).
9
+ 4. Build a unified graph containing all entities, relationships, indexes, glossary,
10
+ and rules — with imported entities prefixed by their alias when referenced.
11
+ 5. Validate: no circular imports, no duplicate entity names across models.
12
+ """
13
+
14
+ import copy
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Set, Tuple
17
+
18
+ from datalex_core.issues import Issue
19
+ from datalex_core.loader import load_yaml_model
20
+
21
+
22
+ def _find_model_file(
23
+ model_name: str,
24
+ search_dirs: List[Path],
25
+ ) -> Optional[Path]:
26
+ """Search directories for a model file matching the given model name."""
27
+ candidates = [
28
+ f"{model_name}.model.yaml",
29
+ f"{model_name}.model.yml",
30
+ ]
31
+ for search_dir in search_dirs:
32
+ for candidate in candidates:
33
+ path = search_dir / candidate
34
+ if path.exists():
35
+ return path
36
+ # Also search subdirectories one level deep
37
+ if search_dir.is_dir():
38
+ for sub in sorted(search_dir.iterdir()):
39
+ if sub.is_dir() and not sub.name.startswith("."):
40
+ for candidate in candidates:
41
+ path = sub / candidate
42
+ if path.exists():
43
+ return path
44
+ return None
45
+
46
+
47
+ def _resolve_import_path(
48
+ imp: Dict[str, Any],
49
+ root_dir: Path,
50
+ search_dirs: List[Path],
51
+ ) -> Optional[Path]:
52
+ """Resolve the file path for an import entry."""
53
+ if imp.get("path"):
54
+ candidate = root_dir / imp["path"]
55
+ if candidate.exists():
56
+ return candidate.resolve()
57
+ return None
58
+
59
+ model_name = imp.get("model", "")
60
+ if not model_name:
61
+ return None
62
+
63
+ return _find_model_file(model_name, [root_dir] + search_dirs)
64
+
65
+
66
+ class ResolvedModel:
67
+ """Result of resolving a multi-model project."""
68
+
69
+ def __init__(self):
70
+ self.root_model: Dict[str, Any] = {}
71
+ self.imported_models: Dict[str, Dict[str, Any]] = {} # alias -> model
72
+ self.import_graph: Dict[str, List[str]] = {} # model_name -> [imported model names]
73
+ self.file_map: Dict[str, str] = {} # model_name -> file path
74
+ self.issues: List[Issue] = []
75
+
76
+ @property
77
+ def all_model_names(self) -> List[str]:
78
+ names = [self.root_model.get("model", {}).get("name", "")]
79
+ names.extend(sorted(self.imported_models.keys()))
80
+ return [n for n in names if n]
81
+
82
+ def unified_entities(self) -> List[Dict[str, Any]]:
83
+ """Return all entities from root + imported models, with source_model annotation."""
84
+ entities = []
85
+ root_name = self.root_model.get("model", {}).get("name", "root")
86
+ for entity in self.root_model.get("entities", []):
87
+ e = copy.deepcopy(entity)
88
+ e["_source_model"] = root_name
89
+ entities.append(e)
90
+
91
+ for alias, model in sorted(self.imported_models.items()):
92
+ model_name = model.get("model", {}).get("name", alias)
93
+ for entity in model.get("entities", []):
94
+ e = copy.deepcopy(entity)
95
+ e["_source_model"] = model_name
96
+ e["_import_alias"] = alias
97
+ entities.append(e)
98
+
99
+ return entities
100
+
101
+ def unified_relationships(self) -> List[Dict[str, Any]]:
102
+ """Return all relationships from root + imported models."""
103
+ rels = []
104
+ root_name = self.root_model.get("model", {}).get("name", "root")
105
+ for rel in self.root_model.get("relationships", []):
106
+ r = copy.deepcopy(rel)
107
+ r["_source_model"] = root_name
108
+ rels.append(r)
109
+
110
+ for alias, model in sorted(self.imported_models.items()):
111
+ model_name = model.get("model", {}).get("name", alias)
112
+ for rel in model.get("relationships", []):
113
+ r = copy.deepcopy(rel)
114
+ r["_source_model"] = model_name
115
+ rels.append(r)
116
+
117
+ return rels
118
+
119
+ def unified_indexes(self) -> List[Dict[str, Any]]:
120
+ """Return all indexes from root + imported models."""
121
+ indexes = []
122
+ root_name = self.root_model.get("model", {}).get("name", "root")
123
+ for idx in self.root_model.get("indexes", []):
124
+ i = copy.deepcopy(idx)
125
+ i["_source_model"] = root_name
126
+ indexes.append(i)
127
+
128
+ for alias, model in sorted(self.imported_models.items()):
129
+ model_name = model.get("model", {}).get("name", alias)
130
+ for idx in model.get("indexes", []):
131
+ i = copy.deepcopy(idx)
132
+ i["_source_model"] = model_name
133
+ indexes.append(i)
134
+
135
+ return indexes
136
+
137
+ def to_graph_summary(self) -> Dict[str, Any]:
138
+ """Return a JSON-serializable summary of the resolved multi-model graph."""
139
+ root_name = self.root_model.get("model", {}).get("name", "unknown")
140
+ models = []
141
+
142
+ # Root model summary
143
+ root_entities = self.root_model.get("entities", [])
144
+ models.append({
145
+ "name": root_name,
146
+ "file": self.file_map.get(root_name, ""),
147
+ "entity_count": len(root_entities),
148
+ "entities": [e.get("name", "") for e in root_entities],
149
+ "imports": [
150
+ imp.get("alias", imp.get("model", ""))
151
+ for imp in self.root_model.get("model", {}).get("imports", [])
152
+ ],
153
+ "is_root": True,
154
+ })
155
+
156
+ # Imported model summaries
157
+ for alias, model in sorted(self.imported_models.items()):
158
+ model_name = model.get("model", {}).get("name", alias)
159
+ imp_entities = model.get("entities", [])
160
+ models.append({
161
+ "name": model_name,
162
+ "alias": alias,
163
+ "file": self.file_map.get(model_name, ""),
164
+ "entity_count": len(imp_entities),
165
+ "entities": [e.get("name", "") for e in imp_entities],
166
+ "imports": [
167
+ imp.get("alias", imp.get("model", ""))
168
+ for imp in model.get("model", {}).get("imports", [])
169
+ ],
170
+ "is_root": False,
171
+ })
172
+
173
+ # Cross-model relationships (relationships that reference entities from different models)
174
+ cross_rels = []
175
+ all_entities = self.unified_entities()
176
+ entity_to_model = {}
177
+ for e in all_entities:
178
+ entity_to_model[e.get("name", "")] = e.get("_source_model", "")
179
+
180
+ for rel in self.unified_relationships():
181
+ from_entity = (rel.get("from", "") or "").split(".")[0]
182
+ to_entity = (rel.get("to", "") or "").split(".")[0]
183
+ from_model = entity_to_model.get(from_entity, "")
184
+ to_model = entity_to_model.get(to_entity, "")
185
+ if from_model and to_model and from_model != to_model:
186
+ cross_rels.append({
187
+ "name": rel.get("name", ""),
188
+ "from_model": from_model,
189
+ "to_model": to_model,
190
+ "from": rel.get("from", ""),
191
+ "to": rel.get("to", ""),
192
+ "cardinality": rel.get("cardinality", ""),
193
+ })
194
+
195
+ return {
196
+ "root_model": root_name,
197
+ "model_count": len(models),
198
+ "total_entities": sum(m["entity_count"] for m in models),
199
+ "cross_model_relationships": cross_rels,
200
+ "models": models,
201
+ "issues": [
202
+ {"severity": i.severity, "code": i.code, "message": i.message, "path": i.path}
203
+ for i in self.issues
204
+ ],
205
+ }
206
+
207
+
208
+ def _detect_cycle(
209
+ model_name: str,
210
+ import_graph: Dict[str, List[str]],
211
+ visiting: Set[str],
212
+ visited: Set[str],
213
+ ) -> bool:
214
+ """DFS cycle detection in import graph."""
215
+ if model_name in visiting:
216
+ return True
217
+ if model_name in visited:
218
+ return False
219
+ visiting.add(model_name)
220
+ for dep in import_graph.get(model_name, []):
221
+ if _detect_cycle(dep, import_graph, visiting, visited):
222
+ return True
223
+ visiting.remove(model_name)
224
+ visited.add(model_name)
225
+ return False
226
+
227
+
228
+ def resolve_model(
229
+ root_path: str,
230
+ search_dirs: Optional[List[str]] = None,
231
+ ) -> ResolvedModel:
232
+ """Resolve a model file and all its imports into a ResolvedModel.
233
+
234
+ Args:
235
+ root_path: Path to the root model YAML file.
236
+ search_dirs: Additional directories to search for imported models.
237
+ The root model's directory is always searched first.
238
+
239
+ Returns:
240
+ ResolvedModel with all imported models resolved and issues collected.
241
+ """
242
+ result = ResolvedModel()
243
+ root_file = Path(root_path).resolve()
244
+ root_dir = root_file.parent
245
+
246
+ extra_dirs = [Path(d).resolve() for d in (search_dirs or [])]
247
+
248
+ # Load root model
249
+ try:
250
+ root_model = load_yaml_model(str(root_file))
251
+ except Exception as exc:
252
+ result.issues.append(Issue(
253
+ severity="error",
254
+ code="ROOT_LOAD_FAILED",
255
+ message=f"Failed to load root model: {exc}",
256
+ path="/",
257
+ ))
258
+ return result
259
+
260
+ result.root_model = root_model
261
+ root_name = root_model.get("model", {}).get("name", "unknown")
262
+ result.file_map[root_name] = str(root_file)
263
+
264
+ imports = root_model.get("model", {}).get("imports", [])
265
+ if not imports:
266
+ return result
267
+
268
+ # Track import graph for cycle detection
269
+ result.import_graph[root_name] = []
270
+
271
+ # Resolve each import
272
+ loaded_models: Dict[str, Dict[str, Any]] = {} # model_name -> model data
273
+ _resolve_imports_recursive(
274
+ model_name=root_name,
275
+ imports=imports,
276
+ root_dir=root_dir,
277
+ search_dirs=extra_dirs,
278
+ loaded_models=loaded_models,
279
+ result=result,
280
+ depth=0,
281
+ max_depth=10,
282
+ )
283
+
284
+ # Cycle detection
285
+ visiting: Set[str] = set()
286
+ visited: Set[str] = set()
287
+ if _detect_cycle(root_name, result.import_graph, visiting, visited):
288
+ result.issues.append(Issue(
289
+ severity="error",
290
+ code="CIRCULAR_IMPORT",
291
+ message="Circular import detected in model dependency graph.",
292
+ path="/model/imports",
293
+ ))
294
+
295
+ # Check for duplicate entity names across all models
296
+ seen_entities: Dict[str, str] = {} # entity_name -> source_model
297
+ for entity in result.root_model.get("entities", []):
298
+ ename = entity.get("name", "")
299
+ if ename:
300
+ seen_entities[ename] = root_name
301
+
302
+ for alias, model in result.imported_models.items():
303
+ model_name = model.get("model", {}).get("name", alias)
304
+ for entity in model.get("entities", []):
305
+ ename = entity.get("name", "")
306
+ if ename and ename in seen_entities:
307
+ result.issues.append(Issue(
308
+ severity="warn",
309
+ code="DUPLICATE_CROSS_MODEL_ENTITY",
310
+ message=f"Entity '{ename}' exists in both '{seen_entities[ename]}' and '{model_name}'. "
311
+ f"Use alias '{alias}' to disambiguate.",
312
+ path=f"/model/imports",
313
+ ))
314
+ elif ename:
315
+ seen_entities[ename] = model_name
316
+
317
+ return result
318
+
319
+
320
+ def _resolve_imports_recursive(
321
+ model_name: str,
322
+ imports: List[Dict[str, Any]],
323
+ root_dir: Path,
324
+ search_dirs: List[Path],
325
+ loaded_models: Dict[str, Dict[str, Any]],
326
+ result: ResolvedModel,
327
+ depth: int,
328
+ max_depth: int,
329
+ ) -> None:
330
+ """Recursively resolve imports."""
331
+ if depth > max_depth:
332
+ result.issues.append(Issue(
333
+ severity="error",
334
+ code="IMPORT_DEPTH_EXCEEDED",
335
+ message=f"Import depth exceeded {max_depth}. Possible circular dependency.",
336
+ path="/model/imports",
337
+ ))
338
+ return
339
+
340
+ for imp in imports:
341
+ imp_model_name = imp.get("model", "")
342
+ alias = imp.get("alias", imp_model_name)
343
+ entity_filter = imp.get("entities") # None means all
344
+
345
+ if not imp_model_name:
346
+ result.issues.append(Issue(
347
+ severity="error",
348
+ code="INVALID_IMPORT",
349
+ message="Import entry missing 'model' field.",
350
+ path="/model/imports",
351
+ ))
352
+ continue
353
+
354
+ # Track in import graph
355
+ if model_name not in result.import_graph:
356
+ result.import_graph[model_name] = []
357
+ result.import_graph[model_name].append(imp_model_name)
358
+
359
+ # Skip if already loaded
360
+ if imp_model_name in loaded_models:
361
+ model_data = loaded_models[imp_model_name]
362
+ else:
363
+ # Resolve file path
364
+ file_path = _resolve_import_path(imp, root_dir, search_dirs)
365
+ if not file_path:
366
+ result.issues.append(Issue(
367
+ severity="error",
368
+ code="IMPORT_NOT_FOUND",
369
+ message=f"Cannot find model file for import '{imp_model_name}'.",
370
+ path="/model/imports",
371
+ ))
372
+ continue
373
+
374
+ try:
375
+ model_data = load_yaml_model(str(file_path))
376
+ except Exception as exc:
377
+ result.issues.append(Issue(
378
+ severity="error",
379
+ code="IMPORT_LOAD_FAILED",
380
+ message=f"Failed to load imported model '{imp_model_name}': {exc}",
381
+ path="/model/imports",
382
+ ))
383
+ continue
384
+
385
+ actual_name = model_data.get("model", {}).get("name", "")
386
+ if actual_name and actual_name != imp_model_name:
387
+ result.issues.append(Issue(
388
+ severity="warn",
389
+ code="IMPORT_NAME_MISMATCH",
390
+ message=f"Import references '{imp_model_name}' but file declares model.name='{actual_name}'.",
391
+ path="/model/imports",
392
+ ))
393
+
394
+ loaded_models[imp_model_name] = model_data
395
+ result.file_map[imp_model_name] = str(file_path)
396
+
397
+ # Recursively resolve this model's imports
398
+ sub_imports = model_data.get("model", {}).get("imports", [])
399
+ if sub_imports:
400
+ _resolve_imports_recursive(
401
+ model_name=imp_model_name,
402
+ imports=sub_imports,
403
+ root_dir=file_path.parent,
404
+ search_dirs=search_dirs,
405
+ loaded_models=loaded_models,
406
+ result=result,
407
+ depth=depth + 1,
408
+ max_depth=max_depth,
409
+ )
410
+
411
+ # Apply entity filter if specified
412
+ if entity_filter:
413
+ filtered = copy.deepcopy(model_data)
414
+ available = {e.get("name", "") for e in model_data.get("entities", [])}
415
+ for requested in entity_filter:
416
+ if requested not in available:
417
+ result.issues.append(Issue(
418
+ severity="error",
419
+ code="IMPORT_ENTITY_NOT_FOUND",
420
+ message=f"Import '{alias}' requests entity '{requested}' which does not exist in '{imp_model_name}'.",
421
+ path="/model/imports",
422
+ ))
423
+ filtered["entities"] = [
424
+ e for e in model_data.get("entities", [])
425
+ if e.get("name", "") in set(entity_filter)
426
+ ]
427
+ result.imported_models[alias] = filtered
428
+ else:
429
+ result.imported_models[alias] = copy.deepcopy(model_data)
430
+
431
+
432
+ def resolve_project(
433
+ project_dir: str,
434
+ search_dirs: Optional[List[str]] = None,
435
+ ) -> Dict[str, ResolvedModel]:
436
+ """Resolve all model files in a project directory.
437
+
438
+ Returns a dict mapping model file path -> ResolvedModel.
439
+ """
440
+ project_path = Path(project_dir).resolve()
441
+ results: Dict[str, ResolvedModel] = {}
442
+
443
+ # Find all model files
444
+ model_files = sorted(
445
+ list(project_path.rglob("*.model.yaml")) +
446
+ list(project_path.rglob("*.model.yml"))
447
+ )
448
+
449
+ extra_dirs = [Path(d).resolve() for d in (search_dirs or [])]
450
+ all_dirs = [project_path] + extra_dirs
451
+
452
+ for model_file in model_files:
453
+ resolved = resolve_model(str(model_file), search_dirs=[str(d) for d in all_dirs])
454
+ results[str(model_file)] = resolved
455
+
456
+ return results
datalex_core/schema.py ADDED
@@ -0,0 +1,54 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List
4
+
5
+ from jsonschema import Draft202012Validator
6
+
7
+ from datalex_core.issues import Issue
8
+ from datalex_core.modeling import normalize_model
9
+
10
+
11
+ def load_schema(schema_path: str) -> Dict[str, Any]:
12
+ path = Path(schema_path)
13
+ if not path.exists():
14
+ raise FileNotFoundError(f"Schema file not found: {schema_path}")
15
+ with path.open("r", encoding="utf-8") as handle:
16
+ return json.load(handle)
17
+
18
+
19
+ def _to_json_path(parts: List[Any]) -> str:
20
+ if not parts:
21
+ return "/"
22
+ formatted = []
23
+ for part in parts:
24
+ formatted.append(str(part))
25
+ return "/" + "/".join(formatted)
26
+
27
+
28
+ def _looks_like_model_schema(schema: Dict[str, Any]) -> bool:
29
+ schema_id = str(schema.get("$id") or "")
30
+ if schema_id.endswith("/model.schema.json"):
31
+ return True
32
+ properties = schema.get("properties")
33
+ if not isinstance(properties, dict):
34
+ return False
35
+ return "model" in properties and "entities" in properties
36
+
37
+
38
+ def schema_issues(model: Dict[str, Any], schema: Dict[str, Any]) -> List[Issue]:
39
+ if _looks_like_model_schema(schema):
40
+ model = normalize_model(model)
41
+ validator = Draft202012Validator(schema)
42
+ issues: List[Issue] = []
43
+
44
+ for error in sorted(validator.iter_errors(model), key=lambda e: list(e.absolute_path)):
45
+ issues.append(
46
+ Issue(
47
+ severity="error",
48
+ code="SCHEMA_VALIDATION_FAILED",
49
+ message=error.message,
50
+ path=_to_json_path(list(error.absolute_path)),
51
+ )
52
+ )
53
+
54
+ return issues