datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,85 @@
1
+ """Source-located error model for DataLex.
2
+
3
+ Every error carries file, line, column, and a suggested fix where possible. This is
4
+ what the DataLex spec calls out as a parser guarantee and what makes both humans and
5
+ LLMs faster at repair.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import List, Optional
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class SourceLocation:
16
+ file: str
17
+ line: Optional[int] = None
18
+ column: Optional[int] = None
19
+
20
+ def format(self) -> str:
21
+ if self.line is None:
22
+ return self.file
23
+ if self.column is None:
24
+ return f"{self.file}:{self.line}"
25
+ return f"{self.file}:{self.line}:{self.column}"
26
+
27
+
28
+ @dataclass
29
+ class DataLexError(Exception):
30
+ code: str
31
+ message: str
32
+ location: Optional[SourceLocation] = None
33
+ suggested_fix: Optional[str] = None
34
+ path: Optional[str] = None
35
+ severity: str = "error"
36
+
37
+ def __str__(self) -> str:
38
+ loc = self.location.format() if self.location else ""
39
+ prefix = f"{loc}: " if loc else ""
40
+ fix = f"\n hint: {self.suggested_fix}" if self.suggested_fix else ""
41
+ path = f" [{self.path}]" if self.path else ""
42
+ return f"{prefix}{self.severity}[{self.code}]: {self.message}{path}{fix}"
43
+
44
+ def to_dict(self) -> dict:
45
+ return {
46
+ "code": self.code,
47
+ "severity": self.severity,
48
+ "message": self.message,
49
+ "path": self.path,
50
+ "file": self.location.file if self.location else None,
51
+ "line": self.location.line if self.location else None,
52
+ "column": self.location.column if self.location else None,
53
+ "suggested_fix": self.suggested_fix,
54
+ }
55
+
56
+
57
+ @dataclass
58
+ class DataLexErrorBag:
59
+ """Collects multiple errors across a project load so the user can fix them in one pass."""
60
+
61
+ errors: List[DataLexError] = field(default_factory=list)
62
+
63
+ def add(self, err: DataLexError) -> None:
64
+ self.errors.append(err)
65
+
66
+ def extend(self, errs: List[DataLexError]) -> None:
67
+ self.errors.extend(errs)
68
+
69
+ def has_errors(self) -> bool:
70
+ return any(e.severity == "error" for e in self.errors)
71
+
72
+ def raise_if_errors(self) -> None:
73
+ if self.has_errors():
74
+ raise DataLexLoadError(self.errors)
75
+
76
+ def to_list(self) -> List[dict]:
77
+ return [e.to_dict() for e in self.errors]
78
+
79
+
80
+ class DataLexLoadError(Exception):
81
+ """Raised when load_project finishes with one or more errors."""
82
+
83
+ def __init__(self, errors: List[DataLexError]):
84
+ self.errors = errors
85
+ super().__init__(f"{len(errors)} DataLex error(s) — " + "; ".join(e.code for e in errors[:5]))
@@ -0,0 +1,512 @@
1
+ """Streaming, kind-dispatched DataLex project loader.
2
+
3
+ Design goals (from the DataLex spec):
4
+ * Streaming-safe: load a 10,000-entity project without holding all YAML in memory.
5
+ * Kind-dispatched: every file declares `kind:` at top; unrecognized is a parse error.
6
+ * Source-locating: every error carries file/line/column.
7
+ * Deterministic: iteration order is sorted by (kind, name) for stable emission.
8
+
9
+ This loader is intentionally self-contained — it does not go through the legacy
10
+ `loader.py` path, which is v3-model-shaped and not kind-aware.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import glob
16
+ import json
17
+ from pathlib import Path
18
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
19
+
20
+ import yaml
21
+ from jsonschema import Draft202012Validator
22
+
23
+ from datalex_core.datalex.errors import DataLexError, DataLexErrorBag, SourceLocation
24
+ from datalex_core.datalex.parse_cache import (
25
+ ParseCache,
26
+ cache_enabled_from_env,
27
+ default_cache_dir,
28
+ )
29
+
30
+
31
+ KINDS = ("project", "entity", "source", "model", "term", "domain", "policy", "snippet")
32
+
33
+
34
+ class _MarkedSafeLoader(yaml.SafeLoader):
35
+ """PyYAML SafeLoader that tags every mapping with its source line/column.
36
+
37
+ Line/column are stored under the double-underscore key `__mark__` which the loader
38
+ strips before returning to user code. This lets us surface file:line:column in
39
+ validation errors without a second parse.
40
+ """
41
+
42
+
43
+ def _construct_mapping(loader, node, deep=False):
44
+ loader.flatten_mapping(node)
45
+ mapping: Dict[Any, Any] = {}
46
+ for key_node, value_node in node.value:
47
+ key = loader.construct_object(key_node, deep=deep)
48
+ value = loader.construct_object(value_node, deep=deep)
49
+ mapping[key] = value
50
+ # attach source mark — use start_mark of the mapping node itself
51
+ mapping["__mark__"] = (node.start_mark.line + 1, node.start_mark.column + 1)
52
+ return mapping
53
+
54
+
55
+ _MarkedSafeLoader.add_constructor(
56
+ yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping
57
+ )
58
+
59
+
60
+ def _strip_marks(value: Any) -> Any:
61
+ """Return a deep copy of value with all __mark__ keys removed.
62
+
63
+ Marks are attached during parsing for error reporting; the user-facing document
64
+ should never include them.
65
+ """
66
+ if isinstance(value, dict):
67
+ return {k: _strip_marks(v) for k, v in value.items() if k != "__mark__"}
68
+ if isinstance(value, list):
69
+ return [_strip_marks(v) for v in value]
70
+ return value
71
+
72
+
73
+ def _mark_of(value: Any) -> Optional[Tuple[int, int]]:
74
+ if isinstance(value, dict):
75
+ m = value.get("__mark__")
76
+ if isinstance(m, tuple) and len(m) == 2:
77
+ return m
78
+ return None
79
+
80
+
81
+ def _load_yaml_marked(path: Path, bag: DataLexErrorBag) -> Optional[Any]:
82
+ try:
83
+ with path.open("r", encoding="utf-8") as f:
84
+ return yaml.load(f, Loader=_MarkedSafeLoader)
85
+ except yaml.YAMLError as e:
86
+ mark = getattr(e, "problem_mark", None)
87
+ loc = SourceLocation(
88
+ file=str(path),
89
+ line=(mark.line + 1) if mark else None,
90
+ column=(mark.column + 1) if mark else None,
91
+ )
92
+ bag.add(
93
+ DataLexError(
94
+ code="YAML_PARSE",
95
+ message=f"YAML parse error: {e}",
96
+ location=loc,
97
+ suggested_fix="Check indentation and quoting near the reported line.",
98
+ )
99
+ )
100
+ return None
101
+ except OSError as e:
102
+ bag.add(
103
+ DataLexError(
104
+ code="YAML_IO",
105
+ message=f"Cannot read file: {e}",
106
+ location=SourceLocation(file=str(path)),
107
+ )
108
+ )
109
+ return None
110
+
111
+
112
+ _SCHEMA_CACHE: Dict[str, Dict[str, Any]] = {}
113
+
114
+
115
+ def _load_kind_schema(schemas_root: Path, kind: str) -> Optional[Dict[str, Any]]:
116
+ if kind in _SCHEMA_CACHE:
117
+ return _SCHEMA_CACHE[kind]
118
+ path = schemas_root / f"{kind}.schema.json"
119
+ if not path.exists():
120
+ return None
121
+ with path.open("r", encoding="utf-8") as f:
122
+ schema = json.load(f)
123
+ _SCHEMA_CACHE[kind] = schema
124
+ return schema
125
+
126
+
127
+ def _validate_against_kind_schema(
128
+ doc: Dict[str, Any], kind: str, schemas_root: Path, path: Path, bag: DataLexErrorBag
129
+ ) -> None:
130
+ schema = _load_kind_schema(schemas_root, kind)
131
+ if schema is None:
132
+ bag.add(
133
+ DataLexError(
134
+ code="SCHEMA_MISSING",
135
+ message=f"No schema file found for kind '{kind}' under {schemas_root}",
136
+ location=SourceLocation(file=str(path)),
137
+ )
138
+ )
139
+ return
140
+
141
+ clean = _strip_marks(doc)
142
+ validator = Draft202012Validator(schema)
143
+ for err in sorted(validator.iter_errors(clean), key=lambda e: list(e.absolute_path)):
144
+ line, column = _lookup_mark(doc, list(err.absolute_path))
145
+ bag.add(
146
+ DataLexError(
147
+ code="SCHEMA_VALIDATION",
148
+ message=err.message,
149
+ location=SourceLocation(file=str(path), line=line, column=column),
150
+ path="/" + "/".join(str(p) for p in err.absolute_path),
151
+ suggested_fix=_suggest_fix(err),
152
+ )
153
+ )
154
+
155
+
156
+ def _lookup_mark(doc: Any, abs_path: List[Any]) -> Tuple[Optional[int], Optional[int]]:
157
+ """Walk the doc along abs_path and return the closest known source mark."""
158
+ best: Optional[Tuple[int, int]] = _mark_of(doc)
159
+ current = doc
160
+ for part in abs_path:
161
+ try:
162
+ if isinstance(current, list) and isinstance(part, int):
163
+ current = current[part]
164
+ elif isinstance(current, dict):
165
+ current = current.get(part)
166
+ else:
167
+ break
168
+ except (IndexError, KeyError, TypeError):
169
+ break
170
+ m = _mark_of(current)
171
+ if m:
172
+ best = m
173
+ if best:
174
+ return best
175
+ return (None, None)
176
+
177
+
178
+ def _suggest_fix(err) -> Optional[str]:
179
+ validator = err.validator
180
+ if validator == "required":
181
+ missing = err.message.split("'")[1] if "'" in err.message else "required key"
182
+ return f"Add the missing key '{missing}' to this object."
183
+ if validator == "enum":
184
+ return f"Use one of: {err.validator_value}"
185
+ if validator == "pattern":
186
+ return f"Value must match pattern {err.validator_value}"
187
+ if validator == "const":
188
+ return f"Expected constant value: {err.validator_value}"
189
+ if validator == "additionalProperties":
190
+ return "Remove the unknown property, or check for a typo."
191
+ return None
192
+
193
+
194
+ def iter_yaml_files(root: Path, glob_pattern: str) -> Iterator[Path]:
195
+ """Yield files matching the glob relative to root. Streaming — never materializes the full list."""
196
+ # glob returns sorted by Path on most filesystems; sort explicitly for determinism.
197
+ full = str(root / glob_pattern)
198
+ for p in sorted(glob.iglob(full, recursive=True)):
199
+ path = Path(p)
200
+ if path.is_file():
201
+ yield path
202
+
203
+
204
+ def load_file(
205
+ path: Path,
206
+ schemas_root: Path,
207
+ bag: DataLexErrorBag,
208
+ cache: Optional[ParseCache] = None,
209
+ ) -> Optional[Dict[str, Any]]:
210
+ """Load and validate a single DataLex YAML file. Returns the marked document or None.
211
+
212
+ When `cache` is provided, a cache hit short-circuits YAML parsing and schema
213
+ validation — the cached document is already mark-stripped and validated. A
214
+ miss parses + validates + writes back. Cache keys are content-addressed so
215
+ stale entries are impossible.
216
+ """
217
+ if cache is not None:
218
+ # Cheap pre-flight: read `kind:` via a partial parse is expensive, so we
219
+ # just peek the content hash + try each kind-schema key. In practice we
220
+ # store under the real kind. Simpler: read kind from the file once via
221
+ # a lightweight YAML parse gated on a cache miss.
222
+ cached = _try_cache_get(path, cache)
223
+ if cached is not None:
224
+ return cached
225
+
226
+ doc = _load_yaml_marked(path, bag)
227
+ if doc is None:
228
+ return None
229
+ if not isinstance(doc, dict):
230
+ bag.add(
231
+ DataLexError(
232
+ code="SHAPE",
233
+ message="Top-level YAML must be a mapping.",
234
+ location=SourceLocation(file=str(path)),
235
+ )
236
+ )
237
+ return None
238
+ kind = doc.get("kind")
239
+ if kind not in KINDS:
240
+ bag.add(
241
+ DataLexError(
242
+ code="KIND_UNKNOWN",
243
+ message=f"Unknown kind '{kind}'",
244
+ location=SourceLocation(
245
+ file=str(path),
246
+ line=_mark_of(doc)[0] if _mark_of(doc) else None,
247
+ column=_mark_of(doc)[1] if _mark_of(doc) else None,
248
+ ),
249
+ suggested_fix=f"Set 'kind:' to one of: {', '.join(KINDS)}",
250
+ )
251
+ )
252
+ return None
253
+ _validate_against_kind_schema(doc, kind, schemas_root, path, bag)
254
+ if cache is not None:
255
+ # Cache the mark-stripped doc — downstream callers strip marks anyway.
256
+ cache.put(path, kind, _strip_marks(doc))
257
+ return doc
258
+
259
+
260
+ def _try_cache_get(path: Path, cache: ParseCache) -> Optional[Dict[str, Any]]:
261
+ """Probe cache for this file's parsed doc.
262
+
263
+ The cache key includes the schema hash, which depends on `kind`. We cheat
264
+ by trying each known kind. File-reads are one stat + one open on hit,
265
+ negligible cost, and a miss returns None quickly.
266
+ """
267
+ for kind in KINDS:
268
+ hit = cache.get(path, kind)
269
+ if hit is not None and hit.get("kind") == kind:
270
+ return hit
271
+ return None
272
+
273
+
274
+ def load_project(
275
+ project_root: Union[str, Path],
276
+ schemas_root: Optional[Union[str, Path]] = None,
277
+ strict: bool = True,
278
+ cache_dir: Optional[Union[str, Path]] = None,
279
+ ) -> "DataLexProject":
280
+ """Entry point: discover, parse, validate, and aggregate a DataLex project.
281
+
282
+ project_root — directory containing `datalex.yaml`.
283
+ schemas_root — directory containing per-kind JSON Schemas.
284
+ Defaults to <repo-root>/schemas/datalex.
285
+ strict — when True, raise DataLexLoadError if any errors are collected.
286
+ When False, return the project with errors on the bag.
287
+ cache_dir — optional parse cache directory. If None and DATALEX_CACHE=1
288
+ is set in the environment, uses <project_root>/build/.cache.
289
+ Pass an explicit path to override.
290
+ """
291
+ from datalex_core.datalex.project import DataLexProject # local import to avoid cycle
292
+
293
+ root = Path(project_root).resolve()
294
+ if schemas_root is None:
295
+ schemas_root = _infer_schemas_root(root)
296
+ schemas_root = Path(schemas_root)
297
+
298
+ cache: Optional[ParseCache] = None
299
+ if cache_dir is not None:
300
+ cache = ParseCache(Path(cache_dir), schemas_root)
301
+ elif cache_enabled_from_env():
302
+ cache = ParseCache(default_cache_dir(root), schemas_root)
303
+
304
+ bag = DataLexErrorBag()
305
+
306
+ manifest_path = root / "datalex.yaml"
307
+ manifest: Optional[Dict[str, Any]] = None
308
+ if manifest_path.exists():
309
+ manifest = load_file(manifest_path, schemas_root, bag, cache=cache)
310
+
311
+ if manifest is None:
312
+ # Missing manifest is not fatal — we can still load discovered files for migration
313
+ # tooling, but we warn.
314
+ bag.add(
315
+ DataLexError(
316
+ code="PROJECT_MANIFEST_MISSING",
317
+ severity="warn",
318
+ message="No datalex.yaml manifest found; discovery will use default globs.",
319
+ location=SourceLocation(file=str(root)),
320
+ suggested_fix="Create datalex.yaml at the project root. See schemas/datalex/project.schema.json.",
321
+ )
322
+ )
323
+
324
+ globs = {
325
+ "models": (manifest or {}).get("models", "models/**/*.yaml"),
326
+ "sources": (manifest or {}).get("sources", "sources/**/*.yaml"),
327
+ "glossary": (manifest or {}).get("glossary", "glossary/**/*.yaml"),
328
+ "snippets": (manifest or {}).get("snippets", ".datalex/snippets/**/*.yaml"),
329
+ "policies": (manifest or {}).get("policies", "policies/**/*.yaml"),
330
+ }
331
+
332
+ entities: Dict[str, Dict[str, Any]] = {}
333
+ sources: Dict[str, Dict[str, Any]] = {}
334
+ models_dict: Dict[str, Dict[str, Any]] = {}
335
+ terms: Dict[str, Dict[str, Any]] = {}
336
+ domains: Dict[str, Dict[str, Any]] = {}
337
+ policies: Dict[str, Dict[str, Any]] = {}
338
+ snippets: Dict[str, Dict[str, Any]] = {}
339
+ file_of: Dict[Tuple[str, str], str] = {}
340
+
341
+ def _register(doc: Dict[str, Any], path: Path) -> None:
342
+ kind = doc.get("kind")
343
+ name = doc.get("name")
344
+ if not name:
345
+ return
346
+ bucket = {
347
+ "entity": entities,
348
+ "source": sources,
349
+ "model": models_dict,
350
+ "term": terms,
351
+ "domain": domains,
352
+ "policy": policies,
353
+ "snippet": snippets,
354
+ }.get(kind)
355
+ if bucket is None:
356
+ return
357
+ # layer uniqueness for entities — name is unique *per layer*
358
+ key = name if kind != "entity" else f"{doc.get('layer', 'physical')}:{name}"
359
+ if key in bucket:
360
+ bag.add(
361
+ DataLexError(
362
+ code="DUPLICATE_NAME",
363
+ message=f"Duplicate {kind} '{name}' — first defined in {file_of.get((kind, key))}",
364
+ location=SourceLocation(
365
+ file=str(path),
366
+ line=_mark_of(doc)[0] if _mark_of(doc) else None,
367
+ ),
368
+ suggested_fix="Rename one of the duplicates or merge them.",
369
+ )
370
+ )
371
+ return
372
+ bucket[key] = doc
373
+ file_of[(kind, key)] = str(path)
374
+
375
+ # Walk the trees in a stable order
376
+ for group, pattern in sorted(globs.items()):
377
+ for p in iter_yaml_files(root, pattern):
378
+ doc = load_file(p, schemas_root, bag, cache=cache)
379
+ if doc is not None:
380
+ _register(doc, p)
381
+
382
+ project = DataLexProject(
383
+ root=root,
384
+ manifest=_strip_marks(manifest) if manifest else None,
385
+ entities={k: _strip_marks(v) for k, v in entities.items()},
386
+ sources={k: _strip_marks(v) for k, v in sources.items()},
387
+ models={k: _strip_marks(v) for k, v in models_dict.items()},
388
+ terms={k: _strip_marks(v) for k, v in terms.items()},
389
+ domains={k: _strip_marks(v) for k, v in domains.items()},
390
+ policies={k: _strip_marks(v) for k, v in policies.items()},
391
+ snippets={k: _strip_marks(v) for k, v in snippets.items()},
392
+ file_of=file_of,
393
+ errors=bag,
394
+ )
395
+
396
+ _load_imports(project, schemas_root, bag)
397
+
398
+ project.resolve() # resolves term references, snippet `use:`, logical back-refs
399
+
400
+ if strict:
401
+ bag.raise_if_errors()
402
+
403
+ return project
404
+
405
+
406
+ def _load_imports(
407
+ project: "DataLexProject",
408
+ schemas_root: Path,
409
+ bag: DataLexErrorBag,
410
+ ) -> None:
411
+ """Resolve `imports:` in the manifest and attach each as a sub-project.
412
+
413
+ Skips silently if no imports are declared. Each import is loaded in
414
+ non-strict mode so sub-project warnings bubble up as warnings rather than
415
+ aborting the whole load; fatal sub-project errors become errors on the main
416
+ bag.
417
+ """
418
+ manifest = project.manifest or {}
419
+ imports = manifest.get("imports") or []
420
+ if not imports:
421
+ return
422
+
423
+ try:
424
+ from datalex_core.packages import load_imports_for, PackageResolveError
425
+ except ImportError:
426
+ bag.add(
427
+ DataLexError(
428
+ code="PACKAGES_MODULE_MISSING",
429
+ message="datalex_core.packages is unavailable; cannot resolve imports.",
430
+ location=SourceLocation(file=str(project.root)),
431
+ )
432
+ )
433
+ return
434
+
435
+ try:
436
+ resolved = load_imports_for(project.root)
437
+ except PackageResolveError as e:
438
+ bag.add(
439
+ DataLexError(
440
+ code="PACKAGE_RESOLVE",
441
+ message=str(e),
442
+ location=SourceLocation(file=str(project.root / "datalex.yaml")),
443
+ suggested_fix="Run `datalex datalex packages resolve` and re-validate.",
444
+ )
445
+ )
446
+ return
447
+
448
+ for pkg in resolved:
449
+ alias = pkg.spec.default_alias()
450
+ if alias in project.imports:
451
+ bag.add(
452
+ DataLexError(
453
+ code="IMPORT_ALIAS_COLLISION",
454
+ message=f"Two imports share alias '{alias}'. Add an `alias:` to one of them.",
455
+ location=SourceLocation(file=str(project.root / "datalex.yaml")),
456
+ )
457
+ )
458
+ continue
459
+ try:
460
+ sub = load_project(pkg.root, schemas_root=schemas_root, strict=False)
461
+ except Exception as e: # noqa: BLE001 — surface any loader failure as an error
462
+ bag.add(
463
+ DataLexError(
464
+ code="IMPORT_LOAD_FAILED",
465
+ message=f"Failed to load imported package '{pkg.spec.package}': {e}",
466
+ location=SourceLocation(file=str(pkg.root)),
467
+ )
468
+ )
469
+ continue
470
+
471
+ # Propagate sub-project errors as warnings prefixed by the alias so
472
+ # it's clear which package they came from.
473
+ for err in sub.errors.to_list():
474
+ bag.add(
475
+ DataLexError(
476
+ code=err.get("code", "IMPORT_CHILD"),
477
+ severity=err.get("severity", "warn"),
478
+ message=f"[import:{alias}] {err.get('message', '')}",
479
+ location=SourceLocation(file=err.get("file") or str(pkg.root)),
480
+ )
481
+ )
482
+ project.imports[alias] = sub
483
+
484
+
485
+ def _infer_schemas_root(project_root: Path) -> Path:
486
+ """Locate the DataLex JSON Schemas.
487
+
488
+ Order of precedence:
489
+ 1. Bundled schemas shipped with the installed `datalex_core` package
490
+ (so `pip install datalex-cli` works from any working directory).
491
+ 2. A repo-relative `schemas/datalex/` when running from a clone —
492
+ walk up from the project root.
493
+ 3. Repo-root fallback based on this file's location.
494
+ """
495
+ # 1. Bundled package resource — the canonical location for an installed package.
496
+ bundled = Path(__file__).resolve().parent.parent / "_schemas" / "datalex"
497
+ if bundled.exists():
498
+ return bundled
499
+
500
+ # 2. Walk up from the project root (legacy clone layout with top-level schemas/).
501
+ here = project_root
502
+ for _ in range(6):
503
+ candidate = here / "schemas" / "datalex"
504
+ if candidate.exists():
505
+ return candidate
506
+ if here.parent == here:
507
+ break
508
+ here = here.parent
509
+
510
+ # 3. Final fallback — repo-root-relative path from this file's location.
511
+ repo_root = Path(__file__).resolve().parents[4]
512
+ return repo_root / "schemas" / "datalex"