datalex-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. datalex_cli/__init__.py +1 -0
  2. datalex_cli/datalex_cli.py +658 -0
  3. datalex_cli/main.py +2925 -0
  4. datalex_cli-0.1.1.dist-info/METADATA +228 -0
  5. datalex_cli-0.1.1.dist-info/RECORD +64 -0
  6. datalex_cli-0.1.1.dist-info/WHEEL +5 -0
  7. datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
  9. datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
  10. datalex_core/__init__.py +94 -0
  11. datalex_core/_schemas/datalex/common.schema.json +127 -0
  12. datalex_core/_schemas/datalex/domain.schema.json +24 -0
  13. datalex_core/_schemas/datalex/entity.schema.json +158 -0
  14. datalex_core/_schemas/datalex/model.schema.json +141 -0
  15. datalex_core/_schemas/datalex/policy.schema.json +70 -0
  16. datalex_core/_schemas/datalex/project.schema.json +82 -0
  17. datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  18. datalex_core/_schemas/datalex/source.schema.json +104 -0
  19. datalex_core/_schemas/datalex/term.schema.json +30 -0
  20. datalex_core/canonical.py +166 -0
  21. datalex_core/completion.py +204 -0
  22. datalex_core/connectors/__init__.py +39 -0
  23. datalex_core/connectors/base.py +417 -0
  24. datalex_core/connectors/bigquery.py +229 -0
  25. datalex_core/connectors/databricks.py +262 -0
  26. datalex_core/connectors/mysql.py +266 -0
  27. datalex_core/connectors/postgres.py +309 -0
  28. datalex_core/connectors/redshift.py +298 -0
  29. datalex_core/connectors/snowflake.py +336 -0
  30. datalex_core/connectors/sqlserver.py +425 -0
  31. datalex_core/datalex/__init__.py +26 -0
  32. datalex_core/datalex/diff.py +188 -0
  33. datalex_core/datalex/errors.py +85 -0
  34. datalex_core/datalex/loader.py +512 -0
  35. datalex_core/datalex/migrate_layout.py +382 -0
  36. datalex_core/datalex/parse_cache.py +102 -0
  37. datalex_core/datalex/project.py +214 -0
  38. datalex_core/datalex/types.py +224 -0
  39. datalex_core/dbt/__init__.py +18 -0
  40. datalex_core/dbt/emit.py +344 -0
  41. datalex_core/dbt/manifest.py +329 -0
  42. datalex_core/dbt/profiles.py +185 -0
  43. datalex_core/dbt/sync.py +279 -0
  44. datalex_core/dbt/warehouse.py +215 -0
  45. datalex_core/dialects/__init__.py +15 -0
  46. datalex_core/dialects/_common.py +48 -0
  47. datalex_core/dialects/base.py +47 -0
  48. datalex_core/dialects/postgres.py +164 -0
  49. datalex_core/dialects/registry.py +36 -0
  50. datalex_core/dialects/snowflake.py +129 -0
  51. datalex_core/diffing.py +358 -0
  52. datalex_core/docs_generator.py +797 -0
  53. datalex_core/doctor.py +181 -0
  54. datalex_core/generators.py +478 -0
  55. datalex_core/importers.py +1176 -0
  56. datalex_core/issues.py +23 -0
  57. datalex_core/loader.py +21 -0
  58. datalex_core/migrate.py +316 -0
  59. datalex_core/modeling.py +679 -0
  60. datalex_core/packages.py +430 -0
  61. datalex_core/policy.py +1037 -0
  62. datalex_core/resolver.py +456 -0
  63. datalex_core/schema.py +54 -0
  64. datalex_core/semantic.py +1561 -0
@@ -0,0 +1,430 @@
1
+ """Cross-repo DataLex package resolver.
2
+
3
+ Given `imports:` entries in a DataLex project manifest, resolve each into a
4
+ local on-disk directory suitable for loading via `load_project`. Supports:
5
+
6
+ * Local path imports:
7
+ - package: local/warehouse-core
8
+ path: ../warehouse-core
9
+
10
+ * Git-backed imports (tag, branch, or commit):
11
+ - package: acme/warehouse-core
12
+ git: https://github.com/acme/warehouse-core.git
13
+ ref: v1.4.0
14
+
15
+ * Shorthand `package: org/name@version` — resolves to a default registry
16
+ URL (currently github.com/<org>/<name> tag <version>).
17
+
18
+ Cache layout:
19
+ ~/.datalex/packages/<org>__<name>/<ref>/ # single shared cache per host
20
+
21
+ Lockfile layout (`.datalex/lock.yaml`):
22
+ packages:
23
+ acme/warehouse-core:
24
+ version: 1.4.0
25
+ git: https://github.com/acme/warehouse-core.git
26
+ ref: v1.4.0
27
+ resolved_sha: <40-char-sha>
28
+ content_hash: sha256:<hash-of-packaged-tree>
29
+
30
+ Security notes:
31
+ * When a lockfile exists, we refuse to use any resolution whose resolved_sha
32
+ disagrees with the locked entry. Run `datalex datalex packages resolve --update`
33
+ to regenerate.
34
+ * `path:` imports are not sandboxed — a local import can be anywhere on the
35
+ filesystem. That is the user's choice.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import hashlib
41
+ import os
42
+ import re
43
+ import shutil
44
+ import subprocess
45
+ from dataclasses import dataclass, field
46
+ from pathlib import Path
47
+ from typing import Any, Dict, List, Optional, Tuple, Union
48
+
49
+ import yaml
50
+
51
+
52
+ PACKAGE_SPEC_RE = re.compile(r"^(?P<org>[a-z0-9][a-z0-9_-]*)/(?P<name>[a-z0-9][a-z0-9_-]*)(@(?P<version>[\w.+-]+))?$")
53
+ DEFAULT_REGISTRY_URL_TEMPLATE = "https://github.com/{org}/{name}.git"
54
+
55
+
56
+ # ---------- dataclasses ----------
57
+
58
+
59
+ @dataclass
60
+ class ImportSpec:
61
+ """A single `imports:` entry as declared in `datalex.yaml`."""
62
+
63
+ package: str
64
+ path: Optional[str] = None # local path import
65
+ git: Optional[str] = None # explicit git URL
66
+ ref: Optional[str] = None # tag / branch / sha
67
+ alias: Optional[str] = None # namespace prefix (default: package basename)
68
+ version: Optional[str] = None # parsed from `org/name@version` shorthand
69
+
70
+ @classmethod
71
+ def from_dict(cls, raw: Dict[str, Any]) -> "ImportSpec":
72
+ pkg = raw.get("package") or ""
73
+ spec = cls(
74
+ package=pkg,
75
+ path=raw.get("path"),
76
+ git=raw.get("git"),
77
+ ref=raw.get("ref"),
78
+ alias=raw.get("alias"),
79
+ version=raw.get("version"),
80
+ )
81
+ # Support `package: org/name@version` shorthand.
82
+ m = PACKAGE_SPEC_RE.match(pkg)
83
+ if m and m.group("version") and not spec.version:
84
+ spec.version = m.group("version")
85
+ spec.package = f"{m.group('org')}/{m.group('name')}"
86
+ return spec
87
+
88
+ def default_alias(self) -> str:
89
+ """Alias for namespacing imported names. Defaults to the last path segment."""
90
+ if self.alias:
91
+ return self.alias
92
+ base = self.package.split("/")[-1]
93
+ return _slug(base)
94
+
95
+ def kind(self) -> str:
96
+ if self.path:
97
+ return "path"
98
+ if self.git or self.version:
99
+ return "git"
100
+ raise ValueError(f"Import '{self.package}' has neither path: nor git/version.")
101
+
102
+
103
+ @dataclass
104
+ class ResolvedPackage:
105
+ spec: ImportSpec
106
+ root: Path # local disk path the project was resolved into
107
+ resolved_sha: Optional[str] # git SHA (None for path imports)
108
+ content_hash: str # sha256 of the tree at `root` (stable)
109
+
110
+ def to_lock_entry(self) -> Dict[str, Any]:
111
+ entry: Dict[str, Any] = {"content_hash": self.content_hash}
112
+ if self.spec.version:
113
+ entry["version"] = self.spec.version
114
+ if self.spec.git:
115
+ entry["git"] = self.spec.git
116
+ if self.spec.ref:
117
+ entry["ref"] = self.spec.ref
118
+ if self.spec.path:
119
+ entry["path"] = self.spec.path
120
+ if self.resolved_sha:
121
+ entry["resolved_sha"] = self.resolved_sha
122
+ return entry
123
+
124
+
125
+ @dataclass
126
+ class ResolveReport:
127
+ resolved: List[ResolvedPackage] = field(default_factory=list)
128
+ lockfile_path: Optional[Path] = None
129
+ lockfile_written: bool = False
130
+ warnings: List[str] = field(default_factory=list)
131
+
132
+ def summary(self) -> str:
133
+ lines = [f"Resolved {len(self.resolved)} package(s):"]
134
+ for r in self.resolved:
135
+ suffix = f"@{r.spec.version}" if r.spec.version else ""
136
+ lines.append(f" - {r.spec.package}{suffix} → {r.root}")
137
+ if self.lockfile_written and self.lockfile_path:
138
+ lines.append(f"Wrote lockfile: {self.lockfile_path}")
139
+ for w in self.warnings:
140
+ lines.append(f" warning: {w}")
141
+ return "\n".join(lines)
142
+
143
+
144
+ # ---------- resolver ----------
145
+
146
+
147
+ def resolve_imports(
148
+ project_root: Union[str, Path],
149
+ cache_root: Optional[Union[str, Path]] = None,
150
+ update: bool = False,
151
+ ) -> ResolveReport:
152
+ """Resolve every `imports:` entry in `<project_root>/datalex.yaml`.
153
+
154
+ When `update` is True, re-fetch git-backed packages even if the lockfile
155
+ pins them. Otherwise, lockfile entries are authoritative.
156
+ """
157
+ project_root = Path(project_root).resolve()
158
+ manifest = _load_manifest(project_root)
159
+ cache_root = Path(cache_root) if cache_root else _default_cache_root()
160
+ cache_root.mkdir(parents=True, exist_ok=True)
161
+
162
+ lockfile_path = project_root / ".datalex" / "lock.yaml"
163
+ existing_lock = _load_lockfile(lockfile_path)
164
+
165
+ report = ResolveReport(lockfile_path=lockfile_path)
166
+
167
+ for raw in manifest.get("imports", []) or []:
168
+ spec = ImportSpec.from_dict(raw)
169
+ if not spec.package:
170
+ report.warnings.append("Skipping imports entry with empty package field.")
171
+ continue
172
+
173
+ resolved = _resolve_one(
174
+ spec=spec,
175
+ project_root=project_root,
176
+ cache_root=cache_root,
177
+ lock_entry=existing_lock.get(spec.package),
178
+ update=update,
179
+ )
180
+ report.resolved.append(resolved)
181
+
182
+ new_lock = {r.spec.package: r.to_lock_entry() for r in report.resolved}
183
+ if new_lock != existing_lock:
184
+ _write_lockfile(lockfile_path, new_lock)
185
+ report.lockfile_written = True
186
+
187
+ return report
188
+
189
+
190
+ def _resolve_one(
191
+ spec: ImportSpec,
192
+ project_root: Path,
193
+ cache_root: Path,
194
+ lock_entry: Optional[Dict[str, Any]],
195
+ update: bool,
196
+ ) -> ResolvedPackage:
197
+ kind = spec.kind()
198
+
199
+ if kind == "path":
200
+ root = (project_root / spec.path).resolve() if not Path(spec.path).is_absolute() else Path(spec.path)
201
+ if not root.exists():
202
+ raise PackageResolveError(
203
+ f"Local path import '{spec.package}' points to nonexistent directory: {root}"
204
+ )
205
+ ch = _hash_tree(root)
206
+ _verify_against_lock(spec, ch, None, lock_entry, update)
207
+ return ResolvedPackage(spec=spec, root=root, resolved_sha=None, content_hash=ch)
208
+
209
+ # git-backed
210
+ git_url = spec.git or _registry_url(spec)
211
+ ref = spec.ref or spec.version
212
+ if not ref:
213
+ raise PackageResolveError(
214
+ f"Git-backed import '{spec.package}' needs a ref or version."
215
+ )
216
+
217
+ pkg_dir = cache_root / _safe_cache_key(spec.package) / _safe_cache_key(ref)
218
+ needs_fetch = update or not pkg_dir.exists() or not (pkg_dir / ".git_sha").exists()
219
+ if needs_fetch:
220
+ _fetch_git(git_url, ref, pkg_dir)
221
+
222
+ sha = (pkg_dir / ".git_sha").read_text().strip() if (pkg_dir / ".git_sha").exists() else ""
223
+ ch = _hash_tree(pkg_dir)
224
+ _verify_against_lock(spec, ch, sha, lock_entry, update)
225
+ return ResolvedPackage(spec=spec, root=pkg_dir, resolved_sha=sha, content_hash=ch)
226
+
227
+
228
+ def _verify_against_lock(
229
+ spec: ImportSpec,
230
+ content_hash: str,
231
+ resolved_sha: Optional[str],
232
+ lock_entry: Optional[Dict[str, Any]],
233
+ update: bool,
234
+ ) -> None:
235
+ if not lock_entry or update:
236
+ return
237
+ locked_ch = lock_entry.get("content_hash")
238
+ if locked_ch and locked_ch != content_hash:
239
+ raise PackageResolveError(
240
+ f"Package '{spec.package}' content_hash {content_hash} does not match "
241
+ f"lockfile {locked_ch}. Run `datalex datalex packages resolve --update` to regenerate."
242
+ )
243
+ locked_sha = lock_entry.get("resolved_sha")
244
+ if locked_sha and resolved_sha and locked_sha != resolved_sha:
245
+ raise PackageResolveError(
246
+ f"Package '{spec.package}' resolved_sha {resolved_sha} does not match "
247
+ f"lockfile {locked_sha}. Run `datalex datalex packages resolve --update` to regenerate."
248
+ )
249
+
250
+
251
+ # ---------- git backend ----------
252
+
253
+
254
+ def _fetch_git(url: str, ref: str, target: Path) -> None:
255
+ """Shallow-clone `url@ref` into `target`. Writes the resolved SHA to .git_sha."""
256
+ if target.exists():
257
+ shutil.rmtree(target)
258
+ target.mkdir(parents=True, exist_ok=True)
259
+
260
+ try:
261
+ # shallow clone of the single ref
262
+ subprocess.run(
263
+ ["git", "init", "--quiet", str(target)], check=True, capture_output=True
264
+ )
265
+ subprocess.run(
266
+ ["git", "-C", str(target), "remote", "add", "origin", url],
267
+ check=True, capture_output=True,
268
+ )
269
+ # Try fetching the ref directly (works for tags, branches, and SHAs on many servers)
270
+ fetch = subprocess.run(
271
+ ["git", "-C", str(target), "fetch", "--depth=1", "origin", ref],
272
+ capture_output=True,
273
+ )
274
+ if fetch.returncode != 0:
275
+ # fallback: full fetch then checkout
276
+ subprocess.run(
277
+ ["git", "-C", str(target), "fetch", "origin"],
278
+ check=True, capture_output=True,
279
+ )
280
+ subprocess.run(
281
+ ["git", "-C", str(target), "checkout", "--quiet", "FETCH_HEAD"]
282
+ if fetch.returncode == 0
283
+ else ["git", "-C", str(target), "checkout", "--quiet", ref],
284
+ check=True, capture_output=True,
285
+ )
286
+ sha = subprocess.run(
287
+ ["git", "-C", str(target), "rev-parse", "HEAD"],
288
+ check=True, capture_output=True, text=True,
289
+ ).stdout.strip()
290
+ (target / ".git_sha").write_text(sha + "\n", encoding="utf-8")
291
+ except subprocess.CalledProcessError as e:
292
+ err = (e.stderr or b"").decode("utf-8", errors="replace")
293
+ raise PackageResolveError(
294
+ f"git fetch failed for {url}@{ref}: {err.strip() or e}"
295
+ ) from e
296
+
297
+
298
+ def _registry_url(spec: ImportSpec) -> str:
299
+ m = PACKAGE_SPEC_RE.match(spec.package)
300
+ if not m:
301
+ raise PackageResolveError(
302
+ f"Package '{spec.package}' is not in org/name form; provide `git:` explicitly."
303
+ )
304
+ return DEFAULT_REGISTRY_URL_TEMPLATE.format(org=m.group("org"), name=m.group("name"))
305
+
306
+
307
+ # ---------- helpers ----------
308
+
309
+
310
+ class PackageResolveError(RuntimeError):
311
+ """Raised when a package cannot be resolved or fails verification."""
312
+
313
+
314
+ def _default_cache_root() -> Path:
315
+ override = os.environ.get("DATALEX_CACHE_ROOT")
316
+ if override:
317
+ return Path(override) / "packages"
318
+ return Path.home() / ".datalex" / "packages"
319
+
320
+
321
+ def _safe_cache_key(value: str) -> str:
322
+ return re.sub(r"[^A-Za-z0-9_.-]", "_", value).strip("._")
323
+
324
+
325
+ def _slug(value: str) -> str:
326
+ out = re.sub(r"[^a-z0-9_]+", "_", value.lower()).strip("_")
327
+ return out or "pkg"
328
+
329
+
330
+ def _hash_tree(root: Path) -> str:
331
+ """Stable sha256 over all .yaml / .yml files in a tree."""
332
+ h = hashlib.sha256()
333
+ for p in sorted(root.rglob("*")):
334
+ if not p.is_file():
335
+ continue
336
+ if p.suffix.lower() not in (".yaml", ".yml"):
337
+ continue
338
+ rel = p.relative_to(root).as_posix()
339
+ h.update(rel.encode("utf-8"))
340
+ h.update(b"\0")
341
+ h.update(p.read_bytes())
342
+ h.update(b"\0")
343
+ return "sha256:" + h.hexdigest()
344
+
345
+
346
+ def _load_manifest(project_root: Path) -> Dict[str, Any]:
347
+ manifest_path = project_root / "datalex.yaml"
348
+ if not manifest_path.exists():
349
+ return {}
350
+ with manifest_path.open("r", encoding="utf-8") as f:
351
+ return yaml.safe_load(f) or {}
352
+
353
+
354
+ def _load_lockfile(path: Path) -> Dict[str, Dict[str, Any]]:
355
+ if not path.exists():
356
+ return {}
357
+ with path.open("r", encoding="utf-8") as f:
358
+ data = yaml.safe_load(f) or {}
359
+ return dict(data.get("packages") or {})
360
+
361
+
362
+ def _write_lockfile(path: Path, packages: Dict[str, Dict[str, Any]]) -> None:
363
+ path.parent.mkdir(parents=True, exist_ok=True)
364
+ doc = {
365
+ "version": 1,
366
+ "packages": packages,
367
+ }
368
+ with path.open("w", encoding="utf-8") as f:
369
+ yaml.safe_dump(doc, f, sort_keys=True, default_flow_style=False, allow_unicode=True)
370
+
371
+
372
+ # ---------- helpers consumed by the loader ----------
373
+
374
+
375
+ def load_imports_for(
376
+ project_root: Union[str, Path],
377
+ cache_root: Optional[Union[str, Path]] = None,
378
+ ) -> List[ResolvedPackage]:
379
+ """Resolve (using cached state) and return ResolvedPackage entries.
380
+
381
+ Does not refetch; assumes `resolve_imports` has been run at least once.
382
+ Raises if a git-backed import has never been fetched, and raises if any
383
+ import's content_hash has drifted from the lockfile.
384
+ """
385
+ project_root = Path(project_root).resolve()
386
+ manifest = _load_manifest(project_root)
387
+ cache_root = Path(cache_root) if cache_root else _default_cache_root()
388
+ lock = _load_lockfile(project_root / ".datalex" / "lock.yaml")
389
+ out: List[ResolvedPackage] = []
390
+ for raw in manifest.get("imports", []) or []:
391
+ spec = ImportSpec.from_dict(raw)
392
+ if not spec.package:
393
+ continue
394
+ resolved = _probe_resolved(spec, project_root, cache_root)
395
+ lock_entry = lock.get(spec.package)
396
+ if lock_entry:
397
+ locked_ch = lock_entry.get("content_hash")
398
+ if locked_ch and locked_ch != resolved.content_hash:
399
+ raise PackageResolveError(
400
+ f"Package '{spec.package}' content_hash drifted from lockfile; "
401
+ f"run `datalex datalex packages resolve --update`."
402
+ )
403
+ out.append(resolved)
404
+ return out
405
+
406
+
407
+ def _probe_resolved(
408
+ spec: ImportSpec,
409
+ project_root: Path,
410
+ cache_root: Path,
411
+ ) -> ResolvedPackage:
412
+ """Return a ResolvedPackage pointing at the on-disk location without fetching."""
413
+ if spec.path:
414
+ root = (project_root / spec.path).resolve() if not Path(spec.path).is_absolute() else Path(spec.path).resolve()
415
+ if not root.exists():
416
+ raise PackageResolveError(
417
+ f"Local path import '{spec.package}' points to nonexistent directory: {root}"
418
+ )
419
+ return ResolvedPackage(spec=spec, root=root, resolved_sha=None, content_hash=_hash_tree(root))
420
+ ref = spec.ref or spec.version
421
+ if not ref:
422
+ raise PackageResolveError(f"Git-backed import '{spec.package}' missing ref/version.")
423
+ pkg_dir = cache_root / _safe_cache_key(spec.package) / _safe_cache_key(ref)
424
+ if not pkg_dir.exists():
425
+ raise PackageResolveError(
426
+ f"Package '{spec.package}@{ref}' is not in the cache. "
427
+ f"Run `datalex datalex packages resolve` first."
428
+ )
429
+ sha = (pkg_dir / ".git_sha").read_text().strip() if (pkg_dir / ".git_sha").exists() else ""
430
+ return ResolvedPackage(spec=spec, root=pkg_dir, resolved_sha=sha, content_hash=_hash_tree(pkg_dir))