datalex-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalex_cli/__init__.py +1 -0
- datalex_cli/datalex_cli.py +658 -0
- datalex_cli/main.py +2925 -0
- datalex_cli-0.1.1.dist-info/METADATA +228 -0
- datalex_cli-0.1.1.dist-info/RECORD +64 -0
- datalex_cli-0.1.1.dist-info/WHEEL +5 -0
- datalex_cli-0.1.1.dist-info/entry_points.txt +2 -0
- datalex_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
- datalex_cli-0.1.1.dist-info/top_level.txt +2 -0
- datalex_core/__init__.py +94 -0
- datalex_core/_schemas/datalex/common.schema.json +127 -0
- datalex_core/_schemas/datalex/domain.schema.json +24 -0
- datalex_core/_schemas/datalex/entity.schema.json +158 -0
- datalex_core/_schemas/datalex/model.schema.json +141 -0
- datalex_core/_schemas/datalex/policy.schema.json +70 -0
- datalex_core/_schemas/datalex/project.schema.json +82 -0
- datalex_core/_schemas/datalex/snippet.schema.json +24 -0
- datalex_core/_schemas/datalex/source.schema.json +104 -0
- datalex_core/_schemas/datalex/term.schema.json +30 -0
- datalex_core/canonical.py +166 -0
- datalex_core/completion.py +204 -0
- datalex_core/connectors/__init__.py +39 -0
- datalex_core/connectors/base.py +417 -0
- datalex_core/connectors/bigquery.py +229 -0
- datalex_core/connectors/databricks.py +262 -0
- datalex_core/connectors/mysql.py +266 -0
- datalex_core/connectors/postgres.py +309 -0
- datalex_core/connectors/redshift.py +298 -0
- datalex_core/connectors/snowflake.py +336 -0
- datalex_core/connectors/sqlserver.py +425 -0
- datalex_core/datalex/__init__.py +26 -0
- datalex_core/datalex/diff.py +188 -0
- datalex_core/datalex/errors.py +85 -0
- datalex_core/datalex/loader.py +512 -0
- datalex_core/datalex/migrate_layout.py +382 -0
- datalex_core/datalex/parse_cache.py +102 -0
- datalex_core/datalex/project.py +214 -0
- datalex_core/datalex/types.py +224 -0
- datalex_core/dbt/__init__.py +18 -0
- datalex_core/dbt/emit.py +344 -0
- datalex_core/dbt/manifest.py +329 -0
- datalex_core/dbt/profiles.py +185 -0
- datalex_core/dbt/sync.py +279 -0
- datalex_core/dbt/warehouse.py +215 -0
- datalex_core/dialects/__init__.py +15 -0
- datalex_core/dialects/_common.py +48 -0
- datalex_core/dialects/base.py +47 -0
- datalex_core/dialects/postgres.py +164 -0
- datalex_core/dialects/registry.py +36 -0
- datalex_core/dialects/snowflake.py +129 -0
- datalex_core/diffing.py +358 -0
- datalex_core/docs_generator.py +797 -0
- datalex_core/doctor.py +181 -0
- datalex_core/generators.py +478 -0
- datalex_core/importers.py +1176 -0
- datalex_core/issues.py +23 -0
- datalex_core/loader.py +21 -0
- datalex_core/migrate.py +316 -0
- datalex_core/modeling.py +679 -0
- datalex_core/packages.py +430 -0
- datalex_core/policy.py +1037 -0
- datalex_core/resolver.py +456 -0
- datalex_core/schema.py +54 -0
- datalex_core/semantic.py +1561 -0
datalex_core/packages.py
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
"""Cross-repo DataLex package resolver.
|
|
2
|
+
|
|
3
|
+
Given `imports:` entries in a DataLex project manifest, resolve each into a
|
|
4
|
+
local on-disk directory suitable for loading via `load_project`. Supports:
|
|
5
|
+
|
|
6
|
+
* Local path imports:
|
|
7
|
+
- package: local/warehouse-core
|
|
8
|
+
path: ../warehouse-core
|
|
9
|
+
|
|
10
|
+
* Git-backed imports (tag, branch, or commit):
|
|
11
|
+
- package: acme/warehouse-core
|
|
12
|
+
git: https://github.com/acme/warehouse-core.git
|
|
13
|
+
ref: v1.4.0
|
|
14
|
+
|
|
15
|
+
* Shorthand `package: org/name@version` — resolves to a default registry
|
|
16
|
+
URL (currently github.com/<org>/<name> tag <version>).
|
|
17
|
+
|
|
18
|
+
Cache layout:
|
|
19
|
+
~/.datalex/packages/<org>__<name>/<ref>/ # single shared cache per host
|
|
20
|
+
|
|
21
|
+
Lockfile layout (`.datalex/lock.yaml`):
|
|
22
|
+
packages:
|
|
23
|
+
acme/warehouse-core:
|
|
24
|
+
version: 1.4.0
|
|
25
|
+
git: https://github.com/acme/warehouse-core.git
|
|
26
|
+
ref: v1.4.0
|
|
27
|
+
resolved_sha: <40-char-sha>
|
|
28
|
+
content_hash: sha256:<hash-of-packaged-tree>
|
|
29
|
+
|
|
30
|
+
Security notes:
|
|
31
|
+
* When a lockfile exists, we refuse to use any resolution whose resolved_sha
|
|
32
|
+
disagrees with the locked entry. Run `datalex datalex packages resolve --update`
|
|
33
|
+
to regenerate.
|
|
34
|
+
* `path:` imports are not sandboxed — a local import can be anywhere on the
|
|
35
|
+
filesystem. That is the user's choice.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import hashlib
|
|
41
|
+
import os
|
|
42
|
+
import re
|
|
43
|
+
import shutil
|
|
44
|
+
import subprocess
|
|
45
|
+
from dataclasses import dataclass, field
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
48
|
+
|
|
49
|
+
import yaml
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
PACKAGE_SPEC_RE = re.compile(r"^(?P<org>[a-z0-9][a-z0-9_-]*)/(?P<name>[a-z0-9][a-z0-9_-]*)(@(?P<version>[\w.+-]+))?$")
|
|
53
|
+
DEFAULT_REGISTRY_URL_TEMPLATE = "https://github.com/{org}/{name}.git"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------- dataclasses ----------
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ImportSpec:
|
|
61
|
+
"""A single `imports:` entry as declared in `datalex.yaml`."""
|
|
62
|
+
|
|
63
|
+
package: str
|
|
64
|
+
path: Optional[str] = None # local path import
|
|
65
|
+
git: Optional[str] = None # explicit git URL
|
|
66
|
+
ref: Optional[str] = None # tag / branch / sha
|
|
67
|
+
alias: Optional[str] = None # namespace prefix (default: package basename)
|
|
68
|
+
version: Optional[str] = None # parsed from `org/name@version` shorthand
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_dict(cls, raw: Dict[str, Any]) -> "ImportSpec":
|
|
72
|
+
pkg = raw.get("package") or ""
|
|
73
|
+
spec = cls(
|
|
74
|
+
package=pkg,
|
|
75
|
+
path=raw.get("path"),
|
|
76
|
+
git=raw.get("git"),
|
|
77
|
+
ref=raw.get("ref"),
|
|
78
|
+
alias=raw.get("alias"),
|
|
79
|
+
version=raw.get("version"),
|
|
80
|
+
)
|
|
81
|
+
# Support `package: org/name@version` shorthand.
|
|
82
|
+
m = PACKAGE_SPEC_RE.match(pkg)
|
|
83
|
+
if m and m.group("version") and not spec.version:
|
|
84
|
+
spec.version = m.group("version")
|
|
85
|
+
spec.package = f"{m.group('org')}/{m.group('name')}"
|
|
86
|
+
return spec
|
|
87
|
+
|
|
88
|
+
def default_alias(self) -> str:
|
|
89
|
+
"""Alias for namespacing imported names. Defaults to the last path segment."""
|
|
90
|
+
if self.alias:
|
|
91
|
+
return self.alias
|
|
92
|
+
base = self.package.split("/")[-1]
|
|
93
|
+
return _slug(base)
|
|
94
|
+
|
|
95
|
+
def kind(self) -> str:
|
|
96
|
+
if self.path:
|
|
97
|
+
return "path"
|
|
98
|
+
if self.git or self.version:
|
|
99
|
+
return "git"
|
|
100
|
+
raise ValueError(f"Import '{self.package}' has neither path: nor git/version.")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class ResolvedPackage:
|
|
105
|
+
spec: ImportSpec
|
|
106
|
+
root: Path # local disk path the project was resolved into
|
|
107
|
+
resolved_sha: Optional[str] # git SHA (None for path imports)
|
|
108
|
+
content_hash: str # sha256 of the tree at `root` (stable)
|
|
109
|
+
|
|
110
|
+
def to_lock_entry(self) -> Dict[str, Any]:
|
|
111
|
+
entry: Dict[str, Any] = {"content_hash": self.content_hash}
|
|
112
|
+
if self.spec.version:
|
|
113
|
+
entry["version"] = self.spec.version
|
|
114
|
+
if self.spec.git:
|
|
115
|
+
entry["git"] = self.spec.git
|
|
116
|
+
if self.spec.ref:
|
|
117
|
+
entry["ref"] = self.spec.ref
|
|
118
|
+
if self.spec.path:
|
|
119
|
+
entry["path"] = self.spec.path
|
|
120
|
+
if self.resolved_sha:
|
|
121
|
+
entry["resolved_sha"] = self.resolved_sha
|
|
122
|
+
return entry
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class ResolveReport:
|
|
127
|
+
resolved: List[ResolvedPackage] = field(default_factory=list)
|
|
128
|
+
lockfile_path: Optional[Path] = None
|
|
129
|
+
lockfile_written: bool = False
|
|
130
|
+
warnings: List[str] = field(default_factory=list)
|
|
131
|
+
|
|
132
|
+
def summary(self) -> str:
|
|
133
|
+
lines = [f"Resolved {len(self.resolved)} package(s):"]
|
|
134
|
+
for r in self.resolved:
|
|
135
|
+
suffix = f"@{r.spec.version}" if r.spec.version else ""
|
|
136
|
+
lines.append(f" - {r.spec.package}{suffix} → {r.root}")
|
|
137
|
+
if self.lockfile_written and self.lockfile_path:
|
|
138
|
+
lines.append(f"Wrote lockfile: {self.lockfile_path}")
|
|
139
|
+
for w in self.warnings:
|
|
140
|
+
lines.append(f" warning: {w}")
|
|
141
|
+
return "\n".join(lines)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------- resolver ----------
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def resolve_imports(
|
|
148
|
+
project_root: Union[str, Path],
|
|
149
|
+
cache_root: Optional[Union[str, Path]] = None,
|
|
150
|
+
update: bool = False,
|
|
151
|
+
) -> ResolveReport:
|
|
152
|
+
"""Resolve every `imports:` entry in `<project_root>/datalex.yaml`.
|
|
153
|
+
|
|
154
|
+
When `update` is True, re-fetch git-backed packages even if the lockfile
|
|
155
|
+
pins them. Otherwise, lockfile entries are authoritative.
|
|
156
|
+
"""
|
|
157
|
+
project_root = Path(project_root).resolve()
|
|
158
|
+
manifest = _load_manifest(project_root)
|
|
159
|
+
cache_root = Path(cache_root) if cache_root else _default_cache_root()
|
|
160
|
+
cache_root.mkdir(parents=True, exist_ok=True)
|
|
161
|
+
|
|
162
|
+
lockfile_path = project_root / ".datalex" / "lock.yaml"
|
|
163
|
+
existing_lock = _load_lockfile(lockfile_path)
|
|
164
|
+
|
|
165
|
+
report = ResolveReport(lockfile_path=lockfile_path)
|
|
166
|
+
|
|
167
|
+
for raw in manifest.get("imports", []) or []:
|
|
168
|
+
spec = ImportSpec.from_dict(raw)
|
|
169
|
+
if not spec.package:
|
|
170
|
+
report.warnings.append("Skipping imports entry with empty package field.")
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
resolved = _resolve_one(
|
|
174
|
+
spec=spec,
|
|
175
|
+
project_root=project_root,
|
|
176
|
+
cache_root=cache_root,
|
|
177
|
+
lock_entry=existing_lock.get(spec.package),
|
|
178
|
+
update=update,
|
|
179
|
+
)
|
|
180
|
+
report.resolved.append(resolved)
|
|
181
|
+
|
|
182
|
+
new_lock = {r.spec.package: r.to_lock_entry() for r in report.resolved}
|
|
183
|
+
if new_lock != existing_lock:
|
|
184
|
+
_write_lockfile(lockfile_path, new_lock)
|
|
185
|
+
report.lockfile_written = True
|
|
186
|
+
|
|
187
|
+
return report
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _resolve_one(
|
|
191
|
+
spec: ImportSpec,
|
|
192
|
+
project_root: Path,
|
|
193
|
+
cache_root: Path,
|
|
194
|
+
lock_entry: Optional[Dict[str, Any]],
|
|
195
|
+
update: bool,
|
|
196
|
+
) -> ResolvedPackage:
|
|
197
|
+
kind = spec.kind()
|
|
198
|
+
|
|
199
|
+
if kind == "path":
|
|
200
|
+
root = (project_root / spec.path).resolve() if not Path(spec.path).is_absolute() else Path(spec.path)
|
|
201
|
+
if not root.exists():
|
|
202
|
+
raise PackageResolveError(
|
|
203
|
+
f"Local path import '{spec.package}' points to nonexistent directory: {root}"
|
|
204
|
+
)
|
|
205
|
+
ch = _hash_tree(root)
|
|
206
|
+
_verify_against_lock(spec, ch, None, lock_entry, update)
|
|
207
|
+
return ResolvedPackage(spec=spec, root=root, resolved_sha=None, content_hash=ch)
|
|
208
|
+
|
|
209
|
+
# git-backed
|
|
210
|
+
git_url = spec.git or _registry_url(spec)
|
|
211
|
+
ref = spec.ref or spec.version
|
|
212
|
+
if not ref:
|
|
213
|
+
raise PackageResolveError(
|
|
214
|
+
f"Git-backed import '{spec.package}' needs a ref or version."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
pkg_dir = cache_root / _safe_cache_key(spec.package) / _safe_cache_key(ref)
|
|
218
|
+
needs_fetch = update or not pkg_dir.exists() or not (pkg_dir / ".git_sha").exists()
|
|
219
|
+
if needs_fetch:
|
|
220
|
+
_fetch_git(git_url, ref, pkg_dir)
|
|
221
|
+
|
|
222
|
+
sha = (pkg_dir / ".git_sha").read_text().strip() if (pkg_dir / ".git_sha").exists() else ""
|
|
223
|
+
ch = _hash_tree(pkg_dir)
|
|
224
|
+
_verify_against_lock(spec, ch, sha, lock_entry, update)
|
|
225
|
+
return ResolvedPackage(spec=spec, root=pkg_dir, resolved_sha=sha, content_hash=ch)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _verify_against_lock(
|
|
229
|
+
spec: ImportSpec,
|
|
230
|
+
content_hash: str,
|
|
231
|
+
resolved_sha: Optional[str],
|
|
232
|
+
lock_entry: Optional[Dict[str, Any]],
|
|
233
|
+
update: bool,
|
|
234
|
+
) -> None:
|
|
235
|
+
if not lock_entry or update:
|
|
236
|
+
return
|
|
237
|
+
locked_ch = lock_entry.get("content_hash")
|
|
238
|
+
if locked_ch and locked_ch != content_hash:
|
|
239
|
+
raise PackageResolveError(
|
|
240
|
+
f"Package '{spec.package}' content_hash {content_hash} does not match "
|
|
241
|
+
f"lockfile {locked_ch}. Run `datalex datalex packages resolve --update` to regenerate."
|
|
242
|
+
)
|
|
243
|
+
locked_sha = lock_entry.get("resolved_sha")
|
|
244
|
+
if locked_sha and resolved_sha and locked_sha != resolved_sha:
|
|
245
|
+
raise PackageResolveError(
|
|
246
|
+
f"Package '{spec.package}' resolved_sha {resolved_sha} does not match "
|
|
247
|
+
f"lockfile {locked_sha}. Run `datalex datalex packages resolve --update` to regenerate."
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ---------- git backend ----------
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _fetch_git(url: str, ref: str, target: Path) -> None:
|
|
255
|
+
"""Shallow-clone `url@ref` into `target`. Writes the resolved SHA to .git_sha."""
|
|
256
|
+
if target.exists():
|
|
257
|
+
shutil.rmtree(target)
|
|
258
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
# shallow clone of the single ref
|
|
262
|
+
subprocess.run(
|
|
263
|
+
["git", "init", "--quiet", str(target)], check=True, capture_output=True
|
|
264
|
+
)
|
|
265
|
+
subprocess.run(
|
|
266
|
+
["git", "-C", str(target), "remote", "add", "origin", url],
|
|
267
|
+
check=True, capture_output=True,
|
|
268
|
+
)
|
|
269
|
+
# Try fetching the ref directly (works for tags, branches, and SHAs on many servers)
|
|
270
|
+
fetch = subprocess.run(
|
|
271
|
+
["git", "-C", str(target), "fetch", "--depth=1", "origin", ref],
|
|
272
|
+
capture_output=True,
|
|
273
|
+
)
|
|
274
|
+
if fetch.returncode != 0:
|
|
275
|
+
# fallback: full fetch then checkout
|
|
276
|
+
subprocess.run(
|
|
277
|
+
["git", "-C", str(target), "fetch", "origin"],
|
|
278
|
+
check=True, capture_output=True,
|
|
279
|
+
)
|
|
280
|
+
subprocess.run(
|
|
281
|
+
["git", "-C", str(target), "checkout", "--quiet", "FETCH_HEAD"]
|
|
282
|
+
if fetch.returncode == 0
|
|
283
|
+
else ["git", "-C", str(target), "checkout", "--quiet", ref],
|
|
284
|
+
check=True, capture_output=True,
|
|
285
|
+
)
|
|
286
|
+
sha = subprocess.run(
|
|
287
|
+
["git", "-C", str(target), "rev-parse", "HEAD"],
|
|
288
|
+
check=True, capture_output=True, text=True,
|
|
289
|
+
).stdout.strip()
|
|
290
|
+
(target / ".git_sha").write_text(sha + "\n", encoding="utf-8")
|
|
291
|
+
except subprocess.CalledProcessError as e:
|
|
292
|
+
err = (e.stderr or b"").decode("utf-8", errors="replace")
|
|
293
|
+
raise PackageResolveError(
|
|
294
|
+
f"git fetch failed for {url}@{ref}: {err.strip() or e}"
|
|
295
|
+
) from e
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _registry_url(spec: ImportSpec) -> str:
|
|
299
|
+
m = PACKAGE_SPEC_RE.match(spec.package)
|
|
300
|
+
if not m:
|
|
301
|
+
raise PackageResolveError(
|
|
302
|
+
f"Package '{spec.package}' is not in org/name form; provide `git:` explicitly."
|
|
303
|
+
)
|
|
304
|
+
return DEFAULT_REGISTRY_URL_TEMPLATE.format(org=m.group("org"), name=m.group("name"))
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# ---------- helpers ----------
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class PackageResolveError(RuntimeError):
|
|
311
|
+
"""Raised when a package cannot be resolved or fails verification."""
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _default_cache_root() -> Path:
|
|
315
|
+
override = os.environ.get("DATALEX_CACHE_ROOT")
|
|
316
|
+
if override:
|
|
317
|
+
return Path(override) / "packages"
|
|
318
|
+
return Path.home() / ".datalex" / "packages"
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _safe_cache_key(value: str) -> str:
|
|
322
|
+
return re.sub(r"[^A-Za-z0-9_.-]", "_", value).strip("._")
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _slug(value: str) -> str:
|
|
326
|
+
out = re.sub(r"[^a-z0-9_]+", "_", value.lower()).strip("_")
|
|
327
|
+
return out or "pkg"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _hash_tree(root: Path) -> str:
|
|
331
|
+
"""Stable sha256 over all .yaml / .yml files in a tree."""
|
|
332
|
+
h = hashlib.sha256()
|
|
333
|
+
for p in sorted(root.rglob("*")):
|
|
334
|
+
if not p.is_file():
|
|
335
|
+
continue
|
|
336
|
+
if p.suffix.lower() not in (".yaml", ".yml"):
|
|
337
|
+
continue
|
|
338
|
+
rel = p.relative_to(root).as_posix()
|
|
339
|
+
h.update(rel.encode("utf-8"))
|
|
340
|
+
h.update(b"\0")
|
|
341
|
+
h.update(p.read_bytes())
|
|
342
|
+
h.update(b"\0")
|
|
343
|
+
return "sha256:" + h.hexdigest()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _load_manifest(project_root: Path) -> Dict[str, Any]:
|
|
347
|
+
manifest_path = project_root / "datalex.yaml"
|
|
348
|
+
if not manifest_path.exists():
|
|
349
|
+
return {}
|
|
350
|
+
with manifest_path.open("r", encoding="utf-8") as f:
|
|
351
|
+
return yaml.safe_load(f) or {}
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _load_lockfile(path: Path) -> Dict[str, Dict[str, Any]]:
|
|
355
|
+
if not path.exists():
|
|
356
|
+
return {}
|
|
357
|
+
with path.open("r", encoding="utf-8") as f:
|
|
358
|
+
data = yaml.safe_load(f) or {}
|
|
359
|
+
return dict(data.get("packages") or {})
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _write_lockfile(path: Path, packages: Dict[str, Dict[str, Any]]) -> None:
|
|
363
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
364
|
+
doc = {
|
|
365
|
+
"version": 1,
|
|
366
|
+
"packages": packages,
|
|
367
|
+
}
|
|
368
|
+
with path.open("w", encoding="utf-8") as f:
|
|
369
|
+
yaml.safe_dump(doc, f, sort_keys=True, default_flow_style=False, allow_unicode=True)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
# ---------- helpers consumed by the loader ----------
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def load_imports_for(
|
|
376
|
+
project_root: Union[str, Path],
|
|
377
|
+
cache_root: Optional[Union[str, Path]] = None,
|
|
378
|
+
) -> List[ResolvedPackage]:
|
|
379
|
+
"""Resolve (using cached state) and return ResolvedPackage entries.
|
|
380
|
+
|
|
381
|
+
Does not refetch; assumes `resolve_imports` has been run at least once.
|
|
382
|
+
Raises if a git-backed import has never been fetched, and raises if any
|
|
383
|
+
import's content_hash has drifted from the lockfile.
|
|
384
|
+
"""
|
|
385
|
+
project_root = Path(project_root).resolve()
|
|
386
|
+
manifest = _load_manifest(project_root)
|
|
387
|
+
cache_root = Path(cache_root) if cache_root else _default_cache_root()
|
|
388
|
+
lock = _load_lockfile(project_root / ".datalex" / "lock.yaml")
|
|
389
|
+
out: List[ResolvedPackage] = []
|
|
390
|
+
for raw in manifest.get("imports", []) or []:
|
|
391
|
+
spec = ImportSpec.from_dict(raw)
|
|
392
|
+
if not spec.package:
|
|
393
|
+
continue
|
|
394
|
+
resolved = _probe_resolved(spec, project_root, cache_root)
|
|
395
|
+
lock_entry = lock.get(spec.package)
|
|
396
|
+
if lock_entry:
|
|
397
|
+
locked_ch = lock_entry.get("content_hash")
|
|
398
|
+
if locked_ch and locked_ch != resolved.content_hash:
|
|
399
|
+
raise PackageResolveError(
|
|
400
|
+
f"Package '{spec.package}' content_hash drifted from lockfile; "
|
|
401
|
+
f"run `datalex datalex packages resolve --update`."
|
|
402
|
+
)
|
|
403
|
+
out.append(resolved)
|
|
404
|
+
return out
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _probe_resolved(
|
|
408
|
+
spec: ImportSpec,
|
|
409
|
+
project_root: Path,
|
|
410
|
+
cache_root: Path,
|
|
411
|
+
) -> ResolvedPackage:
|
|
412
|
+
"""Return a ResolvedPackage pointing at the on-disk location without fetching."""
|
|
413
|
+
if spec.path:
|
|
414
|
+
root = (project_root / spec.path).resolve() if not Path(spec.path).is_absolute() else Path(spec.path).resolve()
|
|
415
|
+
if not root.exists():
|
|
416
|
+
raise PackageResolveError(
|
|
417
|
+
f"Local path import '{spec.package}' points to nonexistent directory: {root}"
|
|
418
|
+
)
|
|
419
|
+
return ResolvedPackage(spec=spec, root=root, resolved_sha=None, content_hash=_hash_tree(root))
|
|
420
|
+
ref = spec.ref or spec.version
|
|
421
|
+
if not ref:
|
|
422
|
+
raise PackageResolveError(f"Git-backed import '{spec.package}' missing ref/version.")
|
|
423
|
+
pkg_dir = cache_root / _safe_cache_key(spec.package) / _safe_cache_key(ref)
|
|
424
|
+
if not pkg_dir.exists():
|
|
425
|
+
raise PackageResolveError(
|
|
426
|
+
f"Package '{spec.package}@{ref}' is not in the cache. "
|
|
427
|
+
f"Run `datalex datalex packages resolve` first."
|
|
428
|
+
)
|
|
429
|
+
sha = (pkg_dir / ".git_sha").read_text().strip() if (pkg_dir / ".git_sha").exists() else ""
|
|
430
|
+
return ResolvedPackage(spec=spec, root=pkg_dir, resolved_sha=sha, content_hash=_hash_tree(pkg_dir))
|