ossllms 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ossllms/__init__.py +33 -0
  2. ossllms/api.py +194 -0
  3. ossllms/cache.py +102 -0
  4. ossllms/catalog.py +517 -0
  5. ossllms/cli.py +1502 -0
  6. ossllms/compat.py +31 -0
  7. ossllms/config.py +75 -0
  8. ossllms/contrib_plan.py +489 -0
  9. ossllms/contrib_runtime.py +302 -0
  10. ossllms/contrib_scan.py +557 -0
  11. ossllms/contrib_worker.py +526 -0
  12. ossllms/data/default-catalog/catalog.json +11 -0
  13. ossllms/data/default-catalog/hash-index.json +117 -0
  14. ossllms/data/default-catalog/manifests/ossllms/smoke-tiny/0.0.1/files/LICENSE +1 -0
  15. ossllms/data/default-catalog/manifests/ossllms/smoke-tiny/0.0.1/files/README.md +3 -0
  16. ossllms/data/default-catalog/manifests/ossllms/smoke-tiny/0.0.1/files/config.json +1 -0
  17. ossllms/data/default-catalog/manifests/ossllms/smoke-tiny/0.0.1/files/model.safetensors +1 -0
  18. ossllms/data/default-catalog/manifests/ossllms/smoke-tiny/0.0.1/release.json +71 -0
  19. ossllms/data/default-catalog/manifests/sentence-transformers/all-MiniLM-L6-v2/0.0.1+1110a243fdf4/release.json +161 -0
  20. ossllms/data/default-catalog/manifests/sentence-transformers/all-MiniLM-L6-v2/0.0.1+1110a243fdf4/release.torrent +0 -0
  21. ossllms/data/default-catalog/search-index.json +25 -0
  22. ossllms/data/manifest.schema.json +122 -0
  23. ossllms/data/trusted-keys/ossllms.pub +2 -0
  24. ossllms/denylist.py +251 -0
  25. ossllms/engine.py +428 -0
  26. ossllms/hf.py +326 -0
  27. ossllms/keys.py +228 -0
  28. ossllms/manifest.py +109 -0
  29. ossllms/provenance.py +178 -0
  30. ossllms/publish.py +706 -0
  31. ossllms/reachability.py +78 -0
  32. ossllms/refs.py +172 -0
  33. ossllms/resolve.py +213 -0
  34. ossllms/search.py +367 -0
  35. ossllms/seed.py +859 -0
  36. ossllms/seed_engine.py +444 -0
  37. ossllms/signing.py +64 -0
  38. ossllms/torrent.py +407 -0
  39. ossllms/verify.py +269 -0
  40. ossllms-0.1.0.dist-info/METADATA +120 -0
  41. ossllms-0.1.0.dist-info/RECORD +43 -0
  42. ossllms-0.1.0.dist-info/WHEEL +4 -0
  43. ossllms-0.1.0.dist-info/entry_points.txt +2 -0
ossllms/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """ossllms — pull open-weight AI models from the ossllms preservation network.
2
+
3
+ A drop-in for huggingface_hub that resolves from the torrent layer, verifies
4
+ integrity + publisher signature, and lands files in the HF-compatible cache.
5
+
6
+ from ossllms import snapshot_download
7
+ path = snapshot_download("Qwen/Qwen2.5-7B")
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from .api import (
12
+ IntegrityError,
13
+ PullResult,
14
+ UnverifiedOriginError,
15
+ hf_hub_download,
16
+ pull,
17
+ snapshot_download,
18
+ )
19
+ from .verify import TrustStore, Verdict
20
+
21
+ __version__ = "0.1.0"
22
+
23
+ __all__ = [
24
+ "pull",
25
+ "snapshot_download",
26
+ "hf_hub_download",
27
+ "PullResult",
28
+ "Verdict",
29
+ "TrustStore",
30
+ "IntegrityError",
31
+ "UnverifiedOriginError",
32
+ "__version__",
33
+ ]
ossllms/api.py ADDED
@@ -0,0 +1,194 @@
1
+ """High-level API. `pull()` is the core; `snapshot_download` / `hf_hub_download`
2
+ mirror the huggingface_hub names for drop-in use.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import shutil
7
+ import tempfile
8
+ from dataclasses import dataclass
9
+ from pathlib import Path, PurePosixPath
10
+ from typing import Callable, List, Optional
11
+
12
+ from . import config
13
+ from .cache import place_into_cache, reuse_from_blobs
14
+ from .denylist import load_denylist
15
+ from .engine import EngineError, get_engine
16
+ from .manifest import model_id, version
17
+ from .resolve import resolve_manifest_with_source
18
+ from .verify import TrustStore, Verdict, verify_manifest
19
+
20
+
21
+ class IntegrityError(RuntimeError):
22
+ pass
23
+
24
+
25
+ class UnverifiedOriginError(RuntimeError):
26
+ pass
27
+
28
+
29
+ @dataclass
30
+ class PullResult:
31
+ path: Path
32
+ repo_id: str
33
+ revision: str
34
+ manifest: dict
35
+ verdict: Verdict
36
+
37
+
38
+ def pull(
39
+ ref: str,
40
+ *,
41
+ catalog: Optional[str] = None,
42
+ cache_dir=None,
43
+ revision: Optional[str] = None,
44
+ include: Optional[List[str]] = None,
45
+ engine: str = "auto",
46
+ engine_impl=None,
47
+ trust: Optional[TrustStore] = None,
48
+ denylist: Optional[str] = None,
49
+ require_signature: bool = False,
50
+ progress: Optional[Callable[[str], None]] = None,
51
+ ) -> PullResult:
52
+ """Resolve, download, verify, and cache a model version.
53
+
54
+ Integrity (per-file SHA-256) is ALWAYS enforced. Signature/origin failure
55
+ raises only when require_signature=True; otherwise it returns an Unverified
56
+ verdict (community upload) and proceeds, matching the trust UX.
57
+ """
58
+ catalog = catalog or config.default_catalog()
59
+ cache_dir = Path(cache_dir) if cache_dir else config.hf_hub_cache()
60
+ trust = trust if trust is not None else TrustStore.from_dir(config.trust_dir())
61
+
62
+ resolution = resolve_manifest_with_source(ref, catalog)
63
+ manifest = resolution.manifest
64
+ repo_id = model_id(manifest)
65
+ ver = version(manifest)
66
+ rev = revision or ver
67
+ denylist = denylist or config.default_denylist()
68
+ if denylist is not None:
69
+ load_denylist(denylist, trust).check_manifest(manifest)
70
+
71
+ with tempfile.TemporaryDirectory(prefix="ossllms-") as staged:
72
+ selected = include_to_paths(manifest, include)
73
+ for path in selected:
74
+ _safe_artifact_relpath(path)
75
+ sel_set = set(selected)
76
+ selected_artifacts = [a for a in manifest["artifacts"] if a["path"] in sel_set]
77
+
78
+ # Reuse byte-identical files already in the blob store; fetch only the rest.
79
+ reused = set(reuse_from_blobs(staged, cache_dir, repo_id, selected_artifacts))
80
+ missing = [p for p in selected if p not in reused]
81
+ local = set()
82
+ if resolution.local_source_dir is not None and missing:
83
+ local = set(
84
+ _copy_from_local_source(
85
+ resolution.local_source_dir,
86
+ staged,
87
+ missing,
88
+ )
89
+ )
90
+ missing = [p for p in missing if p not in local]
91
+
92
+ eng = None
93
+ if missing:
94
+ eng = engine_impl if engine_impl is not None else get_engine(engine, manifest)
95
+ if progress:
96
+ engine_name = eng.name if eng is not None else "none"
97
+ progress(
98
+ f"engine: {engine_name}; reuse {len(reused)} file(s), "
99
+ f"local {len(local)} file(s), fetch {len(missing)}"
100
+ )
101
+ if missing:
102
+ try:
103
+ eng.fetch(manifest, staged, include=missing, progress=progress)
104
+ except EngineError as exc:
105
+ _raise_integrity_error_if_staged_file_failed(manifest, staged, trust, sel_set, exc)
106
+ raise
107
+
108
+ verdict = verify_manifest(manifest, staged, trust, only=sel_set)
109
+
110
+ if not verdict.integrity_ok:
111
+ bad = [f"{f.path} ({f.reason})" for f in verdict.files if not f.ok]
112
+ raise IntegrityError("integrity check failed: " + "; ".join(bad))
113
+ if require_signature and not verdict.origin_ok:
114
+ raise UnverifiedOriginError(
115
+ "publisher signature could not be verified: " + "; ".join(verdict.messages)
116
+ )
117
+
118
+ target = place_into_cache(staged, cache_dir, repo_id, rev, manifest=manifest)
119
+
120
+ return PullResult(target, repo_id, rev, manifest, verdict)
121
+
122
+
123
+ def include_to_paths(manifest: dict, include: Optional[List[str]]) -> List[str]:
124
+ import fnmatch
125
+
126
+ if not include:
127
+ return [a["path"] for a in manifest["artifacts"]]
128
+ return [
129
+ a["path"]
130
+ for a in manifest["artifacts"]
131
+ if any(fnmatch.fnmatch(a["path"], pat) for pat in include)
132
+ ]
133
+
134
+
135
+ def _copy_from_local_source(source_dir: Path, staged_dir, paths: List[str]) -> List[str]:
136
+ source = Path(source_dir).resolve()
137
+ staged = Path(staged_dir)
138
+ copied: List[str] = []
139
+ for artifact_path in paths:
140
+ rel = _safe_artifact_relpath(artifact_path)
141
+ src = (source / rel).resolve()
142
+ try:
143
+ src.relative_to(source)
144
+ except ValueError as exc:
145
+ raise IntegrityError(f"artifact path escapes local source: {artifact_path!r}") from exc
146
+ if not src.is_file():
147
+ continue
148
+ dst = staged / rel
149
+ dst.parent.mkdir(parents=True, exist_ok=True)
150
+ shutil.copy2(src, dst)
151
+ copied.append(artifact_path)
152
+ return copied
153
+
154
+
155
+ def _safe_artifact_relpath(path: str) -> Path:
156
+ if not isinstance(path, str) or not path:
157
+ raise IntegrityError(f"unsafe artifact path: {path!r}")
158
+ rel = PurePosixPath(path)
159
+ if rel.is_absolute() or not rel.parts or any(part in ("", ".", "..") for part in rel.parts):
160
+ raise IntegrityError(f"unsafe artifact path: {path!r}")
161
+ return Path(*rel.parts)
162
+
163
+
164
+ def _raise_integrity_error_if_staged_file_failed(
165
+ manifest: dict,
166
+ staged_dir,
167
+ trust: TrustStore,
168
+ only: set,
169
+ cause: EngineError,
170
+ ) -> None:
171
+ verdict = verify_manifest(manifest, staged_dir, trust, only=only)
172
+ bad = [f"{f.path} ({f.reason})" for f in verdict.files if not f.ok and f.reason != "missing"]
173
+ if bad:
174
+ raise IntegrityError("integrity check failed: " + "; ".join(bad)) from cause
175
+
176
+
177
+ # --- huggingface_hub-compatible surface -------------------------------------
178
+
179
+ def snapshot_download(repo_id: str, *, revision: Optional[str] = None,
180
+ allow_patterns: Optional[List[str]] = None,
181
+ cache_dir=None, catalog: Optional[str] = None,
182
+ **_ignored) -> str:
183
+ """Drop-in for huggingface_hub.snapshot_download. Returns the local path."""
184
+ ref = repo_id if not revision else f"{repo_id}@{revision}"
185
+ res = pull(ref, catalog=catalog, cache_dir=cache_dir, include=allow_patterns)
186
+ return str(res.path)
187
+
188
+
189
+ def hf_hub_download(repo_id: str, filename: str, *, revision: Optional[str] = None,
190
+ cache_dir=None, catalog: Optional[str] = None, **_ignored) -> str:
191
+ """Drop-in for huggingface_hub.hf_hub_download. Returns the local file path."""
192
+ ref = repo_id if not revision else f"{repo_id}@{revision}"
193
+ res = pull(ref, catalog=catalog, cache_dir=cache_dir, include=[filename])
194
+ return str(res.path / filename)
ossllms/cache.py ADDED
@@ -0,0 +1,102 @@
1
+ """HF-compatible cache with a blob store + hardlink dedup.
2
+
3
+ Layout (identical to huggingface_hub, so transformers/vllm/llama.cpp load unchanged
4
+ AND versions/quants/repos share bytes):
5
+
6
+ <cache>/models--<org>--<name>/
7
+ blobs/<sha256> one physical copy per unique file
8
+ snapshots/<revision>/<path> hardlink into blobs/
9
+ refs/<channel> e.g. refs/main -> a revision
10
+
11
+ Identical files across versions point at the same blob inode, so disk cost is the
12
+ size of the UNIQUE bytes, not N x per version. `reuse_from_blobs` lets a pull skip
13
+ downloading any file already present as a blob (the "only changed bytes move"
14
+ update path in docs/VERSIONING.md).
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import shutil
20
+ from pathlib import Path
21
+ from typing import List
22
+
23
+
24
+ def repo_folder_name(repo_id: str) -> str:
25
+ return "models--" + repo_id.replace("/", "--")
26
+
27
+
28
+ def repo_dir(cache_root, repo_id: str) -> Path:
29
+ return Path(cache_root) / repo_folder_name(repo_id)
30
+
31
+
32
+ def blobs_dir(cache_root, repo_id: str) -> Path:
33
+ return repo_dir(cache_root, repo_id) / "blobs"
34
+
35
+
36
+ def blob_path(cache_root, repo_id: str, sha256: str) -> Path:
37
+ return blobs_dir(cache_root, repo_id) / sha256
38
+
39
+
40
+ def snapshot_dir(cache_root, repo_id: str, revision: str) -> Path:
41
+ return repo_dir(cache_root, repo_id) / "snapshots" / revision
42
+
43
+
44
+ def has_blob(cache_root, repo_id: str, sha256: str) -> bool:
45
+ return blob_path(cache_root, repo_id, sha256).exists()
46
+
47
+
48
+ def _link_or_copy(src: Path, dst: Path) -> None:
49
+ dst.parent.mkdir(parents=True, exist_ok=True)
50
+ if dst.exists():
51
+ return
52
+ try:
53
+ os.link(src, dst) # hardlink (same filesystem)
54
+ except OSError:
55
+ shutil.copy2(src, dst) # cross-filesystem fallback
56
+
57
+
58
+ def reuse_from_blobs(staged_dir, cache_root, repo_id: str, artifacts) -> List[str]:
59
+ """Hardlink already-present blobs into the staging dir; return reused paths.
60
+
61
+ Call before downloading so files already on disk (from a prior version, a quant,
62
+ or another pull) are not fetched again.
63
+ """
64
+ reused: List[str] = []
65
+ staged = Path(staged_dir)
66
+ for a in artifacts:
67
+ bp = blob_path(cache_root, repo_id, a["sha256"])
68
+ if bp.exists():
69
+ _link_or_copy(bp, staged / a["path"])
70
+ reused.append(a["path"])
71
+ return reused
72
+
73
+
74
+ def place_into_cache(staged_dir, cache_root, repo_id: str, revision: str, manifest=None) -> Path:
75
+ """Store staged files as blobs (keyed by sha256) and hardlink them into the
76
+ snapshot dir. Files in the manifest are deduped via the blob store; any extras
77
+ are copied as-is."""
78
+ target = snapshot_dir(cache_root, repo_id, revision)
79
+ target.mkdir(parents=True, exist_ok=True)
80
+ staged = Path(staged_dir)
81
+ sha_by_path = {a["path"]: a["sha256"] for a in (manifest or {}).get("artifacts", [])}
82
+
83
+ for src in staged.rglob("*"):
84
+ if not src.is_file():
85
+ continue
86
+ rel = src.relative_to(staged)
87
+ sha = sha_by_path.get(str(rel))
88
+ snap_dst = target / rel
89
+ snap_dst.parent.mkdir(parents=True, exist_ok=True)
90
+ if sha:
91
+ bp = blob_path(cache_root, repo_id, sha)
92
+ _link_or_copy(src, bp) # store blob (idempotent)
93
+ if snap_dst.exists():
94
+ snap_dst.unlink()
95
+ _link_or_copy(bp, snap_dst) # snapshot hardlinks to blob
96
+ elif not snap_dst.exists():
97
+ shutil.copy2(src, snap_dst)
98
+
99
+ refs = repo_dir(cache_root, repo_id) / "refs"
100
+ refs.mkdir(parents=True, exist_ok=True)
101
+ (refs / "main").write_text(revision, encoding="utf-8")
102
+ return target