opencode-llmstack 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmstack/tiers.py ADDED
@@ -0,0 +1,394 @@
1
+ """Tier inventory: parse ``models.ini`` into Python objects.
2
+
3
+ This is the **data layer** for the stack -- the single source of truth for
4
+ "what tiers exist and where their weights live". A tier has a *backend*:
5
+
6
+ ``gguf`` local llama-server (managed by llama-swap), driven by
7
+ ``hf_repo`` + ``hf_file`` (and optional ``_next`` upgrade
8
+ target). This is the only backend the original stack
9
+ supported.
10
+ ``bedrock`` hosted AWS Bedrock model, driven by ``aws_model_id``
11
+ (and per-tier ``aws_region`` / ``aws_profile`` /
12
+ ``aws_endpoint_url``). Credentials live in the standard
13
+ AWS config (``~/.aws/config`` and ``~/.aws/credentials``),
14
+ selected by ``aws_profile`` -- never in ``models.ini``,
15
+ which is meant to be committable. Anything boto3 can do
16
+ via a named profile (long-term keys, SSO, role chaining
17
+ via ``role_arn`` + ``source_profile`` in
18
+ ``~/.aws/config``, MFA, IMDS) is supported transparently.
19
+
20
+ Used by:
21
+
22
+ - :mod:`llmstack.app` request dispatch (gguf -> proxy
23
+ to llama-swap; bedrock -> AWS).
24
+ - :mod:`llmstack.check_models` snapshot table + HF metadata lookup
25
+ - :mod:`llmstack.download.ggufs` drives the GGUF downloader
26
+ - :mod:`llmstack.generators.llama_swap` only emits gguf tiers
27
+ - :mod:`llmstack.generators.opencode` exposes every tier to opencode
28
+
29
+ Stdlib only -- safe to import before any extra dependency is present.
30
+
31
+ CLI (kept for backwards-compatible scripting):
32
+
33
+ python -m llmstack.tiers # human-readable summary
34
+ python -m llmstack.tiers --downloads # TSV: tag<TAB>repo<TAB>file<TAB>label
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import configparser
40
+ import os
41
+ import re
42
+ import sys
43
+ from collections.abc import Iterator
44
+ from dataclasses import dataclass, field
45
+ from pathlib import Path
46
+
47
+ from llmstack.paths import models_ini_path, require_models_ini
48
+
49
+ DIGITS = re.compile(r"\d+")
50
+ SAMPLER_KV = re.compile(r"(\w+)\s*=\s*([0-9.]+)")
51
+
52
+ BACKEND_GGUF = "gguf"
53
+ BACKEND_BEDROCK = "bedrock"
54
+ KNOWN_BACKENDS = {BACKEND_GGUF, BACKEND_BEDROCK}
55
+
56
+
57
+ def _int(value: str, default: int = 0) -> int:
58
+ m = DIGITS.search(value or "")
59
+ return int(m.group()) if m else default
60
+
61
+
62
+ def parse_sampler(raw: str) -> dict[str, float]:
63
+ """Parse a ``sampler = temp=0.5, top_p=0.85, top_k=20, ...`` line.
64
+
65
+ Returns a dict keyed by the short name as it appears in models.ini
66
+ (``temp``, ``top_p``, ``top_k``, ``min_p``, ``rep_pen``). The router
67
+ is responsible for translating these into the OpenAI-compatible
68
+ request-body field names that backends understand. An empty / missing
69
+ line yields ``{}`` -- the canonical "no sampler tuning" signal that
70
+ the router uses to pass requests through untouched (which is what
71
+ Bedrock Claude Opus 4.7 et al. require).
72
+ """
73
+ return {k: float(v) for k, v in SAMPLER_KV.findall(raw or "")}
74
+
75
+
76
+ def _strip(value: str | None) -> str:
77
+ return (value or "").strip()
78
+
79
+
80
+ def _opt(value: str | None) -> str | None:
81
+ """Return a stripped non-empty string, else ``None``.
82
+
83
+ Values can also reference an env var with ``$NAME`` or ``${NAME}`` so
84
+ secrets stay out of ``models.ini`` if the operator prefers.
85
+ """
86
+ raw = _strip(value)
87
+ if not raw:
88
+ return None
89
+ if raw.startswith("${") and raw.endswith("}"):
90
+ return os.environ.get(raw[2:-1]) or None
91
+ if raw.startswith("$"):
92
+ return os.environ.get(raw[1:]) or None
93
+ return raw
94
+
95
+
96
+ @dataclass(frozen=True)
97
+ class TierFile:
98
+ """One downloadable GGUF for a tier (current or upgrade target)."""
99
+
100
+ tier: str # tier section name, e.g. "code-smart"
101
+ role: str # role from ini, e.g. "agent"
102
+ label: str # "current" or "next"
103
+ repo: str # HuggingFace repo (owner/name)
104
+ file: str # GGUF filename inside that repo
105
+
106
+ @property
107
+ def tag(self) -> str:
108
+ """Stable slug used for log filenames: ``<tier>-<label>``."""
109
+ return f"{self.tier}-{self.label}"
110
+
111
+
112
+ @dataclass(frozen=True)
113
+ class BedrockConfig:
114
+ """AWS Bedrock backend config for a single tier.
115
+
116
+ Identity-only -- never holds credentials. The tier names a profile
117
+ via :attr:`profile`; everything boto3 needs (long-term access keys,
118
+ SSO, role chaining via ``role_arn`` + ``source_profile`` in
119
+ ``~/.aws/config``, MFA, IMDS) is resolved by the standard AWS
120
+ config files, not by ``models.ini``. When :attr:`profile` is
121
+ ``None``, boto3's default credential chain applies (env vars,
122
+ default profile, instance role, ...).
123
+
124
+ Upgrade pre-staging (mirrors gguf ``hf_file_next``)
125
+ ----------------------------------------------------
126
+ ``model_id_next`` (and optional ``region_next``) is the queued
127
+ upgrade target -- e.g. flip ``code-smart`` from Sonnet 4.5 to a
128
+ newer Sonnet revision once it ships in your region. The router
129
+ reads it only when ``--next`` is in effect (env var
130
+ ``LLMSTACK_USE_NEXT=1``); the rest of the time the active
131
+ ``model_id`` / ``region`` are used. Permanent promotion is the same
132
+ as gguf: edit ``aws_model_id`` in models.ini and re-run
133
+ ``llmstack install``.
134
+ """
135
+
136
+ model_id: str
137
+ region: str | None = None
138
+ profile: str | None = None
139
+ endpoint_url: str | None = None
140
+ model_id_next: str | None = None
141
+ region_next: str | None = None
142
+
143
+ @property
144
+ def has_next(self) -> bool:
145
+ return bool(self.model_id_next)
146
+
147
+ def resolved(self, use_next: bool = False) -> BedrockConfig:
148
+ """Return a copy with model_id/region swapped to the queued upgrade.
149
+
150
+ No-op when ``use_next`` is false or the tier has no queued
151
+ upgrade; this is what the dispatcher actually hands to boto3.
152
+ """
153
+ if not use_next or not self.model_id_next:
154
+ return self
155
+ from dataclasses import replace
156
+ return replace(
157
+ self,
158
+ model_id=self.model_id_next,
159
+ region=self.region_next or self.region,
160
+ )
161
+
162
+
163
+ @dataclass(frozen=True)
164
+ class Tier:
165
+ """A single tier in models.ini.
166
+
167
+ ``backend`` discriminates between local GGUF tiers (the historical
168
+ default) and hosted AWS Bedrock tiers. Only one set of fields is
169
+ populated at a time:
170
+
171
+ - ``backend == "gguf"`` -> ``repo`` + ``file`` (and optional
172
+ ``repo_next`` + ``file_next``).
173
+ - ``backend == "bedrock"`` -> ``bedrock`` is non-None.
174
+ """
175
+
176
+ name: str
177
+ role: str
178
+ backend: str
179
+ description: str
180
+ ctx_size: int
181
+ repo: str = ""
182
+ file: str = ""
183
+ repo_next: str | None = None
184
+ file_next: str | None = None
185
+ bedrock: BedrockConfig | None = None
186
+ aliases: tuple[str, ...] = field(default_factory=tuple)
187
+ # Per-tier sampling defaults (parsed from `sampler = ...` in models.ini).
188
+ # The router injects these into outbound request bodies so that:
189
+ # 1. opencode.json stays sampler-free (clients pick a model and let
190
+ # the stack decide how to sample it).
191
+ # 2. Bedrock-hosted tiers whose backing model rejects sampler params
192
+ # (e.g. Claude Opus 4.7) can simply omit `sampler =` and the
193
+ # router will pass requests through untouched.
194
+ # Keys are the short names as written in models.ini (`temp`, `top_p`,
195
+ # `top_k`, `min_p`, `rep_pen`); the router maps them to OpenAI-compat
196
+ # request fields.
197
+ sampler: dict[str, float] = field(default_factory=dict)
198
+
199
+ def files(self) -> list[TierFile]:
200
+ """Return the GGUF download targets for this tier (empty for non-gguf)."""
201
+ if self.backend != BACKEND_GGUF or not (self.repo and self.file):
202
+ return []
203
+ out = [TierFile(self.name, self.role, "current", self.repo, self.file)]
204
+ if self.file_next:
205
+ out.append(TierFile(
206
+ self.name, self.role, "next",
207
+ self.repo_next or self.repo, self.file_next,
208
+ ))
209
+ return out
210
+
211
+ @property
212
+ def is_gguf(self) -> bool:
213
+ return self.backend == BACKEND_GGUF
214
+
215
+ @property
216
+ def is_bedrock(self) -> bool:
217
+ return self.backend == BACKEND_BEDROCK
218
+
219
+ @property
220
+ def has_next(self) -> bool:
221
+ """Does this tier declare a queued upgrade target?
222
+
223
+ Backend-aware: gguf checks ``hf_file_next``, bedrock checks
224
+ ``aws_model_id_next``. Used by ``start --next`` to decide
225
+ whether the channel switch has anything to do, and by
226
+ ``check`` to print an extra row.
227
+ """
228
+ if self.is_gguf:
229
+ return bool(self.file_next)
230
+ if self.is_bedrock:
231
+ return bool(self.bedrock and self.bedrock.has_next)
232
+ return False
233
+
234
+
235
+ def _detect_backend(section) -> str:
236
+ """Pick the backend implied by which keys the section sets."""
237
+ explicit = _strip(section.get("backend"))
238
+ if explicit:
239
+ if explicit not in KNOWN_BACKENDS:
240
+ raise SystemExit(
241
+ f"[!] models.ini [{section.name}] has unknown backend={explicit!r} "
242
+ f"(supported: {', '.join(sorted(KNOWN_BACKENDS))})"
243
+ )
244
+ return explicit
245
+ if _strip(section.get("aws_model_id")):
246
+ return BACKEND_BEDROCK
247
+ if _strip(section.get("hf_repo")) and _strip(section.get("hf_file")):
248
+ return BACKEND_GGUF
249
+ return ""
250
+
251
+
252
+ BANNED_BEDROCK_KEYS = {
253
+ # Hard-secret material -- belongs in ~/.aws/credentials, never here.
254
+ "aws_access_key_id": "long-term access key",
255
+ "aws_secret_access_key": "long-term secret key",
256
+ "aws_session_token": "STS session token",
257
+ # Things boto3 already handles natively in ~/.aws/config under a
258
+ # named profile -- pointing aws_profile at that profile is the
259
+ # correct way to opt into them, not duplicating them here.
260
+ "aws_role_arn": "role to assume",
261
+ "aws_role_session_name": "role-session name",
262
+ }
263
+
264
+
265
+ def _check_no_secrets(section) -> None:
266
+ """Reject credentials/role-chaining keys in models.ini."""
267
+ found = sorted(k for k in BANNED_BEDROCK_KEYS if section.get(k))
268
+ if not found:
269
+ return
270
+ profile_hint = _strip(section.get("aws_profile")) or "<my-profile>"
271
+ bullets = "\n".join(
272
+ f" - {k} ({BANNED_BEDROCK_KEYS[k]})" for k in found
273
+ )
274
+ raise SystemExit(
275
+ f"[!] models.ini [{section.name}] contains AWS credential keys -- "
276
+ "these must NOT live in models.ini (it is meant to be committable):\n"
277
+ f"{bullets}\n"
278
+ " Move them into a named profile in ~/.aws/credentials and/or\n"
279
+ " ~/.aws/config, then reference it from this section:\n\n"
280
+ f" aws_profile = {profile_hint}\n\n"
281
+ " boto3 picks up the profile's keys, role_arn + source_profile,\n"
282
+ " SSO, MFA, etc. transparently. See `aws configure --profile\n"
283
+ f" {profile_hint}` and the AWS shared-config docs."
284
+ )
285
+
286
+
287
+ def _build_bedrock(section) -> BedrockConfig:
288
+ _check_no_secrets(section)
289
+ model_id = _strip(section.get("aws_model_id"))
290
+ if not model_id:
291
+ raise SystemExit(
292
+ f"[!] models.ini [{section.name}] backend=bedrock but aws_model_id is missing"
293
+ )
294
+ return BedrockConfig(
295
+ model_id=model_id,
296
+ region=_opt(section.get("aws_region")),
297
+ profile=_opt(section.get("aws_profile")),
298
+ endpoint_url=_opt(section.get("aws_endpoint_url")),
299
+ model_id_next=_opt(section.get("aws_model_id_next")),
300
+ region_next=_opt(section.get("aws_region_next")),
301
+ )
302
+
303
+
304
+ def _aliases(section) -> tuple[str, ...]:
305
+ raw = _strip(section.get("aliases"))
306
+ if not raw:
307
+ return ()
308
+ return tuple(a.strip() for a in raw.split(",") if a.strip())
309
+
310
+
311
+ def load_tiers(ini_path: Path | None = None) -> dict[str, Tier]:
312
+ """Parse ``models.ini`` into a dict of tier-name -> Tier.
313
+
314
+ Sections without a recognisable backend (no ``hf_repo``/``hf_file``
315
+ pair *and* no ``aws_model_id``) are silently skipped -- this is how
316
+ the ``[ROUTING]`` block stays out of the inventory.
317
+ """
318
+ path = ini_path or require_models_ini()
319
+
320
+ cfg = configparser.ConfigParser(
321
+ inline_comment_prefixes=(";",),
322
+ interpolation=None,
323
+ )
324
+ cfg.read(path)
325
+
326
+ tiers: dict[str, Tier] = {}
327
+ for sec in cfg.sections():
328
+ if sec == "ROUTING":
329
+ continue
330
+ s = cfg[sec]
331
+ backend = _detect_backend(s)
332
+ if not backend:
333
+ continue
334
+
335
+ common = {
336
+ "name": sec,
337
+ "role": _strip(s.get("role")),
338
+ "backend": backend,
339
+ "description": _strip(s.get("description")) or sec,
340
+ "ctx_size": _int(s.get("ctx_size", "")),
341
+ "aliases": _aliases(s),
342
+ "sampler": parse_sampler(s.get("sampler", "")),
343
+ }
344
+
345
+ if backend == BACKEND_GGUF:
346
+ tiers[sec] = Tier(
347
+ **common,
348
+ repo=_strip(s.get("hf_repo")),
349
+ file=_strip(s.get("hf_file")),
350
+ repo_next=_strip(s.get("hf_repo_next")) or None,
351
+ file_next=_strip(s.get("hf_file_next")) or None,
352
+ )
353
+ elif backend == BACKEND_BEDROCK:
354
+ tiers[sec] = Tier(**common, bedrock=_build_bedrock(s))
355
+ return tiers
356
+
357
+
358
+ def iter_download_targets(ini_path: Path | None = None) -> Iterator[TierFile]:
359
+ """Yield every :class:`TierFile` worth caching, across all tiers.
360
+
361
+ Bedrock-backed tiers contribute nothing (no GGUFs to fetch).
362
+ """
363
+ for tier in load_tiers(ini_path).values():
364
+ yield from tier.files()
365
+
366
+
367
+ def main(argv: list[str]) -> int:
368
+ if len(argv) >= 2 and argv[1] == "--downloads":
369
+ for tf in iter_download_targets():
370
+ print(f"{tf.tag}\t{tf.repo}\t{tf.file}\t{tf.label}")
371
+ return 0
372
+
373
+ path = models_ini_path()
374
+ print(f"# tiers loaded from {path}\n")
375
+ for tier in load_tiers(path).values():
376
+ print(f"[{tier.name}] role={tier.role} backend={tier.backend} ctx={tier.ctx_size}")
377
+ if tier.is_gguf:
378
+ print(f" current : {tier.repo} / {tier.file}")
379
+ if tier.file_next:
380
+ print(f" next : {tier.repo_next or tier.repo} / {tier.file_next}")
381
+ elif tier.is_bedrock:
382
+ b = tier.bedrock
383
+ assert b is not None
384
+ scope = b.region or "(default region)"
385
+ print(f" current : {b.model_id} @ {scope}")
386
+ if b.has_next:
387
+ next_scope = b.region_next or scope
388
+ print(f" next : {b.model_id_next} @ {next_scope}")
389
+ print(f" profile : {b.profile or '(default chain)'}")
390
+ return 0
391
+
392
+
393
+ if __name__ == "__main__":
394
+ sys.exit(main(sys.argv))