npm - trantor - Versions diffs - 0.15.0 → 0.17.0 - Mend

trantor 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.claude-plugin/marketplace.json +7 -7
package/.claude-plugin/plugin.json +3 -3
package/README.md +30 -6
package/bin/cli.mjs +1 -1
package/bin/connect.mjs +6 -6
package/bin/crew-runner.mjs +3 -3
package/bin/crew.sh +2 -2
package/bin/doctor.mjs +1 -1
package/bin/handoff-prompt.sh +3 -3
package/bin/profile.mjs +1 -1
package/bin/statusline.mjs +5 -5
package/configs/gemini-settings.json +2 -2
package/deploy/setup.sh +5 -4
package/engine/LICENSE +21 -0
package/engine/README.md +5 -0
package/engine/bin/scrooge +1276 -0
package/engine/bin/scrooge-capabilities +209 -0
package/engine/bin/scrooge-diverge +263 -0
package/engine/bin/scrooge-drift +126 -0
package/engine/bin/scrooge-verify +190 -0
package/engine/capabilities.seed.json +112 -0
package/engine/install.sh +138 -0
package/engine/lessons.seed.json +17 -0
package/engine/registry.template.json +329 -0
package/hooks/hooks.json +1 -1
package/hooks/precompact.mjs +5 -5
package/hooks/sessionstart.mjs +9 -9
package/hub.mjs +3 -3
package/mcp.mjs +3 -3
package/package.json +5 -4
package/skills/crew/SKILL.md +8 -7
package/skills/{relay-handoff → handoff}/SKILL.md +2 -2
package/ui.html +1 -1

package/engine/bin/scrooge ADDED Viewed

@@ -0,0 +1,1276 @@
+#!/usr/bin/env python3
+"""
+scrooge — make the cheap models do the grunt work.
+Routes a single task to a CHEAP external model (DeepSeek/Kimi/ZAI-GLM/Gemini/
+OpenAI/xAI/OpenRouter, …) via the OpenAI-compatible chat API. Your expensive
+orchestrator (Claude Opus, etc.) stays in charge; this is ONLY for disconnected,
+individually-scoped execution work — drafts, summaries, extraction, judgment.
+TRANSPARENCY (the whole point): every call prints a loud banner to STDERR and
+appends to a cost ledger. The model's text goes to STDOUT only.
+Usage:
+  scrooge "prompt"                          # default = cheapest live model (nothing hardcoded)
+  scrooge --latest "prompt"                 # re-check the provider's live model list now
+  scrooge --model kimi "prompt"             # force a model (alias or full id)
+  scrooge --task code "prompt"              # weigh capability×cost for the task (best value)
+  scrooge --task code -d hard "prompt"      # difficulty floor → escalate hard work off the cheap model
+  scrooge --task code --spread 3 "prompt"   # fan a batch across the top-3 capable models
+  scrooge --json --system "..." "prompt"    # JSON-object output + system prompt
+  echo "long input" | scrooge --task summarize   # prompt from stdin (or - )
+  scrooge models <provider>                 # list live model ids from a provider
+  scrooge list                              # show registry (providers/models/tasks)
+  scrooge ledger [--since 24h|7d|all]       # usage + cost totals, savings vs Opus
+  scrooge watch  [--tail N|--all]           # LIVE feed of cheap-model calls as they happen
+Live training (per-model lessons): short corrective guardrails, learned from
+observed failures, are auto-injected into the model's system prompt at routing
+time (the banner shows "+N lessons"; --no-lessons bypasses):
+  scrooge learn  -m <model> [-t <task>] "lesson"   # capture (dedup; -t omitted => "*")
+  scrooge learn  --seed                              # merge the shipped seed set
+  scrooge lessons [-m <model>] [-t <task>]          # show the store
+  scrooge forget -m <model> [-t <task>] <index>|--all   # remove
+Config:  $SCROOGE_HOME (default ~/.token-scrooge) holds registry.json + calls.jsonl
+         + lessons.json (user-local; seeded from the committed lessons.seed.json)
+Keys:    read from the environment; optionally also from $SCROOGE_ENV_FILE (KEY=VALUE)
+Exit: 0 ok, non-zero on error (message to stderr).
+"""
+import sys, os, json, time, argparse, urllib.request, urllib.error, re, shutil, hashlib
+HOME = os.path.expanduser("~")
+SCROOGE_DIR = os.environ.get("SCROOGE_HOME", os.path.join(HOME, ".token-scrooge"))
+REGISTRY = os.path.join(SCROOGE_DIR, "registry.json")
+LEDGER = os.path.join(SCROOGE_DIR, "calls.jsonl")
+MODELS_CACHE = os.path.join(SCROOGE_DIR, "models-cache.json")   # short-lived /models snapshots per provider
+CAPS = os.path.join(SCROOGE_DIR, "capabilities.json")           # per-model quality scores for the weighted router
+ENV_FALLBACK = os.environ.get("SCROOGE_ENV_FILE", "")   # optional extra KEY=VALUE file (e.g. a proxy .env)
+OPUS_IN, OPUS_OUT = 15.0, 75.0   # $/1M reference for savings math (Opus 4.x)
+# ---- ANSI (stderr only) -------------------------------------------------
+def _c(code, s):
+    return s if not sys.stderr.isatty() else "\033[%sm%s\033[0m" % (code, s)
+ORANGE = lambda s: _c("38;5;208", s)
+DIM = lambda s: _c("2", s)
+RED = lambda s: _c("31", s)
+def err(*a):
+    sys.stderr.write(" ".join(str(x) for x in a) + "\n")
+# ---- key loading --------------------------------------------------------
+def _load_env_file(path):
+    """Merge KEY=VALUE lines from a file into os.environ (no override of existing)."""
+    if not path or not os.path.exists(path):
+        return
+    try:
+        with open(path) as fh:
+            for line in fh:
+                line = line.strip()
+                if not line or line.startswith("#") or "=" not in line:
+                    continue
+                k, v = line.split("=", 1)
+                k = k.strip()
+                if k.startswith("export "):
+                    k = k[len("export "):].strip()
+                v = v.strip().strip('"').strip("'")
+                if k and k not in os.environ:
+                    os.environ[k] = v
+    except Exception as e:
+        err(DIM("[scrooge] could not read %s: %s" % (path, e)))
+def load_env_fallback():
+    """Keys come from the environment, plus (no override): ~/.token-scrooge/.env
+    (written by `scrooge setup`) and an optional $SCROOGE_ENV_FILE."""
+    _load_env_file(os.path.join(SCROOGE_DIR, ".env"))
+    _load_env_file(ENV_FALLBACK)
+def load_registry():
+    with open(REGISTRY) as fh:
+        return json.load(fh)
+def provider_key(reg, provider):
+    p = reg["providers"].get(provider) or {}
+    for name in p.get("env", []):
+        if os.environ.get(name):
+            return os.environ[name]
+    return None
+# ---- model resolution ---------------------------------------------------
+def resolve_model(reg, model=None, task=None, latest=False):
+    """Return (model_id, provider) or raise."""
+    if model:
+        model = reg.get("aliases", {}).get(model, model)
+        if model in reg["models"]:
+            return model, reg["models"][model]["provider"]
+        if "/" in model:   # provider/model form, e.g. openrouter explicit
+            prov, _ = model.split("/", 1)
+            if prov in reg["providers"]:
+                return model, prov
+        # unknown but maybe valid for openrouter
+        raise SystemExit("Unknown model '%s'. Try: scrooge list" % model)
+    if task:
+        cands = reg.get("tasks", {}).get(task)
+        if not cands:
+            raise SystemExit("Unknown task '%s'. Tasks: %s" % (task, ", ".join(reg.get("tasks", {}))))
+        for mid in cands:
+            prov = reg["models"][mid]["provider"]
+            if provider_key(reg, prov):
+                return mid, prov
+        raise SystemExit("No live API key for any model serving task '%s'." % task)
+    # No model, no task: derive the default dynamically from what's actually live.
+    return default_model(reg, latest=latest)
+# ---- capability-weighted routing ----------------------------------------
+# Pick the best *value* model for a task: weight each candidate's quality-for-the-task
+# against its price, but first gate by a DIFFICULTY floor so hard work escalates off the
+# cheapest model while easy work stays cheap. Quality data lives in capabilities.json
+# (seeded from the committed capabilities.seed.json; refreshed by `scrooge-capabilities`).
+TASK_METRIC = {"code": "coding", "code-review": "coding", "reason": "reasoning",
+               "verify": "reasoning", "math": "math"}   # else -> "intelligence"
+DIFF_PCTL = {"easy": 0.0, "medium": 0.5, "hard": 0.8}    # capability floor = this percentile of candidates
+HARD_TASKS = ("code", "code-review", "reason", "verify")  # default to 'medium' difficulty when unspecified
+def _caps_seed_path():
+    here = os.path.dirname(os.path.realpath(__file__))
+    for cand in (os.path.join(SCROOGE_DIR, "capabilities.seed.json"),
+                 os.path.join(here, "..", "capabilities.seed.json")):
+        if os.path.exists(cand):
+            return cand
+    return None
+def load_caps():
+    """Per-model quality scores (user store → seed). Missing/malformed → {}."""
+    for path in (CAPS, _caps_seed_path()):
+        if not path:
+            continue
+        try:
+            with open(path) as fh:
+                d = json.load(fh)
+            if isinstance(d, dict):
+                return {k: v for k, v in d.items() if not k.startswith("_")}
+        except Exception:
+            continue
+    return {}
+def task_metric(task):
+    return TASK_METRIC.get(task or "", "intelligence")
+def infer_difficulty(task, prompt):
+    """Fallback when --difficulty is omitted: harder default for code/reasoning tasks,
+    bumped a notch for very long prompts."""
+    base = "medium" if (task in HARD_TASKS) else "easy"
+    if prompt and len(prompt) > 8000:
+        base = {"easy": "medium", "medium": "hard", "hard": "hard"}[base]
+    return base
+def model_quality(caps, mid, metric):
+    c = caps.get(mid) or {}
+    v = c.get(metric)
+    if not isinstance(v, (int, float)):
+        v = c.get("intelligence")          # fall back to the general index
+    return float(v) if isinstance(v, (int, float)) else 0.0
+def blended_cost(reg, mid):
+    m = reg["models"].get(mid, {})
+    return max(1e-6, 0.3 * m.get("cost_in", 0) + 0.7 * m.get("cost_out", 0))
+def weigh_candidates(reg, caps, cand_ids, task, difficulty):
+    """Apply the difficulty floor, then rank survivors by quality^qw / cost^cw.
+    Returns [(model_id, score)] best-first."""
+    metric = task_metric(task)
+    quals = [(mid, model_quality(caps, mid, metric)) for mid in cand_ids]
+    scored_q = sorted(q for _, q in quals if q > 0)
+    pct = DIFF_PCTL.get(difficulty, 0.5)
+    floor = 0.0
+    if scored_q and pct > 0:
+        floor = scored_q[min(len(scored_q) - 1, int(round(pct * (len(scored_q) - 1))))]
+    survivors = [(mid, q) for mid, q in quals if q >= floor] or quals
+    rw = reg.get("routing") or {}
+    qw, cw = rw.get("q_weight", 1.5), rw.get("c_weight", 0.5)
+    out = [(mid, (max(q, 1e-6) ** qw) / (blended_cost(reg, mid) ** cw)) for mid, q in survivors]
+    out.sort(key=lambda x: -x[1])
+    return out
+def _spread_index(prompt, k):
+    """Deterministic, process-independent bucket so a parallel batch fans across the
+    top-k models instead of hammering one (built-in hash() is salted — use sha1)."""
+    h = int(hashlib.sha1((prompt or "").encode("utf-8", "replace")).hexdigest()[:8], 16)
+    return h % k
+def route_task(reg, caps, task, difficulty, prompt, spread=0):
+    """Capability-weighted pick for a --task. Returns (model_id, provider, info).
+    Falls back to registry cheapest-first order when no capability data is available."""
+    cands = reg.get("tasks", {}).get(task)
+    if not cands:
+        raise SystemExit("Unknown task '%s'. Tasks: %s" % (task, ", ".join(reg.get("tasks", {}))))
+    live = [m for m in cands if provider_key(reg, reg["models"][m]["provider"])]
+    if not live:
+        raise SystemExit("No live API key for any model serving task '%s'." % task)
+    if not caps:                                   # no quality data → legacy cheapest-first
+        return live[0], reg["models"][live[0]]["provider"], {"difficulty": None, "weighed": False}
+    diff = difficulty or infer_difficulty(task, prompt)
+    scored = weigh_candidates(reg, caps, live, task, diff)
+    ranked = [m for m, _ in scored]
+    if spread and len(ranked) > 1:
+        k = min(spread, len(ranked))
+        mid = ranked[_spread_index(prompt, k)]
+    else:
+        mid = ranked[0]
+    return mid, reg["models"][mid]["provider"], {
+        "difficulty": diff, "weighed": True, "metric": task_metric(task),
+        "considered": len(live), "pool": ranked[:max(spread, 1)] if spread else ranked[:1]}
+# ---- HTTP ---------------------------------------------------------------
+def http_post(url, headers, payload, timeout=120):
+    data = json.dumps(payload).encode()
+    req = urllib.request.Request(url, data=data, headers=headers, method="POST")
+    with urllib.request.urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode())
+def http_get(url, headers, timeout=30):
+    req = urllib.request.Request(url, headers=headers, method="GET")
+    with urllib.request.urlopen(req, timeout=timeout) as r:
+        return json.loads(r.read().decode())
+def list_live_models(reg, provider, ttl=600):
+    """The model ids a provider actually serves *right now*, via its OpenAI-compatible
+    /models endpoint. Cached per provider in MODELS_CACHE for `ttl` seconds so routing
+    never hardcodes a model id yet stays fast. Returns [] (best-effort) on any failure
+    — callers must tolerate an empty list (offline, no key, rate-limited)."""
+    cache = {}
+    try:
+        with open(MODELS_CACHE) as fh:
+            cache = json.load(fh)
+        if not isinstance(cache, dict):
+            cache = {}
+    except Exception:
+        cache = {}
+    ent = cache.get(provider)
+    if ttl and isinstance(ent, dict) and isinstance(ent.get("ids"), list) \
+            and (time.time() - ent.get("ts", 0)) < ttl:
+        return ent["ids"]
+    key = provider_key(reg, provider)
+    if not key:
+        return []
+    base = reg["providers"][provider]["base_url"].rstrip("/")
+    headers = {"Authorization": "Bearer " + key}
+    headers.update(reg["providers"][provider].get("extra_headers", {}))
+    try:
+        data = http_get(base + "/models", headers, timeout=15)
+        ids = sorted(str(m.get("id")) for m in (data.get("data") or []) if m.get("id"))
+    except Exception:
+        return []
+    cache[provider] = {"ts": int(time.time()), "ids": ids}
+    try:
+        os.makedirs(SCROOGE_DIR, exist_ok=True)
+        with open(MODELS_CACHE, "w") as fh:
+            json.dump(cache, fh)
+    except Exception:
+        pass
+    return ids
+def default_model(reg, latest=False):
+    """Pick the default model dynamically — NOTHING is hardcoded. Among the registry's
+    models whose provider currently has a key, choose the cheapest one the provider is
+    actually serving live (verified against /models). If every priced candidate has
+    drifted out of the live list, fall back to a live-discovered id (pricing unknown).
+    `latest=True` bypasses the cache to force a fresh liveness check."""
+    priced = sorted(
+        ((mid, cfg["provider"], cfg.get("cost_in", 0) + cfg.get("cost_out", 0))
+         for mid, cfg in reg["models"].items() if provider_key(reg, cfg["provider"])),
+        key=lambda x: x[2])
+    if not priced:
+        raise SystemExit("No default model available — no API key is set for any provider. "
+                         "Run `scrooge setup` (or set a provider key), then `scrooge list`.")
+    ttl = 0 if latest else 600
+    live_by_provider = {}
+    for mid, prov, _ in priced:
+        if prov not in live_by_provider:
+            live_by_provider[prov] = set(list_live_models(reg, prov, ttl=ttl))
+        live = live_by_provider[prov]
+        # Empty set ⇒ couldn't reach the API (offline/etc.): trust the registry rather than block.
+        if not live or mid in live:
+            return mid, prov
+    # Every priced candidate has drifted vs the live list — route to a real live id.
+    prov = priced[0][1]
+    live = sorted(live_by_provider.get(prov) or [])
+    if live:
+        err(DIM("[scrooge] registry models for %s look stale; routing to live '%s' "
+                "(pricing unknown — run scrooge-drift)" % (prov, live[0])))
+        return live[0], prov
+    return priced[0][0], priced[0][1]
+# ---- ledger -------------------------------------------------------------
+def project_label(start=None):
+    """A stable per-project name so a single shared ledger can be filtered by project.
+    Priority: $SCROOGE_PROJECT (explicit, set it per terminal for full control) →
+    nearest enclosing git repo's dir name → the cwd's base name."""
+    env = os.environ.get("SCROOGE_PROJECT")
+    if env:
+        return env
+    base = start or os.getcwd()
+    cur = base
+    for _ in range(40):
+        if os.path.isdir(os.path.join(cur, ".git")):
+            return os.path.basename(cur) or cur
+        parent = os.path.dirname(cur)
+        if parent == cur:
+            break
+        cur = parent
+    return os.path.basename(base.rstrip("/")) or base
+def append_ledger(entry):
+    try:
+        os.makedirs(SCROOGE_DIR, exist_ok=True)
+        with open(LEDGER, "a") as fh:
+            fh.write(json.dumps(entry) + "\n")
+        # return 1-based line count cheaply
+        with open(LEDGER) as fh:
+            return sum(1 for _ in fh)
+    except Exception:
+        return None
+def cost_usd(reg, model, tin, tout):
+    m = reg["models"].get(model.split("/")[-1]) or reg["models"].get(model)
+    if not m:
+        return 0.0
+    return (tin / 1e6) * m.get("cost_in", 0) + (tout / 1e6) * m.get("cost_out", 0)
+# ---- live training: per-model lessons -----------------------------------
+# A user-local, mutable store of short corrective guardrails learned from
+# observed cheap-model failures. The relevant lessons are auto-injected into a
+# model's system prompt at routing time so recurring bugs are preempted. The
+# store is SEPARATE from the capability/pricing registry. Shape:
+#   { "<model-id-or-alias>": { "<task>"|"*": ["one-liner", ...] }, "*": {...} }
+# Keys starting with "_" are metadata (comments) and ignored.
+LESSONS = os.path.join(SCROOGE_DIR, "lessons.json")
+LESSON_CAP = 8          # max lessons taken per (model, task) bucket
+LESSON_CHAR_CAP = 1200  # total injected-char ceiling for the guardrail block
+def _is_meta_key(k):
+    return isinstance(k, str) and k.startswith("_")
+def load_lessons():
+    """Read the user-local lessons store. Missing/malformed -> empty dict."""
+    try:
+        with open(LESSONS) as fh:
+            d = json.load(fh)
+        return d if isinstance(d, dict) else {}
+    except Exception:
+        return {}
+def save_lessons(store):
+    os.makedirs(SCROOGE_DIR, exist_ok=True)
+    with open(LESSONS, "w") as fh:
+        json.dump(store, fh, indent=2)
+        fh.write("\n")
+def _seed_path():
+    """Locate the committed seed set (SCROOGE_HOME first, then alongside the repo)."""
+    here = os.path.dirname(os.path.realpath(__file__))   # resolves symlinks → repo/bin
+    for cand in (os.path.join(SCROOGE_DIR, "lessons.seed.json"),
+                 os.path.join(here, "..", "lessons.seed.json")):
+        if os.path.exists(cand):
+            return cand
+    return None
+def load_seed():
+    p = _seed_path()
+    if not p:
+        return {}
+    try:
+        with open(p) as fh:
+            d = json.load(fh)
+        return d if isinstance(d, dict) else {}
+    except Exception:
+        return {}
+def merge_seed(store):
+    """Add seed lessons not already present (dedup on exact text). Returns count added."""
+    added = 0
+    for model, tasks in load_seed().items():
+        if _is_meta_key(model) or not isinstance(tasks, dict):
+            continue
+        for task, lessons in tasks.items():
+            if _is_meta_key(task) or not isinstance(lessons, list):
+                continue
+            bucket = store.setdefault(model, {}).setdefault(task, [])
+            for L in lessons:
+                if L not in bucket:
+                    bucket.append(L); added += 1
+    return added
+def seed_lessons_if_absent():
+    """First-use bootstrap: if no user store exists yet, copy the shipped seed in."""
+    if os.path.exists(LESSONS):
+        return
+    seed = load_seed()
+    if seed:
+        save_lessons(seed)
+def aliases_for(reg, model):
+    """All alias names that resolve to this full model id."""
+    return [a for a, full in reg.get("aliases", {}).items() if full == model]
+def gather_lessons(reg, model, task):
+    """Ordered, de-duplicated lessons for (full id + aliases) × (task + '*'),
+    then the top-level universal '*' model bucket. <=LESSON_CAP per bucket."""
+    store = load_lessons()
+    if not store:
+        return []
+    model_keys = [model] + [a for a in aliases_for(reg, model) if a != model]
+    task_keys = ([task] if task else []) + ["*"]
+    out, seen = [], set()
+    def drain(mkey):
+        mbucket = store.get(mkey)
+        if not isinstance(mbucket, dict):
+            return
+        for tkey in task_keys:
+            lst = mbucket.get(tkey)
+            if not isinstance(lst, list):
+                continue
+            taken = 0
+            for L in lst:
+                if taken >= LESSON_CAP:
+                    break
+                if L not in seen:
+                    seen.add(L); out.append(L)
+                taken += 1
+    for mkey in model_keys:
+        drain(mkey)
+    drain("*")   # universal lessons (apply to every routed cheap model)
+    return out
+def build_lessons_block(reg, model, task):
+    """Compose the terse guardrail block and its lesson count, within the char cap."""
+    lessons = gather_lessons(reg, model, task)
+    if not lessons:
+        return "", 0
+    header = "Known pitfalls to avoid:"
+    lines, used, n = [header], len(header), 0
+    for L in lessons:
+        line = "- " + L
+        if used + 1 + len(line) > LESSON_CHAR_CAP:
+            break
+        lines.append(line); used += 1 + len(line); n += 1
+    if not n:
+        return "", 0
+    return "\n".join(lines), n
+# ---- commands -----------------------------------------------------------
+def cmd_call(reg, args):
+    # Read the prompt first — the capability weigher uses it to infer difficulty and to
+    # fan a --spread batch deterministically.
+    prompt = args.prompt
+    if prompt in (None, "-"):
+        prompt = sys.stdin.read()
+    if not prompt or not prompt.strip():
+        raise SystemExit("Empty prompt.")
+    # Route: explicit --model wins; a --task is weighed by capability×cost (gated by
+    # difficulty); otherwise the dynamic cheapest-live default.
+    route_info = {}
+    if args.model:
+        model, provider = resolve_model(reg, model=args.model)
+    elif args.task and not getattr(args, "no_weigh", False):
+        model, provider, route_info = route_task(
+            reg, load_caps(), args.task, getattr(args, "difficulty", None),
+            prompt, spread=getattr(args, "spread", 0) or 0)
+    else:
+        model, provider = resolve_model(reg, args.model, args.task,
+                                        latest=getattr(args, "latest", False))
+    key = provider_key(reg, provider)
+    if not key:
+        raise SystemExit("No API key set for provider '%s' (env: %s)" %
+                         (provider, ", ".join(reg["providers"][provider].get("env", []))))
+    cwd = os.getcwd()
+    proj = project_label(cwd)   # stamped on the ledger so `scrooge watch --here` can filter
+    base = reg["providers"][provider]["base_url"].rstrip("/")
+    url = base + "/chat/completions"
+    headers = {"Authorization": "Bearer " + key, "Content-Type": "application/json"}
+    headers.update(reg["providers"][provider].get("extra_headers", {}))
+    # Live training: gather per-model lessons and fold them into the system prompt.
+    lessons_block, n_lessons = ("", 0)
+    if not args.no_lessons:
+        seed_lessons_if_absent()
+        lessons_block, n_lessons = build_lessons_block(reg, model, args.task)
+    # Compose the system message: user's --system (or the JSON-mode instruction)
+    # leads; injected guardrails follow.
+    sys_parts = []
+    if args.system:
+        sys_parts.append(args.system)
+    elif args.json:
+        sys_parts.append("Respond ONLY with a single valid JSON object. No prose, no code fences.")
+    if lessons_block:
+        sys_parts.append(lessons_block)
+    msgs = []
+    if sys_parts:
+        msgs.append({"role": "system", "content": "\n\n".join(sys_parts)})
+    msgs.append({"role": "user", "content": prompt})
+    # Per-model constraint: some models (e.g. kimi-k2.6) require a fixed temperature.
+    mcfg = reg["models"].get(model, {})
+    temp = mcfg["force_temperature"] if "force_temperature" in mcfg else args.temperature
+    payload = {"model": model.split("/", 1)[1] if (provider == "openrouter" and "/" in model) else model,
+               "messages": msgs, "temperature": temp}
+    if provider == "openrouter":
+        payload["model"] = model if "/" in model else model
+    if args.max_tokens:
+        # Some models (OpenAI GPT-5 / reasoning class) reject "max_tokens" and
+        # require "max_completion_tokens" — overridable per-model in the registry.
+        payload[mcfg.get("token_param", "max_tokens")] = args.max_tokens
+    if args.json:
+        payload["response_format"] = {"type": "json_object"}
+        # (the "JSON object only" system instruction is composed above)
+    if args.task and route_info.get("difficulty"):
+        spread_n = len(route_info.get("pool") or [])
+        sp = " · spread/%d" % spread_n if spread_n > 1 else ""
+        label = "[task: %s · %s%s]" % (args.task, route_info["difficulty"], sp)
+    else:
+        label = ("[task: %s]" % args.task) if args.task else ""
+    extra = (" +%d lessons" % n_lessons) if n_lessons else ""
+    err(ORANGE("🪙 scrooge ▸ %s/%s %s%s" % (provider, model, label, extra)))
+    t0 = time.time()
+    try:
+        resp = http_post(url, headers, payload)
+    except urllib.error.HTTPError as e:
+        body = e.read().decode(errors="replace")[:500]
+        err(RED("🪙 scrooge ✗ %s/%s HTTP %s: %s" % (provider, model, e.code, body)))
+        append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
+                       "project": proj, "cwd": cwd,
+                       "ok": False, "error": "HTTP %s" % e.code, "duration_ms": int((time.time()-t0)*1000)})
+        raise SystemExit(2)
+    except Exception as e:
+        err(RED("🪙 scrooge ✗ %s/%s: %s" % (provider, model, e)))
+        append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
+                       "project": proj, "cwd": cwd,
+                       "ok": False, "error": str(e), "duration_ms": int((time.time()-t0)*1000)})
+        raise SystemExit(2)
+    dt = time.time() - t0
+    choice = (resp.get("choices") or [{}])[0]
+    msg = choice.get("message", {}) or {}
+    text = msg.get("content") or msg.get("reasoning_content") or ""
+    usage = resp.get("usage", {}) or {}
+    tin = usage.get("prompt_tokens") or 0
+    tout = usage.get("completion_tokens") or 0
+    c = cost_usd(reg, model, tin, tout)
+    # A short, whitespace-collapsed preview so `scrooge watch` can show WHAT each cheap
+    # model is doing in real time. Local-only (the ledger is gitignored); opt out with
+    # SCROOGE_NO_PREVIEW=1 if you'd rather not write any prompt text to disk.
+    preview = "" if os.environ.get("SCROOGE_NO_PREVIEW", "").lower() in ("1", "true", "yes") \
+              else " ".join(prompt.split())[:100]
+    line_no = append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
+                             "project": proj, "cwd": cwd,
+                             "tokens_in": tin, "tokens_out": tout, "cost_usd": round(c, 6),
+                             "duration_ms": int(dt*1000), "ok": True, "prompt_chars": len(prompt),
+                             "prompt_preview": preview})
+    err(ORANGE("🪙 scrooge ✓ %s/%s · %d→%d tok · ~$%.5f · %.1fs%s" %
+               (provider, model, tin, tout, c, dt, (" · ledger#%d" % line_no) if line_no else "")))
+    sys.stdout.write(text)
+    if not text.endswith("\n"):
+        sys.stdout.write("\n")
+def cmd_models(reg, args):
+    provider = args.provider
+    if provider not in reg["providers"]:
+        raise SystemExit("Unknown provider. Known: %s" % ", ".join(reg["providers"]))
+    if not provider_key(reg, provider):
+        raise SystemExit("No key for %s" % provider)
+    ids = list_live_models(reg, provider, ttl=0)   # always show a fresh list here
+    err(DIM("[scrooge] %d models from %s:" % (len(ids), provider)))
+    for i in ids:
+        print(i)
+def cmd_list(reg, args):
+    print("PROVIDERS (live = key present):")
+    for p, cfg in reg["providers"].items():
+        live = "✓" if provider_key(reg, p) else "✗"
+        print("  %s %-11s %s" % (live, p, cfg["base_url"]))
+    caps = load_caps()
+    cap_hdr = "intel/code/reason · " if caps else ""
+    print("\nMODELS ($/1M in/out · %strust · good_for):" % cap_hdr)
+    for m, c in reg["models"].items():
+        live = "✓" if provider_key(reg, c["provider"]) else "✗"
+        cap = ""
+        if caps:
+            q = caps.get(m) or {}
+            fmt = lambda v: ("%2.0f" % v) if isinstance(v, (int, float)) else " -"
+            cap = "%s/%s/%s " % (fmt(q.get("intelligence")), fmt(q.get("coding")), fmt(q.get("reasoning")))
+        print("  %s %-24s %5.2f/%-5.2f %s%-9s %s" % (live, m, c.get("cost_in",0), c.get("cost_out",0),
+              cap, c.get("trust",""), ",".join(c.get("good_for", []))))
+    print("\nTASKS (weighed by capability×cost, gated by difficulty):")
+    for t, ms in reg.get("tasks", {}).items():
+        print("  %-13s → %s" % (t, ", ".join(ms)))
+    print("\nALIASES:", ", ".join("%s=%s" % (k, v) for k, v in reg.get("aliases", {}).items()))
+def parse_since(s):
+    if s == "all":
+        return 0
+    m = re.match(r"(\d+)([hd])", s or "24h")
+    if not m:
+        return 0
+    n, u = int(m.group(1)), m.group(2)
+    return time.time() - n * (3600 if u == "h" else 86400)
+def cmd_ledger(reg, args):
+    since = parse_since(args.since)
+    want_proj = project_label(os.getcwd()) if getattr(args, "here", False) else getattr(args, "project", None)
+    if not os.path.exists(LEDGER):
+        print("No calls logged yet."); return
+    rows = []
+    with open(LEDGER) as fh:
+        for line in fh:
+            try:
+                o = json.loads(line)
+            except Exception:
+                continue
+            if want_proj and (o.get("project") or "") != want_proj:
+                continue
+            if o.get("ts", 0) >= since and o.get("ok"):
+                rows.append(o)
+    scope = (" · project=%s" % want_proj) if want_proj else ""
+    if not rows:
+        print("No successful calls in window '%s'%s." % (args.since, scope)); return
+    orch = reg.get("orchestrator") or {}
+    base_in = orch.get("cost_in", OPUS_IN)
+    base_out = orch.get("cost_out", OPUS_OUT)
+    orch_name = orch.get("name", "Opus")
+    total_cost = sum(r.get("cost_usd", 0) for r in rows)
+    tin = sum(r.get("tokens_in", 0) for r in rows)
+    tout = sum(r.get("tokens_out", 0) for r in rows)
+    orch_equiv = (tin/1e6)*base_in + (tout/1e6)*base_out
+    by = {}
+    for r in rows:
+        k = "%s/%s" % (r.get("provider"), r.get("model"))
+        d = by.setdefault(k, {"n": 0, "cost": 0, "tin": 0, "tout": 0})
+        d["n"] += 1; d["cost"] += r.get("cost_usd", 0)
+        d["tin"] += r.get("tokens_in", 0); d["tout"] += r.get("tokens_out", 0)
+    print("SCROOGE LEDGER — window: %s%s   (%d calls)" % (args.since, scope, len(rows)))
+    print("  tokens: %s in / %s out" % (f"{tin:,}", f"{tout:,}"))
+    print("  spent on cheap models: $%.4f" % total_cost)
+    print("  same tokens on %s: ~$%.2f" % (orch_name, orch_equiv))
+    if orch_equiv > 0:
+        print("  → saved ~$%.2f (%.0f%% cheaper)" % (orch_equiv - total_cost, 100*(1 - total_cost/orch_equiv)))
+    print("\n  by model:")
+    for k, d in sorted(by.items(), key=lambda x: -x[1]["cost"]):
+        print("    %-28s %3d calls  $%.4f  (%s→%s tok)" % (k, d["n"], d["cost"], f"{d['tin']:,}", f"{d['tout']:,}"))
+def cmd_watch(reg, args):
+    """Live feed of every cheap-model call as it hits the ledger — a real-time view of
+    the orchestrator delegating grunt work. Catches foreground, background, and subagent
+    calls alike (they all append to the ledger). Keep it open in a side pane."""
+    try:
+        sys.stdout.reconfigure(line_buffering=True)   # flush each line even when piped/backgrounded
+    except Exception:
+        pass
+    orch = reg.get("orchestrator") or {}
+    base_in, base_out = orch.get("cost_in", OPUS_IN), orch.get("cost_out", OPUS_OUT)
+    orch_name = orch.get("name", "Opus")
+    tot = {"n": 0, "cost": 0.0, "tin": 0, "tout": 0}
+    # ---- per-project filter (many projects share one ledger) ----------------
+    want_proj = project_label(os.getcwd()) if getattr(args, "here", False) else getattr(args, "project", None)
+    cwd_prefix = os.path.abspath(os.path.expanduser(args.cwd)) if getattr(args, "cwd", None) else None
+    single = bool(want_proj or cwd_prefix)   # single-project view → no per-line project tag
+    def matches(o):
+        if want_proj and (o.get("project") or "") != want_proj:
+            return False
+        if cwd_prefix:
+            c = o.get("cwd") or ""
+            if not (c == cwd_prefix or c.startswith(cwd_prefix.rstrip("/") + "/")):
+                return False
+        return True
+    def render(o):
+        ts = time.strftime("%H:%M:%S", time.localtime(o.get("ts", 0)))
+        pm = "%s/%s" % (o.get("provider"), o.get("model"))
+        tag = AMBER("[%s]" % o["task"]) if o.get("task") else GREYc("[·]")
+        pfx = "" if single else GREYc("%-16s " % (o.get("project") or "?")[:16])
+        if not o.get("ok", False):
+            return "%s%s %s %s %s  %s" % (pfx, GREYc(ts), ERRc("✗"), GOLD(pm), tag,
+                                          ERRc(str(o.get("error", "error"))))
+        tin, tout = o.get("tokens_in", 0), o.get("tokens_out", 0)
+        dur = o.get("duration_ms", 0) / 1000.0
+        prev = (o.get("prompt_preview") or "").strip()
+        return "%s%s %s %s %s %s %s %s%s" % (
+            pfx, GREYc(ts), OKc("✓"), GOLD(pm), tag,
+            GREYc("%d→%d tok" % (tin, tout)), AMBER("$%.5f" % o.get("cost_usd", 0)),
+            GREYc("%.1fs" % dur), ("  " + GREYc("· " + prev)) if prev else "")
+    def summary(final=False):
+        if not tot["n"]:
+            if final:
+                print(GREYc("  (no calls observed while watching)"))
+            return
+        equiv = (tot["tin"] / 1e6) * base_in + (tot["tout"] / 1e6) * base_out
+        saved = equiv - tot["cost"]
+        pct = (100 * (1 - tot["cost"] / equiv)) if equiv > 0 else 0
+        print(AMBER("  ── %d calls · $%.4f cheap · ~$%.2f on %s · saved ~$%.2f (%.0f%%) ──"
+                    % (tot["n"], tot["cost"], equiv, orch_name, saved, pct)))
+    def show(o):
+        if not matches(o):
+            return
+        print(render(o))
+        if o.get("ok"):
+            tot["n"] += 1; tot["cost"] += o.get("cost_usd", 0)
+            tot["tin"] += o.get("tokens_in", 0); tot["tout"] += o.get("tokens_out", 0)
+            if tot["n"] % 10 == 0:
+                summary()
+    path = LEDGER
+    scope = ("project=%s" % want_proj) if want_proj else \
+            ("cwd=%s" % cwd_prefix.replace(HOME, "~")) if cwd_prefix else "all projects"
+    print(GOLD(BOLD("🪙 scrooge watch")) +
+          GREYc("  %s  ·  following %s  ·  Ctrl-C to stop" % (scope, path.replace(HOME, "~"))))
+    # Where to start: --all replays the whole ledger; otherwise follow only NEW calls
+    # (so you literally watch them happen), with --tail N backfilling recent context.
+    last = 0
+    if os.path.exists(path):
+        last = 0 if args.all else os.path.getsize(path)
+    if args.tail and not args.all and os.path.exists(path):
+        with open(path, "rb") as fh:
+            recent = [l for l in fh.read().split(b"\n") if l.strip()][-args.tail:]
+        for bl in recent:
+            try: show(json.loads(bl.decode("utf-8", "replace")))
+            except Exception: pass
+    try:
+        while True:
+            if not os.path.exists(path):
+                time.sleep(0.5); continue
+            size = os.path.getsize(path)
+            if size < last:      # truncated / rotated
+                last = 0
+            if size > last:
+                with open(path, "rb") as fh:
+                    fh.seek(last)
+                    raw = fh.read()
+                cut = raw.rfind(b"\n")
+                if cut != -1:
+                    chunk = raw[:cut + 1]
+                    last += len(chunk)
+                    for bl in chunk.split(b"\n"):
+                        if not bl.strip():
+                            continue
+                        try: show(json.loads(bl.decode("utf-8", "replace")))
+                        except Exception: pass
+            if not args.follow:
+                break
+            time.sleep(0.3)
+    except KeyboardInterrupt:
+        pass
+    print()
+    summary(final=True)
+# ---- live-training subcommands (learn / lessons / forget) ---------------
+def cmd_learn(reg, args):
+    """Append a lesson (dedup exact). --seed merges the shipped seed set."""
+    if args.seed:
+        store = load_lessons()
+        added = merge_seed(store)
+        save_lessons(store)
+        print("%s merged %d seed lesson(s) into %s" %
+              (OKc("✓"), added, LESSONS.replace(HOME, "~")))
+        return
+    if not args.model:
+        raise SystemExit("learn: -m/--model is required (or use --seed to load the shipped seed set).")
+    if not args.text or not args.text.strip():
+        raise SystemExit('learn: provide the lesson text, e.g. scrooge learn -m deepseek -t code "Sort explicitly; never assume API ordering."')
+    seed_lessons_if_absent()   # first-use bootstrap, regardless of entry point
+    model = reg.get("aliases", {}).get(args.model, args.model)   # store under canonical full id
+    task = args.task or "*"
+    store = load_lessons()
+    bucket = store.setdefault(model, {}).setdefault(task, [])
+    text = args.text.strip()
+    scope = "%s/%s" % (GOLD(model), task)
+    if text in bucket:
+        print("%s already known for %s" % (GREYc("•"), scope)); return
+    bucket.append(text)
+    save_lessons(store)
+    note = "" if (model in reg["models"] or args.model in reg.get("aliases", {})) \
+           else GREYc("  (note: '%s' isn't a known model/alias — stored anyway)" % model)
+    print("%s learned for %s: %s%s" % (OKc("✓"), scope, text, note))
+def _print_model_lessons(model, tasks, tfilter):
+    """Print one model's buckets; returns how many lessons were shown."""
+    shown = 0
+    header_done = False
+    for task in sorted(tasks.keys()):
+        if _is_meta_key(task):
+            continue
+        if tfilter and task != tfilter:
+            continue
+        lessons = tasks.get(task) or []
+        if not isinstance(lessons, list) or not lessons:
+            continue
+        if not header_done:
+            print("\n" + GOLD("● " + model)); header_done = True
+        print("  " + AMBER(task if task != "*" else "* (all tasks)"))
+        for i, L in enumerate(lessons):
+            print("    %s %s" % (GREYc("%d." % i), L)); shown += 1
+    return shown
+def cmd_lessons(reg, args):
+    """Pretty-print the lessons store (optionally filtered by -m / -t)."""
+    seed_lessons_if_absent()
+    store = load_lessons()
+    real = {k: v for k, v in store.items() if not _is_meta_key(k)}
+    print(GOLD(BOLD("LESSONS")) + GREYc("  (%s)" % LESSONS.replace(HOME, "~")))
+    if not real:
+        print(GREYc('  none yet — add one:  scrooge learn -m <model> -t <task> "…"')); return
+    mfilter = reg.get("aliases", {}).get(args.model, args.model) if args.model else None
+    shown = 0
+    if mfilter:
+        # the model's own buckets, plus the universal "*" bucket that also applies
+        shown += _print_model_lessons(mfilter, real.get(mfilter, {}), args.task)
+        if "*" in real and mfilter != "*":
+            shown += _print_model_lessons("* (every cheap model)", real.get("*", {}), args.task)
+    else:
+        for model in sorted(real.keys()):
+            shown += _print_model_lessons(model, real.get(model, {}), args.task)
+    if not shown:
+        print(GREYc("  (nothing matches that filter)"))
+def cmd_forget(reg, args):
+    """Remove a lesson by 0-based index, or --all for a whole (model[/task]) scope."""
+    if not args.model:
+        raise SystemExit("forget: -m/--model is required.")
+    seed_lessons_if_absent()   # so a fresh store reflects the shipped seed before removal
+    model = reg.get("aliases", {}).get(args.model, args.model)
+    store = load_lessons()
+    tasks = store.get(model)
+    if not isinstance(tasks, dict) or not tasks:
+        raise SystemExit("No lessons stored for model '%s'." % model)
+    if args.all:
+        if args.task:
+            if args.task not in tasks:
+                raise SystemExit("No lessons for %s/%s." % (model, args.task))
+            tasks.pop(args.task, None)
+            scope = "%s/%s" % (model, args.task)
+        else:
+            store.pop(model, None)
+            scope = model
+        if model in store and not store[model]:
+            store.pop(model, None)
+        save_lessons(store)
+        print("%s forgot all lessons for %s" % (OKc("✓"), GOLD(scope)))
+        return
+    task = args.task or "*"
+    bucket = tasks.get(task)
+    if not isinstance(bucket, list) or not bucket:
+        raise SystemExit("No lessons for %s/%s." % (model, task))
+    try:
+        idx = int(args.index)
+    except (TypeError, ValueError):
+        raise SystemExit("forget: give a 0-based <index> (see `scrooge lessons`) or --all.")
+    if idx < 0 or idx >= len(bucket):
+        raise SystemExit("Index %s out of range (0..%d) for %s/%s." % (args.index, len(bucket) - 1, model, task))
+    removed = bucket.pop(idx)
+    if not bucket:
+        tasks.pop(task, None)
+    if not tasks:
+        store.pop(model, None)
+    save_lessons(store)
+    print("%s forgot %s/%s[%d]: %s" % (OKc("✓"), GOLD(model), task, idx, removed))
+# ---- pretty UI (stdlib only: truecolor + box-drawing + arrow menu) ------
+def _uitty(): return sys.stdout.isatty() and os.environ.get("TERM", "") not in ("", "dumb") and not os.environ.get("NO_COLOR")
+def _rgb(r, g, b, s): return ("\033[38;2;%d;%d;%dm%s\033[0m" % (r, g, b, s)) if _uitty() else s
+GOLD  = lambda s: _rgb(240, 196, 80, s)
+AMBER = lambda s: _rgb(190, 145, 45, s)
+GREYc = lambda s: _rgb(128, 128, 138, s)
+OKc   = lambda s: _rgb(90, 200, 130, s)
+ERRc  = lambda s: _rgb(230, 100, 100, s)
+def BOLD(s):  return ("\033[1m%s\033[0m" % s) if _uitty() else s
+def GREEN(s): return OKc(s)
+_ANSI = re.compile(r"\033\[[0-9;]*m")
+def _vlen(s): return len(_ANSI.sub("", s))
+UW = 60  # panel inner width
+def _box(lines, color=AMBER, pad=2):
+    """Draw a rounded box. Content lines must be ASCII-display-width (ANSI ok)."""
+    top = color("╭" + "─" * UW + "╮"); bot = color("╰" + "─" * UW + "╯")
+    body = []
+    for ln in lines:
+        space = UW - pad - _vlen(ln)
+        body.append(color("│") + " " * pad + ln + " " * max(0, space) + color("│"))
+    return "\n".join([top] + body + [bot])
+def _banner():
+    if not _uitty():
+        print("Token Scrooge — setup"); return
+    print()
+    print(_box([GOLD(BOLD("TOKEN  SCROOGE")) + GREYc("   $ make the cheap models do the grunt work"),
+                GREYc("one orchestrator in charge · cheap labor with receipts")], color=GOLD))
+def _step(n, total, title, sub=""):
+    print()
+    print(" " + GOLD(BOLD("%d/%d" % (n, total))) + "  " + BOLD(title) + (("   " + GREYc(sub)) if sub else ""))
+    print(" " + AMBER("─" * UW))
+def _menu(options, default=0):
+    """options: list of (label, hint). Arrow-key select with ❯; numbered fallback when not a TTY."""
+    if not (sys.stdin.isatty() and _uitty()):
+        for i, (lab, hint) in enumerate(options, 1):
+            print("   %d) %s  %s" % (i, lab, GREYc(hint)))
+        c = _ask("   Select [1-%d] (default %d): " % (len(options), default + 1), str(default + 1))
+        try: return max(1, min(len(options), int(c))) - 1
+        except ValueError: return default
+    import termios, tty
+    idx = default; fd = sys.stdin.fileno(); old = termios.tcgetattr(fd)
+    def draw(first=False):
+        if not first: sys.stdout.write("\033[%dA" % len(options))
+        for i, (lab, hint) in enumerate(options):
+            sel = i == idx
+            ptr = GOLD("❯") if sel else " "
+            txt = GOLD(BOLD(lab)) if sel else lab
+            sys.stdout.write("\r\033[K   %s %s  %s\n" % (ptr, txt, GREYc(hint)))
+        sys.stdout.flush()
+    try:
+        sys.stdout.write(GREYc("   (↑/↓ to move, Enter to select)\n"))
+        draw(first=True)
+        tty.setcbreak(fd)
+        while True:
+            ch = sys.stdin.read(1)
+            if ch in ("\r", "\n"): break
+            elif ch == "\x1b":
+                seq = sys.stdin.read(2)
+                if seq == "[A": idx = (idx - 1) % len(options); draw()
+                elif seq == "[B": idx = (idx + 1) % len(options); draw()
+            elif ch == "k": idx = (idx - 1) % len(options); draw()
+            elif ch == "j": idx = (idx + 1) % len(options); draw()
+            elif ch.isdigit() and 1 <= int(ch) <= len(options): idx = int(ch) - 1; draw()
+            elif ch == "\x03": raise KeyboardInterrupt
+    except Exception:
+        return default
+    finally:
+        termios.tcsetattr(fd, termios.TCSADRAIN, old)
+    return idx
+def _spin(label, fn):
+    """Run fn() while animating a spinner on `label`; return fn()'s result. Plain when not a TTY."""
+    if not _uitty():
+        return fn()
+    import threading, itertools, time as _t
+    box = {}
+    th = threading.Thread(target=lambda: box.__setitem__("r", fn())); th.start()
+    for fr in itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"):
+        if not th.is_alive(): break
+        sys.stdout.write("\r   %s %s" % (GOLD(fr), label)); sys.stdout.flush()
+        _t.sleep(0.08)
+    th.join()
+    sys.stdout.write("\r\033[K")
+    return box.get("r")
+# Orchestrator presets: (key, label, $/1M in, $/1M out). The orchestrator is the
+# model YOU drive your agent with; it only sets the ledger's savings baseline
+# (Scrooge never calls it). Prices are approximate and editable in registry.json.
+# Ordered premium → budget: a cheap flagship can orchestrate for the truly thrifty.
+ORCHESTRATORS = [
+    # frontier / premium
+    ("claude-opus",    "Claude Opus",            15.0, 75.0),
+    ("claude-sonnet",  "Claude Sonnet",           3.0, 15.0),
+    ("gpt-flagship",   "OpenAI GPT (flagship)",  10.0, 30.0),
+    ("gemini-pro",     "Gemini Pro",              1.25, 10.0),
+    ("grok",           "xAI Grok",                3.0, 15.0),
+    ("mistral-large",  "Mistral Large",           2.0,  6.0),
+    # budget flagships (cheap enough to orchestrate on a tight budget)
+    ("deepseek",       "DeepSeek V3 / R1  · budget", 0.27, 1.10),
+    ("kimi",           "Kimi K2 (Moonshot) · budget", 0.60, 2.50),
+    ("qwen",           "Qwen Max · budget",       1.60, 6.40),
+    ("glm",            "Zhipu GLM-4.6 · budget",  0.60, 2.20),
+    ("custom",         "Other flagship / custom", 0.0,  0.0),
+]
+def _ask(prompt, default=""):
+    try:
+        v = input(prompt).strip()
+        return v or default
+    except EOFError:
+        return default
+def _ask_secret(prompt):
+    if sys.stdin.isatty():
+        import getpass
+        try:
+            return getpass.getpass(prompt).strip()
+        except Exception:
+            return _ask(prompt)
+    return _ask(prompt)  # piped (non-interactive / tests)
+def _ensure_registry():
+    os.makedirs(SCROOGE_DIR, exist_ok=True)
+    if os.path.exists(REGISTRY):
+        return
+    here = os.path.dirname(os.path.realpath(__file__))
+    for cand in (os.path.join(SCROOGE_DIR, "registry.template.json"),
+                 os.path.join(here, "..", "registry.template.json")):
+        if os.path.exists(cand):
+            shutil.copy(cand, REGISTRY)
+            return
+    raise SystemExit("No registry template found near %s — reinstall Token Scrooge." % here)
+def _write_env_file(new_keys):
+    """Merge new KEY=VALUE pairs into ~/.token-scrooge/.env, preserving existing, chmod 600."""
+    path = os.path.join(SCROOGE_DIR, ".env")
+    existing = {}
+    if os.path.exists(path):
+        for line in open(path):
+            line = line.strip()
+            if line and not line.startswith("#") and "=" in line:
+                k, v = line.split("=", 1); existing[k.strip()] = v.strip()
+    existing.update({k: v for k, v in new_keys.items() if v})
+    with open(path, "w") as fh:
+        fh.write("# Token Scrooge — provider API keys. Loaded automatically by `scrooge`.\n")
+        fh.write("# Created by `scrooge setup`. Keep private (this file is chmod 600).\n")
+        for k, v in existing.items():
+            fh.write("%s=%s\n" % (k, v))
+    os.chmod(path, 0o600)
+    return path
+def _ping(reg, provider):
+    """Live-test a provider key by listing its models. Returns (ok, detail)."""
+    key = provider_key(reg, provider)
+    if not key:
+        return False, "no key"
+    base = reg["providers"][provider]["base_url"].rstrip("/")
+    headers = {"Authorization": "Bearer " + key}
+    headers.update(reg["providers"][provider].get("extra_headers", {}))
+    try:
+        data = http_get(base + "/models", headers, timeout=15)
+        n = len(data.get("data", []) or [])
+        return True, "%d models" % n
+    except urllib.error.HTTPError as e:
+        return False, "HTTP %s" % e.code
+    except Exception as e:
+        return False, str(e)[:40]
+def _install_claude_gate():
+    """Opt-in: copy the verification gate (hook + agent + skill) into ~/.claude and
+    wire the Stop/SubagentStop hook idempotently. Non-destructive."""
+    repo = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    claude = os.path.join(HOME, ".claude")
+    copies = [
+        (os.path.join(repo, "hooks", "verify-done-gate.py"), os.path.join(claude, "hooks", "verify-done-gate.py")),
+        (os.path.join(repo, "hooks", "scrooge-announce.py"), os.path.join(claude, "hooks", "scrooge-announce.py")),
+        (os.path.join(repo, "agents", "adversarial-verifier.md"), os.path.join(claude, "agents", "adversarial-verifier.md")),
+        (os.path.join(repo, "skills", "diverge", "SKILL.md"), os.path.join(claude, "skills", "diverge", "SKILL.md")),
+    ]
+    for src, dst in copies:
+        if not os.path.exists(src):
+            err(RED("  skipped (missing in repo): %s" % src)); continue
+        os.makedirs(os.path.dirname(dst), exist_ok=True)
+        shutil.copy(src, dst)
+        print("  ✓ %s" % dst.replace(HOME, "~"))
+    # merge hooks into settings.json
+    sp = os.path.join(claude, "settings.json")
+    settings = {}
+    if os.path.exists(sp):
+        try: settings = json.load(open(sp))
+        except Exception: settings = {}
+    hooks = settings.setdefault("hooks", {})
+    gate = {"type": "command", "command": "python3 %s" % os.path.join(claude, "hooks", "verify-done-gate.py")}
+    def _has(arr, needle): return any(any(needle in h.get("command","") for h in e.get("hooks",[])) for e in arr)
+    for ev in ("Stop", "SubagentStop"):
+        arr = hooks.setdefault(ev, [])
+        if not _has(arr, "verify-done-gate.py"):
+            arr.append({"hooks": [gate]})
+    # surface scrooge delegations live (PreToolUse on Bash)
+    announce = {"type": "command", "command": "python3 %s" % os.path.join(claude, "hooks", "scrooge-announce.py")}
+    pre = hooks.setdefault("PreToolUse", [])
+    if not _has(pre, "scrooge-announce.py"):
+        pre.append({"matcher": "Bash", "hooks": [announce]})
+    json.dump(settings, open(sp, "w"), indent=2); open(sp, "a").write("\n")
+    print("  ✓ wired verify-done-gate.py into Stop + SubagentStop (%s)" % sp.replace(HOME, "~"))
+    print("  ✓ wired scrooge-announce.py into PreToolUse(Bash) — marks cheap-model delegations")
+    print(DIM("  (disable a block any time with VERIFY_DONE_GATE_OFF=1)"))
+def cmd_setup(args):
+    _banner()
+    _ensure_registry()
+    reg = load_registry()
+    TOTAL = 4
+    # 1) Orchestrator — arrow-key menu
+    _step(1, TOTAL, "Your orchestrator", "the expensive model you drive with — sets the savings baseline")
+    opts = [(label, ("$%.0f/$%.0f per 1M" % (ci, co)) if k != "custom" else "enter your own pricing")
+            for (k, label, ci, co) in ORCHESTRATORS]
+    idx = _menu(opts, default=0)
+    okey, olabel, oci, oco = ORCHESTRATORS[idx]
+    if okey == "custom":
+        olabel = _ask("   Orchestrator name: ", "Custom")
+        oci = float(_ask("   Its input  $/1M tokens: ", "15") or 15)
+        oco = float(_ask("   Its output $/1M tokens: ", "75") or 75)
+    reg["orchestrator"] = {"name": olabel, "cost_in": oci, "cost_out": oco}
+    print("   " + OKc("✓") + " orchestrator: " + GOLD(BOLD(olabel)))
+    # 2) Keys — show status dots, prompt only for missing
+    _step(2, TOTAL, "Provider API keys", "paste to enable · Enter to skip · detected env keys kept")
+    new_keys = {}
+    for provider, cfg in reg["providers"].items():
+        env_names = cfg.get("env", [])
+        already = next((n for n in env_names if os.environ.get(n)), None)
+        if already:
+            print("   %s %-11s %s" % (GOLD("●"), provider, GREYc("detected · " + already)))
+            continue
+        print("   %s %-11s %s" % (GREYc("○"), provider, GREYc("needs " + (env_names[0] if env_names else "key"))))
+        val = _ask_secret("       ↳ paste key (or Enter to skip): ")
+        if val:
+            new_keys[(env_names[0] if env_names else provider.upper() + "_API_KEY")] = val
+            print("       " + OKc("✓ added"))
+    # Optional Artificial Analysis key — powers capability-aware routing's WEEKLY score refresh.
+    # Routing already works from the shipped capabilities.seed.json; this just keeps the numbers
+    # current as models change. Free key: artificialanalysis.ai (create account → API).
+    if os.environ.get("AA_API_KEY") or os.environ.get("ARTIFICIAL_ANALYSIS_API_KEY"):
+        print("   %s %-11s %s" % (GOLD("●"), "capability", GREYc("AA key detected · model quality scores will auto-refresh weekly")))
+    else:
+        print("   %s %-11s %s" % (GREYc("○"), "capability", GREYc("optional · keeps model quality scores fresh for smart routing")))
+        print("       " + GREYc("free key at artificialanalysis.ai (account → API). Skip and scrooge still"))
+        print("       " + GREYc("routes from the shipped scores — this only enables the weekly refresh."))
+        aav = _ask_secret("       ↳ paste Artificial Analysis key (or Enter to skip): ")
+        if aav:
+            new_keys["AA_API_KEY"] = aav
+            print("       " + OKc("✓ added — weekly capability refresh enabled"))
+    env_path = _write_env_file(new_keys)
+    json.dump(reg, open(REGISTRY, "w"), indent=2); open(REGISTRY, "a").write("\n")
+    print("   " + OKc("✓") + " keys saved to " + GOLD(env_path.replace(HOME, "~")) + GREYc(" (chmod 600)"))
+    # 3) Live test — spinner per provider, with inline retry for failures
+    _step(3, TOTAL, "Testing live providers", "")
+    def run_tests():
+        load_env_fallback()
+        r = load_registry()
+        livec, failed = 0, []
+        for provider in r["providers"]:
+            if not provider_key(r, provider):
+                continue
+            ok, detail = _spin("testing %s …" % provider, lambda p=provider: _ping(r, p))
+            print("   %s %-11s %s" % (OKc("✓") if ok else ERRc("✗"), provider, (OKc(detail) if ok else ERRc(detail))))
+            if ok: livec += 1
+            else: failed.append(provider)
+        return r, livec, failed
+    reg, live, failed = run_tests()
+    # A failure is almost always a wrong paste or a stale/auto-detected key.
+    # Offer to re-enter the right key and re-test, in place, until resolved.
+    while failed and sys.stdin.isatty():
+        print("   " + AMBER("%d provider(s) failed — usually a wrong or stale key." % len(failed)))
+        if not _ask("   Re-enter keys for the failed ones now? " + GREYc("[Y/n]") + " ", "y").lower().startswith("y"):
+            break
+        fixes = {}
+        for provider in failed:
+            envn = reg["providers"][provider].get("env", [])
+            primary = envn[0] if envn else provider.upper() + "_API_KEY"
+            val = _ask_secret("   %s → paste %s (Enter to skip): " % (provider, primary))
+            if val:
+                fixes[primary] = val
+                os.environ[primary] = val  # picked up on the next test pass
+        if not fixes:
+            break
+        _write_env_file(fixes)
+        print("   " + GREYc("re-testing…"))
+        reg, live, failed = run_tests()
+    print("   " + (OKc("● %d provider(s) live" % live) if live else ERRc("no working providers yet — add a key and re-run `scrooge setup`")))
+    # 4) Optional Claude Code gate
+    _step(4, TOTAL, "Claude Code verification gate", "optional")
+    print("   " + GREYc("diverge skill + adversarial-verifier agent + a Stop hook that blocks"))
+    print("   " + GREYc("'done' claims with no build/test evidence."))
+    if _ask("   Install into ~/.claude? " + GREYc("[y/N]") + " ", "n").lower().startswith("y"):
+        _install_claude_gate()
+    else:
+        print("   " + GREYc("skipped — run `scrooge setup` again any time to add it."))
+    # Done panel
+    print()
+    print(_box([OKc(BOLD("✓ Ready.")) + GREYc("  saving baseline: ") + GOLD(olabel),
+                "",
+                GREYc("try  ") + GOLD("scrooge list"),
+                GREYc("     ") + GOLD("scrooge --task summarize < file.md"),
+                GREYc("     ") + GOLD("scrooge ledger") + GREYc("   # spend + savings")], color=OKc))
+    print()
+def main():
+    load_env_fallback()
+    argv = sys.argv[1:]
+    if argv and argv[0] == "setup":
+        return cmd_setup(None)
+    reg = load_registry()
+    # Manual subcommand dispatch (avoids argparse subparser vs positional-prompt clash).
+    if argv and argv[0] in ("models", "list", "ledger", "watch", "learn", "lessons", "forget"):
+        cmd, rest = argv[0], argv[1:]
+        if cmd == "models":
+            ap = argparse.ArgumentParser(prog="scrooge models"); ap.add_argument("provider")
+            return cmd_models(reg, ap.parse_args(rest))
+        if cmd == "list":
+            return cmd_list(reg, None)
+        if cmd == "ledger":
+            ap = argparse.ArgumentParser(prog="scrooge ledger")
+            ap.add_argument("--since", default="24h")
+            ap.add_argument("--here", action="store_true", help="only this project (cwd's git repo / dir)")
+            ap.add_argument("--project", help="only this project name (see SCROOGE_PROJECT)")
+            return cmd_ledger(reg, ap.parse_args(rest))
+        if cmd == "watch":
+            ap = argparse.ArgumentParser(prog="scrooge watch")
+            ap.add_argument("--tail", type=int, default=3, help="show the last N calls before following")
+            ap.add_argument("--all", action="store_true", help="replay the entire ledger, then follow")
+            ap.add_argument("--no-follow", action="store_false", dest="follow",
+                            help="print matching calls and exit (don't stream)")
+            ap.add_argument("--here", action="store_true",
+                            help="only THIS project (cwd's git repo / dir) — run it in the project's terminal")
+            ap.add_argument("--project", help="only this project name (the SCROOGE_PROJECT / git-dir label)")
+            ap.add_argument("--cwd", help="only calls whose working dir is under this path")
+            return cmd_watch(reg, ap.parse_args(rest))
+        if cmd == "learn":
+            ap = argparse.ArgumentParser(prog="scrooge learn")
+            ap.add_argument("--model", "-m")
+            ap.add_argument("--task", "-t")
+            ap.add_argument("--seed", action="store_true", help="merge the shipped seed set into your store")
+            ap.add_argument("text", nargs="?")
+            return cmd_learn(reg, ap.parse_args(rest))
+        if cmd == "lessons":
+            ap = argparse.ArgumentParser(prog="scrooge lessons")
+            ap.add_argument("--model", "-m")
+            ap.add_argument("--task", "-t")
+            return cmd_lessons(reg, ap.parse_args(rest))
+        if cmd == "forget":
+            ap = argparse.ArgumentParser(prog="scrooge forget")
+            ap.add_argument("--model", "-m")
+            ap.add_argument("--task", "-t")
+            ap.add_argument("--all", action="store_true")
+            ap.add_argument("index", nargs="?")
+            return cmd_forget(reg, ap.parse_args(rest))
+    # Default: a model call.
+    ap = argparse.ArgumentParser(prog="scrooge")
+    ap.add_argument("prompt", nargs="?")
+    ap.add_argument("--model", "-m")
+    ap.add_argument("--task", "-t")
+    ap.add_argument("--system", "-s")
+    ap.add_argument("--json", action="store_true")
+    ap.add_argument("--max-tokens", type=int, dest="max_tokens")
+    ap.add_argument("--temperature", type=float, default=0.3)
+    ap.add_argument("--no-lessons", action="store_true", dest="no_lessons",
+                    help="skip injecting learned per-model lessons (A/B comparison)")
+    ap.add_argument("--latest", action="store_true",
+                    help="for the default model, re-check the provider's live model list now (bypass cache)")
+    ap.add_argument("--difficulty", "-d", choices=["easy", "medium", "hard"],
+                    help="task difficulty → sets the capability floor (else inferred)")
+    ap.add_argument("--spread", type=int, default=0, metavar="N",
+                    help="fan a batch across the top-N capable models (rate-limit/throughput)")
+    ap.add_argument("--no-weigh", action="store_true", dest="no_weigh",
+                    help="skip capability weighing; use the registry's cheapest-first task order")
+    return cmd_call(reg, ap.parse_args(argv))
+if __name__ == "__main__":
+    main()