npm - trantor - Versions diffs - 0.17.45 → 0.17.46 - Mend

trantor 0.17.45 → 0.17.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/bin/advise.mjs +4 -4
package/bin/doctor.mjs +1 -1
package/engine/bin/scrooge +15 -4
package/engine/bin/scrooge-capabilities +71 -5
package/engine/test-routing.py +55 -0
package/package.json +3 -3

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -6,14 +6,14 @@
   },
   "metadata": {
     "description": "Trantor — the hub-world for AI agent crews: live message bus, presence, project Kanban/flow board + context-handoff for independent AI coding agents (Claude, Codex, Gemini, …)",
-    "version": "0.17.45"
+    "version": "0.17.46"
   },
   "plugins": [
     {
       "name": "trantor",
       "source": "./",
       "description": "The hub-world for AI agent crews. Say \"fire up the crew\" and Claude becomes the architect: a plan-aware Advisor routes the work (solo / cheap inline calls / live crew of Codex, GLM, Kimi & DeepSeek in their own terminal windows), a Kanban/flow command center with a testing gate tracks it, and an economics brain (Scrooge) keeps the receipts. Includes the relay MCP, a SessionStart auto-discovery hook, and a PreCompact context-handoff so a fresh session can take over a full window instead of compacting.",
-      "version": "0.17.45",
+      "version": "0.17.46",
       "author": {
         "name": "Sasha Bogojevic"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trantor",
-  "version": "0.17.45",
+  "version": "0.17.46",
   "description": "Trantor — the hub-world for AI agent crews: live message bus, presence, project Kanban/flow board + crew orchestration for independent AI coding agents (Claude, Codex, Gemini, Kimi, DeepSeek)",
   "mcpServers": {
     "relay": {

package/bin/advise.mjs CHANGED Viewed

@@ -135,10 +135,10 @@ export function advise(input, world = loadWorld()) {
       : p.difficulty === "medium"
         ? `medium → solid mid-tier (${agent}) keeps frontier seats free for hard work; ${pool === "api" ? "metered" : "quota"} pool`
         : `easy → cheapest seat (${agent})`;
-    // OpenRouter live-select ranks by COST only (the 335-model catalog has no capability scores
-    // yet — that's the capability-ingestion follow-up), so for HARD work it can land a cheap model.
-    // Flag it: pin a strong model explicitly (openrouter:openrouter/<vendor>/<model>) for hard work.
-    if (agent === "openrouter" && p.difficulty === "hard") why_r += ` — ⚠️ live-select ranks by cost; PIN a strong model (e.g. openrouter:openrouter/anthropic/claude-opus-latest) for hard work until capability data lands`;
+    // OpenRouter live-select ranks capability×cost ACROSS the catalog once `scrooge-capabilities`
+    // has scored it (AA scores + price proxy + per-difficulty cost weighting → hard escalates to a
+    // strong model, easy stays cheap). If it hasn't been run, routing falls back to cost-only.
+    if (agent === "openrouter" && p.difficulty === "hard") why_r += ` — OpenRouter ranks capability×cost; run \`scrooge-capabilities\` to keep the catalog scored (or pin openrouter:openrouter/<vendor>/<model>)`;
     return { ...p, executor: agent, pool, est_cost_usd: est, reason: why_r };
   });
   // crew-size rationale: seats are EMERGENT from the work, and we say so

package/bin/doctor.mjs CHANGED Viewed

@@ -64,7 +64,7 @@ const CLIS = [
   // OpenRouter — the BYOM on-ramp: ONE key fronts hundreds of models. Rides opencode; the same
   // OPENROUTER_API_KEY Scrooge already uses authenticates the crew seat (the runner sources the
   // .env files). Available the moment the key exists in env/opencode + declared `openrouter=api`.
-  { name: "openrouter (via opencode · BYOM, hundreds of models)", bin: "opencode", wired: () => !!read(join(H, ".config", "opencode", "opencode.json"))?.mcp?.relay, auth: () => !!process.env.OPENROUTER_API_KEY || !!read(join(H, ".config", "opencode", "opencode.json"))?.provider?.openrouter?.options?.apiKey || [join(H, ".token-scrooge", ".env"), join(H, ".agent-bus", ".env")].some(f => { try { return readFileSync(f, "utf8").includes("OPENROUTER_API_KEY"); } catch { return false; } }), login: `get a key at openrouter.ai/keys, then: echo 'OPENROUTER_API_KEY=sk-or-…' >> ~/.agent-bus/.env && trantor profile set openrouter=api. Seat: trantor up openrouter (live-selects) or pin trantor up openrouter:openrouter/<vendor>/<model>` },
+  { name: "openrouter (via opencode · BYOM, hundreds of models)", bin: "opencode", wired: () => !!read(join(H, ".config", "opencode", "opencode.json"))?.mcp?.relay, auth: () => !!process.env.OPENROUTER_API_KEY || !!read(join(H, ".config", "opencode", "opencode.json"))?.provider?.openrouter?.options?.apiKey || [join(H, ".token-scrooge", ".env"), join(H, ".agent-bus", ".env")].some(f => { try { return readFileSync(f, "utf8").includes("OPENROUTER_API_KEY"); } catch { return false; } }), login: `get a key at openrouter.ai/keys, then: echo 'OPENROUTER_API_KEY=sk-or-…' >> ~/.agent-bus/.env && trantor profile set openrouter=api && scrooge-capabilities (scores the catalog so the crew routes it by difficulty). Seat: trantor up openrouter (live-selects) or pin trantor up openrouter:openrouter/<vendor>/<model>` },
 ];
 let installed = 0;
 for (const c of CLIS) {

package/engine/bin/scrooge CHANGED Viewed

@@ -171,8 +171,11 @@ def model_quality(caps, mid, metric):
         v = c.get("intelligence")          # fall back to the general index
     return float(v) if isinstance(v, (int, float)) else 0.0
-def blended_cost(reg, mid):
-    m = reg["models"].get(mid, {})
+def blended_cost(reg, caps, mid):
+    # The curated registry carries cost for its own models; for CATALOG models (OpenRouter's
+    # hundreds, brought via --candidates) the per-model price rides on the capability entry
+    # instead — so a brought model is ranked on its real price without bloating the registry.
+    m = reg["models"].get(mid) or (caps.get(mid) if isinstance(caps, dict) else None) or {}
     return max(1e-6, 0.3 * m.get("cost_in", 0) + 0.7 * m.get("cost_out", 0))
 def weigh_candidates(reg, caps, cand_ids, task, difficulty):
@@ -187,8 +190,16 @@ def weigh_candidates(reg, caps, cand_ids, task, difficulty):
         floor = scored_q[min(len(scored_q) - 1, int(round(pct * (len(scored_q) - 1))))]
     survivors = [(mid, q) for mid, q in quals if q >= floor] or quals
     rw = reg.get("routing") or {}
-    qw, cw = rw.get("q_weight", 1.5), rw.get("c_weight", 0.5)
-    out = [(mid, (max(q, 1e-6) ** qw) / (blended_cost(reg, mid) ** cw)) for mid, q in survivors]
+    qw = rw.get("q_weight", 1.5)
+    # cost-weight scales DOWN as difficulty rises: EASY work optimizes price (cheap wins), HARD
+    # work prioritizes capability so it can escalate to a genuinely strong model instead of the
+    # cheapest-that-clears-the-floor (the "deepseek-flash wins everything" trap on a huge catalog).
+    # It still weighs cost (hard picks a strong *value* model, not a blind frontier overpay).
+    # Per-difficulty override via registry.routing.c_weight_<difficulty>; legacy c_weight still honored.
+    CW_BY_DIFF = {"easy": 0.65, "medium": 0.5, "hard": 0.1}
+    cw_key = "c_weight_" + str(difficulty)
+    cw = rw[cw_key] if cw_key in rw else (CW_BY_DIFF[difficulty] if difficulty in CW_BY_DIFF else rw.get("c_weight", 0.5))
+    out = [(mid, (max(q, 1e-6) ** qw) / (blended_cost(reg, caps, mid) ** cw)) for mid, q in survivors]
     out.sort(key=lambda x: -x[1])
     return out

package/engine/bin/scrooge-capabilities CHANGED Viewed

@@ -22,7 +22,7 @@ Usage:  scrooge-capabilities            # refresh from AA (+OpenRouter), show a
         scrooge-capabilities --dry-run  # fetch + match, print, but don't write
 Exit: 0 updated · 1 nothing fetched (no key / network) · 2 error.
 """
-import sys, os, json, argparse, urllib.request, urllib.error
+import sys, os, json, argparse, math, urllib.request, urllib.error
 HOME = os.path.expanduser("~")
 SCROOGE_DIR = os.environ.get("SCROOGE_HOME", os.path.join(HOME, ".token-scrooge"))
@@ -122,13 +122,35 @@ def fetch_openrouter():
         if not mid:
             continue
         arch = m.get("architecture") or {}
-        out[norm(mid.split("/")[-1])] = {
+        pr = m.get("pricing") or {}
+        def _per_m(v):                       # OpenRouter prices are USD PER TOKEN → $/1M tokens
+            try:
+                return round(float(v) * 1e6, 4)
+            except Exception:
+                return None
+        # KEY BY THE RAW LAST SEGMENT (dots intact) — this is exactly what `scrooge route` uses to
+        # look a candidate up (by_bare = id.split("/")[-1], no normalisation), so the keys must match.
+        out[mid.split("/")[-1]] = {
+            "full": mid,
             "context": m.get("context_length"),
             "modalities": arch.get("input_modalities"),
+            "cost_in": _per_m(pr.get("prompt")),
+            "cost_out": _per_m(pr.get("completion")),
         }
-    sys.stderr.write(DIM("  OpenRouter: %d models fetched (context/modality).\n" % len(out)))
+    sys.stderr.write(DIM("  OpenRouter: %d models fetched (pricing/context/modality).\n" % len(out)))
     return out
+def price_proxy_capability(cost_out):
+    """A transparent fallback capability when AA has no score for a brought model: price is a
+    decent proxy for tier (frontier models cost more). Monotonic in cost_out ($/1M), with an
+    HONEST ceiling — never claim frontier capability from price alone. Gives every catalog model
+    a non-zero, rank-able score so the difficulty floor can separate hard from easy work; a later
+    AA refresh upgrades it to a real score."""
+    if not isinstance(cost_out, (int, float)) or cost_out <= 0:
+        return 8.0                            # free / unknown price → low floor
+    cap = 30.0 + 20.0 * math.log10(cost_out + 0.1)
+    return round(max(5.0, min(72.0, cap)), 1)
 def aa_scores(m):
     ev = m.get("evaluations") or {}
     g = ev.get("gpqa")
@@ -162,6 +184,7 @@ def main():
         return 1
     today = __import__("time").strftime("%Y-%m-%d")
+    by_or_norm = {norm(k): v for k, v in by_or.items()}   # by_or is raw-keyed; registry ids are dash-form
     matched, unmatched = [], []
     for mid in reg["models"]:
         existing = caps.get(mid) if isinstance(caps.get(mid), dict) else {}
@@ -175,7 +198,7 @@ def main():
             matched.append(mid)
         elif by_slug:
             unmatched.append(mid)
-        orx = by_or.get(norm(mid))
+        orx = by_or_norm.get(norm(mid))
         if orx:
             if orx.get("context"):
                 rec["context"] = orx["context"]
@@ -184,10 +207,53 @@ def main():
         if rec:
             caps[mid] = rec
+    # ---- OpenRouter CATALOG ingestion: make every brought model routable by difficulty -------
+    # The crew passes OpenRouter's hundreds of models as candidates; the router ranks by
+    # capability (gated by a difficulty floor) ÷ price. Write each catalog model as a first-class
+    # entry keyed by its RAW bare name (matching `scrooge route`'s lookup): a REAL AA score when
+    # the model matches a slug, else a transparent price-tier PROXY — plus its real price so cost
+    # ranking works. Registry-curated models are never shadowed. Marked `_src` for honesty; a
+    # later AA refresh upgrades proxies in place.
+    or_real = or_proxy = 0
+    for bare, info in by_or.items():
+        if bare in reg["models"]:                     # curated registry model — leave it authoritative
+            continue
+        existing = caps.get(bare) if isinstance(caps.get(bare), dict) else {}
+        rec = dict(existing)
+        if info.get("cost_in") is not None:
+            rec["cost_in"] = info["cost_in"]
+        if info.get("cost_out") is not None:
+            rec["cost_out"] = info["cost_out"]
+        if info.get("context"):
+            rec.setdefault("context", info["context"])
+        has_real = isinstance(rec.get("coding"), (int, float)) or isinstance(rec.get("intelligence"), (int, float))
+        am = None
+        if by_slug:
+            full = info.get("full", "")
+            am = by_slug.get(norm(full)) or by_slug.get(norm(full.split("/")[-1])) or by_slug.get(bare)
+        if am:
+            rec.update({k: v for k, v in aa_scores(am).items() if v is not None})
+            rec["updated"] = today
+            rec["source"] = "artificialanalysis"
+            rec.pop("_src", None)
+            or_real += 1
+        elif not has_real or rec.get("_src") == "openrouter-price-proxy":
+            p = price_proxy_capability(rec.get("cost_out"))
+            if p is not None:
+                rec["coding"] = rec["intelligence"] = rec["reasoning"] = p
+                rec["_src"] = "openrouter-price-proxy"
+                rec["updated"] = today
+                or_proxy += 1
+        if rec:
+            caps[bare] = rec
     caps["_meta"].update({"source": "artificialanalysis.ai + openrouter", "refreshed": today,
-                          "attribution": "https://artificialanalysis.ai/"})
+                          "attribution": "https://artificialanalysis.ai/",
+                          "openrouter_catalog": {"aa_scored": or_real, "price_proxy": or_proxy}})
     sys.stderr.write(GOLD("🪙 scrooge-capabilities — %d matched, %d unmatched\n" % (len(matched), len(unmatched))))
+    if by_or:
+        sys.stderr.write(GOLD("   OpenRouter catalog — %d AA-scored, %d price-proxy (now routable by difficulty)\n" % (or_real, or_proxy)))
     for mid in matched:
         c = caps[mid]
         sys.stderr.write("  %s %-24s intel=%-5s code=%-5s reason=%-5s %st/s\n" % (

package/engine/test-routing.py ADDED Viewed

@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""Regression test for capability×cost routing — difficulty-aware escalation + catalog cost.
+Runs the real weigh_candidates/blended_cost out of bin/scrooge against SYNTHETIC capability
+data (no network, no key), asserting:
+  1. catalog cost rides on the capability entry (blended_cost falls back to caps when the
+     registry doesn't have the model) — the OpenRouter-routing fix,
+  2. cost-weight is difficulty-aware: a cheap-but-decent model wins EASY, while HARD escalates
+     to a genuinely stronger model instead of the cheapest-that-clears-the-floor.
+Exit 0 = all pass. Used to verify the T2 OpenRouter-routes-by-difficulty change.
+"""
+import os, sys
+HERE = os.path.dirname(os.path.realpath(__file__))
+SCROOGE = os.path.join(HERE, "bin", "scrooge")
+g = {"__name__": "scr", "__file__": SCROOGE}
+exec(compile(open(SCROOGE).read(), "scrooge", "exec"), g)
+# Synthetic registry has NO catalog models — cost must come from caps (the OpenRouter case).
+reg = {"models": {}, "routing": {}}
+caps = {
+    "cheap-weak":   {"coding": 20, "cost_in": 0.05, "cost_out": 0.10},   # junk-tier
+    "cheap-strong": {"coding": 56, "cost_in": 0.14, "cost_out": 0.28},   # deepseek-flash-like
+    "mid-strong":   {"coding": 69, "cost_in": 0.60, "cost_out": 2.40},   # glm-5.2-like
+    "frontier":     {"coding": 75, "cost_in": 5.00, "cost_out": 22.50},  # gpt-5.5-like
+}
+cands = list(caps.keys())
+fails = []
+def ok(name, cond):
+    print(("  ✓ " if cond else "  ✗ ") + name)
+    if not cond:
+        fails.append(name)
+# 1. catalog cost via caps fallback (model absent from registry)
+ok("blended_cost falls back to the capability entry for catalog models",
+   abs(g["blended_cost"](reg, caps, "frontier") - (0.3 * 5.0 + 0.7 * 22.5)) < 1e-6)
+ok("blended_cost is 1e-6 for an entirely unknown model (no crash)",
+   g["blended_cost"](reg, caps, "does-not-exist") == 1e-6)
+def winner(diff):
+    return g["weigh_candidates"](reg, caps, cands, "code", diff)[0][0]
+easy, medium, hard = winner("easy"), winner("medium"), winner("hard")
+print("  picks → easy=%s medium=%s hard=%s" % (easy, medium, hard))
+# 2. difficulty-aware escalation
+ok("easy prefers a cheap model (cost-optimized)", caps[easy]["coding"] <= caps["mid-strong"]["coding"])
+ok("hard escalates to a stronger model than easy", caps[hard]["coding"] > caps[easy]["coding"])
+ok("hard reaches genuine strength (>= mid-strong tier)", caps[hard]["coding"] >= caps["mid-strong"]["coding"])
+ok("the junk-tier model never wins any difficulty", "cheap-weak" not in (easy, medium, hard))
+print(("\nALL PASS" if not fails else "\nFAILED: %d" % len(fails)))
+sys.exit(1 if fails else 0)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trantor",
-  "version": "0.17.45",
+  "version": "0.17.46",
   "type": "module",
   "bin": {
     "trantor": "bin/cli.mjs"
@@ -10,9 +10,9 @@
     "zod": "^4.4.3"
   },
   "scripts": {
-    "test": "node test.mjs && node test-scenarios.mjs && node test-failure.mjs && node test-handoff.mjs && node test-agents.mjs && node test-update.mjs && node test-handoff-guard.mjs && node test-balances.mjs && node test-subagent-cost.mjs && node test-inbox.mjs && node test-inflight.mjs && node test-focus.mjs"
+    "test": "node test.mjs && node test-scenarios.mjs && node test-failure.mjs && node test-handoff.mjs && node test-agents.mjs && node test-update.mjs && node test-handoff-guard.mjs && node test-balances.mjs && node test-subagent-cost.mjs && node test-inbox.mjs && node test-inflight.mjs && node test-focus.mjs && python3 engine/test-routing.py"
   },
-  "description": "The hub-world for AI agent crews — orchestrate Claude Code, Codex, Gemini, Kimi & DeepSeek as live crews with a plan-aware Advisor, a Kanban/flow command center, a testing gate, and an economics brain (Scrooge).",
+  "description": "The hub-world for AI agent crews — orchestrate Claude Code, Codex, GLM, Kimi, DeepSeek & any OpenRouter model as live crews with a plan-aware Advisor, a Kanban/flow command center, a testing gate, and an economics brain (Scrooge).",
   "files": [
     "hub.mjs",
     "mcp.mjs",