npm - copilot-reverse - Versions diffs - 0.5.3 → 0.5.5 - Mend

copilot-reverse 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +7 -3
package/dist/cli/index.js +9 -1
package/dist/core/stream-guard.js +38 -0
package/dist/core/tool-xml.js +9 -2
package/dist/providers/copilot/responses-upstream.js +7 -4
package/dist/shared/config.js +3 -0
package/dist/shared/prefs.js +24 -16
package/dist/supervisor/index.js +3 -1
package/dist/tui/app.js +16 -8
package/dist/tui/assistant/runtime.js +17 -11
package/dist/tui/assistant/tools.js +13 -0
package/dist/tui/panels/metrics-agg.js +7 -4
package/dist/tui/report.js +19 -6
package/dist/tui/setup/apply.js +10 -1
package/dist/tui/setup/clients.js +4 -0
package/dist/tui/setup/status.js +18 -11
package/dist/version.js +1 -1
package/dist/worker/anthropic-server.js +35 -2
package/dist/worker/openai-server.js +37 -4
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -56,9 +56,9 @@ Here's the app itself — a prompt, a live status bar, and slash-command autocom
 Just **talk to it** — it understands plain English and will do the work for you:
 > *"list models"* → shows every model + its context window
-> *"set up claude"* → configures Claude Code
+> *"set up claude"* → asks scope (global/project) + model, then configures Claude Code
 > *"is the worker healthy?"* → runs a health check
-> *"why did my last request fail?"* → shows the error
+> *"why did my last request fail?"* → shows the error (incl. cut-short stream runaways)
 Prefer commands? Type `/` to see them all. The essentials:
@@ -66,7 +66,7 @@ Prefer commands? Type `/` to see them all. The essentials:
 |---|---|
 | `/setup-claude` · `/setup-codex` | Point Claude Code / Codex at copilot-reverse |
 | `/model` | Switch the chat model (1M-context models marked) |
-| `/status` · `/doctor` | Is everything healthy? |
+| `/status` · `/doctor` | Is everything healthy? (`/status` shows each client's scope + model) |
 | `/logs` · `/metrics` | What ran, what failed, and why |
 | `/dashboard` | Open a live web dashboard in your browser |
 | `/report` | File a pre-filled bug report (diagnostics only — no prompts) |
@@ -147,6 +147,10 @@ of your config untouched.
 - **Your data stays local.** The app proxies between your editor and Copilot on `127.0.0.1`. Your
   GitHub token lives only in `~/.copilot-reverse/creds.json` on your own disk.
 - **It heals itself.** If the proxy crashes, the supervisor restarts it with backoff and records why.
+- **It never freezes.** If a model degenerates (loops on one token, never stops), the proxy cuts the
+  stream cleanly as `max_tokens` and tags it — `/report` then files a prefilled issue so it's easy to flag.
+- **Tunable.** `~/.copilot-reverse` config covers ports, restart backoff, and the GitHub-token
+  heartbeat interval; defaults are sensible, override only if needed.
 - **Unofficial endpoints.** This uses community-documented Copilot endpoints with *your own*
   subscription. It may break if GitHub changes them — that's the trade-off for not needing extra keys.

package/dist/cli/index.js CHANGED Viewed

@@ -13,7 +13,7 @@ import { makeOnChat } from "../tui/assistant/on-chat.js";
 import { readGhToken, clearGhToken, hasGhTokenFile } from "../shared/creds.js";
 import { writeWebIqKey, readWebIqKey, clearWebIqKey, readWebSearchMode, writeWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
 import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
-import { readChatModel, writeChatModel } from "../shared/prefs.js";
+import { readChatModel, writeChatModel, shouldShowChange, markChangeShown } from "../shared/prefs.js";
 import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
 import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
 import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
@@ -181,6 +181,12 @@ async function launchTui() {
         }
     });
     const persistedModel = readChatModel(dataDir());
+    // "What's new" banner: MAJOR changes only — keyed by version so each release re-announces, shown
+    // ~3 launches then quiet. Minor fixes/polish do NOT go here; reserve it for things worth noticing.
+    const CHANGE_ID = `v${APP_VERSION}`;
+    const changeBanner = shouldShowChange(dataDir(), CHANGE_ID)
+        ? { lines: ["• runaway streams now cut cleanly — no more frozen 'code code code' sessions"] }
+        : undefined;
     // Startup overview. The token was already validated above (re-auth happens before we get here), so
     // GitHub is connected; web search readiness and configured clients are read from disk.
     const clientStatus = readClientStatus();
@@ -215,6 +221,8 @@ async function launchTui() {
         disableWebiq: () => { clearWebIqKey(dataDir()); },
         webSearchBackend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
         startupStatus,
+        changeBanner,
+        onChangeSeen: () => markChangeShown(dataDir(), CHANGE_ID),
         githubStatus: async () => {
             const token = readGhToken(dataDir());
             if (!token)

package/dist/core/stream-guard.js ADDED Viewed

@@ -0,0 +1,38 @@
+// Defends a streaming turn against upstream model degeneration: the model collapses into emitting
+// the same short token forever ("code\ncode\ncode…") and never sends a stop, so a faithful proxy
+// would relay deltas until the socket dies — the session appears frozen. This watchdog converts
+// that into a clean, bounded stop. It is pure (no I/O, no timers) so it is trivially testable; the
+// idle/wall-clock timeout lives at the SSE loop where the timers are. Defaults are generous: real
+// answers don't hit them, only runaways do.
+export class RunawayGuard {
+    maxRepeats;
+    maxOutputChars;
+    last = "";
+    repeats = 0;
+    chars = 0;
+    reason;
+    constructor(limits = {}) {
+        this.maxRepeats = limits.maxRepeats ?? 200;
+        this.maxOutputChars = limits.maxOutputChars ?? 2_000_000;
+    }
+    // Returns true the moment a limit is exceeded; thereafter `reason` is set. Short repeated deltas
+    // are the degenerate signal — long varied text just accumulates against the char cap.
+    push(delta) {
+        this.chars += delta.length;
+        if (delta === this.last)
+            this.repeats++;
+        else {
+            this.repeats = 1;
+            this.last = delta;
+        }
+        if (delta.length <= 16 && this.repeats > this.maxRepeats) {
+            this.reason = "repetition";
+            return true;
+        }
+        if (this.chars > this.maxOutputChars) {
+            this.reason = "max_output";
+            return true;
+        }
+        return false;
+    }
+}

package/dist/core/tool-xml.js CHANGED Viewed

@@ -83,8 +83,15 @@ export class ToolCallExtractor {
             if (end < 0)
                 return events; // incomplete block — wait for more data
             const block = this.buf.slice(0, end);
-            for (const tool of parseInvokes(block))
-                events.push({ kind: "tool", tool });
+            const tools = parseInvokes(block);
+            // Parse-faithful: a block that recovers no tools (empty/missing name="" or malformed body)
+            // must NOT be swallowed — pass it through verbatim so the client sees exactly what the model
+            // emitted. Swallowing yields a turn with neither text nor a tool, which loops the model.
+            if (tools.length)
+                for (const tool of tools)
+                    events.push({ kind: "tool", tool });
+            else
+                events.push({ kind: "text", text: block });
             this.buf = this.buf.slice(end);
             this.capturing = false; // a following <invoke> re-triggers via the passthrough branch
         }

package/dist/providers/copilot/responses-upstream.js CHANGED Viewed

@@ -84,9 +84,9 @@ export function parseResponsesResult(data) {
                 }
             }
         }
-        else if (item.type === "function_call") {
+        else if (item.type === "function_call" && item.name) {
             sawTool = true;
-            content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name ?? "", input: safeJson(item.arguments) });
+            content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name, input: safeJson(item.arguments) });
         }
     }
     const finishReason = data.status === "incomplete" ? mapIncomplete(data.incomplete_details?.reason) : sawTool ? "tool_use" : "stop";
@@ -156,10 +156,13 @@ export async function* streamResponses(res) {
             switch (ev.type) {
                 case "response.output_item.added": {
                     const item = ev.item ?? {};
-                    if (item.type === "function_call") {
+                    // Gate on a present name, mirroring the chat adapter's `tc.function?.name` guard: a
+                    // nameless function_call would surface as a bare "call:" the client can't run. No name,
+                    // no start — its later arg deltas find no mapping and are dropped, not rendered.
+                    if (item.type === "function_call" && item.name) {
                         const idx = nextToolIndex++;
                         toolByOutputIndex.set(ev.output_index, idx);
-                        yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name ?? "", done: false };
+                        yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name, done: false };
                     }
                     break;
                 }

package/dist/shared/config.js CHANGED Viewed

@@ -4,6 +4,8 @@ export function defaultConfig() {
         supervisorPort: 7890,
         workerPort: 7891,
         restart: { maxCrashes: 5, windowMs: 60_000, baseBackoffMs: 500, maxBackoffMs: 8_000, unhealthyCooldownMs: 30_000 },
+        // Token failure is rare and GitHub rate-limits, so a slow cadence is plenty; overridable for tests/tuning.
+        heartbeat: { intervalMs: 60_000, initialDelayMs: 2_000 },
         // Empty = pass the requested model straight through to Copilot. Add entries (or "*") to remap.
         modelMap: {},
         // Set MAESTRO_REPORT_REPO=owner/repo to override where /report files diagnostics issues.
@@ -15,6 +17,7 @@ export function mergeConfig(base, o) {
         ...base,
         ...o,
         restart: { ...base.restart, ...(o.restart ?? {}) },
+        heartbeat: { ...base.heartbeat, ...(o.heartbeat ?? {}) },
         modelMap: { ...base.modelMap, ...(o.modelMap ?? {}) },
     };
 }

package/dist/shared/prefs.js CHANGED Viewed

@@ -1,28 +1,36 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
-// Small user-preferences store (e.g. the chosen chat model), persisted across launches.
+// Small user-preferences store (e.g. the chosen chat model, change-banner view counts), persisted
+// across launches.
 const file = (dir) => join(dir, "prefs.json");
-export function readChatModel(dir) {
+function read(dir) {
     if (!existsSync(file(dir)))
-        return null;
+        return {};
     try {
-        return JSON.parse(readFileSync(file(dir), "utf8")).chatModel ?? null;
+        return JSON.parse(readFileSync(file(dir), "utf8"));
     }
     catch {
-        return null;
+        return {};
     }
 }
-export function writeChatModel(dir, model) {
+function write(dir, next) {
     if (!existsSync(dir))
         mkdirSync(dir, { recursive: true });
-    let cur = {};
-    if (existsSync(file(dir))) {
-        try {
-            cur = JSON.parse(readFileSync(file(dir), "utf8"));
-        }
-        catch {
-            cur = {};
-        }
-    }
-    writeFileSync(file(dir), JSON.stringify({ ...cur, chatModel: model }));
+    writeFileSync(file(dir), JSON.stringify(next));
+}
+export function readChatModel(dir) {
+    return read(dir).chatModel ?? null;
+}
+export function writeChatModel(dir, model) {
+    write(dir, { ...read(dir), chatModel: model });
+}
+// "What's new" banner: show a change a few times then stop. Counts are keyed by an id (e.g. version),
+// so a new release re-shows; bumping the count is what decides whether the banner appears again.
+const seenKey = (id) => `seen:${id}`;
+export function shouldShowChange(dir, id, maxShows = 3) {
+    return (read(dir)[seenKey(id)] ?? 0) < maxShows;
+}
+export function markChangeShown(dir, id) {
+    const cur = read(dir);
+    write(dir, { ...cur, [seenKey(id)]: (cur[seenKey(id)] ?? 0) + 1 });
 }

package/dist/supervisor/index.js CHANGED Viewed

@@ -51,7 +51,9 @@ export function startSupervisor() {
     };
     // Periodically re-check the GitHub token so the UI reflects an expired/revoked login within ~60s,
     // instead of only on the next failed request or a manual /status.
-    const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir()));
+    const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir()), probeGithubAuth, undefined, {
+        intervalMs: config.heartbeat.intervalMs, initialDelayMs: config.heartbeat.initialDelayMs,
+    });
     const app = createControlApp({
         db, getState: () => state,
         restart: () => monitor.restartManually(),

package/dist/tui/app.js CHANGED Viewed

@@ -17,16 +17,20 @@ const SPINNER = ["✶", "✸", "✹", "✺", "✹", "✷"];
 // Startup overview card. GitHub shows a login STATE (no real token expiry exists). Web search shows
 // the resolved backend: "via WebIQ", "via Copilot (native)", or "unavailable — run /webiq".
 // `extra` appends detail lines (e.g. worker restart history for /status).
-function statusCard(s, extra = []) {
+function statusCard(s, extra = [], clients) {
     const gh = s.github === "connected" ? "✓ connected" : s.github === "expired" ? "✗ expired — run /login" : "✗ signed out — run /login";
     const web = s.webSearch === "webiq" ? "✓ via WebIQ" : s.webSearch === "copilot" ? "✓ via Copilot (native)" : "✗ unavailable — run /webiq";
-    const clients = `claude ${s.clients.claude ? "✓" : "○"}  codex ${s.clients.codex ? "✓" : "○"}`;
+    // Per-scope + model when we have the file-derived detail; else fall back to the simple flag.
+    const scope = (sc) => sc?.on ? `✓ ${sc.model ? sc.model.replace(/\[1m\]$/, "") : "on"}` : "○";
+    const clientsLine = clients
+        ? `claude u:${scope({ on: clients.claude.user, model: clients.claude.userModel })} p:${scope({ on: clients.claude.project, model: clients.claude.projectModel })} · codex u:${scope({ on: clients.codex.user, model: clients.codex.userModel })} p:${scope({ on: clients.codex.project, model: clients.codex.projectModel })}`
+        : `claude ${s.clients.claude ? "✓" : "○"}  codex ${s.clients.codex ? "✓" : "○"}`;
     const tone = s.github === "connected" ? "ok" : "error";
     return { type: "card", title: "status", tone, lines: [
             `GitHub login   ${gh}`,
             `web search     ${web}`,
             `worker         ${s.worker}`,
-            `clients        ${clients}`,
+            `clients        ${clientsLine}`,
             ...extra,
         ] };
 }
@@ -49,17 +53,21 @@ function OutputCard({ title, lines, tone }) {
 function HelpCard({ commands }) {
     return (_jsxs(Box, { flexDirection: "column", borderStyle: "round", borderColor: theme.border, paddingX: 1, marginBottom: 1, children: [_jsx(Text, { color: theme.accent, bold: true, children: "Commands" }), commands.map((c) => (_jsxs(Text, { children: [_jsx(Text, { color: theme.prompt, children: c.name.padEnd(16) }), _jsx(Text, { color: theme.muted, children: c.describe })] }, c.name))), _jsx(Text, { color: theme.muted, children: "tip: type / to autocomplete \u00B7 plain text talks to the assistant" })] }));
 }
-// HUD client cell: shows configured scopes read from the real config files.
+// HUD client cell: shows configured scopes read from the real config files, with the pinned model.
 function ClientBadge({ name, status }) {
-    const cell = (label, on) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? "✓" : "○"] }));
-    return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user), " ", cell("p", status.project)] }));
+    const short = (m) => (m ? m.replace(/\[1m\]$/, "").replace(/^claude-/, "").slice(0, 14) : "");
+    const cell = (label, on, model) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? `✓ ${short(model)}`.trimEnd() : "○"] }));
+    return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user, status.userModel), " ", cell("p", status.project, status.projectModel)] }));
 }
-export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, }) {
+export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, changeBanner, onChangeSeen, }) {
     const cmds = registry.list().map((c) => ({ name: c.name, describe: c.describe }));
     const [entries, setEntries] = useState(() => [
         ...(startupStatus ? [statusCard(startupStatus)] : []),
+        ...(changeBanner ? [{ type: "card", title: "what's new", tone: "info", lines: changeBanner.lines }] : []),
         { type: "system", text: "Type a message to chat with the assistant, or /help for commands." },
     ]);
+    useEffect(() => { if (changeBanner)
+        onChangeSeen?.(); }, []);
     const [state, setState] = useState(workerState);
     const [status, setStatus] = useState(() => readStatus?.() ?? EMPTY_STATUS);
     const [webBackend, setWebBackend] = useState(() => webSearchBackend?.() ?? "unavailable");
@@ -149,7 +157,7 @@ export function App({ registry, title, workerState = "starting", initialModel =
                 webSearch: webSearchBackend?.() ?? webBackend, worker,
                 clients: { claude: status.claude.user || status.claude.project, codex: status.codex.user || status.codex.project },
             });
-            add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : []));
+            add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : [], status));
             return;
         }
         if (t === "/config" && info) {

package/dist/tui/assistant/runtime.js CHANGED Viewed

@@ -3,9 +3,10 @@ import { z } from "zod";
 import { buildActions } from "./tools.js";
 import { formatModelList } from "../../shared/format.js";
 const empty = z.object({});
-const setupShape = z.object({
-    scope: z.enum(["global", "project"]).optional(),
-    model: z.string().optional(),
+// Setup is a config write — require both so the assistant must confirm scope+model, never assume.
+const requiredSetupShape = z.object({
+    scope: z.enum(["global", "project"]),
+    model: z.string(),
 }).shape;
 function sdkTools(actions, cfg) {
     const tools = [
@@ -13,6 +14,8 @@ function sdkTools(actions, cfg) {
         tool("restart_worker", "Restart the proxy worker", empty.shape, async () => ({ content: [{ type: "text", text: await actions.restart_worker({}) }] })),
         tool("run_doctor", "Run copilot-reverse health checks", empty.shape, async () => ({ content: [{ type: "text", text: await actions.run_doctor({}) }] })),
         tool("recent_requests", "List recent proxied requests", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_requests({}) }] })),
+        tool("recent_errors", "List recent failed/cut requests with their messages (incl. stream runaways)", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_errors({}) }] })),
+        tool("metrics", "Show request totals, error count, and per-model average latency", empty.shape, async () => ({ content: [{ type: "text", text: await actions.metrics({}) }] })),
     ];
     const listModels = cfg.listModels;
     if (listModels) {
@@ -24,11 +27,12 @@ function sdkTools(actions, cfg) {
     if (setupClient) {
         for (const client of ["claude", "codex"]) {
             const label = client === "claude" ? "Claude Code" : "Codex";
-            tools.push(tool(`setup_${client}`, `Configure ${label} to use the copilot-reverse proxy (writes its config). scope defaults to "global" (all projects); model defaults to the current chat model.`, setupShape, async (args) => {
-                const scope = args.scope ?? "global";
-                const model = args.model ?? cfg.model;
-                const r = await setupClient(client, scope, model);
-                return { content: [{ type: "text", text: `configured ${label} (${scope}) with model ${model} — wrote ${r.path}; keys: ${r.changed.join(", ") || "(no change)"}` }] };
+            // scope+model are REQUIRED (not defaulted): config writes are not reversible-by-undo, so the
+            // assistant must confirm both with the user first rather than silently writing the global scope
+            // with the current model. The prompt tells it to ask; making the args required enforces it.
+            tools.push(tool(`setup_${client}`, `Configure ${label} to use the proxy. REQUIRES scope ("global"=all projects / "project"=here) AND model — ask the user for both before calling; do not assume.`, requiredSetupShape, async (args) => {
+                const r = await setupClient(client, args.scope, args.model);
+                return { content: [{ type: "text", text: `configured ${label} (${args.scope}) with model ${args.model} — wrote ${r.path}; keys: ${r.changed.join(", ") || "(no change)"}` }] };
             }));
         }
     }
@@ -70,9 +74,11 @@ export async function runAssistantTurn(cfg, prompt, print, queryFn = query, abor
             settingSources: [],
             systemPrompt: "You are copilot-reverse's built-in assistant for the local Copilot proxy. Be concise. " +
                 "When the user expresses an intent you have a tool for, CALL THE TOOL instead of explaining. " +
-                "Tools: get_status, restart_worker, run_doctor, recent_requests, list_models (show available " +
-                "models + context windows), setup_claude / setup_codex (configure those clients to use the proxy). " +
-                "E.g. 'list models' -> call list_models; 'set up claude' -> call setup_claude.",
+                "Tools: get_status, restart_worker, run_doctor, recent_requests, recent_errors, metrics, list_models " +
+                "(models + context windows), setup_claude / setup_codex (configure those clients). " +
+                "SETUP RULE: setup_claude/setup_codex WRITE config and need scope (global=all projects / project=here) " +
+                "AND model. Before calling, confirm BOTH with the user — if unstated, ask (offer list_models). Never assume. " +
+                "E.g. 'list models' -> list_models; 'set up claude' -> ask scope+model, then setup_claude.",
             permissionMode: "bypassPermissions",
             includePartialMessages: true,
             ...(abortController ? { abortController } : {}),

package/dist/tui/assistant/tools.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { aggregate, recentErrors } from "../panels/metrics-agg.js";
 // Plain action handlers — wrapped as SDK tools in runtime.ts.
 // Each takes a parsed-args object and returns a short text result for the model.
 export function buildActions(client) {
@@ -20,5 +21,17 @@ export function buildActions(client) {
                 return "no requests logged yet";
             return reqs.slice(0, 10).map((r) => `${r.endpoint} ${r.model} ${r.status} ${r.latencyMs}ms`).join("; ");
         },
+        async recent_errors(_args) {
+            const errs = recentErrors(await client.requests(), 10);
+            if (!errs.length)
+                return "no request errors logged — everything's green";
+            return errs.map((e) => `${e.status} ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`).join("; ");
+        },
+        async metrics(_args) {
+            const a = aggregate(await client.requests());
+            if (!a.total)
+                return "no requests yet";
+            return `requests: ${a.total}, errors: ${a.errors}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
+        },
     };
 }

package/dist/tui/panels/metrics-agg.js CHANGED Viewed

@@ -1,8 +1,11 @@
+// A request "failed" if it returned a 4xx/5xx OR carried an error message — runaway streams finish
+// 200 but tag an error (model degenerated, cut early), and those are exactly what we want to surface.
+const isError = (s) => s.status >= 400 || s.error != null;
 export function aggregate(samples) {
     const map = new Map();
     let errors = 0;
     for (const s of samples) {
-        if (s.status >= 400)
+        if (isError(s))
             errors++;
         const m = map.get(s.model) ?? { count: 0, sum: 0 };
         m.count++;
@@ -15,8 +18,8 @@ export function aggregate(samples) {
         byModel: [...map.entries()].map(([model, v]) => ({ model, count: v.count, avgMs: Math.round(v.sum / v.count) })),
     };
 }
-// The failed requests (status >= 400), in the order given (callers pass newest-first), capped at `limit`.
-// This is the actually-useful "log" — what failed and why — as opposed to worker restart events.
+// The failed requests (status >= 400 or any tagged error), newest-first, capped at `limit`. This is
+// the actually-useful "log" — what failed and why — as opposed to worker restart events.
 export function recentErrors(samples, limit) {
-    return samples.filter((s) => s.status >= 400).slice(0, limit);
+    return samples.filter(isError).slice(0, limit);
 }

package/dist/tui/report.js CHANGED Viewed

@@ -1,12 +1,19 @@
 // Sentinel for an unconfigured report target. /report refuses to open until this is changed.
 export const PLACEHOLDER_REPO = "OWNER/REPO";
-// A diagnostics-only report. It contains metrics, doctor output, and worker restart reasons —
-// never request/response bodies — so there is no user prompt content to leak.
+// A diagnostics-only report, structured like a good hand-written issue (e.g. #5): Summary →
+// Environment → Diagnostics → Steps to reproduce. It contains metrics, doctor output, and worker
+// restart reasons — never request/response bodies — so there is no user prompt content to leak.
 export function buildIssueBody(i) {
+    const runaways = i.errors.filter((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
+    const summary = runaways.length
+        ? `Stream(s) degenerated and were cut early (model looped, no stop) on ${i.status.workerState} worker — ${runaways.length} runaway(s).`
+        : i.errors.length
+            ? `${i.errors.length} recent request error(s) on a ${i.status.workerState} worker; details below.`
+            : `Worker state: ${i.status.workerState}. (Describe the problem below.)`;
     const lines = [
-        "## copilot-reverse diagnostics",
-        "",
-        `- version: ${i.version}`,
+        "### Summary", "", summary, "",
+        "### Environment", "",
+        `- copilot-reverse **${i.version}**`,
         `- platform: ${i.platform}`,
         `- worker state: ${i.status.workerState}`,
         "",
@@ -18,14 +25,20 @@ export function buildIssueBody(i) {
             ? i.errors.map((e) => `- \`${e.status}\` ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`)
             : ["- (none)"]),
     ];
+    if (runaways.length) {
+        lines.push("", "### Stream runaways (model degenerated, cut early)", ...runaways.map((e) => `- ${e.endpoint} ${e.model} after ${e.latencyMs}ms — ${e.error}`));
+    }
     if (i.status.restarts.length) {
         lines.push("", "### Recent worker restarts", ...i.status.restarts.slice(0, 5).map((r) => `- ${new Date(r.ts).toISOString()} ${r.reason} exit=${r.exitCode ?? "-"} ${r.stderrTail.slice(0, 120)}`));
     }
-    lines.push("", "### What happened", "<!-- describe what you were doing when this occurred -->", "");
+    lines.push("", "### Steps to reproduce", "<!-- what were you doing? which client/model? -->", "");
     // Keep well under GitHub's ~8KB URL cap once encoded.
     return lines.join("\n").slice(0, 5500);
 }
 export function buildIssueTitle(i) {
+    const runaway = i.errors.find((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
+    if (runaway)
+        return `copilot-reverse: stream runaway (${runaway.model})`;
     const first = i.errors[0]?.error;
     return `copilot-reverse report: ${first ? first.slice(0, 70) : i.status.workerState}`;
 }

package/dist/tui/setup/apply.js CHANGED Viewed

@@ -2,10 +2,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join, dirname } from "node:path";
 // The env keys copilot-reverse writes for each client — so reset knows exactly what to remove.
+// ANTHROPIC_AUTH_TOKEN isn't one we write, but reset strips it too: if it lingers alongside our
+// API key, Claude Code warns "both set", so a clean reset should clear the conflict.
 export const CLAUDE_ENV_KEYS = [
-    "ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL",
+    "ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL", "ANTHROPIC_AUTH_TOKEN",
     "CLAUDE_CODE_AUTO_COMPACT_WINDOW", "CLAUDE_AUTOCOMPACT_PCT_OVERRIDE",
     "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "CLAUDE_CODE_ATTRIBUTION_HEADER",
+    "CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY",
 ];
 export const CODEX_ENV_KEYS = ["OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENAI_MODEL"];
 // --- Claude Code: merge into settings.json `env` (non-destructive) ---
@@ -29,6 +32,12 @@ export function applyClaude(scope, env, o = {}) {
     }
     const envObj = (settings.env && typeof settings.env === "object" ? settings.env : {});
     const changed = [];
+    // We authenticate with ANTHROPIC_API_KEY; a leftover ANTHROPIC_AUTH_TOKEN here makes Claude Code
+    // warn "both set · auth may not work" — strip it so our setup leaves a clean, single-credential env.
+    if ("ANTHROPIC_AUTH_TOKEN" in envObj) {
+        delete envObj.ANTHROPIC_AUTH_TOKEN;
+        changed.push("ANTHROPIC_AUTH_TOKEN(removed)");
+    }
     for (const [k, v] of Object.entries(env)) {
         if (envObj[k] !== v) {
             envObj[k] = v;

package/dist/tui/setup/clients.js CHANGED Viewed

@@ -27,6 +27,10 @@ export function claudeCopilotReverseEnv(base, apiKey, model, contextWindow) {
         CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "80",
         CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
         CLAUDE_CODE_ATTRIBUTION_HEADER: "0", // keep prompt caching working on a non-Anthropic gateway
+        // Populate Claude Code's /model picker from our /anthropic/v1/models so the user can switch
+        // models natively. Coexists with ANTHROPIC_MODEL (which stays the 1M default — it does NOT lock
+        // the picker). Claude Code >=2.1.129 only; older builds ignore it. Picker lists claude* ids.
+        CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY: "1",
     };
 }
 export function codexConfig(e) {

package/dist/tui/setup/status.js CHANGED Viewed

@@ -3,33 +3,40 @@ import { claudePath, codexPath } from "./apply.js";
 // A copilot-reverse-written endpoint always points at the local loopback proxy — this lets us tell our
 // own config apart from a user's pre-existing ANTHROPIC_BASE_URL / OPENAI_BASE_URL.
 const isCopilotReverse = (v) => typeof v === "string" && /127\.0\.0\.1|localhost/.test(v);
-function claudeConfigured(scope, o) {
+function claudeScope(scope, o) {
     const p = claudePath(scope, o);
     if (!existsSync(p))
-        return false;
+        return { on: false };
     try {
         const s = JSON.parse(readFileSync(p, "utf8"));
-        return isCopilotReverse(s.env?.ANTHROPIC_BASE_URL);
+        if (!isCopilotReverse(s.env?.ANTHROPIC_BASE_URL))
+            return { on: false };
+        return { on: true, model: typeof s.env?.ANTHROPIC_MODEL === "string" ? s.env.ANTHROPIC_MODEL : undefined };
     }
     catch {
-        return false;
+        return { on: false };
     }
 }
-function codexConfigured(scope, o) {
+function codexScope(scope, o) {
     const p = codexPath(scope, o);
     if (!existsSync(p))
-        return false;
+        return { on: false };
     try {
-        const m = /^OPENAI_BASE_URL=(.*)$/m.exec(readFileSync(p, "utf8"));
-        return !!m && isCopilotReverse(m[1]);
+        const txt = readFileSync(p, "utf8");
+        const base = /^OPENAI_BASE_URL=(.*)$/m.exec(txt);
+        if (!base || !isCopilotReverse(base[1]))
+            return { on: false };
+        return { on: true, model: /^OPENAI_MODEL=(.*)$/m.exec(txt)?.[1] };
     }
     catch {
-        return false;
+        return { on: false };
     }
 }
 export function readClientStatus(o = {}) {
+    const cu = claudeScope("global", o), cp = claudeScope("project", o);
+    const xu = codexScope("global", o), xp = codexScope("project", o);
     return {
-        claude: { user: claudeConfigured("global", o), project: claudeConfigured("project", o) },
-        codex: { user: codexConfigured("global", o), project: codexConfigured("project", o) },
+        claude: { user: cu.on, project: cp.on, userModel: cu.model, projectModel: cp.model },
+        codex: { user: xu.on, project: xp.on, userModel: xu.model, projectModel: xp.model },
     };
 }

package/dist/version.js CHANGED Viewed

@@ -1,2 +1,2 @@
 // AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
-export const APP_VERSION = "0.5.3";
+export const APP_VERSION = "0.5.5";

package/dist/worker/anthropic-server.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { estimateTokens } from "../core/tokens.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
 import { isGatewayTool } from "../core/server-tools.js";
+import { RunawayGuard } from "../core/stream-guard.js";
 const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
 const safeJson = (s) => { try {
     return JSON.parse(s);
@@ -14,6 +15,11 @@ catch {
 // Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
 // returns "search more") can never spin forever inside one request.
 const MAX_TOOL_ITERS = 5;
+// Wall-clock cap on a single streaming turn. The model occasionally degenerates into emitting the
+// same short token forever ("code\ncode\ncode…") and never sends a stop, which would otherwise relay
+// for minutes and freeze the client. The RunawayGuard catches the repetition fast; this is the
+// backstop for any slow-but-endless stream. On either trip we end the turn cleanly as max_tokens.
+const STREAM_DEADLINE_MS = 120_000;
 export function mountAnthropic(app, router, onMetric, runner) {
     // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
     // before chatting; without it they 404 on the connection test.
@@ -52,7 +58,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
                 let next = 0;
                 let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
                 let finalStop = "stop";
-                for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
+                // Runaway protection spans the whole request: repeated-token degeneration + a wall-clock
+                // deadline. Tripping ends the stream as a clean max_tokens turn instead of hanging.
+                const guard = new RunawayGuard();
+                const deadline = start + STREAM_DEADLINE_MS;
+                let runaway = false;
+                let runawayReason = "";
+                for (let iter = 0; iter < MAX_TOOL_ITERS && !runaway; iter++) {
                     let textIndex; // Anthropic index of this turn's text block
                     const byCopilotIdx = new Map();
                     const buffered = []; // tool calls seen this turn, in order
@@ -73,6 +85,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
                                 res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
                             }
                             res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
+                            // Degenerate-stream kill-switch: a model looping on a short token is cut here.
+                            if (guard.push(chunk.delta)) {
+                                runaway = true;
+                                runawayReason = guard.reason ?? "repetition";
+                                turnStop = "length";
+                                break;
+                            }
                         }
                         else if (chunk.kind === "tool_use_start") {
                             if (!byCopilotIdx.has(chunk.index)) {
@@ -86,9 +105,23 @@ export function mountAnthropic(app, router, onMetric, runner) {
                             if (t)
                                 t.args += chunk.argsDelta;
                         }
+                        // Wall-clock backstop on EVERY chunk kind: a tool-call-only runaway never feeds the text
+                        // guard, so without this a model spamming calls would relay until the socket died.
+                        if (Date.now() > deadline) {
+                            runaway = true;
+                            runawayReason = "deadline";
+                            turnStop = "length";
+                            break;
+                        }
                     }
                     if (textIndex !== undefined)
                         res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
+                    // Runaway tripped mid-text: stop now as max_tokens. Don't forward partial tool calls or
+                    // loop into gateway tools — the turn was abandoned, not legitimately completed.
+                    if (runaway) {
+                        finalStop = "length";
+                        break;
+                    }
                     const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
                     // Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
                     // has no handler for it and would stall. So whenever the model calls gateway tools (and a
@@ -124,7 +157,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
                 res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
                 res.write(frame("message_stop", { type: "message_stop" }));
                 res.end();
-                metric(200);
+                metric(200, runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined);
             }
             else {
                 // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the

package/dist/worker/openai-server.js CHANGED Viewed

@@ -3,6 +3,10 @@ import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOp
 import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
+import { RunawayGuard } from "../core/stream-guard.js";
+// Cut a single streaming turn that degenerates (model repeats one short token forever, never stops)
+// so the client gets a bounded answer instead of a frozen session. Mirrors the Anthropic backend.
+const STREAM_DEADLINE_MS = 120_000;
 export function mountOpenAI(app, router, onMetric) {
     // Model discovery — OpenAI list shape. Clients (LiteLLM-style gateways, "test connection" probes)
     // GET this before chatting; without it they 404 and refuse to connect.
@@ -20,10 +24,24 @@ export function mountOpenAI(app, router, onMetric) {
                 res.setHeader("content-type", "text/event-stream");
                 res.setHeader("cache-control", "no-cache");
                 const id = `chatcmpl-${randomUUID().replace(/-/g, "")}`; // unique per response, not constant
-                for await (const chunk of provider.stream(canon))
+                const guard = new RunawayGuard();
+                const deadline = start + STREAM_DEADLINE_MS;
+                let runawayReason = "";
+                for await (const chunk of provider.stream(canon)) {
                     res.write(canonicalChunkToOpenAISSE(chunk, id, canon.model));
+                    // Backstop covers tool-call streams too: a model can loop on tool calls forever, which
+                    // never feeds the text guard — the wall clock cuts those cleanly instead of freezing.
+                    if (chunk.kind === "text" && guard.push(chunk.delta)) {
+                        runawayReason = guard.reason ?? "repetition";
+                        break;
+                    }
+                    if (Date.now() > deadline) {
+                        runawayReason = "deadline";
+                        break;
+                    }
+                }
                 res.end();
-                metric(200);
+                metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
             }
             else {
                 res.json(canonicalToOpenAIResponse(await provider.complete(canon)));
@@ -65,15 +83,24 @@ export function mountOpenAI(app, router, onMetric) {
                 const argsByIdx = new Map();
                 let usage;
                 let finish = "stop";
+                const guard = new RunawayGuard();
+                const deadline = start + STREAM_DEADLINE_MS;
+                let runawayReason = "";
                 for await (const chunk of provider.stream(canon)) {
                     if (chunk.done) {
                         finish = chunk.finishReason ?? "stop";
                         usage = chunk.usage;
                         break;
                     }
-                    if (chunk.kind === "text")
+                    if (chunk.kind === "text") {
                         for (const f of sse.text(chunk.delta))
                             res.write(f);
+                        if (guard.push(chunk.delta)) {
+                            finish = "length";
+                            runawayReason = guard.reason ?? "repetition";
+                            break;
+                        }
+                    }
                     else if (chunk.kind === "tool_use_start")
                         for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
                             res.write(f);
@@ -82,11 +109,17 @@ export function mountOpenAI(app, router, onMetric) {
                         for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
                             res.write(f);
                     }
+                    // Deadline applies to every chunk kind: a tool-call-only runaway never hits the text guard.
+                    if (Date.now() > deadline) {
+                        finish = "length";
+                        runawayReason = "deadline";
+                        break;
+                    }
                 }
                 for (const f of sse.finish(usage, finish, argsByIdx))
                     res.write(f);
                 res.end();
-                metric(200);
+                metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
             }
             else {
                 res.json(canonicalToResponsesResponse(await provider.complete(canon)));

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "copilot-reverse",
-  "version": "0.5.3",
+  "version": "0.5.5",
   "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
   "type": "module",
   "license": "MIT",