copilot-reverse 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,9 +56,9 @@ Here's the app itself — a prompt, a live status bar, and slash-command autocom
56
56
  Just **talk to it** — it understands plain English and will do the work for you:
57
57
 
58
58
  > *"list models"* → shows every model + its context window
59
- > *"set up claude"* → configures Claude Code
59
+ > *"set up claude"* → asks scope (global/project) + model, then configures Claude Code
60
60
  > *"is the worker healthy?"* → runs a health check
61
- > *"why did my last request fail?"* → shows the error
61
+ > *"why did my last request fail?"* → shows the error (incl. cut-short stream runaways)
62
62
 
63
63
  Prefer commands? Type `/` to see them all. The essentials:
64
64
 
@@ -66,7 +66,7 @@ Prefer commands? Type `/` to see them all. The essentials:
66
66
  |---|---|
67
67
  | `/setup-claude` · `/setup-codex` | Point Claude Code / Codex at copilot-reverse |
68
68
  | `/model` | Switch the chat model (1M-context models marked) |
69
- | `/status` · `/doctor` | Is everything healthy? |
69
+ | `/status` · `/doctor` | Is everything healthy? (`/status` shows each client's scope + model) |
70
70
  | `/logs` · `/metrics` | What ran, what failed, and why |
71
71
  | `/dashboard` | Open a live web dashboard in your browser |
72
72
  | `/report` | File a pre-filled bug report (diagnostics only — no prompts) |
@@ -147,6 +147,10 @@ of your config untouched.
147
147
  - **Your data stays local.** The app proxies between your editor and Copilot on `127.0.0.1`. Your
148
148
  GitHub token lives only in `~/.copilot-reverse/creds.json` on your own disk.
149
149
  - **It heals itself.** If the proxy crashes, the supervisor restarts it with backoff and records why.
150
+ - **It never freezes.** If a model degenerates (loops on one token, never stops), the proxy cuts the
151
+ stream cleanly as `max_tokens` and tags it — `/report` then files a prefilled issue so it's easy to flag.
152
+ - **Tunable.** `~/.copilot-reverse` config covers ports, restart backoff, and the GitHub-token
153
+ heartbeat interval; defaults are sensible, override only if needed.
150
154
  - **Unofficial endpoints.** This uses community-documented Copilot endpoints with *your own*
151
155
  subscription. It may break if GitHub changes them — that's the trade-off for not needing extra keys.
152
156
 
package/dist/cli/index.js CHANGED
@@ -13,7 +13,7 @@ import { makeOnChat } from "../tui/assistant/on-chat.js";
13
13
  import { readGhToken, clearGhToken, hasGhTokenFile } from "../shared/creds.js";
14
14
  import { writeWebIqKey, readWebIqKey, clearWebIqKey, readWebSearchMode, writeWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
15
15
  import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
16
- import { readChatModel, writeChatModel } from "../shared/prefs.js";
16
+ import { readChatModel, writeChatModel, shouldShowChange, markChangeShown } from "../shared/prefs.js";
17
17
  import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
18
18
  import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
19
19
  import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
@@ -181,6 +181,12 @@ async function launchTui() {
181
181
  }
182
182
  });
183
183
  const persistedModel = readChatModel(dataDir());
184
+ // "What's new" banner: MAJOR changes only — keyed by version so each release re-announces, shown
185
+ // ~3 launches then quiet. Minor fixes/polish do NOT go here; reserve it for things worth noticing.
186
+ const CHANGE_ID = `v${APP_VERSION}`;
187
+ const changeBanner = shouldShowChange(dataDir(), CHANGE_ID)
188
+ ? { lines: ["• runaway streams now cut cleanly — no more frozen 'code code code' sessions"] }
189
+ : undefined;
184
190
  // Startup overview. The token was already validated above (re-auth happens before we get here), so
185
191
  // GitHub is connected; web search readiness and configured clients are read from disk.
186
192
  const clientStatus = readClientStatus();
@@ -215,6 +221,8 @@ async function launchTui() {
215
221
  disableWebiq: () => { clearWebIqKey(dataDir()); },
216
222
  webSearchBackend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
217
223
  startupStatus,
224
+ changeBanner,
225
+ onChangeSeen: () => markChangeShown(dataDir(), CHANGE_ID),
218
226
  githubStatus: async () => {
219
227
  const token = readGhToken(dataDir());
220
228
  if (!token)
@@ -0,0 +1,38 @@
1
+ // Defends a streaming turn against upstream model degeneration: the model collapses into emitting
2
+ // the same short token forever ("code\ncode\ncode…") and never sends a stop, so a faithful proxy
3
+ // would relay deltas until the socket dies — the session appears frozen. This watchdog converts
4
+ // that into a clean, bounded stop. It is pure (no I/O, no timers) so it is trivially testable; the
5
+ // idle/wall-clock timeout lives at the SSE loop where the timers are. Defaults are generous: real
6
+ // answers don't hit them, only runaways do.
7
+ export class RunawayGuard {
8
+ maxRepeats;
9
+ maxOutputChars;
10
+ last = "";
11
+ repeats = 0;
12
+ chars = 0;
13
+ reason;
14
+ constructor(limits = {}) {
15
+ this.maxRepeats = limits.maxRepeats ?? 200;
16
+ this.maxOutputChars = limits.maxOutputChars ?? 2_000_000;
17
+ }
18
+ // Returns true the moment a limit is exceeded; thereafter `reason` is set. Short repeated deltas
19
+ // are the degenerate signal — long varied text just accumulates against the char cap.
20
+ push(delta) {
21
+ this.chars += delta.length;
22
+ if (delta === this.last)
23
+ this.repeats++;
24
+ else {
25
+ this.repeats = 1;
26
+ this.last = delta;
27
+ }
28
+ if (delta.length <= 16 && this.repeats > this.maxRepeats) {
29
+ this.reason = "repetition";
30
+ return true;
31
+ }
32
+ if (this.chars > this.maxOutputChars) {
33
+ this.reason = "max_output";
34
+ return true;
35
+ }
36
+ return false;
37
+ }
38
+ }
@@ -83,8 +83,15 @@ export class ToolCallExtractor {
83
83
  if (end < 0)
84
84
  return events; // incomplete block — wait for more data
85
85
  const block = this.buf.slice(0, end);
86
- for (const tool of parseInvokes(block))
87
- events.push({ kind: "tool", tool });
86
+ const tools = parseInvokes(block);
87
+ // Parse-faithful: a block that recovers no tools (empty/missing name="" or malformed body)
88
+ // must NOT be swallowed — pass it through verbatim so the client sees exactly what the model
89
+ // emitted. Swallowing yields a turn with neither text nor a tool, which loops the model.
90
+ if (tools.length)
91
+ for (const tool of tools)
92
+ events.push({ kind: "tool", tool });
93
+ else
94
+ events.push({ kind: "text", text: block });
88
95
  this.buf = this.buf.slice(end);
89
96
  this.capturing = false; // a following <invoke> re-triggers via the passthrough branch
90
97
  }
@@ -84,9 +84,9 @@ export function parseResponsesResult(data) {
84
84
  }
85
85
  }
86
86
  }
87
- else if (item.type === "function_call") {
87
+ else if (item.type === "function_call" && item.name) {
88
88
  sawTool = true;
89
- content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name ?? "", input: safeJson(item.arguments) });
89
+ content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name, input: safeJson(item.arguments) });
90
90
  }
91
91
  }
92
92
  const finishReason = data.status === "incomplete" ? mapIncomplete(data.incomplete_details?.reason) : sawTool ? "tool_use" : "stop";
@@ -156,10 +156,13 @@ export async function* streamResponses(res) {
156
156
  switch (ev.type) {
157
157
  case "response.output_item.added": {
158
158
  const item = ev.item ?? {};
159
- if (item.type === "function_call") {
159
+ // Gate on a present name, mirroring the chat adapter's `tc.function?.name` guard: a
160
+ // nameless function_call would surface as a bare "call:" the client can't run. No name,
161
+ // no start — its later arg deltas find no mapping and are dropped, not rendered.
162
+ if (item.type === "function_call" && item.name) {
160
163
  const idx = nextToolIndex++;
161
164
  toolByOutputIndex.set(ev.output_index, idx);
162
- yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name ?? "", done: false };
165
+ yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name, done: false };
163
166
  }
164
167
  break;
165
168
  }
@@ -4,6 +4,8 @@ export function defaultConfig() {
4
4
  supervisorPort: 7890,
5
5
  workerPort: 7891,
6
6
  restart: { maxCrashes: 5, windowMs: 60_000, baseBackoffMs: 500, maxBackoffMs: 8_000, unhealthyCooldownMs: 30_000 },
7
+ // Token failure is rare and GitHub rate-limits, so a slow cadence is plenty; overridable for tests/tuning.
8
+ heartbeat: { intervalMs: 60_000, initialDelayMs: 2_000 },
7
9
  // Empty = pass the requested model straight through to Copilot. Add entries (or "*") to remap.
8
10
  modelMap: {},
9
11
  // Set MAESTRO_REPORT_REPO=owner/repo to override where /report files diagnostics issues.
@@ -15,6 +17,7 @@ export function mergeConfig(base, o) {
15
17
  ...base,
16
18
  ...o,
17
19
  restart: { ...base.restart, ...(o.restart ?? {}) },
20
+ heartbeat: { ...base.heartbeat, ...(o.heartbeat ?? {}) },
18
21
  modelMap: { ...base.modelMap, ...(o.modelMap ?? {}) },
19
22
  };
20
23
  }
@@ -1,28 +1,36 @@
1
1
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
2
  import { join } from "node:path";
3
- // Small user-preferences store (e.g. the chosen chat model), persisted across launches.
3
+ // Small user-preferences store (e.g. the chosen chat model, change-banner view counts), persisted
4
+ // across launches.
4
5
  const file = (dir) => join(dir, "prefs.json");
5
- export function readChatModel(dir) {
6
+ function read(dir) {
6
7
  if (!existsSync(file(dir)))
7
- return null;
8
+ return {};
8
9
  try {
9
- return JSON.parse(readFileSync(file(dir), "utf8")).chatModel ?? null;
10
+ return JSON.parse(readFileSync(file(dir), "utf8"));
10
11
  }
11
12
  catch {
12
- return null;
13
+ return {};
13
14
  }
14
15
  }
15
- export function writeChatModel(dir, model) {
16
+ function write(dir, next) {
16
17
  if (!existsSync(dir))
17
18
  mkdirSync(dir, { recursive: true });
18
- let cur = {};
19
- if (existsSync(file(dir))) {
20
- try {
21
- cur = JSON.parse(readFileSync(file(dir), "utf8"));
22
- }
23
- catch {
24
- cur = {};
25
- }
26
- }
27
- writeFileSync(file(dir), JSON.stringify({ ...cur, chatModel: model }));
19
+ writeFileSync(file(dir), JSON.stringify(next));
20
+ }
21
+ export function readChatModel(dir) {
22
+ return read(dir).chatModel ?? null;
23
+ }
24
+ export function writeChatModel(dir, model) {
25
+ write(dir, { ...read(dir), chatModel: model });
26
+ }
27
+ // "What's new" banner: show a change a few times then stop. Counts are keyed by an id (e.g. version),
28
+ // so a new release re-shows; bumping the count is what decides whether the banner appears again.
29
+ const seenKey = (id) => `seen:${id}`;
30
+ export function shouldShowChange(dir, id, maxShows = 3) {
31
+ return (read(dir)[seenKey(id)] ?? 0) < maxShows;
32
+ }
33
+ export function markChangeShown(dir, id) {
34
+ const cur = read(dir);
35
+ write(dir, { ...cur, [seenKey(id)]: (cur[seenKey(id)] ?? 0) + 1 });
28
36
  }
@@ -51,7 +51,9 @@ export function startSupervisor() {
51
51
  };
52
52
  // Periodically re-check the GitHub token so the UI reflects an expired/revoked login within ~60s,
53
53
  // instead of only on the next failed request or a manual /status.
54
- const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir()));
54
+ const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir()), probeGithubAuth, undefined, {
55
+ intervalMs: config.heartbeat.intervalMs, initialDelayMs: config.heartbeat.initialDelayMs,
56
+ });
55
57
  const app = createControlApp({
56
58
  db, getState: () => state,
57
59
  restart: () => monitor.restartManually(),
package/dist/tui/app.js CHANGED
@@ -17,16 +17,20 @@ const SPINNER = ["✶", "✸", "✹", "✺", "✹", "✷"];
17
17
  // Startup overview card. GitHub shows a login STATE (no real token expiry exists). Web search shows
18
18
  // the resolved backend: "via WebIQ", "via Copilot (native)", or "unavailable — run /webiq".
19
19
  // `extra` appends detail lines (e.g. worker restart history for /status).
20
- function statusCard(s, extra = []) {
20
+ function statusCard(s, extra = [], clients) {
21
21
  const gh = s.github === "connected" ? "✓ connected" : s.github === "expired" ? "✗ expired — run /login" : "✗ signed out — run /login";
22
22
  const web = s.webSearch === "webiq" ? "✓ via WebIQ" : s.webSearch === "copilot" ? "✓ via Copilot (native)" : "✗ unavailable — run /webiq";
23
- const clients = `claude ${s.clients.claude ? "✓" : "○"} codex ${s.clients.codex ? "✓" : "○"}`;
23
+ // Per-scope + model when we have the file-derived detail; else fall back to the simple flag.
24
+ const scope = (sc) => sc?.on ? `✓ ${sc.model ? sc.model.replace(/\[1m\]$/, "") : "on"}` : "○";
25
+ const clientsLine = clients
26
+ ? `claude u:${scope({ on: clients.claude.user, model: clients.claude.userModel })} p:${scope({ on: clients.claude.project, model: clients.claude.projectModel })} · codex u:${scope({ on: clients.codex.user, model: clients.codex.userModel })} p:${scope({ on: clients.codex.project, model: clients.codex.projectModel })}`
27
+ : `claude ${s.clients.claude ? "✓" : "○"} codex ${s.clients.codex ? "✓" : "○"}`;
24
28
  const tone = s.github === "connected" ? "ok" : "error";
25
29
  return { type: "card", title: "status", tone, lines: [
26
30
  `GitHub login ${gh}`,
27
31
  `web search ${web}`,
28
32
  `worker ${s.worker}`,
29
- `clients ${clients}`,
33
+ `clients ${clientsLine}`,
30
34
  ...extra,
31
35
  ] };
32
36
  }
@@ -49,17 +53,21 @@ function OutputCard({ title, lines, tone }) {
49
53
  function HelpCard({ commands }) {
50
54
  return (_jsxs(Box, { flexDirection: "column", borderStyle: "round", borderColor: theme.border, paddingX: 1, marginBottom: 1, children: [_jsx(Text, { color: theme.accent, bold: true, children: "Commands" }), commands.map((c) => (_jsxs(Text, { children: [_jsx(Text, { color: theme.prompt, children: c.name.padEnd(16) }), _jsx(Text, { color: theme.muted, children: c.describe })] }, c.name))), _jsx(Text, { color: theme.muted, children: "tip: type / to autocomplete \u00B7 plain text talks to the assistant" })] }));
51
55
  }
52
- // HUD client cell: shows configured scopes read from the real config files.
56
+ // HUD client cell: shows configured scopes read from the real config files, with the pinned model.
53
57
  function ClientBadge({ name, status }) {
54
- const cell = (label, on) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? "✓" : ""] }));
55
- return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user), " ", cell("p", status.project)] }));
58
+ const short = (m) => (m ? m.replace(/\[1m\]$/, "").replace(/^claude-/, "").slice(0, 14) : "");
59
+ const cell = (label, on, model) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? `✓ ${short(model)}`.trimEnd() : ""] }));
60
+ return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user, status.userModel), " ", cell("p", status.project, status.projectModel)] }));
56
61
  }
57
- export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, }) {
62
+ export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, changeBanner, onChangeSeen, }) {
58
63
  const cmds = registry.list().map((c) => ({ name: c.name, describe: c.describe }));
59
64
  const [entries, setEntries] = useState(() => [
60
65
  ...(startupStatus ? [statusCard(startupStatus)] : []),
66
+ ...(changeBanner ? [{ type: "card", title: "what's new", tone: "info", lines: changeBanner.lines }] : []),
61
67
  { type: "system", text: "Type a message to chat with the assistant, or /help for commands." },
62
68
  ]);
69
+ useEffect(() => { if (changeBanner)
70
+ onChangeSeen?.(); }, []);
63
71
  const [state, setState] = useState(workerState);
64
72
  const [status, setStatus] = useState(() => readStatus?.() ?? EMPTY_STATUS);
65
73
  const [webBackend, setWebBackend] = useState(() => webSearchBackend?.() ?? "unavailable");
@@ -149,7 +157,7 @@ export function App({ registry, title, workerState = "starting", initialModel =
149
157
  webSearch: webSearchBackend?.() ?? webBackend, worker,
150
158
  clients: { claude: status.claude.user || status.claude.project, codex: status.codex.user || status.codex.project },
151
159
  });
152
- add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : []));
160
+ add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : [], status));
153
161
  return;
154
162
  }
155
163
  if (t === "/config" && info) {
@@ -3,9 +3,10 @@ import { z } from "zod";
3
3
  import { buildActions } from "./tools.js";
4
4
  import { formatModelList } from "../../shared/format.js";
5
5
  const empty = z.object({});
6
- const setupShape = z.object({
7
- scope: z.enum(["global", "project"]).optional(),
8
- model: z.string().optional(),
6
+ // Setup is a config write — require both so the assistant must confirm scope+model, never assume.
7
+ const requiredSetupShape = z.object({
8
+ scope: z.enum(["global", "project"]),
9
+ model: z.string(),
9
10
  }).shape;
10
11
  function sdkTools(actions, cfg) {
11
12
  const tools = [
@@ -13,6 +14,8 @@ function sdkTools(actions, cfg) {
13
14
  tool("restart_worker", "Restart the proxy worker", empty.shape, async () => ({ content: [{ type: "text", text: await actions.restart_worker({}) }] })),
14
15
  tool("run_doctor", "Run copilot-reverse health checks", empty.shape, async () => ({ content: [{ type: "text", text: await actions.run_doctor({}) }] })),
15
16
  tool("recent_requests", "List recent proxied requests", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_requests({}) }] })),
17
+ tool("recent_errors", "List recent failed/cut requests with their messages (incl. stream runaways)", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_errors({}) }] })),
18
+ tool("metrics", "Show request totals, error count, and per-model average latency", empty.shape, async () => ({ content: [{ type: "text", text: await actions.metrics({}) }] })),
16
19
  ];
17
20
  const listModels = cfg.listModels;
18
21
  if (listModels) {
@@ -24,11 +27,12 @@ function sdkTools(actions, cfg) {
24
27
  if (setupClient) {
25
28
  for (const client of ["claude", "codex"]) {
26
29
  const label = client === "claude" ? "Claude Code" : "Codex";
27
- tools.push(tool(`setup_${client}`, `Configure ${label} to use the copilot-reverse proxy (writes its config). scope defaults to "global" (all projects); model defaults to the current chat model.`, setupShape, async (args) => {
28
- const scope = args.scope ?? "global";
29
- const model = args.model ?? cfg.model;
30
- const r = await setupClient(client, scope, model);
31
- return { content: [{ type: "text", text: `configured ${label} (${scope}) with model ${model} — wrote ${r.path}; keys: ${r.changed.join(", ") || "(no change)"}` }] };
30
+ // scope+model are REQUIRED (not defaulted): config writes are not reversible-by-undo, so the
31
+ // assistant must confirm both with the user first rather than silently writing the global scope
32
+ // with the current model. The prompt tells it to ask; making the args required enforces it.
33
+ tools.push(tool(`setup_${client}`, `Configure ${label} to use the proxy. REQUIRES scope ("global"=all projects / "project"=here) AND model — ask the user for both before calling; do not assume.`, requiredSetupShape, async (args) => {
34
+ const r = await setupClient(client, args.scope, args.model);
35
+ return { content: [{ type: "text", text: `configured ${label} (${args.scope}) with model ${args.model} — wrote ${r.path}; keys: ${r.changed.join(", ") || "(no change)"}` }] };
32
36
  }));
33
37
  }
34
38
  }
@@ -70,9 +74,11 @@ export async function runAssistantTurn(cfg, prompt, print, queryFn = query, abor
70
74
  settingSources: [],
71
75
  systemPrompt: "You are copilot-reverse's built-in assistant for the local Copilot proxy. Be concise. " +
72
76
  "When the user expresses an intent you have a tool for, CALL THE TOOL instead of explaining. " +
73
- "Tools: get_status, restart_worker, run_doctor, recent_requests, list_models (show available " +
74
- "models + context windows), setup_claude / setup_codex (configure those clients to use the proxy). " +
75
- "E.g. 'list models' -> call list_models; 'set up claude' -> call setup_claude.",
77
+ "Tools: get_status, restart_worker, run_doctor, recent_requests, recent_errors, metrics, list_models " +
78
+ "(models + context windows), setup_claude / setup_codex (configure those clients). " +
79
+ "SETUP RULE: setup_claude/setup_codex WRITE config and need scope (global=all projects / project=here) " +
80
+ "AND model. Before calling, confirm BOTH with the user — if unstated, ask (offer list_models). Never assume. " +
81
+ "E.g. 'list models' -> list_models; 'set up claude' -> ask scope+model, then setup_claude.",
76
82
  permissionMode: "bypassPermissions",
77
83
  includePartialMessages: true,
78
84
  ...(abortController ? { abortController } : {}),
@@ -1,3 +1,4 @@
1
+ import { aggregate, recentErrors } from "../panels/metrics-agg.js";
1
2
  // Plain action handlers — wrapped as SDK tools in runtime.ts.
2
3
  // Each takes a parsed-args object and returns a short text result for the model.
3
4
  export function buildActions(client) {
@@ -20,5 +21,17 @@ export function buildActions(client) {
20
21
  return "no requests logged yet";
21
22
  return reqs.slice(0, 10).map((r) => `${r.endpoint} ${r.model} ${r.status} ${r.latencyMs}ms`).join("; ");
22
23
  },
24
+ async recent_errors(_args) {
25
+ const errs = recentErrors(await client.requests(), 10);
26
+ if (!errs.length)
27
+ return "no request errors logged — everything's green";
28
+ return errs.map((e) => `${e.status} ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`).join("; ");
29
+ },
30
+ async metrics(_args) {
31
+ const a = aggregate(await client.requests());
32
+ if (!a.total)
33
+ return "no requests yet";
34
+ return `requests: ${a.total}, errors: ${a.errors}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
35
+ },
23
36
  };
24
37
  }
@@ -1,8 +1,11 @@
1
+ // A request "failed" if it returned a 4xx/5xx OR carried an error message — runaway streams finish
2
+ // 200 but tag an error (model degenerated, cut early), and those are exactly what we want to surface.
3
+ const isError = (s) => s.status >= 400 || s.error != null;
1
4
  export function aggregate(samples) {
2
5
  const map = new Map();
3
6
  let errors = 0;
4
7
  for (const s of samples) {
5
- if (s.status >= 400)
8
+ if (isError(s))
6
9
  errors++;
7
10
  const m = map.get(s.model) ?? { count: 0, sum: 0 };
8
11
  m.count++;
@@ -15,8 +18,8 @@ export function aggregate(samples) {
15
18
  byModel: [...map.entries()].map(([model, v]) => ({ model, count: v.count, avgMs: Math.round(v.sum / v.count) })),
16
19
  };
17
20
  }
18
- // The failed requests (status >= 400), in the order given (callers pass newest-first), capped at `limit`.
19
- // This is the actually-useful "log" — what failed and why — as opposed to worker restart events.
21
+ // The failed requests (status >= 400 or any tagged error), newest-first, capped at `limit`. This is
22
+ // the actually-useful "log" — what failed and why — as opposed to worker restart events.
20
23
  export function recentErrors(samples, limit) {
21
- return samples.filter((s) => s.status >= 400).slice(0, limit);
24
+ return samples.filter(isError).slice(0, limit);
22
25
  }
@@ -1,12 +1,19 @@
1
1
  // Sentinel for an unconfigured report target. /report refuses to open until this is changed.
2
2
  export const PLACEHOLDER_REPO = "OWNER/REPO";
3
- // A diagnostics-only report. It contains metrics, doctor output, and worker restart reasons
4
- // never request/response bodies so there is no user prompt content to leak.
3
+ // A diagnostics-only report, structured like a good hand-written issue (e.g. #5): Summary
4
+ // Environment Diagnostics Steps to reproduce. It contains metrics, doctor output, and worker
5
+ // restart reasons — never request/response bodies — so there is no user prompt content to leak.
5
6
  export function buildIssueBody(i) {
7
+ const runaways = i.errors.filter((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
8
+ const summary = runaways.length
9
+ ? `Stream(s) degenerated and were cut early (model looped, no stop) on ${i.status.workerState} worker — ${runaways.length} runaway(s).`
10
+ : i.errors.length
11
+ ? `${i.errors.length} recent request error(s) on a ${i.status.workerState} worker; details below.`
12
+ : `Worker state: ${i.status.workerState}. (Describe the problem below.)`;
6
13
  const lines = [
7
- "## copilot-reverse diagnostics",
8
- "",
9
- `- version: ${i.version}`,
14
+ "### Summary", "", summary, "",
15
+ "### Environment", "",
16
+ `- copilot-reverse **${i.version}**`,
10
17
  `- platform: ${i.platform}`,
11
18
  `- worker state: ${i.status.workerState}`,
12
19
  "",
@@ -18,14 +25,20 @@ export function buildIssueBody(i) {
18
25
  ? i.errors.map((e) => `- \`${e.status}\` ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`)
19
26
  : ["- (none)"]),
20
27
  ];
28
+ if (runaways.length) {
29
+ lines.push("", "### Stream runaways (model degenerated, cut early)", ...runaways.map((e) => `- ${e.endpoint} ${e.model} after ${e.latencyMs}ms — ${e.error}`));
30
+ }
21
31
  if (i.status.restarts.length) {
22
32
  lines.push("", "### Recent worker restarts", ...i.status.restarts.slice(0, 5).map((r) => `- ${new Date(r.ts).toISOString()} ${r.reason} exit=${r.exitCode ?? "-"} ${r.stderrTail.slice(0, 120)}`));
23
33
  }
24
- lines.push("", "### What happened", "<!-- describe what you were doing when this occurred -->", "");
34
+ lines.push("", "### Steps to reproduce", "<!-- what were you doing? which client/model? -->", "");
25
35
  // Keep well under GitHub's ~8KB URL cap once encoded.
26
36
  return lines.join("\n").slice(0, 5500);
27
37
  }
28
38
  export function buildIssueTitle(i) {
39
+ const runaway = i.errors.find((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
40
+ if (runaway)
41
+ return `copilot-reverse: stream runaway (${runaway.model})`;
29
42
  const first = i.errors[0]?.error;
30
43
  return `copilot-reverse report: ${first ? first.slice(0, 70) : i.status.workerState}`;
31
44
  }
@@ -2,10 +2,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
2
  import { homedir } from "node:os";
3
3
  import { join, dirname } from "node:path";
4
4
  // The env keys copilot-reverse writes for each client — so reset knows exactly what to remove.
5
+ // ANTHROPIC_AUTH_TOKEN isn't one we write, but reset strips it too: if it lingers alongside our
6
+ // API key, Claude Code warns "both set", so a clean reset should clear the conflict.
5
7
  export const CLAUDE_ENV_KEYS = [
6
- "ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL",
8
+ "ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL", "ANTHROPIC_AUTH_TOKEN",
7
9
  "CLAUDE_CODE_AUTO_COMPACT_WINDOW", "CLAUDE_AUTOCOMPACT_PCT_OVERRIDE",
8
10
  "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "CLAUDE_CODE_ATTRIBUTION_HEADER",
11
+ "CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY",
9
12
  ];
10
13
  export const CODEX_ENV_KEYS = ["OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENAI_MODEL"];
11
14
  // --- Claude Code: merge into settings.json `env` (non-destructive) ---
@@ -29,6 +32,12 @@ export function applyClaude(scope, env, o = {}) {
29
32
  }
30
33
  const envObj = (settings.env && typeof settings.env === "object" ? settings.env : {});
31
34
  const changed = [];
35
+ // We authenticate with ANTHROPIC_API_KEY; a leftover ANTHROPIC_AUTH_TOKEN here makes Claude Code
36
+ // warn "both set · auth may not work" — strip it so our setup leaves a clean, single-credential env.
37
+ if ("ANTHROPIC_AUTH_TOKEN" in envObj) {
38
+ delete envObj.ANTHROPIC_AUTH_TOKEN;
39
+ changed.push("ANTHROPIC_AUTH_TOKEN(removed)");
40
+ }
32
41
  for (const [k, v] of Object.entries(env)) {
33
42
  if (envObj[k] !== v) {
34
43
  envObj[k] = v;
@@ -27,6 +27,10 @@ export function claudeCopilotReverseEnv(base, apiKey, model, contextWindow) {
27
27
  CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "80",
28
28
  CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
29
29
  CLAUDE_CODE_ATTRIBUTION_HEADER: "0", // keep prompt caching working on a non-Anthropic gateway
30
+ // Populate Claude Code's /model picker from our /anthropic/v1/models so the user can switch
31
+ // models natively. Coexists with ANTHROPIC_MODEL (which stays the 1M default — it does NOT lock
32
+ // the picker). Claude Code >=2.1.129 only; older builds ignore it. Picker lists claude* ids.
33
+ CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY: "1",
30
34
  };
31
35
  }
32
36
  export function codexConfig(e) {
@@ -3,33 +3,40 @@ import { claudePath, codexPath } from "./apply.js";
3
3
  // A copilot-reverse-written endpoint always points at the local loopback proxy — this lets us tell our
4
4
  // own config apart from a user's pre-existing ANTHROPIC_BASE_URL / OPENAI_BASE_URL.
5
5
  const isCopilotReverse = (v) => typeof v === "string" && /127\.0\.0\.1|localhost/.test(v);
6
- function claudeConfigured(scope, o) {
6
+ function claudeScope(scope, o) {
7
7
  const p = claudePath(scope, o);
8
8
  if (!existsSync(p))
9
- return false;
9
+ return { on: false };
10
10
  try {
11
11
  const s = JSON.parse(readFileSync(p, "utf8"));
12
- return isCopilotReverse(s.env?.ANTHROPIC_BASE_URL);
12
+ if (!isCopilotReverse(s.env?.ANTHROPIC_BASE_URL))
13
+ return { on: false };
14
+ return { on: true, model: typeof s.env?.ANTHROPIC_MODEL === "string" ? s.env.ANTHROPIC_MODEL : undefined };
13
15
  }
14
16
  catch {
15
- return false;
17
+ return { on: false };
16
18
  }
17
19
  }
18
- function codexConfigured(scope, o) {
20
+ function codexScope(scope, o) {
19
21
  const p = codexPath(scope, o);
20
22
  if (!existsSync(p))
21
- return false;
23
+ return { on: false };
22
24
  try {
23
- const m = /^OPENAI_BASE_URL=(.*)$/m.exec(readFileSync(p, "utf8"));
24
- return !!m && isCopilotReverse(m[1]);
25
+ const txt = readFileSync(p, "utf8");
26
+ const base = /^OPENAI_BASE_URL=(.*)$/m.exec(txt);
27
+ if (!base || !isCopilotReverse(base[1]))
28
+ return { on: false };
29
+ return { on: true, model: /^OPENAI_MODEL=(.*)$/m.exec(txt)?.[1] };
25
30
  }
26
31
  catch {
27
- return false;
32
+ return { on: false };
28
33
  }
29
34
  }
30
35
  export function readClientStatus(o = {}) {
36
+ const cu = claudeScope("global", o), cp = claudeScope("project", o);
37
+ const xu = codexScope("global", o), xp = codexScope("project", o);
31
38
  return {
32
- claude: { user: claudeConfigured("global", o), project: claudeConfigured("project", o) },
33
- codex: { user: codexConfigured("global", o), project: codexConfigured("project", o) },
39
+ claude: { user: cu.on, project: cp.on, userModel: cu.model, projectModel: cp.model },
40
+ codex: { user: xu.on, project: xp.on, userModel: xu.model, projectModel: xp.model },
34
41
  };
35
42
  }
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  // AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
2
- export const APP_VERSION = "0.5.3";
2
+ export const APP_VERSION = "0.5.5";
@@ -4,6 +4,7 @@ import { estimateTokens } from "../core/tokens.js";
4
4
  import { errorHint } from "./errors.js";
5
5
  import { CopilotAuthError } from "../providers/copilot/token.js";
6
6
  import { isGatewayTool } from "../core/server-tools.js";
7
+ import { RunawayGuard } from "../core/stream-guard.js";
7
8
  const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
8
9
  const safeJson = (s) => { try {
9
10
  return JSON.parse(s);
@@ -14,6 +15,11 @@ catch {
14
15
  // Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
15
16
  // returns "search more") can never spin forever inside one request.
16
17
  const MAX_TOOL_ITERS = 5;
18
+ // Wall-clock cap on a single streaming turn. The model occasionally degenerates into emitting the
19
+ // same short token forever ("code\ncode\ncode…") and never sends a stop, which would otherwise relay
20
+ // for minutes and freeze the client. The RunawayGuard catches the repetition fast; this is the
21
+ // backstop for any slow-but-endless stream. On either trip we end the turn cleanly as max_tokens.
22
+ const STREAM_DEADLINE_MS = 120_000;
17
23
  export function mountAnthropic(app, router, onMetric, runner) {
18
24
  // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
19
25
  // before chatting; without it they 404 on the connection test.
@@ -52,7 +58,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
52
58
  let next = 0;
53
59
  let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
54
60
  let finalStop = "stop";
55
- for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
61
+ // Runaway protection spans the whole request: repeated-token degeneration + a wall-clock
62
+ // deadline. Tripping ends the stream as a clean max_tokens turn instead of hanging.
63
+ const guard = new RunawayGuard();
64
+ const deadline = start + STREAM_DEADLINE_MS;
65
+ let runaway = false;
66
+ let runawayReason = "";
67
+ for (let iter = 0; iter < MAX_TOOL_ITERS && !runaway; iter++) {
56
68
  let textIndex; // Anthropic index of this turn's text block
57
69
  const byCopilotIdx = new Map();
58
70
  const buffered = []; // tool calls seen this turn, in order
@@ -73,6 +85,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
73
85
  res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
74
86
  }
75
87
  res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
88
+ // Degenerate-stream kill-switch: a model looping on a short token is cut here.
89
+ if (guard.push(chunk.delta)) {
90
+ runaway = true;
91
+ runawayReason = guard.reason ?? "repetition";
92
+ turnStop = "length";
93
+ break;
94
+ }
76
95
  }
77
96
  else if (chunk.kind === "tool_use_start") {
78
97
  if (!byCopilotIdx.has(chunk.index)) {
@@ -86,9 +105,23 @@ export function mountAnthropic(app, router, onMetric, runner) {
86
105
  if (t)
87
106
  t.args += chunk.argsDelta;
88
107
  }
108
+ // Wall-clock backstop on EVERY chunk kind: a tool-call-only runaway never feeds the text
109
+ // guard, so without this a model spamming calls would relay until the socket died.
110
+ if (Date.now() > deadline) {
111
+ runaway = true;
112
+ runawayReason = "deadline";
113
+ turnStop = "length";
114
+ break;
115
+ }
89
116
  }
90
117
  if (textIndex !== undefined)
91
118
  res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
119
+ // Runaway tripped mid-text: stop now as max_tokens. Don't forward partial tool calls or
120
+ // loop into gateway tools — the turn was abandoned, not legitimately completed.
121
+ if (runaway) {
122
+ finalStop = "length";
123
+ break;
124
+ }
92
125
  const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
93
126
  // Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
94
127
  // has no handler for it and would stall. So whenever the model calls gateway tools (and a
@@ -124,7 +157,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
124
157
  res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
125
158
  res.write(frame("message_stop", { type: "message_stop" }));
126
159
  res.end();
127
- metric(200);
160
+ metric(200, runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined);
128
161
  }
129
162
  else {
130
163
  // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
@@ -3,6 +3,10 @@ import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOp
3
3
  import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
4
4
  import { errorHint } from "./errors.js";
5
5
  import { CopilotAuthError } from "../providers/copilot/token.js";
6
+ import { RunawayGuard } from "../core/stream-guard.js";
7
+ // Cut a single streaming turn that degenerates (model repeats one short token forever, never stops)
8
+ // so the client gets a bounded answer instead of a frozen session. Mirrors the Anthropic backend.
9
+ const STREAM_DEADLINE_MS = 120_000;
6
10
  export function mountOpenAI(app, router, onMetric) {
7
11
  // Model discovery — OpenAI list shape. Clients (LiteLLM-style gateways, "test connection" probes)
8
12
  // GET this before chatting; without it they 404 and refuse to connect.
@@ -20,10 +24,24 @@ export function mountOpenAI(app, router, onMetric) {
20
24
  res.setHeader("content-type", "text/event-stream");
21
25
  res.setHeader("cache-control", "no-cache");
22
26
  const id = `chatcmpl-${randomUUID().replace(/-/g, "")}`; // unique per response, not constant
23
- for await (const chunk of provider.stream(canon))
27
+ const guard = new RunawayGuard();
28
+ const deadline = start + STREAM_DEADLINE_MS;
29
+ let runawayReason = "";
30
+ for await (const chunk of provider.stream(canon)) {
24
31
  res.write(canonicalChunkToOpenAISSE(chunk, id, canon.model));
32
+ // Backstop covers tool-call streams too: a model can loop on tool calls forever, which
33
+ // never feeds the text guard — the wall clock cuts those cleanly instead of freezing.
34
+ if (chunk.kind === "text" && guard.push(chunk.delta)) {
35
+ runawayReason = guard.reason ?? "repetition";
36
+ break;
37
+ }
38
+ if (Date.now() > deadline) {
39
+ runawayReason = "deadline";
40
+ break;
41
+ }
42
+ }
25
43
  res.end();
26
- metric(200);
44
+ metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
27
45
  }
28
46
  else {
29
47
  res.json(canonicalToOpenAIResponse(await provider.complete(canon)));
@@ -65,15 +83,24 @@ export function mountOpenAI(app, router, onMetric) {
65
83
  const argsByIdx = new Map();
66
84
  let usage;
67
85
  let finish = "stop";
86
+ const guard = new RunawayGuard();
87
+ const deadline = start + STREAM_DEADLINE_MS;
88
+ let runawayReason = "";
68
89
  for await (const chunk of provider.stream(canon)) {
69
90
  if (chunk.done) {
70
91
  finish = chunk.finishReason ?? "stop";
71
92
  usage = chunk.usage;
72
93
  break;
73
94
  }
74
- if (chunk.kind === "text")
95
+ if (chunk.kind === "text") {
75
96
  for (const f of sse.text(chunk.delta))
76
97
  res.write(f);
98
+ if (guard.push(chunk.delta)) {
99
+ finish = "length";
100
+ runawayReason = guard.reason ?? "repetition";
101
+ break;
102
+ }
103
+ }
77
104
  else if (chunk.kind === "tool_use_start")
78
105
  for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
79
106
  res.write(f);
@@ -82,11 +109,17 @@ export function mountOpenAI(app, router, onMetric) {
82
109
  for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
83
110
  res.write(f);
84
111
  }
112
+ // Deadline applies to every chunk kind: a tool-call-only runaway never hits the text guard.
113
+ if (Date.now() > deadline) {
114
+ finish = "length";
115
+ runawayReason = "deadline";
116
+ break;
117
+ }
85
118
  }
86
119
  for (const f of sse.finish(usage, finish, argsByIdx))
87
120
  res.write(f);
88
121
  res.end();
89
- metric(200);
122
+ metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
90
123
  }
91
124
  else {
92
125
  res.json(canonicalToResponsesResponse(await provider.complete(canon)));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "copilot-reverse",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
5
5
  "type": "module",
6
6
  "license": "MIT",