copilot-reverse 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/cli/index.js +9 -1
- package/dist/core/stream-guard.js +38 -0
- package/dist/core/tool-xml.js +9 -2
- package/dist/providers/copilot/responses-upstream.js +7 -4
- package/dist/shared/config.js +3 -0
- package/dist/shared/prefs.js +24 -16
- package/dist/supervisor/index.js +3 -1
- package/dist/tui/app.js +16 -8
- package/dist/tui/assistant/runtime.js +17 -11
- package/dist/tui/assistant/tools.js +13 -0
- package/dist/tui/panels/metrics-agg.js +7 -4
- package/dist/tui/report.js +19 -6
- package/dist/tui/setup/apply.js +10 -1
- package/dist/tui/setup/clients.js +4 -0
- package/dist/tui/setup/status.js +18 -11
- package/dist/version.js +1 -1
- package/dist/worker/anthropic-server.js +35 -2
- package/dist/worker/openai-server.js +37 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -56,9 +56,9 @@ Here's the app itself — a prompt, a live status bar, and slash-command autocom
|
|
|
56
56
|
Just **talk to it** — it understands plain English and will do the work for you:
|
|
57
57
|
|
|
58
58
|
> *"list models"* → shows every model + its context window
|
|
59
|
-
> *"set up claude"* → configures Claude Code
|
|
59
|
+
> *"set up claude"* → asks scope (global/project) + model, then configures Claude Code
|
|
60
60
|
> *"is the worker healthy?"* → runs a health check
|
|
61
|
-
> *"why did my last request fail?"* → shows the error
|
|
61
|
+
> *"why did my last request fail?"* → shows the error (incl. cut-short stream runaways)
|
|
62
62
|
|
|
63
63
|
Prefer commands? Type `/` to see them all. The essentials:
|
|
64
64
|
|
|
@@ -66,7 +66,7 @@ Prefer commands? Type `/` to see them all. The essentials:
|
|
|
66
66
|
|---|---|
|
|
67
67
|
| `/setup-claude` · `/setup-codex` | Point Claude Code / Codex at copilot-reverse |
|
|
68
68
|
| `/model` | Switch the chat model (1M-context models marked) |
|
|
69
|
-
| `/status` · `/doctor` | Is everything healthy? |
|
|
69
|
+
| `/status` · `/doctor` | Is everything healthy? (`/status` shows each client's scope + model) |
|
|
70
70
|
| `/logs` · `/metrics` | What ran, what failed, and why |
|
|
71
71
|
| `/dashboard` | Open a live web dashboard in your browser |
|
|
72
72
|
| `/report` | File a pre-filled bug report (diagnostics only — no prompts) |
|
|
@@ -147,6 +147,10 @@ of your config untouched.
|
|
|
147
147
|
- **Your data stays local.** The app proxies between your editor and Copilot on `127.0.0.1`. Your
|
|
148
148
|
GitHub token lives only in `~/.copilot-reverse/creds.json` on your own disk.
|
|
149
149
|
- **It heals itself.** If the proxy crashes, the supervisor restarts it with backoff and records why.
|
|
150
|
+
- **It never freezes.** If a model degenerates (loops on one token, never stops), the proxy cuts the
|
|
151
|
+
stream cleanly as `max_tokens` and tags it — `/report` then files a prefilled issue so it's easy to flag.
|
|
152
|
+
- **Tunable.** `~/.copilot-reverse` config covers ports, restart backoff, and the GitHub-token
|
|
153
|
+
heartbeat interval; defaults are sensible, override only if needed.
|
|
150
154
|
- **Unofficial endpoints.** This uses community-documented Copilot endpoints with *your own*
|
|
151
155
|
subscription. It may break if GitHub changes them — that's the trade-off for not needing extra keys.
|
|
152
156
|
|
package/dist/cli/index.js
CHANGED
|
@@ -13,7 +13,7 @@ import { makeOnChat } from "../tui/assistant/on-chat.js";
|
|
|
13
13
|
import { readGhToken, clearGhToken, hasGhTokenFile } from "../shared/creds.js";
|
|
14
14
|
import { writeWebIqKey, readWebIqKey, clearWebIqKey, readWebSearchMode, writeWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
|
|
15
15
|
import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
|
|
16
|
-
import { readChatModel, writeChatModel } from "../shared/prefs.js";
|
|
16
|
+
import { readChatModel, writeChatModel, shouldShowChange, markChangeShown } from "../shared/prefs.js";
|
|
17
17
|
import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
|
|
18
18
|
import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
|
|
19
19
|
import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
|
|
@@ -181,6 +181,12 @@ async function launchTui() {
|
|
|
181
181
|
}
|
|
182
182
|
});
|
|
183
183
|
const persistedModel = readChatModel(dataDir());
|
|
184
|
+
// "What's new" banner: MAJOR changes only — keyed by version so each release re-announces, shown
|
|
185
|
+
// ~3 launches then quiet. Minor fixes/polish do NOT go here; reserve it for things worth noticing.
|
|
186
|
+
const CHANGE_ID = `v${APP_VERSION}`;
|
|
187
|
+
const changeBanner = shouldShowChange(dataDir(), CHANGE_ID)
|
|
188
|
+
? { lines: ["• runaway streams now cut cleanly — no more frozen 'code code code' sessions"] }
|
|
189
|
+
: undefined;
|
|
184
190
|
// Startup overview. The token was already validated above (re-auth happens before we get here), so
|
|
185
191
|
// GitHub is connected; web search readiness and configured clients are read from disk.
|
|
186
192
|
const clientStatus = readClientStatus();
|
|
@@ -215,6 +221,8 @@ async function launchTui() {
|
|
|
215
221
|
disableWebiq: () => { clearWebIqKey(dataDir()); },
|
|
216
222
|
webSearchBackend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
|
|
217
223
|
startupStatus,
|
|
224
|
+
changeBanner,
|
|
225
|
+
onChangeSeen: () => markChangeShown(dataDir(), CHANGE_ID),
|
|
218
226
|
githubStatus: async () => {
|
|
219
227
|
const token = readGhToken(dataDir());
|
|
220
228
|
if (!token)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// Defends a streaming turn against upstream model degeneration: the model collapses into emitting
|
|
2
|
+
// the same short token forever ("code\ncode\ncode…") and never sends a stop, so a faithful proxy
|
|
3
|
+
// would relay deltas until the socket dies — the session appears frozen. This watchdog converts
|
|
4
|
+
// that into a clean, bounded stop. It is pure (no I/O, no timers) so it is trivially testable; the
|
|
5
|
+
// idle/wall-clock timeout lives at the SSE loop where the timers are. Defaults are generous: real
|
|
6
|
+
// answers don't hit them, only runaways do.
|
|
7
|
+
export class RunawayGuard {
|
|
8
|
+
maxRepeats;
|
|
9
|
+
maxOutputChars;
|
|
10
|
+
last = "";
|
|
11
|
+
repeats = 0;
|
|
12
|
+
chars = 0;
|
|
13
|
+
reason;
|
|
14
|
+
constructor(limits = {}) {
|
|
15
|
+
this.maxRepeats = limits.maxRepeats ?? 200;
|
|
16
|
+
this.maxOutputChars = limits.maxOutputChars ?? 2_000_000;
|
|
17
|
+
}
|
|
18
|
+
// Returns true the moment a limit is exceeded; thereafter `reason` is set. Short repeated deltas
|
|
19
|
+
// are the degenerate signal — long varied text just accumulates against the char cap.
|
|
20
|
+
push(delta) {
|
|
21
|
+
this.chars += delta.length;
|
|
22
|
+
if (delta === this.last)
|
|
23
|
+
this.repeats++;
|
|
24
|
+
else {
|
|
25
|
+
this.repeats = 1;
|
|
26
|
+
this.last = delta;
|
|
27
|
+
}
|
|
28
|
+
if (delta.length <= 16 && this.repeats > this.maxRepeats) {
|
|
29
|
+
this.reason = "repetition";
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
if (this.chars > this.maxOutputChars) {
|
|
33
|
+
this.reason = "max_output";
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
package/dist/core/tool-xml.js
CHANGED
|
@@ -83,8 +83,15 @@ export class ToolCallExtractor {
|
|
|
83
83
|
if (end < 0)
|
|
84
84
|
return events; // incomplete block — wait for more data
|
|
85
85
|
const block = this.buf.slice(0, end);
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
const tools = parseInvokes(block);
|
|
87
|
+
// Parse-faithful: a block that recovers no tools (empty/missing name="" or malformed body)
|
|
88
|
+
// must NOT be swallowed — pass it through verbatim so the client sees exactly what the model
|
|
89
|
+
// emitted. Swallowing yields a turn with neither text nor a tool, which loops the model.
|
|
90
|
+
if (tools.length)
|
|
91
|
+
for (const tool of tools)
|
|
92
|
+
events.push({ kind: "tool", tool });
|
|
93
|
+
else
|
|
94
|
+
events.push({ kind: "text", text: block });
|
|
88
95
|
this.buf = this.buf.slice(end);
|
|
89
96
|
this.capturing = false; // a following <invoke> re-triggers via the passthrough branch
|
|
90
97
|
}
|
|
@@ -84,9 +84,9 @@ export function parseResponsesResult(data) {
|
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
|
-
else if (item.type === "function_call") {
|
|
87
|
+
else if (item.type === "function_call" && item.name) {
|
|
88
88
|
sawTool = true;
|
|
89
|
-
content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name
|
|
89
|
+
content.push({ type: "tool_use", id: item.call_id ?? item.id, name: item.name, input: safeJson(item.arguments) });
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
92
|
const finishReason = data.status === "incomplete" ? mapIncomplete(data.incomplete_details?.reason) : sawTool ? "tool_use" : "stop";
|
|
@@ -156,10 +156,13 @@ export async function* streamResponses(res) {
|
|
|
156
156
|
switch (ev.type) {
|
|
157
157
|
case "response.output_item.added": {
|
|
158
158
|
const item = ev.item ?? {};
|
|
159
|
-
|
|
159
|
+
// Gate on a present name, mirroring the chat adapter's `tc.function?.name` guard: a
|
|
160
|
+
// nameless function_call would surface as a bare "call:" the client can't run. No name,
|
|
161
|
+
// no start — its later arg deltas find no mapping and are dropped, not rendered.
|
|
162
|
+
if (item.type === "function_call" && item.name) {
|
|
160
163
|
const idx = nextToolIndex++;
|
|
161
164
|
toolByOutputIndex.set(ev.output_index, idx);
|
|
162
|
-
yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name
|
|
165
|
+
yield { kind: "tool_use_start", index: idx, id: item.call_id ?? item.id ?? `call_${idx}`, name: item.name, done: false };
|
|
163
166
|
}
|
|
164
167
|
break;
|
|
165
168
|
}
|
package/dist/shared/config.js
CHANGED
|
@@ -4,6 +4,8 @@ export function defaultConfig() {
|
|
|
4
4
|
supervisorPort: 7890,
|
|
5
5
|
workerPort: 7891,
|
|
6
6
|
restart: { maxCrashes: 5, windowMs: 60_000, baseBackoffMs: 500, maxBackoffMs: 8_000, unhealthyCooldownMs: 30_000 },
|
|
7
|
+
// Token failure is rare and GitHub rate-limits, so a slow cadence is plenty; overridable for tests/tuning.
|
|
8
|
+
heartbeat: { intervalMs: 60_000, initialDelayMs: 2_000 },
|
|
7
9
|
// Empty = pass the requested model straight through to Copilot. Add entries (or "*") to remap.
|
|
8
10
|
modelMap: {},
|
|
9
11
|
// Set MAESTRO_REPORT_REPO=owner/repo to override where /report files diagnostics issues.
|
|
@@ -15,6 +17,7 @@ export function mergeConfig(base, o) {
|
|
|
15
17
|
...base,
|
|
16
18
|
...o,
|
|
17
19
|
restart: { ...base.restart, ...(o.restart ?? {}) },
|
|
20
|
+
heartbeat: { ...base.heartbeat, ...(o.heartbeat ?? {}) },
|
|
18
21
|
modelMap: { ...base.modelMap, ...(o.modelMap ?? {}) },
|
|
19
22
|
};
|
|
20
23
|
}
|
package/dist/shared/prefs.js
CHANGED
|
@@ -1,28 +1,36 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
2
|
import { join } from "node:path";
|
|
3
|
-
// Small user-preferences store (e.g. the chosen chat model), persisted
|
|
3
|
+
// Small user-preferences store (e.g. the chosen chat model, change-banner view counts), persisted
|
|
4
|
+
// across launches.
|
|
4
5
|
const file = (dir) => join(dir, "prefs.json");
|
|
5
|
-
|
|
6
|
+
function read(dir) {
|
|
6
7
|
if (!existsSync(file(dir)))
|
|
7
|
-
return
|
|
8
|
+
return {};
|
|
8
9
|
try {
|
|
9
|
-
return JSON.parse(readFileSync(file(dir), "utf8"))
|
|
10
|
+
return JSON.parse(readFileSync(file(dir), "utf8"));
|
|
10
11
|
}
|
|
11
12
|
catch {
|
|
12
|
-
return
|
|
13
|
+
return {};
|
|
13
14
|
}
|
|
14
15
|
}
|
|
15
|
-
|
|
16
|
+
function write(dir, next) {
|
|
16
17
|
if (!existsSync(dir))
|
|
17
18
|
mkdirSync(dir, { recursive: true });
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
19
|
+
writeFileSync(file(dir), JSON.stringify(next));
|
|
20
|
+
}
|
|
21
|
+
export function readChatModel(dir) {
|
|
22
|
+
return read(dir).chatModel ?? null;
|
|
23
|
+
}
|
|
24
|
+
export function writeChatModel(dir, model) {
|
|
25
|
+
write(dir, { ...read(dir), chatModel: model });
|
|
26
|
+
}
|
|
27
|
+
// "What's new" banner: show a change a few times then stop. Counts are keyed by an id (e.g. version),
|
|
28
|
+
// so a new release re-shows; bumping the count is what decides whether the banner appears again.
|
|
29
|
+
const seenKey = (id) => `seen:${id}`;
|
|
30
|
+
export function shouldShowChange(dir, id, maxShows = 3) {
|
|
31
|
+
return (read(dir)[seenKey(id)] ?? 0) < maxShows;
|
|
32
|
+
}
|
|
33
|
+
export function markChangeShown(dir, id) {
|
|
34
|
+
const cur = read(dir);
|
|
35
|
+
write(dir, { ...cur, [seenKey(id)]: (cur[seenKey(id)] ?? 0) + 1 });
|
|
28
36
|
}
|
package/dist/supervisor/index.js
CHANGED
|
@@ -51,7 +51,9 @@ export function startSupervisor() {
|
|
|
51
51
|
};
|
|
52
52
|
// Periodically re-check the GitHub token so the UI reflects an expired/revoked login within ~60s,
|
|
53
53
|
// instead of only on the next failed request or a manual /status.
|
|
54
|
-
const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir())
|
|
54
|
+
const heartbeat = new GithubHeartbeat(() => readGhToken(dataDir()), probeGithubAuth, undefined, {
|
|
55
|
+
intervalMs: config.heartbeat.intervalMs, initialDelayMs: config.heartbeat.initialDelayMs,
|
|
56
|
+
});
|
|
55
57
|
const app = createControlApp({
|
|
56
58
|
db, getState: () => state,
|
|
57
59
|
restart: () => monitor.restartManually(),
|
package/dist/tui/app.js
CHANGED
|
@@ -17,16 +17,20 @@ const SPINNER = ["✶", "✸", "✹", "✺", "✹", "✷"];
|
|
|
17
17
|
// Startup overview card. GitHub shows a login STATE (no real token expiry exists). Web search shows
|
|
18
18
|
// the resolved backend: "via WebIQ", "via Copilot (native)", or "unavailable — run /webiq".
|
|
19
19
|
// `extra` appends detail lines (e.g. worker restart history for /status).
|
|
20
|
-
function statusCard(s, extra = []) {
|
|
20
|
+
function statusCard(s, extra = [], clients) {
|
|
21
21
|
const gh = s.github === "connected" ? "✓ connected" : s.github === "expired" ? "✗ expired — run /login" : "✗ signed out — run /login";
|
|
22
22
|
const web = s.webSearch === "webiq" ? "✓ via WebIQ" : s.webSearch === "copilot" ? "✓ via Copilot (native)" : "✗ unavailable — run /webiq";
|
|
23
|
-
|
|
23
|
+
// Per-scope + model when we have the file-derived detail; else fall back to the simple flag.
|
|
24
|
+
const scope = (sc) => sc?.on ? `✓ ${sc.model ? sc.model.replace(/\[1m\]$/, "") : "on"}` : "○";
|
|
25
|
+
const clientsLine = clients
|
|
26
|
+
? `claude u:${scope({ on: clients.claude.user, model: clients.claude.userModel })} p:${scope({ on: clients.claude.project, model: clients.claude.projectModel })} · codex u:${scope({ on: clients.codex.user, model: clients.codex.userModel })} p:${scope({ on: clients.codex.project, model: clients.codex.projectModel })}`
|
|
27
|
+
: `claude ${s.clients.claude ? "✓" : "○"} codex ${s.clients.codex ? "✓" : "○"}`;
|
|
24
28
|
const tone = s.github === "connected" ? "ok" : "error";
|
|
25
29
|
return { type: "card", title: "status", tone, lines: [
|
|
26
30
|
`GitHub login ${gh}`,
|
|
27
31
|
`web search ${web}`,
|
|
28
32
|
`worker ${s.worker}`,
|
|
29
|
-
`clients ${
|
|
33
|
+
`clients ${clientsLine}`,
|
|
30
34
|
...extra,
|
|
31
35
|
] };
|
|
32
36
|
}
|
|
@@ -49,17 +53,21 @@ function OutputCard({ title, lines, tone }) {
|
|
|
49
53
|
function HelpCard({ commands }) {
|
|
50
54
|
return (_jsxs(Box, { flexDirection: "column", borderStyle: "round", borderColor: theme.border, paddingX: 1, marginBottom: 1, children: [_jsx(Text, { color: theme.accent, bold: true, children: "Commands" }), commands.map((c) => (_jsxs(Text, { children: [_jsx(Text, { color: theme.prompt, children: c.name.padEnd(16) }), _jsx(Text, { color: theme.muted, children: c.describe })] }, c.name))), _jsx(Text, { color: theme.muted, children: "tip: type / to autocomplete \u00B7 plain text talks to the assistant" })] }));
|
|
51
55
|
}
|
|
52
|
-
// HUD client cell: shows configured scopes read from the real config files.
|
|
56
|
+
// HUD client cell: shows configured scopes read from the real config files, with the pinned model.
|
|
53
57
|
function ClientBadge({ name, status }) {
|
|
54
|
-
const
|
|
55
|
-
|
|
58
|
+
const short = (m) => (m ? m.replace(/\[1m\]$/, "").replace(/^claude-/, "").slice(0, 14) : "");
|
|
59
|
+
const cell = (label, on, model) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? `✓ ${short(model)}`.trimEnd() : "○"] }));
|
|
60
|
+
return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user, status.userModel), " ", cell("p", status.project, status.projectModel)] }));
|
|
56
61
|
}
|
|
57
|
-
export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, }) {
|
|
62
|
+
export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, enableWebiq, disableWebiq, webSearchBackend, startupStatus, githubStatus, changeBanner, onChangeSeen, }) {
|
|
58
63
|
const cmds = registry.list().map((c) => ({ name: c.name, describe: c.describe }));
|
|
59
64
|
const [entries, setEntries] = useState(() => [
|
|
60
65
|
...(startupStatus ? [statusCard(startupStatus)] : []),
|
|
66
|
+
...(changeBanner ? [{ type: "card", title: "what's new", tone: "info", lines: changeBanner.lines }] : []),
|
|
61
67
|
{ type: "system", text: "Type a message to chat with the assistant, or /help for commands." },
|
|
62
68
|
]);
|
|
69
|
+
useEffect(() => { if (changeBanner)
|
|
70
|
+
onChangeSeen?.(); }, []);
|
|
63
71
|
const [state, setState] = useState(workerState);
|
|
64
72
|
const [status, setStatus] = useState(() => readStatus?.() ?? EMPTY_STATUS);
|
|
65
73
|
const [webBackend, setWebBackend] = useState(() => webSearchBackend?.() ?? "unavailable");
|
|
@@ -149,7 +157,7 @@ export function App({ registry, title, workerState = "starting", initialModel =
|
|
|
149
157
|
webSearch: webSearchBackend?.() ?? webBackend, worker,
|
|
150
158
|
clients: { claude: status.claude.user || status.claude.project, codex: status.codex.user || status.codex.project },
|
|
151
159
|
});
|
|
152
|
-
add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : []));
|
|
160
|
+
add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : [], status));
|
|
153
161
|
return;
|
|
154
162
|
}
|
|
155
163
|
if (t === "/config" && info) {
|
|
@@ -3,9 +3,10 @@ import { z } from "zod";
|
|
|
3
3
|
import { buildActions } from "./tools.js";
|
|
4
4
|
import { formatModelList } from "../../shared/format.js";
|
|
5
5
|
const empty = z.object({});
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
// Setup is a config write — require both so the assistant must confirm scope+model, never assume.
|
|
7
|
+
const requiredSetupShape = z.object({
|
|
8
|
+
scope: z.enum(["global", "project"]),
|
|
9
|
+
model: z.string(),
|
|
9
10
|
}).shape;
|
|
10
11
|
function sdkTools(actions, cfg) {
|
|
11
12
|
const tools = [
|
|
@@ -13,6 +14,8 @@ function sdkTools(actions, cfg) {
|
|
|
13
14
|
tool("restart_worker", "Restart the proxy worker", empty.shape, async () => ({ content: [{ type: "text", text: await actions.restart_worker({}) }] })),
|
|
14
15
|
tool("run_doctor", "Run copilot-reverse health checks", empty.shape, async () => ({ content: [{ type: "text", text: await actions.run_doctor({}) }] })),
|
|
15
16
|
tool("recent_requests", "List recent proxied requests", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_requests({}) }] })),
|
|
17
|
+
tool("recent_errors", "List recent failed/cut requests with their messages (incl. stream runaways)", empty.shape, async () => ({ content: [{ type: "text", text: await actions.recent_errors({}) }] })),
|
|
18
|
+
tool("metrics", "Show request totals, error count, and per-model average latency", empty.shape, async () => ({ content: [{ type: "text", text: await actions.metrics({}) }] })),
|
|
16
19
|
];
|
|
17
20
|
const listModels = cfg.listModels;
|
|
18
21
|
if (listModels) {
|
|
@@ -24,11 +27,12 @@ function sdkTools(actions, cfg) {
|
|
|
24
27
|
if (setupClient) {
|
|
25
28
|
for (const client of ["claude", "codex"]) {
|
|
26
29
|
const label = client === "claude" ? "Claude Code" : "Codex";
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
// scope+model are REQUIRED (not defaulted): config writes are not reversible-by-undo, so the
|
|
31
|
+
// assistant must confirm both with the user first rather than silently writing the global scope
|
|
32
|
+
// with the current model. The prompt tells it to ask; making the args required enforces it.
|
|
33
|
+
tools.push(tool(`setup_${client}`, `Configure ${label} to use the proxy. REQUIRES scope ("global"=all projects / "project"=here) AND model — ask the user for both before calling; do not assume.`, requiredSetupShape, async (args) => {
|
|
34
|
+
const r = await setupClient(client, args.scope, args.model);
|
|
35
|
+
return { content: [{ type: "text", text: `configured ${label} (${args.scope}) with model ${args.model} — wrote ${r.path}; keys: ${r.changed.join(", ") || "(no change)"}` }] };
|
|
32
36
|
}));
|
|
33
37
|
}
|
|
34
38
|
}
|
|
@@ -70,9 +74,11 @@ export async function runAssistantTurn(cfg, prompt, print, queryFn = query, abor
|
|
|
70
74
|
settingSources: [],
|
|
71
75
|
systemPrompt: "You are copilot-reverse's built-in assistant for the local Copilot proxy. Be concise. " +
|
|
72
76
|
"When the user expresses an intent you have a tool for, CALL THE TOOL instead of explaining. " +
|
|
73
|
-
"Tools: get_status, restart_worker, run_doctor, recent_requests,
|
|
74
|
-
"models + context windows), setup_claude / setup_codex (configure those clients
|
|
75
|
-
"
|
|
77
|
+
"Tools: get_status, restart_worker, run_doctor, recent_requests, recent_errors, metrics, list_models " +
|
|
78
|
+
"(models + context windows), setup_claude / setup_codex (configure those clients). " +
|
|
79
|
+
"SETUP RULE: setup_claude/setup_codex WRITE config and need scope (global=all projects / project=here) " +
|
|
80
|
+
"AND model. Before calling, confirm BOTH with the user — if unstated, ask (offer list_models). Never assume. " +
|
|
81
|
+
"E.g. 'list models' -> list_models; 'set up claude' -> ask scope+model, then setup_claude.",
|
|
76
82
|
permissionMode: "bypassPermissions",
|
|
77
83
|
includePartialMessages: true,
|
|
78
84
|
...(abortController ? { abortController } : {}),
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { aggregate, recentErrors } from "../panels/metrics-agg.js";
|
|
1
2
|
// Plain action handlers — wrapped as SDK tools in runtime.ts.
|
|
2
3
|
// Each takes a parsed-args object and returns a short text result for the model.
|
|
3
4
|
export function buildActions(client) {
|
|
@@ -20,5 +21,17 @@ export function buildActions(client) {
|
|
|
20
21
|
return "no requests logged yet";
|
|
21
22
|
return reqs.slice(0, 10).map((r) => `${r.endpoint} ${r.model} ${r.status} ${r.latencyMs}ms`).join("; ");
|
|
22
23
|
},
|
|
24
|
+
async recent_errors(_args) {
|
|
25
|
+
const errs = recentErrors(await client.requests(), 10);
|
|
26
|
+
if (!errs.length)
|
|
27
|
+
return "no request errors logged — everything's green";
|
|
28
|
+
return errs.map((e) => `${e.status} ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`).join("; ");
|
|
29
|
+
},
|
|
30
|
+
async metrics(_args) {
|
|
31
|
+
const a = aggregate(await client.requests());
|
|
32
|
+
if (!a.total)
|
|
33
|
+
return "no requests yet";
|
|
34
|
+
return `requests: ${a.total}, errors: ${a.errors}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
|
|
35
|
+
},
|
|
23
36
|
};
|
|
24
37
|
}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
// A request "failed" if it returned a 4xx/5xx OR carried an error message — runaway streams finish
|
|
2
|
+
// 200 but tag an error (model degenerated, cut early), and those are exactly what we want to surface.
|
|
3
|
+
const isError = (s) => s.status >= 400 || s.error != null;
|
|
1
4
|
export function aggregate(samples) {
|
|
2
5
|
const map = new Map();
|
|
3
6
|
let errors = 0;
|
|
4
7
|
for (const s of samples) {
|
|
5
|
-
if (s
|
|
8
|
+
if (isError(s))
|
|
6
9
|
errors++;
|
|
7
10
|
const m = map.get(s.model) ?? { count: 0, sum: 0 };
|
|
8
11
|
m.count++;
|
|
@@ -15,8 +18,8 @@ export function aggregate(samples) {
|
|
|
15
18
|
byModel: [...map.entries()].map(([model, v]) => ({ model, count: v.count, avgMs: Math.round(v.sum / v.count) })),
|
|
16
19
|
};
|
|
17
20
|
}
|
|
18
|
-
// The failed requests (status >= 400
|
|
19
|
-
//
|
|
21
|
+
// The failed requests (status >= 400 or any tagged error), newest-first, capped at `limit`. This is
|
|
22
|
+
// the actually-useful "log" — what failed and why — as opposed to worker restart events.
|
|
20
23
|
export function recentErrors(samples, limit) {
|
|
21
|
-
return samples.filter(
|
|
24
|
+
return samples.filter(isError).slice(0, limit);
|
|
22
25
|
}
|
package/dist/tui/report.js
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
// Sentinel for an unconfigured report target. /report refuses to open until this is changed.
|
|
2
2
|
export const PLACEHOLDER_REPO = "OWNER/REPO";
|
|
3
|
-
// A diagnostics-only report
|
|
4
|
-
//
|
|
3
|
+
// A diagnostics-only report, structured like a good hand-written issue (e.g. #5): Summary →
|
|
4
|
+
// Environment → Diagnostics → Steps to reproduce. It contains metrics, doctor output, and worker
|
|
5
|
+
// restart reasons — never request/response bodies — so there is no user prompt content to leak.
|
|
5
6
|
export function buildIssueBody(i) {
|
|
7
|
+
const runaways = i.errors.filter((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
|
|
8
|
+
const summary = runaways.length
|
|
9
|
+
? `Stream(s) degenerated and were cut early (model looped, no stop) on ${i.status.workerState} worker — ${runaways.length} runaway(s).`
|
|
10
|
+
: i.errors.length
|
|
11
|
+
? `${i.errors.length} recent request error(s) on a ${i.status.workerState} worker; details below.`
|
|
12
|
+
: `Worker state: ${i.status.workerState}. (Describe the problem below.)`;
|
|
6
13
|
const lines = [
|
|
7
|
-
"
|
|
8
|
-
"",
|
|
9
|
-
`-
|
|
14
|
+
"### Summary", "", summary, "",
|
|
15
|
+
"### Environment", "",
|
|
16
|
+
`- copilot-reverse **${i.version}**`,
|
|
10
17
|
`- platform: ${i.platform}`,
|
|
11
18
|
`- worker state: ${i.status.workerState}`,
|
|
12
19
|
"",
|
|
@@ -18,14 +25,20 @@ export function buildIssueBody(i) {
|
|
|
18
25
|
? i.errors.map((e) => `- \`${e.status}\` ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`)
|
|
19
26
|
: ["- (none)"]),
|
|
20
27
|
];
|
|
28
|
+
if (runaways.length) {
|
|
29
|
+
lines.push("", "### Stream runaways (model degenerated, cut early)", ...runaways.map((e) => `- ${e.endpoint} ${e.model} after ${e.latencyMs}ms — ${e.error}`));
|
|
30
|
+
}
|
|
21
31
|
if (i.status.restarts.length) {
|
|
22
32
|
lines.push("", "### Recent worker restarts", ...i.status.restarts.slice(0, 5).map((r) => `- ${new Date(r.ts).toISOString()} ${r.reason} exit=${r.exitCode ?? "-"} ${r.stderrTail.slice(0, 120)}`));
|
|
23
33
|
}
|
|
24
|
-
lines.push("", "###
|
|
34
|
+
lines.push("", "### Steps to reproduce", "<!-- what were you doing? which client/model? -->", "");
|
|
25
35
|
// Keep well under GitHub's ~8KB URL cap once encoded.
|
|
26
36
|
return lines.join("\n").slice(0, 5500);
|
|
27
37
|
}
|
|
28
38
|
export function buildIssueTitle(i) {
|
|
39
|
+
const runaway = i.errors.find((e) => e.status < 400 && /runaway/.test(e.error ?? ""));
|
|
40
|
+
if (runaway)
|
|
41
|
+
return `copilot-reverse: stream runaway (${runaway.model})`;
|
|
29
42
|
const first = i.errors[0]?.error;
|
|
30
43
|
return `copilot-reverse report: ${first ? first.slice(0, 70) : i.status.workerState}`;
|
|
31
44
|
}
|
package/dist/tui/setup/apply.js
CHANGED
|
@@ -2,10 +2,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { join, dirname } from "node:path";
|
|
4
4
|
// The env keys copilot-reverse writes for each client — so reset knows exactly what to remove.
|
|
5
|
+
// ANTHROPIC_AUTH_TOKEN isn't one we write, but reset strips it too: if it lingers alongside our
|
|
6
|
+
// API key, Claude Code warns "both set", so a clean reset should clear the conflict.
|
|
5
7
|
export const CLAUDE_ENV_KEYS = [
|
|
6
|
-
"ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL",
|
|
8
|
+
"ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY", "ANTHROPIC_MODEL", "ANTHROPIC_AUTH_TOKEN",
|
|
7
9
|
"CLAUDE_CODE_AUTO_COMPACT_WINDOW", "CLAUDE_AUTOCOMPACT_PCT_OVERRIDE",
|
|
8
10
|
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC", "CLAUDE_CODE_ATTRIBUTION_HEADER",
|
|
11
|
+
"CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY",
|
|
9
12
|
];
|
|
10
13
|
export const CODEX_ENV_KEYS = ["OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENAI_MODEL"];
|
|
11
14
|
// --- Claude Code: merge into settings.json `env` (non-destructive) ---
|
|
@@ -29,6 +32,12 @@ export function applyClaude(scope, env, o = {}) {
|
|
|
29
32
|
}
|
|
30
33
|
const envObj = (settings.env && typeof settings.env === "object" ? settings.env : {});
|
|
31
34
|
const changed = [];
|
|
35
|
+
// We authenticate with ANTHROPIC_API_KEY; a leftover ANTHROPIC_AUTH_TOKEN here makes Claude Code
|
|
36
|
+
// warn "both set · auth may not work" — strip it so our setup leaves a clean, single-credential env.
|
|
37
|
+
if ("ANTHROPIC_AUTH_TOKEN" in envObj) {
|
|
38
|
+
delete envObj.ANTHROPIC_AUTH_TOKEN;
|
|
39
|
+
changed.push("ANTHROPIC_AUTH_TOKEN(removed)");
|
|
40
|
+
}
|
|
32
41
|
for (const [k, v] of Object.entries(env)) {
|
|
33
42
|
if (envObj[k] !== v) {
|
|
34
43
|
envObj[k] = v;
|
|
@@ -27,6 +27,10 @@ export function claudeCopilotReverseEnv(base, apiKey, model, contextWindow) {
|
|
|
27
27
|
CLAUDE_AUTOCOMPACT_PCT_OVERRIDE: "80",
|
|
28
28
|
CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
|
|
29
29
|
CLAUDE_CODE_ATTRIBUTION_HEADER: "0", // keep prompt caching working on a non-Anthropic gateway
|
|
30
|
+
// Populate Claude Code's /model picker from our /anthropic/v1/models so the user can switch
|
|
31
|
+
// models natively. Coexists with ANTHROPIC_MODEL (which stays the 1M default — it does NOT lock
|
|
32
|
+
// the picker). Claude Code >=2.1.129 only; older builds ignore it. Picker lists claude* ids.
|
|
33
|
+
CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY: "1",
|
|
30
34
|
};
|
|
31
35
|
}
|
|
32
36
|
export function codexConfig(e) {
|
package/dist/tui/setup/status.js
CHANGED
|
@@ -3,33 +3,40 @@ import { claudePath, codexPath } from "./apply.js";
|
|
|
3
3
|
// A copilot-reverse-written endpoint always points at the local loopback proxy — this lets us tell our
|
|
4
4
|
// own config apart from a user's pre-existing ANTHROPIC_BASE_URL / OPENAI_BASE_URL.
|
|
5
5
|
const isCopilotReverse = (v) => typeof v === "string" && /127\.0\.0\.1|localhost/.test(v);
|
|
6
|
-
function
|
|
6
|
+
function claudeScope(scope, o) {
|
|
7
7
|
const p = claudePath(scope, o);
|
|
8
8
|
if (!existsSync(p))
|
|
9
|
-
return false;
|
|
9
|
+
return { on: false };
|
|
10
10
|
try {
|
|
11
11
|
const s = JSON.parse(readFileSync(p, "utf8"));
|
|
12
|
-
|
|
12
|
+
if (!isCopilotReverse(s.env?.ANTHROPIC_BASE_URL))
|
|
13
|
+
return { on: false };
|
|
14
|
+
return { on: true, model: typeof s.env?.ANTHROPIC_MODEL === "string" ? s.env.ANTHROPIC_MODEL : undefined };
|
|
13
15
|
}
|
|
14
16
|
catch {
|
|
15
|
-
return false;
|
|
17
|
+
return { on: false };
|
|
16
18
|
}
|
|
17
19
|
}
|
|
18
|
-
function
|
|
20
|
+
function codexScope(scope, o) {
|
|
19
21
|
const p = codexPath(scope, o);
|
|
20
22
|
if (!existsSync(p))
|
|
21
|
-
return false;
|
|
23
|
+
return { on: false };
|
|
22
24
|
try {
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
+
const txt = readFileSync(p, "utf8");
|
|
26
|
+
const base = /^OPENAI_BASE_URL=(.*)$/m.exec(txt);
|
|
27
|
+
if (!base || !isCopilotReverse(base[1]))
|
|
28
|
+
return { on: false };
|
|
29
|
+
return { on: true, model: /^OPENAI_MODEL=(.*)$/m.exec(txt)?.[1] };
|
|
25
30
|
}
|
|
26
31
|
catch {
|
|
27
|
-
return false;
|
|
32
|
+
return { on: false };
|
|
28
33
|
}
|
|
29
34
|
}
|
|
30
35
|
export function readClientStatus(o = {}) {
|
|
36
|
+
const cu = claudeScope("global", o), cp = claudeScope("project", o);
|
|
37
|
+
const xu = codexScope("global", o), xp = codexScope("project", o);
|
|
31
38
|
return {
|
|
32
|
-
claude: { user:
|
|
33
|
-
codex: { user:
|
|
39
|
+
claude: { user: cu.on, project: cp.on, userModel: cu.model, projectModel: cp.model },
|
|
40
|
+
codex: { user: xu.on, project: xp.on, userModel: xu.model, projectModel: xp.model },
|
|
34
41
|
};
|
|
35
42
|
}
|
package/dist/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
|
|
2
|
-
export const APP_VERSION = "0.5.
|
|
2
|
+
export const APP_VERSION = "0.5.5";
|
|
@@ -4,6 +4,7 @@ import { estimateTokens } from "../core/tokens.js";
|
|
|
4
4
|
import { errorHint } from "./errors.js";
|
|
5
5
|
import { CopilotAuthError } from "../providers/copilot/token.js";
|
|
6
6
|
import { isGatewayTool } from "../core/server-tools.js";
|
|
7
|
+
import { RunawayGuard } from "../core/stream-guard.js";
|
|
7
8
|
const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
8
9
|
const safeJson = (s) => { try {
|
|
9
10
|
return JSON.parse(s);
|
|
@@ -14,6 +15,11 @@ catch {
|
|
|
14
15
|
// Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
|
|
15
16
|
// returns "search more") can never spin forever inside one request.
|
|
16
17
|
const MAX_TOOL_ITERS = 5;
|
|
18
|
+
// Wall-clock cap on a single streaming turn. The model occasionally degenerates into emitting the
|
|
19
|
+
// same short token forever ("code\ncode\ncode…") and never sends a stop, which would otherwise relay
|
|
20
|
+
// for minutes and freeze the client. The RunawayGuard catches the repetition fast; this is the
|
|
21
|
+
// backstop for any slow-but-endless stream. On either trip we end the turn cleanly as max_tokens.
|
|
22
|
+
const STREAM_DEADLINE_MS = 120_000;
|
|
17
23
|
export function mountAnthropic(app, router, onMetric, runner) {
|
|
18
24
|
// Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
|
|
19
25
|
// before chatting; without it they 404 on the connection test.
|
|
@@ -52,7 +58,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
52
58
|
let next = 0;
|
|
53
59
|
let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
|
|
54
60
|
let finalStop = "stop";
|
|
55
|
-
|
|
61
|
+
// Runaway protection spans the whole request: repeated-token degeneration + a wall-clock
|
|
62
|
+
// deadline. Tripping ends the stream as a clean max_tokens turn instead of hanging.
|
|
63
|
+
const guard = new RunawayGuard();
|
|
64
|
+
const deadline = start + STREAM_DEADLINE_MS;
|
|
65
|
+
let runaway = false;
|
|
66
|
+
let runawayReason = "";
|
|
67
|
+
for (let iter = 0; iter < MAX_TOOL_ITERS && !runaway; iter++) {
|
|
56
68
|
let textIndex; // Anthropic index of this turn's text block
|
|
57
69
|
const byCopilotIdx = new Map();
|
|
58
70
|
const buffered = []; // tool calls seen this turn, in order
|
|
@@ -73,6 +85,13 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
73
85
|
res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
|
|
74
86
|
}
|
|
75
87
|
res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
|
|
88
|
+
// Degenerate-stream kill-switch: a model looping on a short token is cut here.
|
|
89
|
+
if (guard.push(chunk.delta)) {
|
|
90
|
+
runaway = true;
|
|
91
|
+
runawayReason = guard.reason ?? "repetition";
|
|
92
|
+
turnStop = "length";
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
76
95
|
}
|
|
77
96
|
else if (chunk.kind === "tool_use_start") {
|
|
78
97
|
if (!byCopilotIdx.has(chunk.index)) {
|
|
@@ -86,9 +105,23 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
86
105
|
if (t)
|
|
87
106
|
t.args += chunk.argsDelta;
|
|
88
107
|
}
|
|
108
|
+
// Wall-clock backstop on EVERY chunk kind: a tool-call-only runaway never feeds the text
|
|
109
|
+
// guard, so without this a model spamming calls would relay until the socket died.
|
|
110
|
+
if (Date.now() > deadline) {
|
|
111
|
+
runaway = true;
|
|
112
|
+
runawayReason = "deadline";
|
|
113
|
+
turnStop = "length";
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
89
116
|
}
|
|
90
117
|
if (textIndex !== undefined)
|
|
91
118
|
res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
|
|
119
|
+
// Runaway tripped mid-text: stop now as max_tokens. Don't forward partial tool calls or
|
|
120
|
+
// loop into gateway tools — the turn was abandoned, not legitimately completed.
|
|
121
|
+
if (runaway) {
|
|
122
|
+
finalStop = "length";
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
92
125
|
const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
|
|
93
126
|
// Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
|
|
94
127
|
// has no handler for it and would stall. So whenever the model calls gateway tools (and a
|
|
@@ -124,7 +157,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
124
157
|
res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
|
|
125
158
|
res.write(frame("message_stop", { type: "message_stop" }));
|
|
126
159
|
res.end();
|
|
127
|
-
metric(200);
|
|
160
|
+
metric(200, runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined);
|
|
128
161
|
}
|
|
129
162
|
else {
|
|
130
163
|
// Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
|
|
@@ -3,6 +3,10 @@ import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOp
|
|
|
3
3
|
import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
|
|
4
4
|
import { errorHint } from "./errors.js";
|
|
5
5
|
import { CopilotAuthError } from "../providers/copilot/token.js";
|
|
6
|
+
import { RunawayGuard } from "../core/stream-guard.js";
|
|
7
|
+
// Cut a single streaming turn that degenerates (model repeats one short token forever, never stops)
|
|
8
|
+
// so the client gets a bounded answer instead of a frozen session. Mirrors the Anthropic backend.
|
|
9
|
+
const STREAM_DEADLINE_MS = 120_000;
|
|
6
10
|
export function mountOpenAI(app, router, onMetric) {
|
|
7
11
|
// Model discovery — OpenAI list shape. Clients (LiteLLM-style gateways, "test connection" probes)
|
|
8
12
|
// GET this before chatting; without it they 404 and refuse to connect.
|
|
@@ -20,10 +24,24 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
20
24
|
res.setHeader("content-type", "text/event-stream");
|
|
21
25
|
res.setHeader("cache-control", "no-cache");
|
|
22
26
|
const id = `chatcmpl-${randomUUID().replace(/-/g, "")}`; // unique per response, not constant
|
|
23
|
-
|
|
27
|
+
const guard = new RunawayGuard();
|
|
28
|
+
const deadline = start + STREAM_DEADLINE_MS;
|
|
29
|
+
let runawayReason = "";
|
|
30
|
+
for await (const chunk of provider.stream(canon)) {
|
|
24
31
|
res.write(canonicalChunkToOpenAISSE(chunk, id, canon.model));
|
|
32
|
+
// Backstop covers tool-call streams too: a model can loop on tool calls forever, which
|
|
33
|
+
// never feeds the text guard — the wall clock cuts those cleanly instead of freezing.
|
|
34
|
+
if (chunk.kind === "text" && guard.push(chunk.delta)) {
|
|
35
|
+
runawayReason = guard.reason ?? "repetition";
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
if (Date.now() > deadline) {
|
|
39
|
+
runawayReason = "deadline";
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
25
43
|
res.end();
|
|
26
|
-
metric(200);
|
|
44
|
+
metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
|
|
27
45
|
}
|
|
28
46
|
else {
|
|
29
47
|
res.json(canonicalToOpenAIResponse(await provider.complete(canon)));
|
|
@@ -65,15 +83,24 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
65
83
|
const argsByIdx = new Map();
|
|
66
84
|
let usage;
|
|
67
85
|
let finish = "stop";
|
|
86
|
+
const guard = new RunawayGuard();
|
|
87
|
+
const deadline = start + STREAM_DEADLINE_MS;
|
|
88
|
+
let runawayReason = "";
|
|
68
89
|
for await (const chunk of provider.stream(canon)) {
|
|
69
90
|
if (chunk.done) {
|
|
70
91
|
finish = chunk.finishReason ?? "stop";
|
|
71
92
|
usage = chunk.usage;
|
|
72
93
|
break;
|
|
73
94
|
}
|
|
74
|
-
if (chunk.kind === "text")
|
|
95
|
+
if (chunk.kind === "text") {
|
|
75
96
|
for (const f of sse.text(chunk.delta))
|
|
76
97
|
res.write(f);
|
|
98
|
+
if (guard.push(chunk.delta)) {
|
|
99
|
+
finish = "length";
|
|
100
|
+
runawayReason = guard.reason ?? "repetition";
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
77
104
|
else if (chunk.kind === "tool_use_start")
|
|
78
105
|
for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
|
|
79
106
|
res.write(f);
|
|
@@ -82,11 +109,17 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
82
109
|
for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
|
|
83
110
|
res.write(f);
|
|
84
111
|
}
|
|
112
|
+
// Deadline applies to every chunk kind: a tool-call-only runaway never hits the text guard.
|
|
113
|
+
if (Date.now() > deadline) {
|
|
114
|
+
finish = "length";
|
|
115
|
+
runawayReason = "deadline";
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
85
118
|
}
|
|
86
119
|
for (const f of sse.finish(usage, finish, argsByIdx))
|
|
87
120
|
res.write(f);
|
|
88
121
|
res.end();
|
|
89
|
-
metric(200);
|
|
122
|
+
metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
|
|
90
123
|
}
|
|
91
124
|
else {
|
|
92
125
|
res.json(canonicalToResponsesResponse(await provider.complete(canon)));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "copilot-reverse",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.5",
|
|
4
4
|
"description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|