copilot-reverse 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/changes.js +10 -10
- package/dist/core/model-canonical.js +35 -0
- package/dist/supervisor/db.js +10 -6
- package/dist/supervisor/index.js +1 -1
- package/dist/tui/app.js +4 -0
- package/dist/tui/assistant/tools.js +1 -1
- package/dist/tui/panels/metrics-agg.js +30 -4
- package/dist/tui/setup/clients.js +8 -4
- package/dist/tui/slash/commands.js +8 -2
- package/dist/version.js +1 -1
- package/dist/worker/anthropic-server.js +9 -6
- package/dist/worker/openai-server.js +15 -10
- package/dist/worker/router.js +4 -3
- package/package.json +1 -1
package/dist/changes.js
CHANGED
|
@@ -1,4 +1,14 @@
|
|
|
1
1
|
export const APP_CHANGES = [
|
|
2
|
+
{
|
|
3
|
+
"version": "0.7.0",
|
|
4
|
+
"date": "2026-06-29",
|
|
5
|
+
"summary": "feat(tui): `/metrics` now reports token usage (in/out) and an estimated cost per model and overall — the worker records prompt/completion tokens for every request (persisted in SQLite), and cost is a list-price estimate (Copilot is flat-fee). User messages also get a highlighted bar in the transcript so they stand out from muted system notes and assistant output."
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
"version": "0.6.0",
|
|
9
|
+
"date": "2026-06-29",
|
|
10
|
+
"summary": "feat(tui): add a `/changes` command listing the 10 most recent releases (version, date, summary) with a link to the full CHANGELOG, and refocus the startup \"what's new\" banner on important messages — it now points to `/changes` instead of advertising a bug fix, and still self-suppresses after 3 launches."
|
|
11
|
+
},
|
|
2
12
|
{
|
|
3
13
|
"version": "0.5.5",
|
|
4
14
|
"date": "2026-06-29",
|
|
@@ -38,15 +48,5 @@ export const APP_CHANGES = [
|
|
|
38
48
|
"version": "0.3.0",
|
|
39
49
|
"date": "2026-06-26",
|
|
40
50
|
"summary": "Restore `web_search` and `web_fetch` for Claude Code through the gateway: the worker now runs these tools internally against Microsoft Web IQ in a transparent agentic loop, and a new `/web-search-support` command stores the WebIQ API key."
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
"version": "0.2.1",
|
|
44
|
-
"date": "2026-06-25",
|
|
45
|
-
"summary": "Fix `/login` hanging with no output: the device-code prompt is now shown immediately while authorization is pending, instead of being buffered behind the blocking token poll."
|
|
46
|
-
},
|
|
47
|
-
{
|
|
48
|
-
"version": "0.2.0",
|
|
49
|
-
"date": "2026-06-23",
|
|
50
|
-
"summary": "Recover tool calls that some models emit as inline XML text into structured tool calls, and add changeset-driven automatic versioning + npm publish on merge to master."
|
|
51
51
|
}
|
|
52
52
|
];
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Maps Copilot's model ids to the canonical ids Claude Code recognises, so its native /model picker
|
|
2
|
+
// lights up with friendly names, tier grouping, and the 1M-context badge instead of bare ids.
|
|
3
|
+
//
|
|
4
|
+
// Copilot advertises DOTTED ids (claude-opus-4.8); Claude Code's built-in table keys on DASHED ids
|
|
5
|
+
// (claude-opus-4-8) and shows the 1M badge only when the id ends with the [1m] suffix. So OUTBOUND
|
|
6
|
+
// (/v1/models, ANTHROPIC_MODEL) we dash every claude id + add [1m] for the families Claude Code knows
|
|
7
|
+
// to be 1M; INBOUND the proxy strips [1m] and fuzzy-maps the dashed id back to Copilot's dotted id
|
|
8
|
+
// (see bestModelMatch). Non-claude ids (gpt*, o3*) have no canonical form → pass through untouched.
|
|
9
|
+
export const ONE_M_SUFFIX = "[1m]";
|
|
10
|
+
// Dashed canonical ids whose Claude Code table carries a 1M window — only these get the [1m] badge.
|
|
11
|
+
// Everything else stays at its default window. Anchored on the probed v2.1.195 binary table.
|
|
12
|
+
const ONE_M_MODELS = new Set(["claude-opus-4-6", "claude-opus-4-7", "claude-opus-4-8", "claude-sonnet-4-6"]);
|
|
13
|
+
// claude-<family>-<major>-<minor> -> "Family Major.Minor" (e.g. claude-opus-4-8 -> "Opus 4.8").
|
|
14
|
+
function displayName(dashed) {
|
|
15
|
+
const m = /^claude-(opus|sonnet|haiku)-(\d+)-(\d+)$/.exec(dashed);
|
|
16
|
+
if (!m)
|
|
17
|
+
return dashed;
|
|
18
|
+
const [, fam, maj, min] = m;
|
|
19
|
+
return `${fam[0].toUpperCase()}${fam.slice(1)} ${maj}.${min}`;
|
|
20
|
+
}
|
|
21
|
+
// Outbound: Copilot id -> the id+display Claude Code's picker understands. Claude ids are dashed to the
|
|
22
|
+
// canonical form; the 1M families get the [1m] suffix so the picker shows the badge and sizes context
|
|
23
|
+
// to 1M. Non-claude ids echo back as-is so they still appear, just without native metadata.
|
|
24
|
+
export function toCanonical(copilotId) {
|
|
25
|
+
if (!copilotId.startsWith("claude-"))
|
|
26
|
+
return { id: copilotId, display_name: copilotId };
|
|
27
|
+
const dashed = copilotId.replace(/\./g, "-");
|
|
28
|
+
const id = ONE_M_MODELS.has(dashed) ? `${dashed}${ONE_M_SUFFIX}` : dashed;
|
|
29
|
+
return { id, display_name: displayName(dashed) };
|
|
30
|
+
}
|
|
31
|
+
// Inbound: drop the [1m] picker suffix. The dashed canonical id then resolves back to Copilot's
|
|
32
|
+
// dotted id via the router's exact map + fuzzy fallback; nothing else to do here.
|
|
33
|
+
export function stripOneM(model) {
|
|
34
|
+
return model.endsWith(ONE_M_SUFFIX) ? model.slice(0, -ONE_M_SUFFIX.length) : model;
|
|
35
|
+
}
|
package/dist/supervisor/db.js
CHANGED
|
@@ -9,12 +9,16 @@ export function openDb(file) {
|
|
|
9
9
|
exit_code INTEGER, stderr_tail TEXT NOT NULL, backoff_ms INTEGER NOT NULL, marked_unhealthy INTEGER NOT NULL DEFAULT 0);
|
|
10
10
|
CREATE TABLE IF NOT EXISTS request_log (
|
|
11
11
|
id INTEGER PRIMARY KEY AUTOINCREMENT, ts INTEGER NOT NULL, endpoint TEXT NOT NULL,
|
|
12
|
-
model TEXT NOT NULL, status INTEGER NOT NULL, latency_ms INTEGER NOT NULL, error TEXT);
|
|
12
|
+
model TEXT NOT NULL, status INTEGER NOT NULL, latency_ms INTEGER NOT NULL, tokens_in INTEGER, tokens_out INTEGER, error TEXT);
|
|
13
13
|
`);
|
|
14
|
-
// Migrate request_log tables created before
|
|
14
|
+
// Migrate request_log tables created before later columns existed.
|
|
15
15
|
const cols = db.prepare(`PRAGMA table_info(request_log)`).all();
|
|
16
16
|
if (!cols.some((c) => c.name === "error"))
|
|
17
17
|
db.exec(`ALTER TABLE request_log ADD COLUMN error TEXT`);
|
|
18
|
+
if (!cols.some((c) => c.name === "tokens_in"))
|
|
19
|
+
db.exec(`ALTER TABLE request_log ADD COLUMN tokens_in INTEGER`);
|
|
20
|
+
if (!cols.some((c) => c.name === "tokens_out"))
|
|
21
|
+
db.exec(`ALTER TABLE request_log ADD COLUMN tokens_out INTEGER`);
|
|
18
22
|
return db;
|
|
19
23
|
}
|
|
20
24
|
export function recordRestart(db, e) {
|
|
@@ -26,10 +30,10 @@ export function listRestarts(db, limit) {
|
|
|
26
30
|
FROM restart_events ORDER BY ts DESC LIMIT ?`).all(limit);
|
|
27
31
|
}
|
|
28
32
|
export function recordRequest(db, m) {
|
|
29
|
-
db.prepare(`INSERT INTO request_log (ts, endpoint, model, status, latency_ms, error) VALUES (@ts, @endpoint, @model, @status, @latencyMs, @error)`)
|
|
30
|
-
.run({ error: null, ...m });
|
|
33
|
+
db.prepare(`INSERT INTO request_log (ts, endpoint, model, status, latency_ms, tokens_in, tokens_out, error) VALUES (@ts, @endpoint, @model, @status, @latencyMs, @tokensIn, @tokensOut, @error)`)
|
|
34
|
+
.run({ tokensIn: null, tokensOut: null, error: null, ...m });
|
|
31
35
|
}
|
|
32
36
|
export function recentRequests(db, limit) {
|
|
33
|
-
return db.prepare(`SELECT ts, endpoint, model, status, latency_ms as latencyMs, error FROM request_log ORDER BY ts DESC LIMIT ?`).all(limit)
|
|
34
|
-
.map(({ error, ...r }) => (
|
|
37
|
+
return db.prepare(`SELECT ts, endpoint, model, status, latency_ms as latencyMs, tokens_in as tokensIn, tokens_out as tokensOut, error FROM request_log ORDER BY ts DESC LIMIT ?`).all(limit)
|
|
38
|
+
.map(({ tokensIn, tokensOut, error, ...r }) => ({ ...r, ...(tokensIn != null ? { tokensIn } : {}), ...(tokensOut != null ? { tokensOut } : {}), ...(error != null ? { error } : {}) }));
|
|
35
39
|
}
|
package/dist/supervisor/index.js
CHANGED
|
@@ -29,7 +29,7 @@ export function startSupervisor() {
|
|
|
29
29
|
},
|
|
30
30
|
onWorkerMessage: (m) => {
|
|
31
31
|
if (m.type === "request-metric") {
|
|
32
|
-
const sample = { ts: Date.now(), endpoint: m.endpoint, model: m.model, status: m.status, latencyMs: m.latencyMs, error: m.error };
|
|
32
|
+
const sample = { ts: Date.now(), endpoint: m.endpoint, model: m.model, status: m.status, latencyMs: m.latencyMs, tokensIn: m.tokensIn, tokensOut: m.tokensOut, error: m.error };
|
|
33
33
|
recordRequest(db, sample);
|
|
34
34
|
bus.emit("metric", sample);
|
|
35
35
|
}
|
package/dist/tui/app.js
CHANGED
|
@@ -267,6 +267,10 @@ export function App({ registry, title, workerState = "starting", initialModel =
|
|
|
267
267
|
const tokens = Math.ceil(e.text.length / 4);
|
|
268
268
|
return (_jsxs(Box, { flexDirection: "column", children: [_jsxs(Text, { color: theme.accent, children: ["\u273D ", _jsxs(Text, { color: theme.muted, children: [frame, " ", loadingVerb(elapsed), "\u2026 (esc to interrupt \u00B7 ", fmtElapsed(elapsed), " \u00B7 \u2193 ", fmtTokens(tokens), " tokens \u00B7 thinking)"] })] }), e.text ? _jsx(Text, { color: color, children: e.text }) : null] }, i));
|
|
269
269
|
}
|
|
270
|
+
// User turns get a clay-on-dark highlight bar so they stand out from muted system notes and
|
|
271
|
+
// gray assistant output — a clear visual anchor for "this is what I said".
|
|
272
|
+
if (e.type === "user")
|
|
273
|
+
return _jsx(Box, { marginTop: 1, children: _jsx(Text, { backgroundColor: theme.accent, color: "black", bold: true, children: ` ${e.text.replace(/^›\s*/, "")} ` }) }, i);
|
|
270
274
|
return _jsx(Text, { color: color, children: e.text }, i);
|
|
271
275
|
}) }), body, _jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsxs(Box, { children: [github && _jsxs(_Fragment, { children: [_jsx(Text, { color: theme.muted, children: "github " }), _jsx(Text, { color: github === "connected" ? theme.ready : theme.error, children: github === "connected" ? "✓" : "✗ /login" })] }), _jsxs(Text, { color: theme.muted, children: [github ? " · " : "", "daemon "] }), _jsx(Text, { color: stateColor[state], children: state })] }), _jsxs(Box, { children: [_jsx(Text, { color: theme.muted, children: "web " }), _jsx(Text, { color: webBackend === "unavailable" ? theme.muted : theme.ready, children: webBackend === "webiq" ? "✓ webiq" : webBackend === "copilot" ? "✓ copilot" : "✗ /webiq" }), _jsx(Text, { color: theme.muted, children: " \u00B7 " }), _jsx(ClientBadge, { name: "claude", status: status.claude }), _jsx(Text, { color: theme.muted, children: " " }), _jsx(ClientBadge, { name: "codex", status: status.codex }), _jsx(Text, { color: theme.muted, children: " \u00B7 /help" })] })] })] }));
|
|
272
276
|
}
|
|
@@ -31,7 +31,7 @@ export function buildActions(client) {
|
|
|
31
31
|
const a = aggregate(await client.requests());
|
|
32
32
|
if (!a.total)
|
|
33
33
|
return "no requests yet";
|
|
34
|
-
return `requests: ${a.total}, errors: ${a.errors}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
|
|
34
|
+
return `requests: ${a.total}, errors: ${a.errors}, tokens: ${a.tokensIn}↑/${a.tokensOut}↓, est. cost: $${a.costUsd.toFixed(3)}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
|
|
35
35
|
},
|
|
36
36
|
};
|
|
37
37
|
}
|
|
@@ -1,21 +1,47 @@
|
|
|
1
1
|
// A request "failed" if it returned a 4xx/5xx OR carried an error message — runaway streams finish
|
|
2
2
|
// 200 but tag an error (model degenerated, cut early), and those are exactly what we want to surface.
|
|
3
3
|
const isError = (s) => s.status >= 400 || s.error != null;
|
|
4
|
+
// Indicative $/1M-token list prices (in, out) used ONLY to estimate spend — Copilot is flat-fee, so
|
|
5
|
+
// this is "what these tokens would cost at provider list price", not a real bill. Matched by substring;
|
|
6
|
+
// unknown models fall back to a mid GPT-4o-class rate. Update as needed; precision isn't the point.
|
|
7
|
+
const PRICING = [
|
|
8
|
+
{ match: "opus", in: 15, out: 75 },
|
|
9
|
+
{ match: "sonnet", in: 3, out: 15 },
|
|
10
|
+
{ match: "haiku", in: 0.8, out: 4 },
|
|
11
|
+
{ match: "gpt-5", in: 1.25, out: 10 },
|
|
12
|
+
{ match: "gpt-4o-mini", in: 0.15, out: 0.6 },
|
|
13
|
+
{ match: "gpt-4o", in: 2.5, out: 10 },
|
|
14
|
+
{ match: "o1", in: 15, out: 60 },
|
|
15
|
+
];
|
|
16
|
+
const RATE_FALLBACK = { in: 2.5, out: 10 };
|
|
17
|
+
const rate = (model) => PRICING.find((p) => model.toLowerCase().includes(p.match)) ?? RATE_FALLBACK;
|
|
18
|
+
export function estimateCost(model, tokensIn, tokensOut) {
|
|
19
|
+
const r = rate(model);
|
|
20
|
+
return (tokensIn * r.in + tokensOut * r.out) / 1_000_000;
|
|
21
|
+
}
|
|
4
22
|
export function aggregate(samples) {
|
|
5
23
|
const map = new Map();
|
|
6
24
|
let errors = 0;
|
|
7
25
|
for (const s of samples) {
|
|
8
26
|
if (isError(s))
|
|
9
27
|
errors++;
|
|
10
|
-
const m = map.get(s.model) ?? { count: 0, sum: 0 };
|
|
28
|
+
const m = map.get(s.model) ?? { count: 0, sum: 0, tin: 0, tout: 0 };
|
|
11
29
|
m.count++;
|
|
12
30
|
m.sum += s.latencyMs;
|
|
31
|
+
m.tin += s.tokensIn ?? 0;
|
|
32
|
+
m.tout += s.tokensOut ?? 0;
|
|
13
33
|
map.set(s.model, m);
|
|
14
34
|
}
|
|
35
|
+
const byModel = [...map.entries()].map(([model, v]) => ({
|
|
36
|
+
model, count: v.count, avgMs: Math.round(v.sum / v.count),
|
|
37
|
+
tokensIn: v.tin, tokensOut: v.tout, costUsd: estimateCost(model, v.tin, v.tout),
|
|
38
|
+
}));
|
|
15
39
|
return {
|
|
16
|
-
total: samples.length,
|
|
17
|
-
|
|
18
|
-
|
|
40
|
+
total: samples.length, errors,
|
|
41
|
+
tokensIn: byModel.reduce((n, r) => n + r.tokensIn, 0),
|
|
42
|
+
tokensOut: byModel.reduce((n, r) => n + r.tokensOut, 0),
|
|
43
|
+
costUsd: byModel.reduce((n, r) => n + r.costUsd, 0),
|
|
44
|
+
byModel,
|
|
19
45
|
};
|
|
20
46
|
}
|
|
21
47
|
// The failed requests (status >= 400 or any tagged error), newest-first, capped at `limit`. This is
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { toCanonical } from "../../core/model-canonical.js";
|
|
1
2
|
export function claudeCodeConfig(e) {
|
|
2
3
|
const base = `http://${e.host}:${e.port}/anthropic`;
|
|
3
4
|
return {
|
|
@@ -6,11 +7,14 @@ export function claudeCodeConfig(e) {
|
|
|
6
7
|
};
|
|
7
8
|
}
|
|
8
9
|
export const ONE_M_SUFFIX = "[1m]";
|
|
9
|
-
// Claude Code switches to its 1M
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
10
|
+
// Claude Code switches to its 1M window only when ANTHROPIC_MODEL ends with `[1m]`, and only matches
|
|
11
|
+
// the model to its native picker entry when the id is the DASHED canonical form it knows
|
|
12
|
+
// (claude-opus-4-8, not Copilot's dotted claude-opus-4.8). Route the default model through toCanonical
|
|
13
|
+
// so it's both dashed and 1M-badged for the known families; for non-claude ids keep the legacy
|
|
14
|
+
// context-window suffix. The proxy strips [1m] + fuzzy-maps back to Copilot before forwarding.
|
|
13
15
|
export function withClaude1mSuffix(model, contextWindow) {
|
|
16
|
+
if (model.startsWith("claude-"))
|
|
17
|
+
return toCanonical(model).id;
|
|
14
18
|
return contextWindow && contextWindow > 800_000 && contextWindow < 1_500_000 && !model.endsWith(ONE_M_SUFFIX)
|
|
15
19
|
? `${model}${ONE_M_SUFFIX}`
|
|
16
20
|
: model;
|
|
@@ -24,12 +24,18 @@ export function buildRegistry(ctx, endpoint, opts = {}) {
|
|
|
24
24
|
return ["no request errors logged — everything's green ✓"];
|
|
25
25
|
return errs.map((e) => `${new Date(e.ts).toISOString()} ${e.status} ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`);
|
|
26
26
|
} });
|
|
27
|
-
reg.add({ name: "/metrics", describe: "request metrics + recent errors", run: async (_a, c) => {
|
|
27
|
+
reg.add({ name: "/metrics", describe: "request metrics, tokens, cost + recent errors", run: async (_a, c) => {
|
|
28
28
|
const reqs = await c.client.requests();
|
|
29
29
|
const a = aggregate(reqs);
|
|
30
30
|
if (!a.total)
|
|
31
31
|
return ["no requests yet"];
|
|
32
|
-
const
|
|
32
|
+
const k = (n) => (n >= 1000 ? `${(n / 1000).toFixed(1)}k` : `${n}`);
|
|
33
|
+
const usd = (n) => `$${n < 1 ? n.toFixed(3) : n.toFixed(2)}`;
|
|
34
|
+
const lines = [
|
|
35
|
+
`requests: ${a.total} errors: ${a.errors} tokens: ${k(a.tokensIn)}↑ ${k(a.tokensOut)}↓ est. cost: ${usd(a.costUsd)}`,
|
|
36
|
+
...a.byModel.map((r) => ` ${r.model.padEnd(20)} n=${r.count} avg=${r.avgMs}ms ${k(r.tokensIn)}↑ ${k(r.tokensOut)}↓ ~${usd(r.costUsd)}`),
|
|
37
|
+
" cost is a list-price estimate (Copilot is flat-fee)",
|
|
38
|
+
];
|
|
33
39
|
const errs = recentErrors(reqs, 5);
|
|
34
40
|
if (errs.length) {
|
|
35
41
|
lines.push("recent errors:");
|
package/dist/version.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
|
|
2
|
-
export const APP_VERSION = "0.
|
|
2
|
+
export const APP_VERSION = "0.8.0";
|
|
@@ -5,6 +5,7 @@ import { errorHint } from "./errors.js";
|
|
|
5
5
|
import { CopilotAuthError } from "../providers/copilot/token.js";
|
|
6
6
|
import { isGatewayTool } from "../core/server-tools.js";
|
|
7
7
|
import { RunawayGuard } from "../core/stream-guard.js";
|
|
8
|
+
import { toCanonical } from "../core/model-canonical.js";
|
|
8
9
|
const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
|
|
9
10
|
const safeJson = (s) => { try {
|
|
10
11
|
return JSON.parse(s);
|
|
@@ -22,9 +23,11 @@ const MAX_TOOL_ITERS = 5;
|
|
|
22
23
|
const STREAM_DEADLINE_MS = 120_000;
|
|
23
24
|
export function mountAnthropic(app, router, onMetric, runner) {
|
|
24
25
|
// Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
|
|
25
|
-
// before chatting; without it they 404 on the connection test.
|
|
26
|
+
// before chatting; without it they 404 on the connection test. Claude families are mapped to the
|
|
27
|
+
// canonical id + display Claude Code recognises (with [1m] for 1M models) so its native picker shows
|
|
28
|
+
// friendly names + the 1M badge; non-claude ids pass through. resolveModel maps them back inbound.
|
|
26
29
|
app.get("/anthropic/v1/models", (_req, res) => {
|
|
27
|
-
res.json({ data: router.listModels().map((id) => ({ type: "model", id
|
|
30
|
+
res.json({ data: router.listModels().map((id) => ({ type: "model", ...toCanonical(id) })), has_more: false });
|
|
28
31
|
});
|
|
29
32
|
// Anthropic clients (Claude Code) call this to size the prompt and decide when to auto-compact.
|
|
30
33
|
app.post("/anthropic/v1/messages/count_tokens", (req, res) => {
|
|
@@ -35,7 +38,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
35
38
|
const canon = anthropicRequestToCanonical(req.body);
|
|
36
39
|
canon.model = router.resolveModel(canon.model);
|
|
37
40
|
const provider = router.pick(canon.model);
|
|
38
|
-
const metric = (status,
|
|
41
|
+
const metric = (status, opts = {}) => onMetric({ endpoint: "/anthropic/v1/messages", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
|
|
39
42
|
try {
|
|
40
43
|
if (canon.stream) {
|
|
41
44
|
res.setHeader("content-type", "text/event-stream");
|
|
@@ -157,7 +160,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
157
160
|
res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
|
|
158
161
|
res.write(frame("message_stop", { type: "message_stop" }));
|
|
159
162
|
res.end();
|
|
160
|
-
metric(200, runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined);
|
|
163
|
+
metric(200, { tokensIn: inputTokens, tokensOut: sumCompletion, error: runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined });
|
|
161
164
|
}
|
|
162
165
|
else {
|
|
163
166
|
// Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
|
|
@@ -182,7 +185,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
182
185
|
if (runner)
|
|
183
186
|
resp = { ...resp, content: resp.content.filter((b) => b.type !== "tool_use" || !isGatewayTool(b.name)) };
|
|
184
187
|
res.json(canonicalToAnthropicResponse(resp));
|
|
185
|
-
metric(200);
|
|
188
|
+
metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
|
|
186
189
|
}
|
|
187
190
|
}
|
|
188
191
|
catch (err) {
|
|
@@ -201,7 +204,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
|
|
|
201
204
|
res.write(frame("error", { type: "error", error: { type: errorType, message } }));
|
|
202
205
|
res.end();
|
|
203
206
|
}
|
|
204
|
-
metric(status, message);
|
|
207
|
+
metric(status, { error: message });
|
|
205
208
|
}
|
|
206
209
|
});
|
|
207
210
|
}
|
|
@@ -18,7 +18,7 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
18
18
|
const canon = openaiRequestToCanonical(req.body);
|
|
19
19
|
canon.model = router.resolveModel(canon.model);
|
|
20
20
|
const provider = router.pick(canon.model);
|
|
21
|
-
const metric = (status,
|
|
21
|
+
const metric = (status, opts = {}) => onMetric({ endpoint: "/openai/chat/completions", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
|
|
22
22
|
try {
|
|
23
23
|
if (canon.stream) {
|
|
24
24
|
res.setHeader("content-type", "text/event-stream");
|
|
@@ -27,8 +27,11 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
27
27
|
const guard = new RunawayGuard();
|
|
28
28
|
const deadline = start + STREAM_DEADLINE_MS;
|
|
29
29
|
let runawayReason = "";
|
|
30
|
+
let usage;
|
|
30
31
|
for await (const chunk of provider.stream(canon)) {
|
|
31
32
|
res.write(canonicalChunkToOpenAISSE(chunk, id, canon.model));
|
|
33
|
+
if (chunk.done)
|
|
34
|
+
usage = chunk.usage;
|
|
32
35
|
// Backstop covers tool-call streams too: a model can loop on tool calls forever, which
|
|
33
36
|
// never feeds the text guard — the wall clock cuts those cleanly instead of freezing.
|
|
34
37
|
if (chunk.kind === "text" && guard.push(chunk.delta)) {
|
|
@@ -41,11 +44,12 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
41
44
|
}
|
|
42
45
|
}
|
|
43
46
|
res.end();
|
|
44
|
-
metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
|
|
47
|
+
metric(200, { tokensIn: usage?.promptTokens, tokensOut: usage?.completionTokens, error: runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined });
|
|
45
48
|
}
|
|
46
49
|
else {
|
|
47
|
-
|
|
48
|
-
|
|
50
|
+
const resp = await provider.complete(canon);
|
|
51
|
+
res.json(canonicalToOpenAIResponse(resp));
|
|
52
|
+
metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
|
|
49
53
|
}
|
|
50
54
|
}
|
|
51
55
|
catch (err) {
|
|
@@ -62,7 +66,7 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
62
66
|
res.write(`data: ${JSON.stringify({ error: { message } })}\n\n`);
|
|
63
67
|
res.end();
|
|
64
68
|
}
|
|
65
|
-
metric(status, message);
|
|
69
|
+
metric(status, { error: message });
|
|
66
70
|
}
|
|
67
71
|
});
|
|
68
72
|
// OpenAI Responses API — Codex speaks ONLY this after codex#7782 removed wire_api="chat". Codex
|
|
@@ -73,7 +77,7 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
73
77
|
const canon = responsesRequestToCanonical(req.body);
|
|
74
78
|
canon.model = router.resolveModel(canon.model);
|
|
75
79
|
const provider = router.pick(canon.model);
|
|
76
|
-
const metric = (status,
|
|
80
|
+
const metric = (status, opts = {}) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
|
|
77
81
|
try {
|
|
78
82
|
if (canon.stream) {
|
|
79
83
|
res.setHeader("content-type", "text/event-stream");
|
|
@@ -119,11 +123,12 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
119
123
|
for (const f of sse.finish(usage, finish, argsByIdx))
|
|
120
124
|
res.write(f);
|
|
121
125
|
res.end();
|
|
122
|
-
metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
|
|
126
|
+
metric(200, { tokensIn: usage?.promptTokens, tokensOut: usage?.completionTokens, error: runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined });
|
|
123
127
|
}
|
|
124
128
|
else {
|
|
125
|
-
|
|
126
|
-
|
|
129
|
+
const resp = await provider.complete(canon);
|
|
130
|
+
res.json(canonicalToResponsesResponse(resp));
|
|
131
|
+
metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
|
|
127
132
|
}
|
|
128
133
|
}
|
|
129
134
|
catch (err) {
|
|
@@ -138,7 +143,7 @@ export function mountOpenAI(app, router, onMetric) {
|
|
|
138
143
|
res.write(`data: ${JSON.stringify({ type: "error", message })}\n\n`);
|
|
139
144
|
res.end();
|
|
140
145
|
}
|
|
141
|
-
metric(status, message);
|
|
146
|
+
metric(status, { error: message });
|
|
142
147
|
}
|
|
143
148
|
});
|
|
144
149
|
}
|
package/dist/worker/router.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { bestModelMatch } from "../core/fuzzy.js";
|
|
2
2
|
import { FALLBACK_MODELS } from "../providers/copilot/models.js";
|
|
3
|
+
import { stripOneM } from "../core/model-canonical.js";
|
|
3
4
|
// M1: single provider. Model name is remapped to the provider's actual id.
|
|
4
5
|
export class Router {
|
|
5
6
|
providers;
|
|
@@ -16,12 +17,12 @@ export class Router {
|
|
|
16
17
|
listModels() { return this.available.length ? this.available : FALLBACK_MODELS; }
|
|
17
18
|
resolveModel(requested) {
|
|
18
19
|
// Claude Code appends [1m] to signal its 1M context window; Copilot doesn't know that id, so
|
|
19
|
-
// strip it back to the
|
|
20
|
-
requested =
|
|
20
|
+
// strip it back to the canonical model before mapping/forwarding.
|
|
21
|
+
requested = stripOneM(requested);
|
|
21
22
|
const mapped = this.modelMap[requested];
|
|
22
23
|
if (mapped)
|
|
23
24
|
return mapped;
|
|
24
|
-
// Fuzzy-match a near-miss id (e.g. claude-opus-4-8
|
|
25
|
+
// Fuzzy-match a near-miss id (e.g. canonical claude-opus-4-8 -> Copilot claude-opus-4.8) to a real model.
|
|
25
26
|
if (this.available.length) {
|
|
26
27
|
const match = bestModelMatch(requested, this.available);
|
|
27
28
|
if (match)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "copilot-reverse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|