copilot-reverse 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/changes.js CHANGED
@@ -1,4 +1,14 @@
1
1
  export const APP_CHANGES = [
2
+ {
3
+ "version": "0.7.0",
4
+ "date": "2026-06-29",
5
+ "summary": "feat(tui): `/metrics` now reports token usage (in/out) and an estimated cost per model and overall — the worker records prompt/completion tokens for every request (persisted in SQLite), and cost is a list-price estimate (Copilot is flat-fee). User messages also get a highlighted bar in the transcript so they stand out from muted system notes and assistant output."
6
+ },
7
+ {
8
+ "version": "0.6.0",
9
+ "date": "2026-06-29",
10
+ "summary": "feat(tui): add a `/changes` command listing the 10 most recent releases (version, date, summary) with a link to the full CHANGELOG, and refocus the startup \"what's new\" banner on important messages — it now points to `/changes` instead of advertising a bug fix, and still self-suppresses after 3 launches."
11
+ },
2
12
  {
3
13
  "version": "0.5.5",
4
14
  "date": "2026-06-29",
@@ -38,15 +48,5 @@ export const APP_CHANGES = [
38
48
  "version": "0.3.0",
39
49
  "date": "2026-06-26",
40
50
  "summary": "Restore `web_search` and `web_fetch` for Claude Code through the gateway: the worker now runs these tools internally against Microsoft Web IQ in a transparent agentic loop, and a new `/web-search-support` command stores the WebIQ API key."
41
- },
42
- {
43
- "version": "0.2.1",
44
- "date": "2026-06-25",
45
- "summary": "Fix `/login` hanging with no output: the device-code prompt is now shown immediately while authorization is pending, instead of being buffered behind the blocking token poll."
46
- },
47
- {
48
- "version": "0.2.0",
49
- "date": "2026-06-23",
50
- "summary": "Recover tool calls that some models emit as inline XML text into structured tool calls, and add changeset-driven automatic versioning + npm publish on merge to master."
51
51
  }
52
52
  ];
@@ -0,0 +1,35 @@
1
+ // Maps Copilot's model ids to the canonical ids Claude Code recognises, so its native /model picker
2
+ // lights up with friendly names, tier grouping, and the 1M-context badge instead of bare ids.
3
+ //
4
+ // Copilot advertises DOTTED ids (claude-opus-4.8); Claude Code's built-in table keys on DASHED ids
5
+ // (claude-opus-4-8) and shows the 1M badge only when the id ends with the [1m] suffix. So OUTBOUND
6
+ // (/v1/models, ANTHROPIC_MODEL) we dash every claude id + add [1m] for the families Claude Code knows
7
+ // to be 1M; INBOUND the proxy strips [1m] and fuzzy-maps the dashed id back to Copilot's dotted id
8
+ // (see bestModelMatch). Non-claude ids (gpt*, o3*) have no canonical form → pass through untouched.
9
+ export const ONE_M_SUFFIX = "[1m]";
10
+ // Dashed canonical ids whose Claude Code table carries a 1M window — only these get the [1m] badge.
11
+ // Everything else stays at its default window. Anchored on the probed v2.1.195 binary table.
12
+ const ONE_M_MODELS = new Set(["claude-opus-4-6", "claude-opus-4-7", "claude-opus-4-8", "claude-sonnet-4-6"]);
13
+ // claude-<family>-<major>-<minor> -> "Family Major.Minor" (e.g. claude-opus-4-8 -> "Opus 4.8").
14
+ function displayName(dashed) {
15
+ const m = /^claude-(opus|sonnet|haiku)-(\d+)-(\d+)$/.exec(dashed);
16
+ if (!m)
17
+ return dashed;
18
+ const [, fam, maj, min] = m;
19
+ return `${fam[0].toUpperCase()}${fam.slice(1)} ${maj}.${min}`;
20
+ }
21
+ // Outbound: Copilot id -> the id+display Claude Code's picker understands. Claude ids are dashed to the
22
+ // canonical form; the 1M families get the [1m] suffix so the picker shows the badge and sizes context
23
+ // to 1M. Non-claude ids echo back as-is so they still appear, just without native metadata.
24
+ export function toCanonical(copilotId) {
25
+ if (!copilotId.startsWith("claude-"))
26
+ return { id: copilotId, display_name: copilotId };
27
+ const dashed = copilotId.replace(/\./g, "-");
28
+ const id = ONE_M_MODELS.has(dashed) ? `${dashed}${ONE_M_SUFFIX}` : dashed;
29
+ return { id, display_name: displayName(dashed) };
30
+ }
31
+ // Inbound: drop the [1m] picker suffix. The dashed canonical id then resolves back to Copilot's
32
+ // dotted id via the router's exact map + fuzzy fallback; nothing else to do here.
33
+ export function stripOneM(model) {
34
+ return model.endsWith(ONE_M_SUFFIX) ? model.slice(0, -ONE_M_SUFFIX.length) : model;
35
+ }
@@ -9,12 +9,16 @@ export function openDb(file) {
9
9
  exit_code INTEGER, stderr_tail TEXT NOT NULL, backoff_ms INTEGER NOT NULL, marked_unhealthy INTEGER NOT NULL DEFAULT 0);
10
10
  CREATE TABLE IF NOT EXISTS request_log (
11
11
  id INTEGER PRIMARY KEY AUTOINCREMENT, ts INTEGER NOT NULL, endpoint TEXT NOT NULL,
12
- model TEXT NOT NULL, status INTEGER NOT NULL, latency_ms INTEGER NOT NULL, error TEXT);
12
+ model TEXT NOT NULL, status INTEGER NOT NULL, latency_ms INTEGER NOT NULL, tokens_in INTEGER, tokens_out INTEGER, error TEXT);
13
13
  `);
14
- // Migrate request_log tables created before the error column existed.
14
+ // Migrate request_log tables created before later columns existed.
15
15
  const cols = db.prepare(`PRAGMA table_info(request_log)`).all();
16
16
  if (!cols.some((c) => c.name === "error"))
17
17
  db.exec(`ALTER TABLE request_log ADD COLUMN error TEXT`);
18
+ if (!cols.some((c) => c.name === "tokens_in"))
19
+ db.exec(`ALTER TABLE request_log ADD COLUMN tokens_in INTEGER`);
20
+ if (!cols.some((c) => c.name === "tokens_out"))
21
+ db.exec(`ALTER TABLE request_log ADD COLUMN tokens_out INTEGER`);
18
22
  return db;
19
23
  }
20
24
  export function recordRestart(db, e) {
@@ -26,10 +30,10 @@ export function listRestarts(db, limit) {
26
30
  FROM restart_events ORDER BY ts DESC LIMIT ?`).all(limit);
27
31
  }
28
32
  export function recordRequest(db, m) {
29
- db.prepare(`INSERT INTO request_log (ts, endpoint, model, status, latency_ms, error) VALUES (@ts, @endpoint, @model, @status, @latencyMs, @error)`)
30
- .run({ error: null, ...m });
33
+ db.prepare(`INSERT INTO request_log (ts, endpoint, model, status, latency_ms, tokens_in, tokens_out, error) VALUES (@ts, @endpoint, @model, @status, @latencyMs, @tokensIn, @tokensOut, @error)`)
34
+ .run({ tokensIn: null, tokensOut: null, error: null, ...m });
31
35
  }
32
36
  export function recentRequests(db, limit) {
33
- return db.prepare(`SELECT ts, endpoint, model, status, latency_ms as latencyMs, error FROM request_log ORDER BY ts DESC LIMIT ?`).all(limit)
34
- .map(({ error, ...r }) => (error == null ? r : { ...r, error }));
37
+ return db.prepare(`SELECT ts, endpoint, model, status, latency_ms as latencyMs, tokens_in as tokensIn, tokens_out as tokensOut, error FROM request_log ORDER BY ts DESC LIMIT ?`).all(limit)
38
+ .map(({ tokensIn, tokensOut, error, ...r }) => ({ ...r, ...(tokensIn != null ? { tokensIn } : {}), ...(tokensOut != null ? { tokensOut } : {}), ...(error != null ? { error } : {}) }));
35
39
  }
@@ -29,7 +29,7 @@ export function startSupervisor() {
29
29
  },
30
30
  onWorkerMessage: (m) => {
31
31
  if (m.type === "request-metric") {
32
- const sample = { ts: Date.now(), endpoint: m.endpoint, model: m.model, status: m.status, latencyMs: m.latencyMs, error: m.error };
32
+ const sample = { ts: Date.now(), endpoint: m.endpoint, model: m.model, status: m.status, latencyMs: m.latencyMs, tokensIn: m.tokensIn, tokensOut: m.tokensOut, error: m.error };
33
33
  recordRequest(db, sample);
34
34
  bus.emit("metric", sample);
35
35
  }
package/dist/tui/app.js CHANGED
@@ -267,6 +267,10 @@ export function App({ registry, title, workerState = "starting", initialModel =
267
267
  const tokens = Math.ceil(e.text.length / 4);
268
268
  return (_jsxs(Box, { flexDirection: "column", children: [_jsxs(Text, { color: theme.accent, children: ["\u273D ", _jsxs(Text, { color: theme.muted, children: [frame, " ", loadingVerb(elapsed), "\u2026 (esc to interrupt \u00B7 ", fmtElapsed(elapsed), " \u00B7 \u2193 ", fmtTokens(tokens), " tokens \u00B7 thinking)"] })] }), e.text ? _jsx(Text, { color: color, children: e.text }) : null] }, i));
269
269
  }
270
+ // User turns get a clay-on-dark highlight bar so they stand out from muted system notes and
271
+ // gray assistant output — a clear visual anchor for "this is what I said".
272
+ if (e.type === "user")
273
+ return _jsx(Box, { marginTop: 1, children: _jsx(Text, { backgroundColor: theme.accent, color: "black", bold: true, children: ` ${e.text.replace(/^›\s*/, "")} ` }) }, i);
270
274
  return _jsx(Text, { color: color, children: e.text }, i);
271
275
  }) }), body, _jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsxs(Box, { children: [github && _jsxs(_Fragment, { children: [_jsx(Text, { color: theme.muted, children: "github " }), _jsx(Text, { color: github === "connected" ? theme.ready : theme.error, children: github === "connected" ? "✓" : "✗ /login" })] }), _jsxs(Text, { color: theme.muted, children: [github ? " · " : "", "daemon "] }), _jsx(Text, { color: stateColor[state], children: state })] }), _jsxs(Box, { children: [_jsx(Text, { color: theme.muted, children: "web " }), _jsx(Text, { color: webBackend === "unavailable" ? theme.muted : theme.ready, children: webBackend === "webiq" ? "✓ webiq" : webBackend === "copilot" ? "✓ copilot" : "✗ /webiq" }), _jsx(Text, { color: theme.muted, children: " \u00B7 " }), _jsx(ClientBadge, { name: "claude", status: status.claude }), _jsx(Text, { color: theme.muted, children: " " }), _jsx(ClientBadge, { name: "codex", status: status.codex }), _jsx(Text, { color: theme.muted, children: " \u00B7 /help" })] })] })] }));
272
276
  }
@@ -31,7 +31,7 @@ export function buildActions(client) {
31
31
  const a = aggregate(await client.requests());
32
32
  if (!a.total)
33
33
  return "no requests yet";
34
- return `requests: ${a.total}, errors: ${a.errors}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
34
+ return `requests: ${a.total}, errors: ${a.errors}, tokens: ${a.tokensIn}↑/${a.tokensOut}↓, est. cost: $${a.costUsd.toFixed(3)}; ` + a.byModel.map((r) => `${r.model} n=${r.count} avg=${r.avgMs}ms`).join("; ");
35
35
  },
36
36
  };
37
37
  }
@@ -1,21 +1,47 @@
1
1
  // A request "failed" if it returned a 4xx/5xx OR carried an error message — runaway streams finish
2
2
  // 200 but tag an error (model degenerated, cut early), and those are exactly what we want to surface.
3
3
  const isError = (s) => s.status >= 400 || s.error != null;
4
+ // Indicative $/1M-token list prices (in, out) used ONLY to estimate spend — Copilot is flat-fee, so
5
+ // this is "what these tokens would cost at provider list price", not a real bill. Matched by substring;
6
+ // unknown models fall back to a mid GPT-4o-class rate. Update as needed; precision isn't the point.
7
+ const PRICING = [
8
+ { match: "opus", in: 15, out: 75 },
9
+ { match: "sonnet", in: 3, out: 15 },
10
+ { match: "haiku", in: 0.8, out: 4 },
11
+ { match: "gpt-5", in: 1.25, out: 10 },
12
+ { match: "gpt-4o-mini", in: 0.15, out: 0.6 },
13
+ { match: "gpt-4o", in: 2.5, out: 10 },
14
+ { match: "o1", in: 15, out: 60 },
15
+ ];
16
+ const RATE_FALLBACK = { in: 2.5, out: 10 };
17
+ const rate = (model) => PRICING.find((p) => model.toLowerCase().includes(p.match)) ?? RATE_FALLBACK;
18
+ export function estimateCost(model, tokensIn, tokensOut) {
19
+ const r = rate(model);
20
+ return (tokensIn * r.in + tokensOut * r.out) / 1_000_000;
21
+ }
4
22
  export function aggregate(samples) {
5
23
  const map = new Map();
6
24
  let errors = 0;
7
25
  for (const s of samples) {
8
26
  if (isError(s))
9
27
  errors++;
10
- const m = map.get(s.model) ?? { count: 0, sum: 0 };
28
+ const m = map.get(s.model) ?? { count: 0, sum: 0, tin: 0, tout: 0 };
11
29
  m.count++;
12
30
  m.sum += s.latencyMs;
31
+ m.tin += s.tokensIn ?? 0;
32
+ m.tout += s.tokensOut ?? 0;
13
33
  map.set(s.model, m);
14
34
  }
35
+ const byModel = [...map.entries()].map(([model, v]) => ({
36
+ model, count: v.count, avgMs: Math.round(v.sum / v.count),
37
+ tokensIn: v.tin, tokensOut: v.tout, costUsd: estimateCost(model, v.tin, v.tout),
38
+ }));
15
39
  return {
16
- total: samples.length,
17
- errors,
18
- byModel: [...map.entries()].map(([model, v]) => ({ model, count: v.count, avgMs: Math.round(v.sum / v.count) })),
40
+ total: samples.length, errors,
41
+ tokensIn: byModel.reduce((n, r) => n + r.tokensIn, 0),
42
+ tokensOut: byModel.reduce((n, r) => n + r.tokensOut, 0),
43
+ costUsd: byModel.reduce((n, r) => n + r.costUsd, 0),
44
+ byModel,
19
45
  };
20
46
  }
21
47
  // The failed requests (status >= 400 or any tagged error), newest-first, capped at `limit`. This is
@@ -1,3 +1,4 @@
1
+ import { toCanonical } from "../../core/model-canonical.js";
1
2
  export function claudeCodeConfig(e) {
2
3
  const base = `http://${e.host}:${e.port}/anthropic`;
3
4
  return {
@@ -6,11 +7,14 @@ export function claudeCodeConfig(e) {
6
7
  };
7
8
  }
8
9
  export const ONE_M_SUFFIX = "[1m]";
9
- // Claude Code switches to its 1M context window only when ANTHROPIC_MODEL ends with `[1m]` that
10
- // suffix is its built-in signal for a 1M model. Mirror agent-maestro: append it for models whose
11
- // window is in the ~1M band (800K..1.5M). Without it Claude Code assumes 200K -> "context 100%"
12
- // and /compact fails. The proxy strips the suffix again before forwarding to Copilot.
10
+ // Claude Code switches to its 1M window only when ANTHROPIC_MODEL ends with `[1m]`, and only matches
11
+ // the model to its native picker entry when the id is the DASHED canonical form it knows
12
+ // (claude-opus-4-8, not Copilot's dotted claude-opus-4.8). Route the default model through toCanonical
13
+ // so it's both dashed and 1M-badged for the known families; for non-claude ids keep the legacy
14
+ // context-window suffix. The proxy strips [1m] + fuzzy-maps back to Copilot before forwarding.
13
15
  export function withClaude1mSuffix(model, contextWindow) {
16
+ if (model.startsWith("claude-"))
17
+ return toCanonical(model).id;
14
18
  return contextWindow && contextWindow > 800_000 && contextWindow < 1_500_000 && !model.endsWith(ONE_M_SUFFIX)
15
19
  ? `${model}${ONE_M_SUFFIX}`
16
20
  : model;
@@ -24,12 +24,18 @@ export function buildRegistry(ctx, endpoint, opts = {}) {
24
24
  return ["no request errors logged — everything's green ✓"];
25
25
  return errs.map((e) => `${new Date(e.ts).toISOString()} ${e.status} ${e.endpoint} ${e.model} — ${e.error ?? "(no message)"}`);
26
26
  } });
27
- reg.add({ name: "/metrics", describe: "request metrics + recent errors", run: async (_a, c) => {
27
+ reg.add({ name: "/metrics", describe: "request metrics, tokens, cost + recent errors", run: async (_a, c) => {
28
28
  const reqs = await c.client.requests();
29
29
  const a = aggregate(reqs);
30
30
  if (!a.total)
31
31
  return ["no requests yet"];
32
- const lines = [`requests: ${a.total} errors: ${a.errors}`, ...a.byModel.map((r) => ` ${r.model.padEnd(20)} n=${r.count} avg=${r.avgMs}ms`)];
32
+ const k = (n) => (n >= 1000 ? `${(n / 1000).toFixed(1)}k` : `${n}`);
33
+ const usd = (n) => `$${n < 1 ? n.toFixed(3) : n.toFixed(2)}`;
34
+ const lines = [
35
+ `requests: ${a.total} errors: ${a.errors} tokens: ${k(a.tokensIn)}↑ ${k(a.tokensOut)}↓ est. cost: ${usd(a.costUsd)}`,
36
+ ...a.byModel.map((r) => ` ${r.model.padEnd(20)} n=${r.count} avg=${r.avgMs}ms ${k(r.tokensIn)}↑ ${k(r.tokensOut)}↓ ~${usd(r.costUsd)}`),
37
+ " cost is a list-price estimate (Copilot is flat-fee)",
38
+ ];
33
39
  const errs = recentErrors(reqs, 5);
34
40
  if (errs.length) {
35
41
  lines.push("recent errors:");
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  // AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
2
- export const APP_VERSION = "0.6.0";
2
+ export const APP_VERSION = "0.8.0";
@@ -5,6 +5,7 @@ import { errorHint } from "./errors.js";
5
5
  import { CopilotAuthError } from "../providers/copilot/token.js";
6
6
  import { isGatewayTool } from "../core/server-tools.js";
7
7
  import { RunawayGuard } from "../core/stream-guard.js";
8
+ import { toCanonical } from "../core/model-canonical.js";
8
9
  const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
9
10
  const safeJson = (s) => { try {
10
11
  return JSON.parse(s);
@@ -22,9 +23,11 @@ const MAX_TOOL_ITERS = 5;
22
23
  const STREAM_DEADLINE_MS = 120_000;
23
24
  export function mountAnthropic(app, router, onMetric, runner) {
24
25
  // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
25
- // before chatting; without it they 404 on the connection test.
26
+ // before chatting; without it they 404 on the connection test. Claude families are mapped to the
27
+ // canonical id + display Claude Code recognises (with [1m] for 1M models) so its native picker shows
28
+ // friendly names + the 1M badge; non-claude ids pass through. resolveModel maps them back inbound.
26
29
  app.get("/anthropic/v1/models", (_req, res) => {
27
- res.json({ data: router.listModels().map((id) => ({ type: "model", id, display_name: id })), has_more: false });
30
+ res.json({ data: router.listModels().map((id) => ({ type: "model", ...toCanonical(id) })), has_more: false });
28
31
  });
29
32
  // Anthropic clients (Claude Code) call this to size the prompt and decide when to auto-compact.
30
33
  app.post("/anthropic/v1/messages/count_tokens", (req, res) => {
@@ -35,7 +38,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
35
38
  const canon = anthropicRequestToCanonical(req.body);
36
39
  canon.model = router.resolveModel(canon.model);
37
40
  const provider = router.pick(canon.model);
38
- const metric = (status, error) => onMetric({ endpoint: "/anthropic/v1/messages", model: canon.model, status, latencyMs: Date.now() - start, error });
41
+ const metric = (status, opts = {}) => onMetric({ endpoint: "/anthropic/v1/messages", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
39
42
  try {
40
43
  if (canon.stream) {
41
44
  res.setHeader("content-type", "text/event-stream");
@@ -157,7 +160,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
157
160
  res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
158
161
  res.write(frame("message_stop", { type: "message_stop" }));
159
162
  res.end();
160
- metric(200, runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined);
163
+ metric(200, { tokensIn: inputTokens, tokensOut: sumCompletion, error: runaway ? `runaway stream cut (${runawayReason}) — model degenerated, ended early as max_tokens` : undefined });
161
164
  }
162
165
  else {
163
166
  // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
@@ -182,7 +185,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
182
185
  if (runner)
183
186
  resp = { ...resp, content: resp.content.filter((b) => b.type !== "tool_use" || !isGatewayTool(b.name)) };
184
187
  res.json(canonicalToAnthropicResponse(resp));
185
- metric(200);
188
+ metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
186
189
  }
187
190
  }
188
191
  catch (err) {
@@ -201,7 +204,7 @@ export function mountAnthropic(app, router, onMetric, runner) {
201
204
  res.write(frame("error", { type: "error", error: { type: errorType, message } }));
202
205
  res.end();
203
206
  }
204
- metric(status, message);
207
+ metric(status, { error: message });
205
208
  }
206
209
  });
207
210
  }
@@ -18,7 +18,7 @@ export function mountOpenAI(app, router, onMetric) {
18
18
  const canon = openaiRequestToCanonical(req.body);
19
19
  canon.model = router.resolveModel(canon.model);
20
20
  const provider = router.pick(canon.model);
21
- const metric = (status, error) => onMetric({ endpoint: "/openai/chat/completions", model: canon.model, status, latencyMs: Date.now() - start, error });
21
+ const metric = (status, opts = {}) => onMetric({ endpoint: "/openai/chat/completions", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
22
22
  try {
23
23
  if (canon.stream) {
24
24
  res.setHeader("content-type", "text/event-stream");
@@ -27,8 +27,11 @@ export function mountOpenAI(app, router, onMetric) {
27
27
  const guard = new RunawayGuard();
28
28
  const deadline = start + STREAM_DEADLINE_MS;
29
29
  let runawayReason = "";
30
+ let usage;
30
31
  for await (const chunk of provider.stream(canon)) {
31
32
  res.write(canonicalChunkToOpenAISSE(chunk, id, canon.model));
33
+ if (chunk.done)
34
+ usage = chunk.usage;
32
35
  // Backstop covers tool-call streams too: a model can loop on tool calls forever, which
33
36
  // never feeds the text guard — the wall clock cuts those cleanly instead of freezing.
34
37
  if (chunk.kind === "text" && guard.push(chunk.delta)) {
@@ -41,11 +44,12 @@ export function mountOpenAI(app, router, onMetric) {
41
44
  }
42
45
  }
43
46
  res.end();
44
- metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
47
+ metric(200, { tokensIn: usage?.promptTokens, tokensOut: usage?.completionTokens, error: runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined });
45
48
  }
46
49
  else {
47
- res.json(canonicalToOpenAIResponse(await provider.complete(canon)));
48
- metric(200);
50
+ const resp = await provider.complete(canon);
51
+ res.json(canonicalToOpenAIResponse(resp));
52
+ metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
49
53
  }
50
54
  }
51
55
  catch (err) {
@@ -62,7 +66,7 @@ export function mountOpenAI(app, router, onMetric) {
62
66
  res.write(`data: ${JSON.stringify({ error: { message } })}\n\n`);
63
67
  res.end();
64
68
  }
65
- metric(status, message);
69
+ metric(status, { error: message });
66
70
  }
67
71
  });
68
72
  // OpenAI Responses API — Codex speaks ONLY this after codex#7782 removed wire_api="chat". Codex
@@ -73,7 +77,7 @@ export function mountOpenAI(app, router, onMetric) {
73
77
  const canon = responsesRequestToCanonical(req.body);
74
78
  canon.model = router.resolveModel(canon.model);
75
79
  const provider = router.pick(canon.model);
76
- const metric = (status, error) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, error });
80
+ const metric = (status, opts = {}) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, tokensIn: opts.tokensIn, tokensOut: opts.tokensOut, error: opts.error });
77
81
  try {
78
82
  if (canon.stream) {
79
83
  res.setHeader("content-type", "text/event-stream");
@@ -119,11 +123,12 @@ export function mountOpenAI(app, router, onMetric) {
119
123
  for (const f of sse.finish(usage, finish, argsByIdx))
120
124
  res.write(f);
121
125
  res.end();
122
- metric(200, runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined);
126
+ metric(200, { tokensIn: usage?.promptTokens, tokensOut: usage?.completionTokens, error: runawayReason ? `runaway stream cut (${runawayReason}) — model degenerated, ended early` : undefined });
123
127
  }
124
128
  else {
125
- res.json(canonicalToResponsesResponse(await provider.complete(canon)));
126
- metric(200);
129
+ const resp = await provider.complete(canon);
130
+ res.json(canonicalToResponsesResponse(resp));
131
+ metric(200, { tokensIn: resp.usage?.promptTokens, tokensOut: resp.usage?.completionTokens });
127
132
  }
128
133
  }
129
134
  catch (err) {
@@ -138,7 +143,7 @@ export function mountOpenAI(app, router, onMetric) {
138
143
  res.write(`data: ${JSON.stringify({ type: "error", message })}\n\n`);
139
144
  res.end();
140
145
  }
141
- metric(status, message);
146
+ metric(status, { error: message });
142
147
  }
143
148
  });
144
149
  }
@@ -1,5 +1,6 @@
1
1
  import { bestModelMatch } from "../core/fuzzy.js";
2
2
  import { FALLBACK_MODELS } from "../providers/copilot/models.js";
3
+ import { stripOneM } from "../core/model-canonical.js";
3
4
  // M1: single provider. Model name is remapped to the provider's actual id.
4
5
  export class Router {
5
6
  providers;
@@ -16,12 +17,12 @@ export class Router {
16
17
  listModels() { return this.available.length ? this.available : FALLBACK_MODELS; }
17
18
  resolveModel(requested) {
18
19
  // Claude Code appends [1m] to signal its 1M context window; Copilot doesn't know that id, so
19
- // strip it back to the real model before mapping/forwarding.
20
- requested = requested.endsWith("[1m]") ? requested.slice(0, -4) : requested;
20
+ // strip it back to the canonical model before mapping/forwarding.
21
+ requested = stripOneM(requested);
21
22
  const mapped = this.modelMap[requested];
22
23
  if (mapped)
23
24
  return mapped;
24
- // Fuzzy-match a near-miss id (e.g. claude-opus-4-8-20251101 -> claude-opus-4.8) to a real model.
25
+ // Fuzzy-match a near-miss id (e.g. canonical claude-opus-4-8 -> Copilot claude-opus-4.8) to a real model.
25
26
  if (this.available.length) {
26
27
  const match = bestModelMatch(requested, this.available);
27
28
  if (match)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "copilot-reverse",
3
- "version": "0.6.0",
3
+ "version": "0.8.0",
4
4
  "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
5
5
  "type": "module",
6
6
  "license": "MIT",