copilot-reverse 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,18 @@ import { anthropicRequestToCanonical, canonicalToAnthropicResponse } from "../co
3
3
  import { estimateTokens } from "../core/tokens.js";
4
4
  import { errorHint } from "./errors.js";
5
5
  import { CopilotAuthError } from "../providers/copilot/token.js";
6
+ import { isGatewayTool } from "../core/server-tools.js";
6
7
  const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
7
- export function mountAnthropic(app, router, onMetric) {
8
+ const safeJson = (s) => { try {
9
+ return JSON.parse(s);
10
+ }
11
+ catch {
12
+ return {};
13
+ } };
14
+ // Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
15
+ // returns "search more") can never spin forever inside one request.
16
+ const MAX_TOOL_ITERS = 5;
17
+ export function mountAnthropic(app, router, onMetric, runner) {
8
18
  // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
9
19
  // before chatting; without it they 404 on the connection test.
10
20
  app.get("/anthropic/v1/models", (_req, res) => {
@@ -33,61 +43,112 @@ export function mountAnthropic(app, router, onMetric) {
33
43
  // isn't stuck at 0%; the terminal message_delta then reports the exact count.
34
44
  const estInput = estimateTokens(canon);
35
45
  res.write(frame("message_start", { type: "message_start", message: { id, type: "message", role: "assistant", model: canon.model, content: [], stop_reason: null, usage: { input_tokens: estInput, output_tokens: 0, cache_read_input_tokens: 0 } } }));
36
- // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17): the endpoint owns
37
- // open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation. We do NOT pre-open an index-0 text block,
38
- // and we do NOT map the Copilot tool index straight to the Anthropic block index (that collides with a
39
- // text preamble on a mixed turn). Instead, whichever block opens FIRST claims Anthropic index 0, the
40
- // next claims 1, etc. This keeps indices contiguous-from-0 in all three cases: pure-text (text@0),
41
- // pure-tool (tool@0), and mixed preamble+tool (text@0, tool@1).
46
+ // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17) + gateway tool loop
47
+ // (2026-06): the endpoint owns block open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation,
48
+ // and `next` spans ALL loop iterations so block indices stay contiguous-from-0 across turns.
49
+ // Within a turn, text streams live (transparent progress) but tool calls are BUFFERED: only
50
+ // after the turn ends do we know whether they're gateway tools (run here, then loop) or client
51
+ // tools (forwarded to the client, exactly as before). Whichever block opens first claims index 0.
42
52
  let next = 0;
43
- let textIndex; // Anthropic index of the (single) text block, once opened
44
- const toolIndex = new Map(); // Copilot tool index -> Anthropic block index
45
- const openedOrder = []; // Anthropic indices in allocation order
46
- let stopReason = "stop";
47
- let usage;
48
- for await (const chunk of provider.stream(canon)) {
49
- if (chunk.done) {
50
- stopReason = chunk.finishReason ?? "stop";
51
- usage = chunk.usage;
52
- break;
53
- }
54
- if (chunk.kind === "text") {
55
- if (textIndex === undefined) {
56
- textIndex = next++;
57
- openedOrder.push(textIndex);
58
- res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
53
+ let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
54
+ let finalStop = "stop";
55
+ for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
56
+ let textIndex; // Anthropic index of this turn's text block
57
+ const byCopilotIdx = new Map();
58
+ const buffered = []; // tool calls seen this turn, in order
59
+ let turnStop = "stop";
60
+ for await (const chunk of provider.stream(canon)) {
61
+ if (chunk.done) {
62
+ turnStop = chunk.finishReason ?? "stop";
63
+ if (chunk.usage) {
64
+ lastPrompt = chunk.usage.promptTokens ?? lastPrompt;
65
+ lastCached = chunk.usage.cachedTokens ?? 0;
66
+ sumCompletion += chunk.usage.completionTokens ?? 0;
67
+ }
68
+ break;
59
69
  }
60
- res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
61
- }
62
- else if (chunk.kind === "tool_use_start") {
63
- if (!toolIndex.has(chunk.index)) {
64
- const index = next++;
65
- toolIndex.set(chunk.index, index);
66
- openedOrder.push(index);
67
- res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: chunk.id, name: chunk.name, input: {} } }));
70
+ if (chunk.kind === "text") {
71
+ if (textIndex === undefined) {
72
+ textIndex = next++;
73
+ res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
74
+ }
75
+ res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
76
+ }
77
+ else if (chunk.kind === "tool_use_start") {
78
+ if (!byCopilotIdx.has(chunk.index)) {
79
+ const t = { id: chunk.id, name: chunk.name, args: "" };
80
+ byCopilotIdx.set(chunk.index, t);
81
+ buffered.push(t);
82
+ }
68
83
  }
84
+ else if (chunk.kind === "tool_use_delta") {
85
+ const t = byCopilotIdx.get(chunk.index);
86
+ if (t)
87
+ t.args += chunk.argsDelta;
88
+ }
89
+ }
90
+ if (textIndex !== undefined)
91
+ res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
92
+ const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
93
+ // Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
94
+ // has no handler for it and would stall. So whenever the model calls gateway tools (and a
95
+ // runner is wired), run them here and loop, feeding results back. Any client tools called in
96
+ // the SAME turn are deliberately NOT forwarded yet: we drop them this turn and let the model
97
+ // re-issue them on the next turn, now informed by the search result. (Forwarding them now
98
+ // would end the turn as tool_use and strand the gateway result with nowhere to go.)
99
+ if (runner && gatewayCalls.length) {
100
+ canon.messages.push({ role: "assistant", content: gatewayCalls.map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: safeJson(t.args) })) });
101
+ const results = [];
102
+ for (const t of gatewayCalls)
103
+ results.push({ type: "tool_result", toolUseId: t.id, content: await runner(t.name, safeJson(t.args)) });
104
+ canon.messages.push({ role: "tool", content: results });
105
+ continue;
69
106
  }
70
- else if (chunk.kind === "tool_use_delta") {
71
- const index = toolIndex.get(chunk.index);
72
- if (index !== undefined)
73
- res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: chunk.argsDelta } }));
107
+ // Terminal turn (no gateway tools, or no runner): forward any buffered tool calls to the
108
+ // client (open/delta/close each at its own freshly-allocated index), then finish.
109
+ for (const t of buffered) {
110
+ const index = next++;
111
+ res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: t.id, name: t.name, input: {} } }));
112
+ if (t.args)
113
+ res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: t.args } }));
114
+ res.write(frame("content_block_stop", { type: "content_block_stop", index }));
74
115
  }
116
+ finalStop = buffered.length ? "tool_use" : turnStop;
117
+ break;
75
118
  }
76
- // Close every opened block (ascending Anthropic index) before the terminal frames.
77
- for (const index of [...openedOrder].sort((a, b) => a - b))
78
- res.write(frame("content_block_stop", { type: "content_block_stop", index }));
79
119
  // Report real usage (agent-maestro shape): split cached tokens out of input so Claude Code's
80
- // context bar is accurate. Falls back to zeros if Copilot didn't return usage.
81
- const cached = usage?.cachedTokens ?? 0;
82
- const inputTokens = Math.max(0, (usage?.promptTokens ?? estInput) - cached); // fall back to the estimate
83
- const deltaUsage = { input_tokens: inputTokens, output_tokens: usage?.completionTokens ?? 0, cache_read_input_tokens: cached };
84
- res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: stopReason === "tool_use" ? "tool_use" : stopReason === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
120
+ // context bar is accurate. promptTokens is the last turn's (largest, includes tool results);
121
+ // output is summed across turns.
122
+ const inputTokens = Math.max(0, lastPrompt - lastCached);
123
+ const deltaUsage = { input_tokens: inputTokens, output_tokens: sumCompletion, cache_read_input_tokens: lastCached };
124
+ res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
85
125
  res.write(frame("message_stop", { type: "message_stop" }));
86
126
  res.end();
87
127
  metric(200);
88
128
  }
89
129
  else {
90
- res.json(canonicalToAnthropicResponse(await provider.complete(canon)));
130
+ // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
131
+ // model answers with text (or a client tool), capped identically.
132
+ let resp = await provider.complete(canon);
133
+ for (let iter = 0; runner && iter < MAX_TOOL_ITERS; iter++) {
134
+ const toolUses = resp.content.filter((b) => b.type === "tool_use");
135
+ const gatewayUses = toolUses.filter((b) => isGatewayTool(b.name));
136
+ if (!gatewayUses.length)
137
+ break; // no gateway work left — client tools / text are terminal
138
+ // Run the gateway tools, feed results back, and continue. Any client tools in the SAME turn
139
+ // ride along in the assistant message and remain in the final resp for the client to handle.
140
+ canon.messages.push({ role: "assistant", content: resp.content });
141
+ const results = [];
142
+ for (const u of gatewayUses)
143
+ results.push({ type: "tool_result", toolUseId: u.id, content: await runner(u.name, u.input) });
144
+ canon.messages.push({ role: "tool", content: results });
145
+ resp = await provider.complete(canon);
146
+ }
147
+ // Invariant: never forward a gateway tool_use to the client (it can't handle it). If the cap
148
+ // was hit with gateway calls still pending, strip them — better a partial answer than a stall.
149
+ if (runner)
150
+ resp = { ...resp, content: resp.content.filter((b) => b.type !== "tool_use" || !isGatewayTool(b.name)) };
151
+ res.json(canonicalToAnthropicResponse(resp));
91
152
  metric(200);
92
153
  }
93
154
  }
@@ -2,8 +2,11 @@ import { createWorkerApp } from "./server.js";
2
2
  import { Router } from "./router.js";
3
3
  import { CopilotAdapter } from "../providers/copilot/adapter.js";
4
4
  import { CopilotTokenStore } from "../providers/copilot/token.js";
5
- import { fetchCopilotModels } from "../providers/copilot/models.js";
5
+ import { fetchCopilotModels, fetchModelEndpoints } from "../providers/copilot/models.js";
6
6
  import { readGhToken } from "../shared/creds.js";
7
+ import { readWebIqKey, readWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
8
+ import { makeGatewayRunner } from "../core/server-tools.js";
9
+ import { borrowSearch } from "../providers/copilot/borrow-search.js";
7
10
  import { dataDir } from "../shared/paths.js";
8
11
  import { defaultConfig } from "../shared/config.js";
9
12
  function send(msg) { if (process.send)
@@ -17,10 +20,27 @@ if (!gh) {
17
20
  process.exit(1);
18
21
  }
19
22
  const tokenStore = new CopilotTokenStore(gh);
20
- const router = new Router([new CopilotAdapter(tokenStore)], cfg.modelMap);
21
- // Load the live model list so the router can fuzzy-match near-miss ids (e.g. dated Anthropic ids).
22
- void tokenStore.get().then((t) => fetchCopilotModels(t)).then((ids) => router.setAvailableModels(ids)).catch(() => { });
23
- const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }));
23
+ // Per-model supported_endpoints, populated lazily from the live model list (same source as the model
24
+ // ids). The adapter reads through this map so responses-only models (e.g. gpt-5.5) route to /responses
25
+ // as soon as discovery resolves; until then the map is empty and the /chat 400 safety net covers it.
26
+ let modelEndpoints = {};
27
+ const router = new Router([new CopilotAdapter(tokenStore, fetch, (m) => modelEndpoints[m] ?? [])], cfg.modelMap);
28
+ // Load the live model list so the router can fuzzy-match near-miss ids (e.g. dated Anthropic ids),
29
+ // and the endpoint map so the adapter can route per model. One token fetch feeds both.
30
+ void tokenStore.get().then(async (t) => {
31
+ const [ids, endpoints] = await Promise.all([fetchCopilotModels(t), fetchModelEndpoints(t)]);
32
+ router.setAvailableModels(ids);
33
+ modelEndpoints = endpoints;
34
+ }).catch(() => { });
35
+ // Gateway-run web_search / web_fetch. The backend is resolved per call (lazy → /webiq toggles need no
36
+ // restart): currently WebIQ when a key is set, else unavailable (Copilot borrow is disabled — see
37
+ // COPILOT_WEB_SEARCH_ENABLED). resolveWebSearchBackend centralises that policy.
38
+ const gatewayRunner = makeGatewayRunner({
39
+ backend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
40
+ webiqKey: () => readWebIqKey(dataDir()),
41
+ borrow: { run: (input) => borrowSearch(tokenStore, input) },
42
+ });
43
+ const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }), gatewayRunner);
24
44
  const server = app.listen(port, host, () => send({ type: "ready", port }));
25
45
  const hb = setInterval(() => send({ type: "heartbeat", ts: Date.now() }), 5_000);
26
46
  process.on("message", (m) => { if (m?.type === "shutdown") {
@@ -1,5 +1,6 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOpenAISSE } from "../core/openai-inbound.js";
3
+ import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
3
4
  import { errorHint } from "./errors.js";
4
5
  import { CopilotAuthError } from "../providers/copilot/token.js";
5
6
  export function mountOpenAI(app, router, onMetric) {
@@ -46,4 +47,65 @@ export function mountOpenAI(app, router, onMetric) {
46
47
  metric(status, message);
47
48
  }
48
49
  });
50
+ // OpenAI Responses API — Codex speaks ONLY this after codex#7782 removed wire_api="chat". Codex
51
+ // POSTs {base_url}/responses, so with base_url …/openai the route is /openai/responses. Same
52
+ // canonical pipeline as chat/completions; the Responses translator handles the item-centric shape.
53
+ app.post("/openai/responses", async (req, res) => {
54
+ const start = Date.now();
55
+ const canon = responsesRequestToCanonical(req.body);
56
+ canon.model = router.resolveModel(canon.model);
57
+ const provider = router.pick(canon.model);
58
+ const metric = (status, error) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, error });
59
+ try {
60
+ if (canon.stream) {
61
+ res.setHeader("content-type", "text/event-stream");
62
+ res.setHeader("cache-control", "no-cache");
63
+ const sse = new ResponsesSSE(`resp_${randomUUID().replace(/-/g, "")}`, canon.model);
64
+ res.write(sse.start());
65
+ const argsByIdx = new Map();
66
+ let usage;
67
+ let finish = "stop";
68
+ for await (const chunk of provider.stream(canon)) {
69
+ if (chunk.done) {
70
+ finish = chunk.finishReason ?? "stop";
71
+ usage = chunk.usage;
72
+ break;
73
+ }
74
+ if (chunk.kind === "text")
75
+ for (const f of sse.text(chunk.delta))
76
+ res.write(f);
77
+ else if (chunk.kind === "tool_use_start")
78
+ for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
79
+ res.write(f);
80
+ else if (chunk.kind === "tool_use_delta") {
81
+ argsByIdx.set(chunk.index, (argsByIdx.get(chunk.index) ?? "") + chunk.argsDelta);
82
+ for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
83
+ res.write(f);
84
+ }
85
+ }
86
+ for (const f of sse.finish(usage, finish, argsByIdx))
87
+ res.write(f);
88
+ res.end();
89
+ metric(200);
90
+ }
91
+ else {
92
+ res.json(canonicalToResponsesResponse(await provider.complete(canon)));
93
+ metric(200);
94
+ }
95
+ }
96
+ catch (err) {
97
+ const raw = err instanceof Error ? err.message : String(err);
98
+ const hint = errorHint(raw);
99
+ const message = hint ? `${raw}\n${hint}` : raw;
100
+ const status = err instanceof CopilotAuthError ? 401 : 502;
101
+ if (!res.headersSent) {
102
+ res.status(status).json({ error: { type: "error", message } });
103
+ }
104
+ else {
105
+ res.write(`data: ${JSON.stringify({ type: "error", message })}\n\n`);
106
+ res.end();
107
+ }
108
+ metric(status, message);
109
+ }
110
+ });
49
111
  }
@@ -1,11 +1,11 @@
1
1
  import express from "express";
2
2
  import { mountOpenAI } from "./openai-server.js";
3
3
  import { mountAnthropic } from "./anthropic-server.js";
4
- export function createWorkerApp(router, onMetric) {
4
+ export function createWorkerApp(router, onMetric, gatewayRunner) {
5
5
  const app = express();
6
6
  app.use(express.json({ limit: "20mb" }));
7
7
  app.get("/healthz", (_req, res) => res.json({ ok: true }));
8
8
  mountOpenAI(app, router, onMetric);
9
- mountAnthropic(app, router, onMetric);
9
+ mountAnthropic(app, router, onMetric, gatewayRunner);
10
10
  return app;
11
11
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "copilot-reverse",
3
- "version": "0.2.1",
3
+ "version": "0.4.0",
4
4
  "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
5
5
  "type": "module",
6
6
  "license": "MIT",