npm - copilot-reverse - Versions diffs - 0.2.1 → 0.4.0 - Mend

copilot-reverse 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/cli/index.js +29 -5
package/dist/core/anthropic-inbound.js +24 -6
package/dist/core/responses-inbound.js +143 -0
package/dist/core/server-tools.js +60 -0
package/dist/core/tool-xml.js +9 -1
package/dist/providers/copilot/adapter.js +49 -5
package/dist/providers/copilot/borrow-search.js +86 -0
package/dist/providers/copilot/models.js +14 -0
package/dist/providers/copilot/responses-upstream.js +161 -0
package/dist/providers/webiq/client.js +66 -0
package/dist/shared/webiq-key.js +59 -0
package/dist/tui/app.js +60 -4
package/dist/tui/screens/webiq-key.js +30 -0
package/dist/tui/setup/codex-toml.js +41 -16
package/dist/tui/slash/commands.js +4 -0
package/dist/tui/status-summary.js +13 -0
package/dist/version.js +1 -1
package/dist/worker/anthropic-server.js +105 -44
package/dist/worker/index.js +25 -5
package/dist/worker/openai-server.js +62 -0
package/dist/worker/server.js +2 -2
package/package.json +1 -1

package/dist/worker/anthropic-server.js CHANGED Viewed

@@ -3,8 +3,18 @@ import { anthropicRequestToCanonical, canonicalToAnthropicResponse } from "../co
 import { estimateTokens } from "../core/tokens.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
+import { isGatewayTool } from "../core/server-tools.js";
 const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
-export function mountAnthropic(app, router, onMetric) {
+const safeJson = (s) => { try {
+    return JSON.parse(s);
+}
+catch {
+    return {};
+} };
+// Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
+// returns "search more") can never spin forever inside one request.
+const MAX_TOOL_ITERS = 5;
+export function mountAnthropic(app, router, onMetric, runner) {
     // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
     // before chatting; without it they 404 on the connection test.
     app.get("/anthropic/v1/models", (_req, res) => {
@@ -33,61 +43,112 @@ export function mountAnthropic(app, router, onMetric) {
                 // isn't stuck at 0%; the terminal message_delta then reports the exact count.
                 const estInput = estimateTokens(canon);
                 res.write(frame("message_start", { type: "message_start", message: { id, type: "message", role: "assistant", model: canon.model, content: [], stop_reason: null, usage: { input_tokens: estInput, output_tokens: 0, cache_read_input_tokens: 0 } } }));
-                // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17): the endpoint owns
-                // open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation. We do NOT pre-open an index-0 text block,
-                // and we do NOT map the Copilot tool index straight to the Anthropic block index (that collides with a
-                // text preamble on a mixed turn). Instead, whichever block opens FIRST claims Anthropic index 0, the
-                // next claims 1, etc. This keeps indices contiguous-from-0 in all three cases: pure-text (text@0),
-                // pure-tool (tool@0), and mixed preamble+tool (text@0, tool@1).
+                // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17) + gateway tool loop
+                // (2026-06): the endpoint owns block open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation,
+                // and `next` spans ALL loop iterations so block indices stay contiguous-from-0 across turns.
+                // Within a turn, text streams live (transparent progress) but tool calls are BUFFERED: only
+                // after the turn ends do we know whether they're gateway tools (run here, then loop) or client
+                // tools (forwarded to the client, exactly as before). Whichever block opens first claims index 0.
                 let next = 0;
-                let textIndex; // Anthropic index of the (single) text block, once opened
-                const toolIndex = new Map(); // Copilot tool index -> Anthropic block index
-                const openedOrder = []; // Anthropic indices in allocation order
-                let stopReason = "stop";
-                let usage;
-                for await (const chunk of provider.stream(canon)) {
-                    if (chunk.done) {
-                        stopReason = chunk.finishReason ?? "stop";
-                        usage = chunk.usage;
-                        break;
-                    }
-                    if (chunk.kind === "text") {
-                        if (textIndex === undefined) {
-                            textIndex = next++;
-                            openedOrder.push(textIndex);
-                            res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
+                let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
+                let finalStop = "stop";
+                for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
+                    let textIndex; // Anthropic index of this turn's text block
+                    const byCopilotIdx = new Map();
+                    const buffered = []; // tool calls seen this turn, in order
+                    let turnStop = "stop";
+                    for await (const chunk of provider.stream(canon)) {
+                        if (chunk.done) {
+                            turnStop = chunk.finishReason ?? "stop";
+                            if (chunk.usage) {
+                                lastPrompt = chunk.usage.promptTokens ?? lastPrompt;
+                                lastCached = chunk.usage.cachedTokens ?? 0;
+                                sumCompletion += chunk.usage.completionTokens ?? 0;
+                            }
+                            break;
                         }
-                        res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
-                    }
-                    else if (chunk.kind === "tool_use_start") {
-                        if (!toolIndex.has(chunk.index)) {
-                            const index = next++;
-                            toolIndex.set(chunk.index, index);
-                            openedOrder.push(index);
-                            res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: chunk.id, name: chunk.name, input: {} } }));
+                        if (chunk.kind === "text") {
+                            if (textIndex === undefined) {
+                                textIndex = next++;
+                                res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
+                            }
+                            res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
+                        }
+                        else if (chunk.kind === "tool_use_start") {
+                            if (!byCopilotIdx.has(chunk.index)) {
+                                const t = { id: chunk.id, name: chunk.name, args: "" };
+                                byCopilotIdx.set(chunk.index, t);
+                                buffered.push(t);
+                            }
                         }
+                        else if (chunk.kind === "tool_use_delta") {
+                            const t = byCopilotIdx.get(chunk.index);
+                            if (t)
+                                t.args += chunk.argsDelta;
+                        }
+                    }
+                    if (textIndex !== undefined)
+                        res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
+                    const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
+                    // Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
+                    // has no handler for it and would stall. So whenever the model calls gateway tools (and a
+                    // runner is wired), run them here and loop, feeding results back. Any client tools called in
+                    // the SAME turn are deliberately NOT forwarded yet: we drop them this turn and let the model
+                    // re-issue them on the next turn, now informed by the search result. (Forwarding them now
+                    // would end the turn as tool_use and strand the gateway result with nowhere to go.)
+                    if (runner && gatewayCalls.length) {
+                        canon.messages.push({ role: "assistant", content: gatewayCalls.map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: safeJson(t.args) })) });
+                        const results = [];
+                        for (const t of gatewayCalls)
+                            results.push({ type: "tool_result", toolUseId: t.id, content: await runner(t.name, safeJson(t.args)) });
+                        canon.messages.push({ role: "tool", content: results });
+                        continue;
                     }
-                    else if (chunk.kind === "tool_use_delta") {
-                        const index = toolIndex.get(chunk.index);
-                        if (index !== undefined)
-                            res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: chunk.argsDelta } }));
+                    // Terminal turn (no gateway tools, or no runner): forward any buffered tool calls to the
+                    // client (open/delta/close each at its own freshly-allocated index), then finish.
+                    for (const t of buffered) {
+                        const index = next++;
+                        res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: t.id, name: t.name, input: {} } }));
+                        if (t.args)
+                            res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: t.args } }));
+                        res.write(frame("content_block_stop", { type: "content_block_stop", index }));
                     }
+                    finalStop = buffered.length ? "tool_use" : turnStop;
+                    break;
                 }
-                // Close every opened block (ascending Anthropic index) before the terminal frames.
-                for (const index of [...openedOrder].sort((a, b) => a - b))
-                    res.write(frame("content_block_stop", { type: "content_block_stop", index }));
                 // Report real usage (agent-maestro shape): split cached tokens out of input so Claude Code's
-                // context bar is accurate. Falls back to zeros if Copilot didn't return usage.
-                const cached = usage?.cachedTokens ?? 0;
-                const inputTokens = Math.max(0, (usage?.promptTokens ?? estInput) - cached); // fall back to the estimate
-                const deltaUsage = { input_tokens: inputTokens, output_tokens: usage?.completionTokens ?? 0, cache_read_input_tokens: cached };
-                res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: stopReason === "tool_use" ? "tool_use" : stopReason === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
+                // context bar is accurate. promptTokens is the last turn's (largest, includes tool results);
+                // output is summed across turns.
+                const inputTokens = Math.max(0, lastPrompt - lastCached);
+                const deltaUsage = { input_tokens: inputTokens, output_tokens: sumCompletion, cache_read_input_tokens: lastCached };
+                res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
                 res.write(frame("message_stop", { type: "message_stop" }));
                 res.end();
                 metric(200);
             }
             else {
-                res.json(canonicalToAnthropicResponse(await provider.complete(canon)));
+                // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
+                // model answers with text (or a client tool), capped identically.
+                let resp = await provider.complete(canon);
+                for (let iter = 0; runner && iter < MAX_TOOL_ITERS; iter++) {
+                    const toolUses = resp.content.filter((b) => b.type === "tool_use");
+                    const gatewayUses = toolUses.filter((b) => isGatewayTool(b.name));
+                    if (!gatewayUses.length)
+                        break; // no gateway work left — client tools / text are terminal
+                    // Run the gateway tools, feed results back, and continue. Any client tools in the SAME turn
+                    // ride along in the assistant message and remain in the final resp for the client to handle.
+                    canon.messages.push({ role: "assistant", content: resp.content });
+                    const results = [];
+                    for (const u of gatewayUses)
+                        results.push({ type: "tool_result", toolUseId: u.id, content: await runner(u.name, u.input) });
+                    canon.messages.push({ role: "tool", content: results });
+                    resp = await provider.complete(canon);
+                }
+                // Invariant: never forward a gateway tool_use to the client (it can't handle it). If the cap
+                // was hit with gateway calls still pending, strip them — better a partial answer than a stall.
+                if (runner)
+                    resp = { ...resp, content: resp.content.filter((b) => b.type !== "tool_use" || !isGatewayTool(b.name)) };
+                res.json(canonicalToAnthropicResponse(resp));
                 metric(200);
             }
         }

package/dist/worker/index.js CHANGED Viewed

@@ -2,8 +2,11 @@ import { createWorkerApp } from "./server.js";
 import { Router } from "./router.js";
 import { CopilotAdapter } from "../providers/copilot/adapter.js";
 import { CopilotTokenStore } from "../providers/copilot/token.js";
-import { fetchCopilotModels } from "../providers/copilot/models.js";
+import { fetchCopilotModels, fetchModelEndpoints } from "../providers/copilot/models.js";
 import { readGhToken } from "../shared/creds.js";
+import { readWebIqKey, readWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
+import { makeGatewayRunner } from "../core/server-tools.js";
+import { borrowSearch } from "../providers/copilot/borrow-search.js";
 import { dataDir } from "../shared/paths.js";
 import { defaultConfig } from "../shared/config.js";
 function send(msg) { if (process.send)
@@ -17,10 +20,27 @@ if (!gh) {
     process.exit(1);
 }
 const tokenStore = new CopilotTokenStore(gh);
-const router = new Router([new CopilotAdapter(tokenStore)], cfg.modelMap);
-// Load the live model list so the router can fuzzy-match near-miss ids (e.g. dated Anthropic ids).
-void tokenStore.get().then((t) => fetchCopilotModels(t)).then((ids) => router.setAvailableModels(ids)).catch(() => { });
-const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }));
+// Per-model supported_endpoints, populated lazily from the live model list (same source as the model
+// ids). The adapter reads through this map so responses-only models (e.g. gpt-5.5) route to /responses
+// as soon as discovery resolves; until then the map is empty and the /chat 400 safety net covers it.
+let modelEndpoints = {};
+const router = new Router([new CopilotAdapter(tokenStore, fetch, (m) => modelEndpoints[m] ?? [])], cfg.modelMap);
+// Load the live model list so the router can fuzzy-match near-miss ids (e.g. dated Anthropic ids),
+// and the endpoint map so the adapter can route per model. One token fetch feeds both.
+void tokenStore.get().then(async (t) => {
+    const [ids, endpoints] = await Promise.all([fetchCopilotModels(t), fetchModelEndpoints(t)]);
+    router.setAvailableModels(ids);
+    modelEndpoints = endpoints;
+}).catch(() => { });
+// Gateway-run web_search / web_fetch. The backend is resolved per call (lazy → /webiq toggles need no
+// restart): currently WebIQ when a key is set, else unavailable (Copilot borrow is disabled — see
+// COPILOT_WEB_SEARCH_ENABLED). resolveWebSearchBackend centralises that policy.
+const gatewayRunner = makeGatewayRunner({
+    backend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
+    webiqKey: () => readWebIqKey(dataDir()),
+    borrow: { run: (input) => borrowSearch(tokenStore, input) },
+});
+const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }), gatewayRunner);
 const server = app.listen(port, host, () => send({ type: "ready", port }));
 const hb = setInterval(() => send({ type: "heartbeat", ts: Date.now() }), 5_000);
 process.on("message", (m) => { if (m?.type === "shutdown") {

package/dist/worker/openai-server.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { randomUUID } from "node:crypto";
 import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOpenAISSE } from "../core/openai-inbound.js";
+import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
 export function mountOpenAI(app, router, onMetric) {
@@ -46,4 +47,65 @@ export function mountOpenAI(app, router, onMetric) {
             metric(status, message);
         }
     });
+    // OpenAI Responses API — Codex speaks ONLY this after codex#7782 removed wire_api="chat". Codex
+    // POSTs {base_url}/responses, so with base_url …/openai the route is /openai/responses. Same
+    // canonical pipeline as chat/completions; the Responses translator handles the item-centric shape.
+    app.post("/openai/responses", async (req, res) => {
+        const start = Date.now();
+        const canon = responsesRequestToCanonical(req.body);
+        canon.model = router.resolveModel(canon.model);
+        const provider = router.pick(canon.model);
+        const metric = (status, error) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, error });
+        try {
+            if (canon.stream) {
+                res.setHeader("content-type", "text/event-stream");
+                res.setHeader("cache-control", "no-cache");
+                const sse = new ResponsesSSE(`resp_${randomUUID().replace(/-/g, "")}`, canon.model);
+                res.write(sse.start());
+                const argsByIdx = new Map();
+                let usage;
+                let finish = "stop";
+                for await (const chunk of provider.stream(canon)) {
+                    if (chunk.done) {
+                        finish = chunk.finishReason ?? "stop";
+                        usage = chunk.usage;
+                        break;
+                    }
+                    if (chunk.kind === "text")
+                        for (const f of sse.text(chunk.delta))
+                            res.write(f);
+                    else if (chunk.kind === "tool_use_start")
+                        for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
+                            res.write(f);
+                    else if (chunk.kind === "tool_use_delta") {
+                        argsByIdx.set(chunk.index, (argsByIdx.get(chunk.index) ?? "") + chunk.argsDelta);
+                        for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
+                            res.write(f);
+                    }
+                }
+                for (const f of sse.finish(usage, finish, argsByIdx))
+                    res.write(f);
+                res.end();
+                metric(200);
+            }
+            else {
+                res.json(canonicalToResponsesResponse(await provider.complete(canon)));
+                metric(200);
+            }
+        }
+        catch (err) {
+            const raw = err instanceof Error ? err.message : String(err);
+            const hint = errorHint(raw);
+            const message = hint ? `${raw}\n${hint}` : raw;
+            const status = err instanceof CopilotAuthError ? 401 : 502;
+            if (!res.headersSent) {
+                res.status(status).json({ error: { type: "error", message } });
+            }
+            else {
+                res.write(`data: ${JSON.stringify({ type: "error", message })}\n\n`);
+                res.end();
+            }
+            metric(status, message);
+        }
+    });
 }

package/dist/worker/server.js CHANGED Viewed

@@ -1,11 +1,11 @@
 import express from "express";
 import { mountOpenAI } from "./openai-server.js";
 import { mountAnthropic } from "./anthropic-server.js";
-export function createWorkerApp(router, onMetric) {
+export function createWorkerApp(router, onMetric, gatewayRunner) {
     const app = express();
     app.use(express.json({ limit: "20mb" }));
     app.get("/healthz", (_req, res) => res.json({ ok: true }));
     mountOpenAI(app, router, onMetric);
-    mountAnthropic(app, router, onMetric);
+    mountAnthropic(app, router, onMetric, gatewayRunner);
     return app;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "copilot-reverse",
-  "version": "0.2.1",
+  "version": "0.4.0",
   "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
   "type": "module",
   "license": "MIT",