npm - copilot-reverse - Versions diffs - 0.2.0 → 0.3.0 - Mend

copilot-reverse 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/cli/auth.js +17 -3
package/dist/cli/index.js +60 -16
package/dist/core/anthropic-inbound.js +24 -6
package/dist/core/responses-inbound.js +140 -0
package/dist/core/server-tools.js +43 -0
package/dist/providers/copilot/models.js +14 -0
package/dist/providers/webiq/client.js +66 -0
package/dist/shared/webiq-key.js +21 -0
package/dist/tui/app.js +69 -4
package/dist/tui/assistant/on-chat.js +10 -1
package/dist/tui/screens/webiq-key.js +30 -0
package/dist/tui/setup/codex-toml.js +41 -16
package/dist/tui/slash/commands.js +1 -0
package/dist/tui/status-summary.js +13 -0
package/dist/version.js +1 -1
package/dist/worker/anthropic-server.js +105 -44
package/dist/worker/index.js +6 -1
package/dist/worker/openai-server.js +62 -0
package/dist/worker/server.js +2 -2
package/package.json +1 -1

package/dist/cli/auth.js CHANGED Viewed

@@ -1,9 +1,23 @@
 import { requestDeviceCode, pollForToken } from "../providers/copilot/auth.js";
 import { writeGhToken } from "../shared/creds.js";
-export async function runDeviceLogin(dir, fetchFn = fetch, log = console.log) {
+// Two-phase device login. `beginDeviceLogin` returns the verification code right away so a caller
+// can surface it to the user; `complete()` then blocks on authorization and writes the token.
+// Splitting these is what lets the TUI render the code while the poll is still pending — folding
+// both into one call buffers the code behind the blocking poll, and the user can't authorize a
+// code they can't see.
+export async function beginDeviceLogin(dir, fetchFn = fetch) {
     const code = await requestDeviceCode(fetchFn);
+    return {
+        code,
+        complete: async () => {
+            const token = await pollForToken(code.device_code, code.interval * 1000, fetchFn);
+            writeGhToken(token, dir);
+        },
+    };
+}
+export async function runDeviceLogin(dir, fetchFn = fetch, log = console.log) {
+    const { code, complete } = await beginDeviceLogin(dir, fetchFn);
     log(`\nOpen ${code.verification_uri} and enter code: ${code.user_code}\n`);
-    const token = await pollForToken(code.device_code, code.interval * 1000, fetchFn);
-    writeGhToken(token, dir);
+    await complete();
     log("GitHub authorization complete.");
 }

package/dist/cli/index.js CHANGED Viewed

@@ -5,18 +5,20 @@ import { Command } from "commander";
 import { App } from "../tui/app.js";
 import { buildRegistry } from "../tui/slash/commands.js";
 import { DaemonClient } from "../tui/daemon-client.js";
-import { runDeviceLogin } from "./auth.js";
+import { runDeviceLogin, beginDeviceLogin } from "./auth.js";
 import { probeSupervisor } from "../daemon/lifecycle.js";
 import { startSupervisor } from "../supervisor/index.js";
 import { runAssistantTurn } from "../tui/assistant/runtime.js";
 import { makeOnChat } from "../tui/assistant/on-chat.js";
 import { readGhToken, clearGhToken } from "../shared/creds.js";
+import { writeWebIqKey, readWebIqKey } from "../shared/webiq-key.js";
 import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
 import { readChatModel, writeChatModel } from "../shared/prefs.js";
 import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
 import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
 import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
 import { readClientStatus } from "../tui/setup/status.js";
+import { summarizeStatus } from "../tui/status-summary.js";
 import { applyCodexToml } from "../tui/setup/codex-toml.js";
 import { claudeCopilotReverseEnv } from "../tui/setup/clients.js";
 import { dataDir } from "../shared/paths.js";
@@ -74,14 +76,6 @@ async function launchTui() {
         appVersion: APP_VERSION,
         platform: `${process.platform} node-${process.version}`,
         resetClient,
-        // Re-run device-code login, then restart the worker so it picks up the new token.
-        login: async () => {
-            const lines = [];
-            await runDeviceLogin(dataDir(), fetch, (m) => lines.push(m));
-            await client.restart().catch(() => { });
-            lines.push("worker restarting with the new token");
-            return lines;
-        },
         // Clear the stored token and restart the worker (it will report unauthenticated until re-login).
         logout: async () => {
             clearGhToken(dataDir());
@@ -89,9 +83,22 @@ async function launchTui() {
             return ["signed out — GitHub token removed", "run /login to sign in again"];
         },
     });
+    // Two-phase /login for the TUI: surface the device code immediately, poll in the background, then
+    // restart the worker so it picks up the new token. The blocking single-call form deadlocked the
+    // Repl (the code stayed hidden behind the poll, so the user could never authorize it).
+    const doLogin = async (show) => {
+        const { code, complete } = await beginDeviceLogin(dataDir());
+        show([`Open ${code.verification_uri} and enter code: ${code.user_code}`, "waiting for authorization…"]);
+        await complete();
+        // Re-point the token store at the freshly written GitHub token; the old store still holds the
+        // expired one and would 401 once its cached Copilot token rotates, breaking the model picker.
+        tokenStore = new CopilotTokenStore(readGhToken(dataDir()));
+        await client.restart().catch(() => { });
+        return ["GitHub authorization complete — worker restarting with the new token"];
+    };
     // Filled in below once we have a token; the assistant prefers a model's real window over the default.
     const modelLimits = {};
-    const tokenStore = new CopilotTokenStore(readGhToken(dataDir()));
+    let tokenStore = new CopilotTokenStore(readGhToken(dataDir()));
     const loadModels = async () => {
         const token = await tokenStore.get();
         const [ids, limits] = await Promise.all([fetchCopilotModels(token), fetchModelLimits(token)]);
@@ -102,19 +109,21 @@ async function launchTui() {
     void tokenStore.get().then((t) => fetchModelLimits(t)).then((m) => Object.assign(modelLimits, m)).catch(() => { });
     // Apply a client's config (shared by the /setup wizard and the assistant's setup_* tools).
     // For Claude Code we also write the selected model's real context window so the client doesn't
+    // For Claude Code we also write the selected model's real context window so the client doesn't
     // assume the default 200K (which makes a 1M model read "context 100%" far too early). For Codex
-    // we write BOTH a .env (legacy) and ~/.codex/config.toml (the native Codex config, with the
-    // model's context window) so either Codex setup style works.
+    // the native config is ~/.codex/config.toml (what the standalone CLI actually reads); we also keep
+    // a legacy .env for older OpenAI-style tooling, but report the config.toml path since that's the
+    // one that matters.
     const applyClient = (clientKind, scope, model) => {
         if (clientKind === "claude") {
             const r = applyClaude(scope, claudeCopilotReverseEnv(anthropicBase, "copilot-reverse-local", model, modelLimits[model]));
             writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), claude: true });
             return r;
         }
-        const r = applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model });
-        applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model] });
+        applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model }); // legacy .env
+        const toml = applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model], apiKey: "copilot-reverse-local" });
         writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), codex: true });
-        return r;
+        return toml; // the native config Codex reads — surface this path in the setup card
     };
     const setup = { apply: async (clientKind, scope, model) => applyClient(clientKind, scope, model) };
     const onChat = makeOnChat({
@@ -122,8 +131,33 @@ async function launchTui() {
         maxInputTokens: DEFAULT_MAX_INPUT_TOKENS, modelLimits,
         listModels: loadModels,
         setupClient: async (c, s, m) => applyClient(c, s, m),
-    }, (c, p, print, abort) => runAssistantTurn(c, p, print, undefined, abort));
+    }, (c, p, print, abort) => runAssistantTurn(c, p, print, undefined, abort), undefined,
+    // Pre-flight auth gate: block a turn (with an actionable hint) when there's no GitHub token, or
+    // the stored one no longer exchanges for a Copilot token — instead of firing a request that just
+    // hangs until the turn timeout. Reuses the long-lived tokenStore so a valid login is a cached,
+    // round-trip-free check between message bursts (its get() caches with a 60s skew).
+    async () => {
+        if (!readGhToken(dataDir()))
+            return "you're signed out — run /login to sign in before chatting";
+        try {
+            await tokenStore.get();
+            return null;
+        }
+        catch {
+            return "your GitHub login has expired — run /login to sign in again";
+        }
+    });
     const persistedModel = readChatModel(dataDir());
+    // Startup overview. The token was already validated above (re-auth happens before we get here), so
+    // GitHub is connected; web search readiness and configured clients are read from disk.
+    const clientStatus = readClientStatus();
+    const startupStatus = summarizeStatus({
+        hasToken: Boolean(readGhToken(dataDir())),
+        tokenValid: true,
+        webSearchReady: Boolean(readWebIqKey(dataDir())),
+        worker: "ready",
+        clients: { claude: clientStatus.claude.user || clientStatus.claude.project, codex: clientStatus.codex.user || clientStatus.codex.project },
+    });
     app = render(React.createElement(App, {
         registry,
         title: "copilot-reverse",
@@ -143,6 +177,16 @@ async function launchTui() {
         },
         onModelChange: (m) => writeChatModel(dataDir(), m),
         pickModelOnStart: !persistedModel,
+        login: doLogin,
+        saveWebIqKey: (k) => writeWebIqKey(k, dataDir()),
+        webSearchReady: () => Boolean(readWebIqKey(dataDir())),
+        startupStatus,
+        githubStatus: async () => {
+            const token = readGhToken(dataDir());
+            if (!token)
+                return "signed-out";
+            return (await isCopilotTokenValid(token)) ? "connected" : "expired";
+        },
     }));
 }
 const program = new Command();

package/dist/core/anthropic-inbound.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { GATEWAY_TOOL_DEFS, isGatewayTool } from "./server-tools.js";
 // The Anthropic `system` field may be a plain string or an array of text blocks (the Claude Code
 // SDK sends blocks with cache_control). Flatten either shape to a string — otherwise it stringifies
 // to "[object Object]" and the model gets garbage instructions.
@@ -41,15 +42,32 @@ export function anthropicRequestToCanonical(req) {
     }
     return {
         model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_tokens,
-        // Keep only custom tools with a real JSON-Schema. Anthropic server-side tools (web_search,
-        // bash, computer, …) arrive with a `type` and no `input_schema`; forwarding them produces an
-        // invalid tool the model can't fulfil, and the client hangs forever waiting for a tool_result.
-        tools: req.tools
-            ?.filter((t) => t.input_schema != null && typeof t.input_schema === "object")
-            .map((t) => ({ name: t.name, description: t.description, parameters: t.input_schema })),
+        tools: mapTools(req.tools),
         messages,
     };
 }
+// Custom tools (with a real JSON-Schema) pass through. Anthropic server-side tools arrive with a
+// dated `type` and no input_schema: web_search / web_fetch are converted to gateway function tools
+// (the gateway runs them itself against WebIQ), and every OTHER server tool (bash, computer, …) is
+// dropped — forwarding an unfulfillable tool makes the client hang forever waiting for a result.
+function mapTools(tools) {
+    if (!tools)
+        return undefined;
+    const out = [];
+    let injectedGateway = false;
+    for (const t of tools) {
+        if (t.input_schema != null && typeof t.input_schema === "object") {
+            out.push({ name: t.name, description: t.description, parameters: t.input_schema });
+        }
+        else if (isGatewayTool(t.name) && !injectedGateway) {
+            // Replace the schema-less server tool with our gateway defs. Inject the whole set once so the
+            // model can use both web_search and web_fetch whenever it asks for either.
+            out.push(...GATEWAY_TOOL_DEFS);
+            injectedGateway = true;
+        }
+    }
+    return out;
+}
 export function canonicalToAnthropicResponse(r) {
     const content = r.content.map((b) => b.type === "text" ? { type: "text", text: b.text } :
         b.type === "tool_use" ? { type: "tool_use", id: b.id, name: b.name, input: b.input } :

package/dist/core/responses-inbound.js ADDED Viewed

@@ -0,0 +1,140 @@
+import { joinText } from "./canonical.js";
+function partsText(content) {
+    if (content == null)
+        return "";
+    if (typeof content === "string")
+        return content;
+    return content.map((p) => (typeof p === "string" ? p : p?.text ?? "")).join("");
+}
+function partsImages(content) {
+    if (!Array.isArray(content))
+        return [];
+    const urlOf = (p) => typeof p.image_url === "string" ? p.image_url : p.image_url?.url;
+    return content.filter((p) => p?.type === "input_image" && urlOf(p)).map((p) => ({ type: "image", dataUrl: urlOf(p) }));
+}
+function safeJson(s) { try {
+    return s ? JSON.parse(s) : {};
+}
+catch {
+    return {};
+} }
+function itemToMessage(it) {
+    if (it.type === "function_call" && it.call_id) {
+        return { role: "assistant", content: [{ type: "tool_use", id: it.call_id, name: it.name ?? "", input: safeJson(it.arguments) }] };
+    }
+    if (it.type === "function_call_output" && it.call_id) {
+        return { role: "tool", content: [{ type: "tool_result", toolUseId: it.call_id, content: it.output ?? "" }] };
+    }
+    // default: a message item
+    const role = (["system", "user", "assistant"].includes(it.role ?? "") ? it.role : "user");
+    const content = [];
+    const text = partsText(it.content);
+    if (text)
+        content.push({ type: "text", text });
+    content.push(...partsImages(it.content));
+    return content.length ? { role, content } : null;
+}
+export function responsesRequestToCanonical(req) {
+    const messages = [];
+    if (req.instructions)
+        messages.push({ role: "system", content: [{ type: "text", text: req.instructions }] });
+    if (typeof req.input === "string") {
+        messages.push({ role: "user", content: [{ type: "text", text: req.input }] });
+    }
+    else {
+        for (const it of req.input) {
+            const m = itemToMessage(it);
+            if (m)
+                messages.push(m);
+        }
+    }
+    return {
+        model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_output_tokens,
+        tools: req.tools?.filter((t) => t.type === "function" && t.name).map((t) => ({ name: t.name, description: t.description, parameters: t.parameters ?? {} })),
+        messages,
+    };
+}
+// Build the non-stream Responses object: text -> an output_text message item, tool_use -> function_call items.
+export function canonicalToResponsesResponse(r) {
+    const output = [];
+    const text = joinText(r.content);
+    if (text)
+        output.push({ type: "message", id: `msg_${r.id}`, role: "assistant", status: "completed", content: [{ type: "output_text", text, annotations: [] }] });
+    for (const b of r.content) {
+        if (b.type === "tool_use")
+            output.push({ type: "function_call", id: `fc_${b.id}`, call_id: b.id, name: b.name, arguments: JSON.stringify(b.input ?? {}), status: "completed" });
+    }
+    return {
+        id: r.id, object: "response", status: "completed", model: r.model,
+        output, output_text: text,
+        usage: { input_tokens: r.usage.promptTokens, output_tokens: r.usage.completionTokens, total_tokens: r.usage.promptTokens + r.usage.completionTokens },
+    };
+}
+// Stateful SSE emitter for the Responses stream. Each event carries a monotonically increasing
+// sequence_number (Codex/agent-maestro require it). Text streams as one output_text message item;
+// each tool call is its own function_call output item. Indices are allocated sequentially.
+const frame = (event) => `data: ${JSON.stringify(event)}\n\n`;
+export class ResponsesSSE {
+    responseId;
+    model;
+    seq = 0;
+    nextIndex = 0;
+    textIndex;
+    textItemId;
+    toolIndex = new Map();
+    constructor(responseId, model) {
+        this.responseId = responseId;
+        this.model = model;
+    }
+    ev(type, extra) {
+        return frame({ type, sequence_number: this.seq++, ...extra });
+    }
+    envelope(status) {
+        return { id: this.responseId, object: "response", status, model: this.model };
+    }
+    start() {
+        return this.ev("response.created", { response: { ...this.envelope("in_progress"), output: [] } });
+    }
+    text(delta) {
+        const out = [];
+        if (this.textIndex === undefined) {
+            this.textIndex = this.nextIndex++;
+            this.textItemId = `msg_${this.responseId}`;
+            out.push(this.ev("response.output_item.added", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "in_progress", content: [] } }));
+            out.push(this.ev("response.content_part.added", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
+        }
+        out.push(this.ev("response.output_text.delta", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, delta }));
+        return out;
+    }
+    toolStart(copilotIdx, callId, name) {
+        if (this.toolIndex.has(copilotIdx))
+            return [];
+        const outputIndex = this.nextIndex++;
+        const itemId = `fc_${callId}`;
+        this.toolIndex.set(copilotIdx, { outputIndex, itemId });
+        return [this.ev("response.output_item.added", { output_index: outputIndex, item: { type: "function_call", id: itemId, call_id: callId, name, arguments: "", status: "in_progress" } })];
+    }
+    toolArgs(copilotIdx, deltaArgs) {
+        const t = this.toolIndex.get(copilotIdx);
+        if (!t)
+            return [];
+        return [this.ev("response.function_call_arguments.delta", { item_id: t.itemId, output_index: t.outputIndex, delta: deltaArgs })];
+    }
+    // Close all open items and complete the response. `argsByIdx` supplies final accumulated tool args.
+    finish(usage, _finishReason, argsByIdx) {
+        const out = [];
+        if (this.textIndex !== undefined) {
+            out.push(this.ev("response.output_text.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, text: "" }));
+            out.push(this.ev("response.content_part.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
+            out.push(this.ev("response.output_item.done", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "completed", content: [] } }));
+        }
+        for (const [copilotIdx, t] of this.toolIndex) {
+            const args = argsByIdx?.get(copilotIdx) ?? "";
+            out.push(this.ev("response.function_call_arguments.done", { item_id: t.itemId, output_index: t.outputIndex, arguments: args }));
+            out.push(this.ev("response.output_item.done", { output_index: t.outputIndex, item: { type: "function_call", id: t.itemId, status: "completed" } }));
+        }
+        const u = usage ? { input_tokens: usage.promptTokens, output_tokens: usage.completionTokens, total_tokens: usage.promptTokens + usage.completionTokens } : undefined;
+        out.push(this.ev("response.completed", { response: { ...this.envelope("completed"), ...(u ? { usage: u } : {}) } }));
+        return out;
+    }
+}

package/dist/core/server-tools.js ADDED Viewed

@@ -0,0 +1,43 @@
+import { webSearch, webFetch, formatSearchResults, formatFetchResult } from "../providers/webiq/client.js";
+// Tools the GATEWAY executes itself (against WebIQ), rather than forwarding to the model's client.
+// These mirror Claude Code's server-side web_search / web_fetch, which a Copilot-backed gateway must
+// fulfil internally — the model calls them like normal function tools and we run them in-process.
+export const GATEWAY_TOOL_DEFS = [
+    {
+        name: "web_search",
+        description: "Search the web for current information. Returns ranked results with titles, URLs, and content snippets.",
+        parameters: { type: "object", properties: { query: { type: "string", description: "The search query." } }, required: ["query"] },
+    },
+    {
+        name: "web_fetch",
+        description: "Fetch and read the content of a specific web page by URL.",
+        parameters: { type: "object", properties: { url: { type: "string", description: "The URL of the page to fetch." } }, required: ["url"] },
+    },
+];
+const GATEWAY_TOOL_NAMES = new Set(GATEWAY_TOOL_DEFS.map((t) => t.name));
+export function isGatewayTool(name) { return GATEWAY_TOOL_NAMES.has(name); }
+const DEFAULT_CLIENT = { search: webSearch, fetchPage: webFetch };
+const NO_KEY = "web search is not configured — run /web-search-support to add a WebIQ API key";
+export function makeGatewayRunner(getKey, client = DEFAULT_CLIENT) {
+    return async (name, input) => {
+        const key = getKey();
+        if (!key)
+            return NO_KEY;
+        const arg = (input ?? {});
+        if (name === "web_search") {
+            const query = typeof arg.query === "string" ? arg.query : "";
+            if (!query)
+                return "web_search error: missing 'query'";
+            const out = await client.search(key, { query });
+            return out.ok ? formatSearchResults(out.results) : out.error;
+        }
+        if (name === "web_fetch") {
+            const url = typeof arg.url === "string" ? arg.url : "";
+            if (!url)
+                return "web_fetch error: missing 'url'";
+            const out = await client.fetchPage(key, { url });
+            return out.ok ? formatFetchResult(out) : out.error;
+        }
+        return `unknown gateway tool: ${name}`;
+    };
+}

package/dist/providers/copilot/models.js CHANGED Viewed

@@ -32,6 +32,20 @@ export async function fetchCopilotModels(token, fetchFn = fetch, timeoutMs = DEF
     const ids = [...new Set(data.map((m) => m.id).filter((x) => Boolean(x)))];
     return ids.length ? ids : FALLBACK_MODELS;
 }
+// Map of model id -> the Copilot API endpoints it supports (e.g. ["/responses","ws:/responses"]).
+// Used to route each request to the right upstream: newer gpt-5.x models are /responses-only and
+// reject /chat/completions. Returns {} on failure so the adapter falls back to chat/completions.
+export async function fetchModelEndpoints(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    const data = await getModels(token, fetchFn, timeoutMs);
+    if (!data)
+        return {};
+    const out = {};
+    for (const m of data) {
+        if (m.id && Array.isArray(m.supported_endpoints) && m.supported_endpoints.length)
+            out[m.id] = m.supported_endpoints;
+    }
+    return out;
+}
 // Map of model id -> its real input/context window, used to size auto-compaction per model and
 // to show the window in the picker. Returns {} on failure/timeout so callers fall back gracefully.
 export async function fetchModelLimits(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {

package/dist/providers/webiq/client.js ADDED Viewed

@@ -0,0 +1,66 @@
+// Microsoft Web IQ REST client. Two grounding endpoints used to back Claude Code's server-side
+// web_search / web_fetch tools, which our gateway executes itself (Copilot can't). Every call
+// returns a discriminated result instead of throwing: a failed search must degrade to a message the
+// model can read and answer around, never abort the in-flight turn.
+const SEARCH_URL = "https://api.microsoft.ai/v3/search/web";
+const BROWSE_URL = "https://api.microsoft.ai/v3/browse";
+const DEFAULT_TIMEOUT_MS = 15_000;
+const headers = (key) => ({ host: "api.microsoft.ai", "x-apikey": key, "content-type": "application/json" });
+// Status -> readable, model-facing reason. Kept identical across both endpoints so the model gets a
+// consistent, actionable string it can reason about (e.g. fall back to its own knowledge).
+function statusError(status, kind) {
+    if (status === 401 || status === 403)
+        return "web search unavailable: WebIQ API key missing or invalid — run /web-search-support to set it";
+    if (status === 429)
+        return "web search unavailable: WebIQ rate limit exceeded — try again shortly";
+    if (status === 404 && kind === "fetch")
+        return "web fetch failed: the page was not found or is not indexed";
+    return `web ${kind} failed: WebIQ returned ${status}`;
+}
+async function post(url, key, body, fetchFn, timeoutMs) {
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+    try {
+        return await fetchFn(url, { method: "POST", headers: headers(key), body: JSON.stringify(body), signal: ctrl.signal });
+    }
+    finally {
+        clearTimeout(timer);
+    }
+}
+export async function webSearch(key, params, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    if (!key)
+        return { ok: false, error: statusError(401, "search") };
+    try {
+        const res = await post(SEARCH_URL, key, { maxResults: 10, contentFormat: "passage", ...params }, fetchFn, timeoutMs);
+        if (!res.ok)
+            return { ok: false, error: statusError(res.status, "search") };
+        const data = (await res.json());
+        return { ok: true, results: data.webResults ?? [] };
+    }
+    catch {
+        return { ok: false, error: "web search failed: could not reach WebIQ" };
+    }
+}
+export async function webFetch(key, params, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    if (!key)
+        return { ok: false, error: statusError(401, "fetch") };
+    try {
+        const res = await post(BROWSE_URL, key, { maxLength: 10_000, contentFormat: "markdown", ...params }, fetchFn, timeoutMs);
+        if (!res.ok)
+            return { ok: false, error: statusError(res.status, "fetch") };
+        const data = (await res.json());
+        return { ok: true, title: data.title ?? "", url: data.url ?? params.url, content: data.content ?? "" };
+    }
+    catch {
+        return { ok: false, error: "web fetch failed: could not reach WebIQ" };
+    }
+}
+// Render results as the tool_result text fed back to the model — compact, citation-friendly.
+export function formatSearchResults(results) {
+    if (!results.length)
+        return "no results found";
+    return results.map((r, i) => `[${i + 1}] ${r.title}\n${r.url}\n${r.content}`.trim()).join("\n\n");
+}
+export function formatFetchResult(r) {
+    return `${r.title}\n${r.url}\n\n${r.content}`.trim();
+}

package/dist/shared/webiq-key.js ADDED Viewed

@@ -0,0 +1,21 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+// WebIQ API key for the gateway-run web_search / web_fetch tools. Stored like the GitHub token
+// (plaintext, 0600, in the data dir). The WEBIQ_API_KEY env var takes precedence so CI / headless
+// runs can inject it without writing a file. Read lazily per request → no worker restart on change.
+const file = (dir) => join(dir, "webiq.json");
+export function writeWebIqKey(key, dir) {
+    if (!existsSync(dir))
+        mkdirSync(dir, { recursive: true });
+    writeFileSync(file(dir), JSON.stringify({ apiKey: key }), { mode: 0o600 });
+}
+export function readWebIqKey(dir) {
+    if (process.env.WEBIQ_API_KEY)
+        return process.env.WEBIQ_API_KEY;
+    if (!existsSync(file(dir)))
+        return null;
+    return JSON.parse(readFileSync(file(dir), "utf8")).apiKey ?? null;
+}
+export function clearWebIqKey(dir) {
+    rmSync(file(dir), { force: true });
+}

package/dist/tui/app.js CHANGED Viewed

@@ -6,12 +6,30 @@ import { Repl } from "./repl.js";
 import { SetupWizard } from "./setup/wizard.js";
 import { ModelScreen } from "./screens/model.js";
 import { ConfigScreen } from "./screens/config.js";
+import { WebIqKeyScreen } from "./screens/webiq-key.js";
+import { summarizeStatus } from "./status-summary.js";
 import { theme } from "./theme.js";
 const stateColor = {
     ready: theme.ready, starting: theme.starting, crashed: theme.crashed, unhealthy: theme.unhealthy,
 };
 const EMPTY_STATUS = { claude: { user: false, project: false }, codex: { user: false, project: false } };
 const SPINNER = ["✶", "✸", "✹", "✺", "✹", "✷"];
+// Startup overview card. GitHub shows a login STATE (no real token expiry exists), web search shows
+// whether a WebIQ key is configured with the command to fix it when not. `extra` appends detail
+// lines (e.g. worker restart history for /status).
+function statusCard(s, extra = []) {
+    const gh = s.github === "connected" ? "✓ connected" : s.github === "expired" ? "✗ expired — run /login" : "✗ signed out — run /login";
+    const web = s.webSearch === "ready" ? "✓ ready" : "✗ not configured — run /web-search-support";
+    const clients = `claude ${s.clients.claude ? "✓" : "○"}  codex ${s.clients.codex ? "✓" : "○"}`;
+    const tone = s.github === "connected" ? "ok" : "error";
+    return { type: "card", title: "status", tone, lines: [
+            `GitHub login   ${gh}`,
+            `web search     ${web}`,
+            `worker         ${s.worker}`,
+            `clients        ${clients}`,
+            ...extra,
+        ] };
+}
 const fmtElapsed = (ms) => {
     const s = Math.floor(ms / 1000);
     return s >= 60 ? `${Math.floor(s / 60)}m ${s % 60}s` : `${s}s`;
@@ -36,20 +54,24 @@ function ClientBadge({ name, status }) {
     const cell = (label, on) => (_jsxs(Text, { color: on ? theme.ready : theme.muted, children: [label, ":", on ? "✓" : "○"] }));
     return (_jsxs(Text, { color: theme.muted, children: [name, " ", cell("u", status.user), " ", cell("p", status.project)] }));
 }
-export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, }) {
+export function App({ registry, title, workerState = "starting", initialModel = "—", statusSource, readStatus, modelLimits, onChat, loadModels, setup, info, onModelChange, pickModelOnStart, login, saveWebIqKey, webSearchReady, startupStatus, githubStatus, }) {
     const cmds = registry.list().map((c) => ({ name: c.name, describe: c.describe }));
-    const [entries, setEntries] = useState([
+    const [entries, setEntries] = useState(() => [
+        ...(startupStatus ? [statusCard(startupStatus)] : []),
         { type: "system", text: "Type a message to chat with the assistant, or /help for commands." },
     ]);
     const [state, setState] = useState(workerState);
     const [status, setStatus] = useState(() => readStatus?.() ?? EMPTY_STATUS);
+    const [webReady, setWebReady] = useState(() => webSearchReady?.() ?? false);
     const [model, setModel] = useState(initialModel);
     const [screen, setScreen] = useState(pickModelOnStart && loadModels ? { kind: "model" } : null);
     const [, setNow] = useState(0); // ticks the live loading line while the assistant streams
     const abortRef = useRef(null); // current turn's interrupt handle
+    const loginInFlight = useRef(false); // guards against starting a second device-login flow
     const add = (e) => setEntries((p) => [...p, e].slice(-100));
     const refreshStatus = () => { if (readStatus)
-        setStatus(readStatus()); };
+        setStatus(readStatus()); if (webSearchReady)
+        setWebReady(webSearchReady()); };
     // esc interrupts an in-flight assistant turn (the Repl doesn't use esc, so this is unambiguous).
     useInput((_input, key) => { if (key.escape)
         abortRef.current?.abort(); });
@@ -91,10 +113,50 @@ export function App({ registry, title, workerState = "starting", initialModel =
             setScreen({ kind: "model" });
             return;
         }
+        if (t === "/web-search-support" && saveWebIqKey) {
+            setScreen({ kind: "webiq-key" });
+            return;
+        }
+        if (t === "/status" && (startupStatus || githubStatus || webSearchReady)) {
+            // Render the live status overview (same card as startup), then the worker restart history.
+            const github = githubStatus ? await githubStatus() : (startupStatus?.github ?? "signed-out");
+            let worker = state, restarts = [];
+            try {
+                const s = await statusSource?.();
+                if (s) {
+                    worker = s.workerState;
+                    restarts = s.restarts.slice(0, 5).map((r) => `  ${r.reason} exit=${r.exitCode ?? "-"} ${r.stderrTail.slice(0, 60)}`);
+                }
+            }
+            catch { /* daemon momentarily down — show what we have */ }
+            const summary = summarizeStatus({
+                hasToken: github !== "signed-out", tokenValid: github === "connected",
+                webSearchReady: webSearchReady?.() ?? webReady, worker,
+                clients: { claude: status.claude.user || status.claude.project, codex: status.codex.user || status.codex.project },
+            });
+            add(statusCard(summary, restarts.length ? ["", "recent restarts:", ...restarts] : []));
+            return;
+        }
         if (t === "/config" && info) {
             setScreen({ kind: "config" });
             return;
         }
+        if (t === "/login" && login) {
+            // Show the verification URL + code right away, then resolve a completion card once the user
+            // authorizes. Done as a special case (not a registry command) because the slash registry only
+            // renders a command's final return value — it can't surface the code mid-poll. Guarded so a
+            // double Enter doesn't start two device-code flows (polling a superseded code 401s).
+            if (loginInFlight.current) {
+                add({ type: "card", title: "/login", tone: "info", lines: ["already waiting for authorization…"] });
+                return;
+            }
+            loginInFlight.current = true;
+            void login((lines) => add({ type: "card", title: "/login", tone: "info", lines }))
+                .then((lines) => add({ type: "card", title: "/login", tone: "ok", lines }))
+                .catch((e) => add({ type: "card", title: "/login", tone: "error", lines: [`login failed: ${e instanceof Error ? e.message : String(e)}`] }))
+                .finally(() => { loginInFlight.current = false; });
+            return;
+        }
         if (setup && loadModels && (t === "/setup-claude" || t === "/setup-codex")) {
             setScreen({ kind: "setup", client: t === "/setup-claude" ? "claude" : "codex" });
             return;
@@ -164,6 +226,9 @@ export function App({ registry, title, workerState = "starting", initialModel =
                     setScreen(null);
             } }));
     }
+    else if (screen?.kind === "webiq-key" && saveWebIqKey) {
+        body = (_jsx(WebIqKeyScreen, { onSubmit: (k) => { saveWebIqKey(k); setWebReady(true); setScreen(null); add({ type: "card", title: "/web-search-support", tone: "ok", lines: ["✓ WebIQ key saved — web search is now enabled for connected clients"] }); }, onCancel: () => { setScreen(null); add({ type: "system", text: "web-search-support cancelled" }); } }));
+    }
     else {
         body = _jsx(Repl, { onSubmit: handle, commands: cmds });
     }
@@ -180,5 +245,5 @@ export function App({ registry, title, workerState = "starting", initialModel =
                         return (_jsxs(Box, { flexDirection: "column", children: [_jsxs(Text, { color: theme.accent, children: ["\u273D ", _jsxs(Text, { color: theme.muted, children: [frame, " ", loadingVerb(elapsed), "\u2026 (esc to interrupt \u00B7 ", fmtElapsed(elapsed), " \u00B7 \u2193 ", fmtTokens(tokens), " tokens \u00B7 thinking)"] })] }), e.text ? _jsx(Text, { color: color, children: e.text }) : null] }, i));
                     }
                     return _jsx(Text, { color: color, children: e.text }, i);
-                }) }), body, _jsxs(Box, { paddingX: 1, children: [_jsx(Text, { color: theme.muted, children: "model " }), _jsx(Text, { color: theme.accent, children: model }), _jsx(Text, { color: theme.muted, children: "  \u00B7  daemon " }), _jsx(Text, { color: stateColor[state], children: state }), _jsx(Text, { color: theme.muted, children: "  \u00B7  " }), _jsx(ClientBadge, { name: "claude", status: status.claude }), _jsx(Text, { color: theme.muted, children: "  " }), _jsx(ClientBadge, { name: "codex", status: status.codex }), _jsx(Text, { color: theme.muted, children: "  \u00B7  /help" })] })] }));
+                }) }), body, _jsxs(Box, { flexDirection: "column", paddingX: 1, children: [_jsxs(Box, { children: [_jsx(Text, { color: theme.muted, children: "model " }), _jsx(Text, { color: theme.accent, children: model }), _jsx(Text, { color: theme.muted, children: "  \u00B7  daemon " }), _jsx(Text, { color: stateColor[state], children: state }), _jsx(Text, { color: theme.muted, children: "  \u00B7  web " }), _jsx(Text, { color: webReady ? theme.ready : theme.muted, children: webReady ? "✓" : "✗ /web-search-support" })] }), _jsxs(Box, { children: [_jsx(ClientBadge, { name: "claude", status: status.claude }), _jsx(Text, { color: theme.muted, children: "  " }), _jsx(ClientBadge, { name: "codex", status: status.codex }), _jsx(Text, { color: theme.muted, children: "  \u00B7  /help" })] })] })] }));
 }

package/dist/tui/assistant/on-chat.js CHANGED Viewed

@@ -1,6 +1,15 @@
 const DEFAULT_TURN_TIMEOUT_MS = 120_000; // 2 minutes — a turn that hasn't replied by then is given up on
-export function makeOnChat(cfg, runner, timeoutMs = DEFAULT_TURN_TIMEOUT_MS) {
+export function makeOnChat(cfg, runner, timeoutMs = DEFAULT_TURN_TIMEOUT_MS, precheck) {
     return async (text, print, model, abort) => {
+        // Gate the turn on auth before firing a doomed request. Without this, a signed-out user's message
+        // hangs until the 120s timeout instead of getting an immediate, actionable hint.
+        if (precheck) {
+            const blocked = await precheck().catch(() => null); // a failed check must never wedge chat
+            if (blocked) {
+                print(blocked);
+                return;
+            }
+        }
         const ctrl = abort ?? new AbortController();
         let timedOut = false;
         // Race the turn against a hard timeout so a hung SDK/upstream can never block the UI forever.

package/dist/tui/screens/webiq-key.js ADDED Viewed

@@ -0,0 +1,30 @@
+import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
+import { useState } from "react";
+import { Box, Text, useInput } from "ink";
+import { theme } from "../theme.js";
+// Masked single-line input for the WebIQ API key. Mirrors the Repl's end-of-line editing (append /
+// backspace), but renders bullets instead of the secret. Enter submits a non-empty key; Esc cancels.
+export function WebIqKeyScreen({ onSubmit, onCancel }) {
+    const [value, setValue] = useState("");
+    useInput((input, key) => {
+        if (key.escape) {
+            onCancel();
+            return;
+        }
+        if (key.return) {
+            const k = value.trim();
+            if (k)
+                onSubmit(k);
+            else
+                onCancel();
+            return;
+        }
+        if (key.backspace || key.delete) {
+            setValue((v) => v.slice(0, -1));
+            return;
+        }
+        if (input && !key.ctrl && !key.meta)
+            setValue((v) => v + input);
+    });
+    return (_jsxs(Box, { flexDirection: "column", borderStyle: "round", borderColor: theme.accent, paddingX: 1, marginBottom: 1, children: [_jsx(Text, { color: theme.accent, bold: true, children: "web search support \u2014 paste your WebIQ API key" }), _jsx(Text, { color: theme.muted, children: "enables web_search / web_fetch for connected clients \u00B7 enter to save \u00B7 esc to cancel" }), _jsxs(Box, { children: [_jsx(Text, { color: theme.prompt, children: "key › " }), _jsx(Text, { children: "•".repeat(value.length) }), _jsx(Text, { inverse: true, children: " " })] })] }));
+}

package/dist/tui/setup/codex-toml.js CHANGED Viewed

@@ -3,7 +3,9 @@ import { homedir } from "node:os";
 import { join, dirname } from "node:path";
 // Codex reads ~/.codex/config.toml. copilot-reverse writes a managed provider block there (model,
 // provider, context window) while preserving the user's other top-level keys. Mirrors
-// agent-maestro's `configureCodex`, but uses wire_api="chat" since our proxy is chat/completions.
+// agent-maestro's `configureCodex`. Codex removed wire_api="chat" (codex#7782), so we write
+// "responses" and serve the OpenAI Responses API at /openai/responses (Codex appends /responses to
+// base_url verbatim — no /v1 auto-added).
 export const PROVIDER_ID = "copilot-reverse";
 export function codexTomlPath(home = homedir()) {
     return join(home, ".codex", "config.toml");
@@ -14,34 +16,57 @@ export function applyCodexToml(opts) {
     const path = codexTomlPath(opts.home);
     if (!existsSync(dirname(path)))
         mkdirSync(dirname(path), { recursive: true });
-    // Read existing top-level lines, dropping our managed keys and any prior managed provider table,
-    // but keeping everything else (approval_policy, other providers, etc.) verbatim.
+    // Parse existing content into top-level (pre-table) bare keys vs. table blocks, dropping our
+    // managed keys and any prior managed provider table. We MUST keep top-level keys and tables
+    // separate: in TOML a bare `key = value` after a `[table]` header belongs to that table, so
+    // appending our `model_provider` at the end (after the user's [windows]/[marketplaces] tables)
+    // silently nested it under the last table — Codex then couldn't see it and fell back to "openai".
     const existing = existsSync(path) ? readFileSync(path, "utf8") : "";
-    const kept = [];
-    let inOurTable = false;
+    const keptTopKeys = []; // bare key=value lines before any table
+    const keptTables = []; // everything from the first [table] onward (preserved verbatim)
+    let inTable = false; // have we passed the first table header?
+    let inOurTable = false; // are we inside our own [model_providers.copilot-reverse] block?
     for (const line of existing.split(/\r?\n/)) {
-        const tableMatch = /^\s*\[/.test(line);
-        if (tableMatch)
+        if (/^\s*\[/.test(line)) {
+            inTable = true;
             inOurTable = line.trim() === `[model_providers.${PROVIDER_ID}]`;
+        }
         if (inOurTable)
-            continue; // skip our previously-written provider table
+            continue; // skip our previously-written provider table entirely
         const keyMatch = /^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=/.exec(line);
+        // Drop our managed top keys wherever they appear. They belong at the top level, but a previous
+        // buggy version wrote them AFTER tables (where TOML nests them) — so filter them in the table
+        // region too, otherwise the rewrite would duplicate them.
         if (keyMatch && MANAGED_TOP_KEYS.includes(keyMatch[1]))
-            continue; // skip our managed top keys
-        kept.push(line);
+            continue;
+        (inTable ? keptTables : keptTopKeys).push(line);
     }
-    const head = kept.join("\n").replace(/\n{3,}/g, "\n\n").trim();
-    const managed = [
+    // Reassemble in valid TOML order: ALL top-level keys (ours + the user's) first, then all table
+    // blocks (the user's preserved tables, then our managed provider table last).
+    const topKeys = [
         `model = "${opts.model}"`,
         `model_provider = "${PROVIDER_ID}"`,
         ...(opts.contextWindow ? [`model_context_window = ${opts.contextWindow}`] : []),
-        "",
+        ...keptTopKeys.filter((l) => l.trim()), // the user's other top-level keys (approval_policy, etc.)
+    ];
+    const ourTable = [
         `[model_providers.${PROVIDER_ID}]`,
         `name = "copilot-reverse"`,
         `base_url = "${opts.baseUrl}"`,
-        `wire_api = "chat"`,
-    ].join("\n");
-    const body = (head ? `${head}\n\n` : "") + managed + "\n";
+        `wire_api = "responses"`,
+        // Auth: inline a static bearer token so Codex talks to our local proxy instead of falling back
+        // to the OpenAI login flow. env_key is unreliable here (a standalone Codex CLI won't see our
+        // .env), so we embed the placeholder directly — the worker ignores the key value anyway.
+        `requires_openai_auth = false`,
+        `experimental_bearer_token = "${opts.apiKey ?? "copilot-reverse-local"}"`,
+    ];
+    const userTables = keptTables.join("\n").replace(/\n{3,}/g, "\n\n").trim();
+    const managed = [
+        topKeys.join("\n"),
+        ...(userTables ? [userTables] : []),
+        ourTable.join("\n"),
+    ].join("\n\n");
+    const body = managed + "\n";
     writeFileSync(path, body);
     return { path, changed: MANAGED_TOP_KEYS };
 }

package/dist/tui/slash/commands.js CHANGED Viewed

@@ -45,6 +45,7 @@ export function buildRegistry(ctx, endpoint, opts = {}) {
     reg.add({ name: "/login", describe: "sign in to GitHub (device-code)", run: async () => opts.login ? opts.login() : ["login not available"] });
     reg.add({ name: "/logout", describe: "sign out — remove the stored GitHub token", run: async () => opts.logout ? opts.logout() : ["logout not available"] });
     reg.add({ name: "/model", describe: "switch the chat model", run: async () => ["opening model picker…"] });
+    reg.add({ name: "/web-search-support", describe: "enable web search/fetch (set WebIQ API key)", run: async () => ["opening web-search-support…"] });
     reg.add({ name: "/config", describe: "view & change configuration", run: async () => ["opening config panel…"] });
     reg.add({ name: "/dashboard", describe: "open the web dashboard in your browser", run: async () => {
             if (!opts.dashboardUrl)

package/dist/tui/status-summary.js ADDED Viewed

@@ -0,0 +1,13 @@
+export function githubLoginState(hasToken, tokenValid) {
+    if (!hasToken)
+        return "signed-out";
+    return tokenValid ? "connected" : "expired";
+}
+export function summarizeStatus(i) {
+    return {
+        github: githubLoginState(i.hasToken, i.tokenValid),
+        webSearch: i.webSearchReady ? "ready" : "not-configured",
+        worker: i.worker,
+        clients: i.clients,
+    };
+}

package/dist/version.js CHANGED Viewed

@@ -1,2 +1,2 @@
 // AUTO-GENERATED by scripts/gen-version.mjs from package.json — do not edit.
-export const APP_VERSION = "0.2.0";
+export const APP_VERSION = "0.3.0";

package/dist/worker/anthropic-server.js CHANGED Viewed

@@ -3,8 +3,18 @@ import { anthropicRequestToCanonical, canonicalToAnthropicResponse } from "../co
 import { estimateTokens } from "../core/tokens.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
+import { isGatewayTool } from "../core/server-tools.js";
 const frame = (event, data) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
-export function mountAnthropic(app, router, onMetric) {
+const safeJson = (s) => { try {
+    return JSON.parse(s);
+}
+catch {
+    return {};
+} };
+// Bounds the gateway tool loop so a model that calls web_search every turn (or a runner that always
+// returns "search more") can never spin forever inside one request.
+const MAX_TOOL_ITERS = 5;
+export function mountAnthropic(app, router, onMetric, runner) {
     // Model discovery — Anthropic list shape. Claude Desktop / Anthropic-protocol clients GET this
     // before chatting; without it they 404 on the connection test.
     app.get("/anthropic/v1/models", (_req, res) => {
@@ -33,61 +43,112 @@ export function mountAnthropic(app, router, onMetric) {
                 // isn't stuck at 0%; the terminal message_delta then reports the exact count.
                 const estInput = estimateTokens(canon);
                 res.write(frame("message_start", { type: "message_start", message: { id, type: "message", role: "assistant", model: canon.model, content: [], stop_reason: null, usage: { input_tokens: estInput, output_tokens: 0, cache_read_input_tokens: 0 } } }));
-                // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17): the endpoint owns
-                // open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation. We do NOT pre-open an index-0 text block,
-                // and we do NOT map the Copilot tool index straight to the Anthropic block index (that collides with a
-                // text preamble on a mixed turn). Instead, whichever block opens FIRST claims Anthropic index 0, the
-                // next claims 1, etc. This keeps indices contiguous-from-0 in all three cases: pure-text (text@0),
-                // pure-tool (tool@0), and mixed preamble+tool (text@0, tool@1).
+                // D3 (interface-freeze §5.4) + mixed text+tool fix (architect, 2026-06-17) + gateway tool loop
+                // (2026-06): the endpoint owns block open/stop bookkeeping with DYNAMIC SEQUENTIAL allocation,
+                // and `next` spans ALL loop iterations so block indices stay contiguous-from-0 across turns.
+                // Within a turn, text streams live (transparent progress) but tool calls are BUFFERED: only
+                // after the turn ends do we know whether they're gateway tools (run here, then loop) or client
+                // tools (forwarded to the client, exactly as before). Whichever block opens first claims index 0.
                 let next = 0;
-                let textIndex; // Anthropic index of the (single) text block, once opened
-                const toolIndex = new Map(); // Copilot tool index -> Anthropic block index
-                const openedOrder = []; // Anthropic indices in allocation order
-                let stopReason = "stop";
-                let usage;
-                for await (const chunk of provider.stream(canon)) {
-                    if (chunk.done) {
-                        stopReason = chunk.finishReason ?? "stop";
-                        usage = chunk.usage;
-                        break;
-                    }
-                    if (chunk.kind === "text") {
-                        if (textIndex === undefined) {
-                            textIndex = next++;
-                            openedOrder.push(textIndex);
-                            res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
+                let lastPrompt = estInput, lastCached = 0, sumCompletion = 0;
+                let finalStop = "stop";
+                for (let iter = 0; iter < MAX_TOOL_ITERS; iter++) {
+                    let textIndex; // Anthropic index of this turn's text block
+                    const byCopilotIdx = new Map();
+                    const buffered = []; // tool calls seen this turn, in order
+                    let turnStop = "stop";
+                    for await (const chunk of provider.stream(canon)) {
+                        if (chunk.done) {
+                            turnStop = chunk.finishReason ?? "stop";
+                            if (chunk.usage) {
+                                lastPrompt = chunk.usage.promptTokens ?? lastPrompt;
+                                lastCached = chunk.usage.cachedTokens ?? 0;
+                                sumCompletion += chunk.usage.completionTokens ?? 0;
+                            }
+                            break;
                         }
-                        res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
-                    }
-                    else if (chunk.kind === "tool_use_start") {
-                        if (!toolIndex.has(chunk.index)) {
-                            const index = next++;
-                            toolIndex.set(chunk.index, index);
-                            openedOrder.push(index);
-                            res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: chunk.id, name: chunk.name, input: {} } }));
+                        if (chunk.kind === "text") {
+                            if (textIndex === undefined) {
+                                textIndex = next++;
+                                res.write(frame("content_block_start", { type: "content_block_start", index: textIndex, content_block: { type: "text", text: "" } }));
+                            }
+                            res.write(frame("content_block_delta", { type: "content_block_delta", index: textIndex, delta: { type: "text_delta", text: chunk.delta } }));
+                        }
+                        else if (chunk.kind === "tool_use_start") {
+                            if (!byCopilotIdx.has(chunk.index)) {
+                                const t = { id: chunk.id, name: chunk.name, args: "" };
+                                byCopilotIdx.set(chunk.index, t);
+                                buffered.push(t);
+                            }
                         }
+                        else if (chunk.kind === "tool_use_delta") {
+                            const t = byCopilotIdx.get(chunk.index);
+                            if (t)
+                                t.args += chunk.argsDelta;
+                        }
+                    }
+                    if (textIndex !== undefined)
+                        res.write(frame("content_block_stop", { type: "content_block_stop", index: textIndex }));
+                    const gatewayCalls = buffered.filter((t) => isGatewayTool(t.name));
+                    // Invariant: a gateway tool (web_search/web_fetch) must NEVER reach the client — the client
+                    // has no handler for it and would stall. So whenever the model calls gateway tools (and a
+                    // runner is wired), run them here and loop, feeding results back. Any client tools called in
+                    // the SAME turn are deliberately NOT forwarded yet: we drop them this turn and let the model
+                    // re-issue them on the next turn, now informed by the search result. (Forwarding them now
+                    // would end the turn as tool_use and strand the gateway result with nowhere to go.)
+                    if (runner && gatewayCalls.length) {
+                        canon.messages.push({ role: "assistant", content: gatewayCalls.map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: safeJson(t.args) })) });
+                        const results = [];
+                        for (const t of gatewayCalls)
+                            results.push({ type: "tool_result", toolUseId: t.id, content: await runner(t.name, safeJson(t.args)) });
+                        canon.messages.push({ role: "tool", content: results });
+                        continue;
                     }
-                    else if (chunk.kind === "tool_use_delta") {
-                        const index = toolIndex.get(chunk.index);
-                        if (index !== undefined)
-                            res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: chunk.argsDelta } }));
+                    // Terminal turn (no gateway tools, or no runner): forward any buffered tool calls to the
+                    // client (open/delta/close each at its own freshly-allocated index), then finish.
+                    for (const t of buffered) {
+                        const index = next++;
+                        res.write(frame("content_block_start", { type: "content_block_start", index, content_block: { type: "tool_use", id: t.id, name: t.name, input: {} } }));
+                        if (t.args)
+                            res.write(frame("content_block_delta", { type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: t.args } }));
+                        res.write(frame("content_block_stop", { type: "content_block_stop", index }));
                     }
+                    finalStop = buffered.length ? "tool_use" : turnStop;
+                    break;
                 }
-                // Close every opened block (ascending Anthropic index) before the terminal frames.
-                for (const index of [...openedOrder].sort((a, b) => a - b))
-                    res.write(frame("content_block_stop", { type: "content_block_stop", index }));
                 // Report real usage (agent-maestro shape): split cached tokens out of input so Claude Code's
-                // context bar is accurate. Falls back to zeros if Copilot didn't return usage.
-                const cached = usage?.cachedTokens ?? 0;
-                const inputTokens = Math.max(0, (usage?.promptTokens ?? estInput) - cached); // fall back to the estimate
-                const deltaUsage = { input_tokens: inputTokens, output_tokens: usage?.completionTokens ?? 0, cache_read_input_tokens: cached };
-                res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: stopReason === "tool_use" ? "tool_use" : stopReason === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
+                // context bar is accurate. promptTokens is the last turn's (largest, includes tool results);
+                // output is summed across turns.
+                const inputTokens = Math.max(0, lastPrompt - lastCached);
+                const deltaUsage = { input_tokens: inputTokens, output_tokens: sumCompletion, cache_read_input_tokens: lastCached };
+                res.write(frame("message_delta", { type: "message_delta", delta: { stop_reason: finalStop === "tool_use" ? "tool_use" : finalStop === "length" ? "max_tokens" : "end_turn" }, usage: deltaUsage }));
                 res.write(frame("message_stop", { type: "message_stop" }));
                 res.end();
                 metric(200);
             }
             else {
-                res.json(canonicalToAnthropicResponse(await provider.complete(canon)));
+                // Non-stream: same gateway loop without SSE — run gateway tools and re-complete until the
+                // model answers with text (or a client tool), capped identically.
+                let resp = await provider.complete(canon);
+                for (let iter = 0; runner && iter < MAX_TOOL_ITERS; iter++) {
+                    const toolUses = resp.content.filter((b) => b.type === "tool_use");
+                    const gatewayUses = toolUses.filter((b) => isGatewayTool(b.name));
+                    if (!gatewayUses.length)
+                        break; // no gateway work left — client tools / text are terminal
+                    // Run the gateway tools, feed results back, and continue. Any client tools in the SAME turn
+                    // ride along in the assistant message and remain in the final resp for the client to handle.
+                    canon.messages.push({ role: "assistant", content: resp.content });
+                    const results = [];
+                    for (const u of gatewayUses)
+                        results.push({ type: "tool_result", toolUseId: u.id, content: await runner(u.name, u.input) });
+                    canon.messages.push({ role: "tool", content: results });
+                    resp = await provider.complete(canon);
+                }
+                // Invariant: never forward a gateway tool_use to the client (it can't handle it). If the cap
+                // was hit with gateway calls still pending, strip them — better a partial answer than a stall.
+                if (runner)
+                    resp = { ...resp, content: resp.content.filter((b) => b.type !== "tool_use" || !isGatewayTool(b.name)) };
+                res.json(canonicalToAnthropicResponse(resp));
                 metric(200);
             }
         }

package/dist/worker/index.js CHANGED Viewed

@@ -4,6 +4,8 @@ import { CopilotAdapter } from "../providers/copilot/adapter.js";
 import { CopilotTokenStore } from "../providers/copilot/token.js";
 import { fetchCopilotModels } from "../providers/copilot/models.js";
 import { readGhToken } from "../shared/creds.js";
+import { readWebIqKey } from "../shared/webiq-key.js";
+import { makeGatewayRunner } from "../core/server-tools.js";
 import { dataDir } from "../shared/paths.js";
 import { defaultConfig } from "../shared/config.js";
 function send(msg) { if (process.send)
@@ -20,7 +22,10 @@ const tokenStore = new CopilotTokenStore(gh);
 const router = new Router([new CopilotAdapter(tokenStore)], cfg.modelMap);
 // Load the live model list so the router can fuzzy-match near-miss ids (e.g. dated Anthropic ids).
 void tokenStore.get().then((t) => fetchCopilotModels(t)).then((ids) => router.setAvailableModels(ids)).catch(() => { });
-const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }));
+// Gateway-run web_search / web_fetch: reads the WebIQ key lazily per call (env or data dir), so
+// setting it via /web-search-support takes effect without restarting the worker.
+const gatewayRunner = makeGatewayRunner(() => readWebIqKey(dataDir()));
+const app = createWorkerApp(router, (m) => send({ type: "request-metric", ...m }), gatewayRunner);
 const server = app.listen(port, host, () => send({ type: "ready", port }));
 const hb = setInterval(() => send({ type: "heartbeat", ts: Date.now() }), 5_000);
 process.on("message", (m) => { if (m?.type === "shutdown") {

package/dist/worker/openai-server.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { randomUUID } from "node:crypto";
 import { openaiRequestToCanonical, canonicalToOpenAIResponse, canonicalChunkToOpenAISSE } from "../core/openai-inbound.js";
+import { responsesRequestToCanonical, canonicalToResponsesResponse, ResponsesSSE } from "../core/responses-inbound.js";
 import { errorHint } from "./errors.js";
 import { CopilotAuthError } from "../providers/copilot/token.js";
 export function mountOpenAI(app, router, onMetric) {
@@ -46,4 +47,65 @@ export function mountOpenAI(app, router, onMetric) {
             metric(status, message);
         }
     });
+    // OpenAI Responses API — Codex speaks ONLY this after codex#7782 removed wire_api="chat". Codex
+    // POSTs {base_url}/responses, so with base_url …/openai the route is /openai/responses. Same
+    // canonical pipeline as chat/completions; the Responses translator handles the item-centric shape.
+    app.post("/openai/responses", async (req, res) => {
+        const start = Date.now();
+        const canon = responsesRequestToCanonical(req.body);
+        canon.model = router.resolveModel(canon.model);
+        const provider = router.pick(canon.model);
+        const metric = (status, error) => onMetric({ endpoint: "/openai/responses", model: canon.model, status, latencyMs: Date.now() - start, error });
+        try {
+            if (canon.stream) {
+                res.setHeader("content-type", "text/event-stream");
+                res.setHeader("cache-control", "no-cache");
+                const sse = new ResponsesSSE(`resp_${randomUUID().replace(/-/g, "")}`, canon.model);
+                res.write(sse.start());
+                const argsByIdx = new Map();
+                let usage;
+                let finish = "stop";
+                for await (const chunk of provider.stream(canon)) {
+                    if (chunk.done) {
+                        finish = chunk.finishReason ?? "stop";
+                        usage = chunk.usage;
+                        break;
+                    }
+                    if (chunk.kind === "text")
+                        for (const f of sse.text(chunk.delta))
+                            res.write(f);
+                    else if (chunk.kind === "tool_use_start")
+                        for (const f of sse.toolStart(chunk.index, chunk.id, chunk.name))
+                            res.write(f);
+                    else if (chunk.kind === "tool_use_delta") {
+                        argsByIdx.set(chunk.index, (argsByIdx.get(chunk.index) ?? "") + chunk.argsDelta);
+                        for (const f of sse.toolArgs(chunk.index, chunk.argsDelta))
+                            res.write(f);
+                    }
+                }
+                for (const f of sse.finish(usage, finish, argsByIdx))
+                    res.write(f);
+                res.end();
+                metric(200);
+            }
+            else {
+                res.json(canonicalToResponsesResponse(await provider.complete(canon)));
+                metric(200);
+            }
+        }
+        catch (err) {
+            const raw = err instanceof Error ? err.message : String(err);
+            const hint = errorHint(raw);
+            const message = hint ? `${raw}\n${hint}` : raw;
+            const status = err instanceof CopilotAuthError ? 401 : 502;
+            if (!res.headersSent) {
+                res.status(status).json({ error: { type: "error", message } });
+            }
+            else {
+                res.write(`data: ${JSON.stringify({ type: "error", message })}\n\n`);
+                res.end();
+            }
+            metric(status, message);
+        }
+    });
 }

package/dist/worker/server.js CHANGED Viewed

@@ -1,11 +1,11 @@
 import express from "express";
 import { mountOpenAI } from "./openai-server.js";
 import { mountAnthropic } from "./anthropic-server.js";
-export function createWorkerApp(router, onMetric) {
+export function createWorkerApp(router, onMetric, gatewayRunner) {
     const app = express();
     app.use(express.json({ limit: "20mb" }));
     app.get("/healthz", (_req, res) => res.json({ ok: true }));
     mountOpenAI(app, router, onMetric);
-    mountAnthropic(app, router, onMetric);
+    mountAnthropic(app, router, onMetric, gatewayRunner);
     return app;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "copilot-reverse",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Interactive terminal app that exposes your GitHub Copilot subscription as local OpenAI- and Anthropic-compatible endpoints, with a self-healing daemon and a built-in assistant.",
   "type": "module",
   "license": "MIT",