npm - copilot-reverse - Versions diffs - 0.2.1 → 0.4.0 - Mend

copilot-reverse 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/cli/index.js +29 -5
package/dist/core/anthropic-inbound.js +24 -6
package/dist/core/responses-inbound.js +143 -0
package/dist/core/server-tools.js +60 -0
package/dist/core/tool-xml.js +9 -1
package/dist/providers/copilot/adapter.js +49 -5
package/dist/providers/copilot/borrow-search.js +86 -0
package/dist/providers/copilot/models.js +14 -0
package/dist/providers/copilot/responses-upstream.js +161 -0
package/dist/providers/webiq/client.js +66 -0
package/dist/shared/webiq-key.js +59 -0
package/dist/tui/app.js +60 -4
package/dist/tui/screens/webiq-key.js +30 -0
package/dist/tui/setup/codex-toml.js +41 -16
package/dist/tui/slash/commands.js +4 -0
package/dist/tui/status-summary.js +13 -0
package/dist/version.js +1 -1
package/dist/worker/anthropic-server.js +105 -44
package/dist/worker/index.js +25 -5
package/dist/worker/openai-server.js +62 -0
package/dist/worker/server.js +2 -2
package/package.json +1 -1

package/dist/cli/index.js CHANGED Viewed

@@ -11,12 +11,14 @@ import { startSupervisor } from "../supervisor/index.js";
 import { runAssistantTurn } from "../tui/assistant/runtime.js";
 import { makeOnChat } from "../tui/assistant/on-chat.js";
 import { readGhToken, clearGhToken } from "../shared/creds.js";
+import { writeWebIqKey, readWebIqKey, clearWebIqKey, readWebSearchMode, writeWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
 import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
 import { readChatModel, writeChatModel } from "../shared/prefs.js";
 import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
 import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
 import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
 import { readClientStatus } from "../tui/setup/status.js";
+import { summarizeStatus } from "../tui/status-summary.js";
 import { applyCodexToml } from "../tui/setup/codex-toml.js";
 import { claudeCopilotReverseEnv } from "../tui/setup/clients.js";
 import { dataDir } from "../shared/paths.js";
@@ -107,19 +109,21 @@ async function launchTui() {
     void tokenStore.get().then((t) => fetchModelLimits(t)).then((m) => Object.assign(modelLimits, m)).catch(() => { });
     // Apply a client's config (shared by the /setup wizard and the assistant's setup_* tools).
     // For Claude Code we also write the selected model's real context window so the client doesn't
+    // For Claude Code we also write the selected model's real context window so the client doesn't
     // assume the default 200K (which makes a 1M model read "context 100%" far too early). For Codex
-    // we write BOTH a .env (legacy) and ~/.codex/config.toml (the native Codex config, with the
-    // model's context window) so either Codex setup style works.
+    // the native config is ~/.codex/config.toml (what the standalone CLI actually reads); we also keep
+    // a legacy .env for older OpenAI-style tooling, but report the config.toml path since that's the
+    // one that matters.
     const applyClient = (clientKind, scope, model) => {
         if (clientKind === "claude") {
             const r = applyClaude(scope, claudeCopilotReverseEnv(anthropicBase, "copilot-reverse-local", model, modelLimits[model]));
             writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), claude: true });
             return r;
         }
-        const r = applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model });
-        applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model] });
+        applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model }); // legacy .env
+        const toml = applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model], apiKey: "copilot-reverse-local" });
         writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), codex: true });
-        return r;
+        return toml; // the native config Codex reads — surface this path in the setup card
     };
     const setup = { apply: async (clientKind, scope, model) => applyClient(clientKind, scope, model) };
     const onChat = makeOnChat({
@@ -144,6 +148,16 @@ async function launchTui() {
         }
     });
     const persistedModel = readChatModel(dataDir());
+    // Startup overview. The token was already validated above (re-auth happens before we get here), so
+    // GitHub is connected; web search readiness and configured clients are read from disk.
+    const clientStatus = readClientStatus();
+    const startupStatus = summarizeStatus({
+        hasToken: Boolean(readGhToken(dataDir())),
+        tokenValid: true,
+        webSearch: resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
+        worker: "ready",
+        clients: { claude: clientStatus.claude.user || clientStatus.claude.project, codex: clientStatus.codex.user || clientStatus.codex.project },
+    });
     app = render(React.createElement(App, {
         registry,
         title: "copilot-reverse",
@@ -164,6 +178,16 @@ async function launchTui() {
         onModelChange: (m) => writeChatModel(dataDir(), m),
         pickModelOnStart: !persistedModel,
         login: doLogin,
+        enableWebiq: (k) => { writeWebIqKey(k, dataDir()); writeWebSearchMode(dataDir(), "webiq"); },
+        disableWebiq: () => { clearWebIqKey(dataDir()); },
+        webSearchBackend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
+        startupStatus,
+        githubStatus: async () => {
+            const token = readGhToken(dataDir());
+            if (!token)
+                return "signed-out";
+            return (await isCopilotTokenValid(token)) ? "connected" : "expired";
+        },
     }));
 }
 const program = new Command();

package/dist/core/anthropic-inbound.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { GATEWAY_TOOL_DEFS, isGatewayTool } from "./server-tools.js";
 // The Anthropic `system` field may be a plain string or an array of text blocks (the Claude Code
 // SDK sends blocks with cache_control). Flatten either shape to a string — otherwise it stringifies
 // to "[object Object]" and the model gets garbage instructions.
@@ -41,15 +42,32 @@ export function anthropicRequestToCanonical(req) {
     }
     return {
         model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_tokens,
-        // Keep only custom tools with a real JSON-Schema. Anthropic server-side tools (web_search,
-        // bash, computer, …) arrive with a `type` and no `input_schema`; forwarding them produces an
-        // invalid tool the model can't fulfil, and the client hangs forever waiting for a tool_result.
-        tools: req.tools
-            ?.filter((t) => t.input_schema != null && typeof t.input_schema === "object")
-            .map((t) => ({ name: t.name, description: t.description, parameters: t.input_schema })),
+        tools: mapTools(req.tools),
         messages,
     };
 }
+// Custom tools (with a real JSON-Schema) pass through. Anthropic server-side tools arrive with a
+// dated `type` and no input_schema: web_search / web_fetch are converted to gateway function tools
+// (the gateway runs them itself against WebIQ), and every OTHER server tool (bash, computer, …) is
+// dropped — forwarding an unfulfillable tool makes the client hang forever waiting for a result.
+function mapTools(tools) {
+    if (!tools)
+        return undefined;
+    const out = [];
+    let injectedGateway = false;
+    for (const t of tools) {
+        if (t.input_schema != null && typeof t.input_schema === "object") {
+            out.push({ name: t.name, description: t.description, parameters: t.input_schema });
+        }
+        else if (isGatewayTool(t.name) && !injectedGateway) {
+            // Replace the schema-less server tool with our gateway defs. Inject the whole set once so the
+            // model can use both web_search and web_fetch whenever it asks for either.
+            out.push(...GATEWAY_TOOL_DEFS);
+            injectedGateway = true;
+        }
+    }
+    return out;
+}
 export function canonicalToAnthropicResponse(r) {
     const content = r.content.map((b) => b.type === "text" ? { type: "text", text: b.text } :
         b.type === "tool_use" ? { type: "tool_use", id: b.id, name: b.name, input: b.input } :

package/dist/core/responses-inbound.js ADDED Viewed

@@ -0,0 +1,143 @@
+import { joinText } from "./canonical.js";
+function partsText(content) {
+    if (content == null)
+        return "";
+    if (typeof content === "string")
+        return content;
+    return content.map((p) => (typeof p === "string" ? p : p?.text ?? "")).join("");
+}
+function partsImages(content) {
+    if (!Array.isArray(content))
+        return [];
+    const urlOf = (p) => typeof p.image_url === "string" ? p.image_url : p.image_url?.url;
+    return content.filter((p) => p?.type === "input_image" && urlOf(p)).map((p) => ({ type: "image", dataUrl: urlOf(p) }));
+}
+function safeJson(s) { try {
+    return s ? JSON.parse(s) : {};
+}
+catch {
+    return {};
+} }
+function itemToMessage(it) {
+    if (it.type === "function_call" && it.call_id) {
+        return { role: "assistant", content: [{ type: "tool_use", id: it.call_id, name: it.name ?? "", input: safeJson(it.arguments) }] };
+    }
+    if (it.type === "function_call_output" && it.call_id) {
+        return { role: "tool", content: [{ type: "tool_result", toolUseId: it.call_id, content: it.output ?? "" }] };
+    }
+    // default: a message item
+    const role = (["system", "user", "assistant"].includes(it.role ?? "") ? it.role : "user");
+    const content = [];
+    const text = partsText(it.content);
+    if (text)
+        content.push({ type: "text", text });
+    content.push(...partsImages(it.content));
+    return content.length ? { role, content } : null;
+}
+export function responsesRequestToCanonical(req) {
+    const messages = [];
+    if (req.instructions)
+        messages.push({ role: "system", content: [{ type: "text", text: req.instructions }] });
+    if (typeof req.input === "string") {
+        messages.push({ role: "user", content: [{ type: "text", text: req.input }] });
+    }
+    else {
+        for (const it of req.input) {
+            const m = itemToMessage(it);
+            if (m)
+                messages.push(m);
+        }
+    }
+    return {
+        model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_output_tokens,
+        tools: req.tools?.filter((t) => t.type === "function" && t.name).map((t) => ({ name: t.name, description: t.description, parameters: t.parameters ?? {} })),
+        // Hosted tools (web_search etc.) Codex requests for Copilot to run server-side. Keep them so the
+        // outbound /responses translator forwards them verbatim, instead of dropping them like before.
+        hostedTools: req.tools?.filter((t) => t.type !== "function" && t.type).map((t) => t.type),
+        messages,
+    };
+}
+// Build the non-stream Responses object: text -> an output_text message item, tool_use -> function_call items.
+export function canonicalToResponsesResponse(r) {
+    const output = [];
+    const text = joinText(r.content);
+    if (text)
+        output.push({ type: "message", id: `msg_${r.id}`, role: "assistant", status: "completed", content: [{ type: "output_text", text, annotations: [] }] });
+    for (const b of r.content) {
+        if (b.type === "tool_use")
+            output.push({ type: "function_call", id: `fc_${b.id}`, call_id: b.id, name: b.name, arguments: JSON.stringify(b.input ?? {}), status: "completed" });
+    }
+    return {
+        id: r.id, object: "response", status: "completed", model: r.model,
+        output, output_text: text,
+        usage: { input_tokens: r.usage.promptTokens, output_tokens: r.usage.completionTokens, total_tokens: r.usage.promptTokens + r.usage.completionTokens },
+    };
+}
+// Stateful SSE emitter for the Responses stream. Each event carries a monotonically increasing
+// sequence_number (Codex/agent-maestro require it). Text streams as one output_text message item;
+// each tool call is its own function_call output item. Indices are allocated sequentially.
+const frame = (event) => `data: ${JSON.stringify(event)}\n\n`;
+export class ResponsesSSE {
+    responseId;
+    model;
+    seq = 0;
+    nextIndex = 0;
+    textIndex;
+    textItemId;
+    toolIndex = new Map();
+    constructor(responseId, model) {
+        this.responseId = responseId;
+        this.model = model;
+    }
+    ev(type, extra) {
+        return frame({ type, sequence_number: this.seq++, ...extra });
+    }
+    envelope(status) {
+        return { id: this.responseId, object: "response", status, model: this.model };
+    }
+    start() {
+        return this.ev("response.created", { response: { ...this.envelope("in_progress"), output: [] } });
+    }
+    text(delta) {
+        const out = [];
+        if (this.textIndex === undefined) {
+            this.textIndex = this.nextIndex++;
+            this.textItemId = `msg_${this.responseId}`;
+            out.push(this.ev("response.output_item.added", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "in_progress", content: [] } }));
+            out.push(this.ev("response.content_part.added", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
+        }
+        out.push(this.ev("response.output_text.delta", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, delta }));
+        return out;
+    }
+    toolStart(copilotIdx, callId, name) {
+        if (this.toolIndex.has(copilotIdx))
+            return [];
+        const outputIndex = this.nextIndex++;
+        const itemId = `fc_${callId}`;
+        this.toolIndex.set(copilotIdx, { outputIndex, itemId });
+        return [this.ev("response.output_item.added", { output_index: outputIndex, item: { type: "function_call", id: itemId, call_id: callId, name, arguments: "", status: "in_progress" } })];
+    }
+    toolArgs(copilotIdx, deltaArgs) {
+        const t = this.toolIndex.get(copilotIdx);
+        if (!t)
+            return [];
+        return [this.ev("response.function_call_arguments.delta", { item_id: t.itemId, output_index: t.outputIndex, delta: deltaArgs })];
+    }
+    // Close all open items and complete the response. `argsByIdx` supplies final accumulated tool args.
+    finish(usage, _finishReason, argsByIdx) {
+        const out = [];
+        if (this.textIndex !== undefined) {
+            out.push(this.ev("response.output_text.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, text: "" }));
+            out.push(this.ev("response.content_part.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
+            out.push(this.ev("response.output_item.done", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "completed", content: [] } }));
+        }
+        for (const [copilotIdx, t] of this.toolIndex) {
+            const args = argsByIdx?.get(copilotIdx) ?? "";
+            out.push(this.ev("response.function_call_arguments.done", { item_id: t.itemId, output_index: t.outputIndex, arguments: args }));
+            out.push(this.ev("response.output_item.done", { output_index: t.outputIndex, item: { type: "function_call", id: t.itemId, status: "completed" } }));
+        }
+        const u = usage ? { input_tokens: usage.promptTokens, output_tokens: usage.completionTokens, total_tokens: usage.promptTokens + usage.completionTokens } : undefined;
+        out.push(this.ev("response.completed", { response: { ...this.envelope("completed"), ...(u ? { usage: u } : {}) } }));
+        return out;
+    }
+}

package/dist/core/server-tools.js ADDED Viewed

@@ -0,0 +1,60 @@
+import { webSearch, webFetch, formatSearchResults, formatFetchResult } from "../providers/webiq/client.js";
+import { formatBorrowSources } from "../providers/copilot/borrow-search.js";
+// Tools the GATEWAY executes itself, rather than forwarding to the model's client. These mirror Claude
+// Code's server-side web_search / web_fetch, which a Copilot-backed gateway must fulfil internally —
+// the model calls them like normal function tools and we run them in-process.
+export const GATEWAY_TOOL_DEFS = [
+    {
+        name: "web_search",
+        description: "Search the web for current information. Returns ranked results with titles, URLs, and content snippets.",
+        parameters: { type: "object", properties: { query: { type: "string", description: "The search query." } }, required: ["query"] },
+    },
+    {
+        name: "web_fetch",
+        description: "Fetch and read the content of a specific web page by URL.",
+        parameters: { type: "object", properties: { url: { type: "string", description: "The URL of the page to fetch." } }, required: ["url"] },
+    },
+];
+const GATEWAY_TOOL_NAMES = new Set(GATEWAY_TOOL_DEFS.map((t) => t.name));
+export function isGatewayTool(name) { return GATEWAY_TOOL_NAMES.has(name); }
+const DEFAULT_WEBIQ = { search: webSearch, fetchPage: webFetch };
+// Shown when web search is unavailable (Copilot borrow disabled and no WebIQ key configured).
+const UNAVAILABLE = "web search/fetch not available, please run /webiq to use the key, to get the key please go to https://webiq.microsoft.ai/profiles/";
+export function makeGatewayRunner(cfg) {
+    const webiq = cfg.webiq ?? DEFAULT_WEBIQ;
+    return async (name, input) => {
+        const arg = (input ?? {});
+        const backend = cfg.backend();
+        const key = cfg.webiqKey();
+        if (name === "web_search") {
+            const query = typeof arg.query === "string" ? arg.query.trim() : "";
+            if (!query)
+                return "web_search error: missing 'query'";
+            if (backend === "unavailable")
+                return UNAVAILABLE;
+            if (backend === "webiq") {
+                const out = await webiq.search(key, { query });
+                return out.ok ? formatSearchResults(out.results) : out.error;
+            }
+            const out = await cfg.borrow.run(query);
+            return out.ok ? formatBorrowSources(out.sources) : out.error;
+        }
+        if (name === "web_fetch") {
+            const url = typeof arg.url === "string" ? arg.url.trim() : "";
+            if (!url)
+                return "web_fetch error: missing 'url'";
+            if (backend === "unavailable")
+                return UNAVAILABLE;
+            if (backend === "webiq") {
+                const out = await webiq.fetchPage(key, { url });
+                return out.ok ? formatFetchResult(out) : out.error;
+            }
+            // Copilot's web_search tool also fetches: "Open {url}…" makes gpt-5-mini open that exact page.
+            const out = await cfg.borrow.run(`Open ${url} and extract its main content.`);
+            if (!out.ok)
+                return out.error;
+            return out.text || formatBorrowSources(out.sources);
+        }
+        return `unknown gateway tool: ${name}`;
+    };
+}

package/dist/core/tool-xml.js CHANGED Viewed

@@ -4,7 +4,15 @@ import { randomUUID } from "node:crypto";
 const TRIGGER_RE = /<(?:antml:)?(?:function_calls>|invoke\b)/;
 // Longest suffix of `s` that is a proper prefix of a trigger token — text we must hold back because
 // it might be the front of a sentinel split across chunk boundaries (e.g. "…<inv" then "oke name=").
-const PREFIX_TOKENS = ["<function_calls>", "<function_calls>", "<invoke", "<invoke"];
+// MUST list both the bare and the `antml:`-namespaced sentinels: Copilot streams Claude's tool call
+// token by token, so an opening `<invoke` is routinely split (e.g. "…<a" then "ntml:invoke");
+// if the namespaced forms are missing, that "<a" tail isn't recognized as a partial sentinel, leaks
+// as text, and the remainder no longer matches the trigger — the whole call renders literally.
+// Bare sentinel bodies, plus their namespaced variants built by inserting the prefix after "<" (the
+// literal is assembled here rather than written inline so the namespace can't be stripped from source).
+const NS = "antml" + ":";
+const BARE_TOKENS = ["<function_calls>", "<invoke"];
+const PREFIX_TOKENS = [...BARE_TOKENS, ...BARE_TOKENS.map((t) => "<" + NS + t.slice(1))];
 function heldBackLen(s) {
     let max = 0;
     for (const t of PREFIX_TOKENS) {

package/dist/providers/copilot/adapter.js CHANGED Viewed

@@ -1,6 +1,10 @@
 import { randomUUID } from "node:crypto";
 import { ToolCallExtractor } from "../../core/tool-xml.js";
+import { canonicalToResponsesBody, parseResponsesResult, streamResponses, RESPONSES_URL } from "./responses-upstream.js";
 const CHAT_URL = "https://api.githubcopilot.com/chat/completions";
+// A /chat 400 whose body names one of these means "this model is responses-only" — retry on /responses
+// once. Matches agent-maestro's safety net for models that drop /chat/completions from their endpoints.
+const RESPONSES_HINT_RE = /unsupported_api_for_model|invalid_request_body|does not support|use the responses|model_not_supported/i;
 // Canonical messages -> OpenAI wire messages (Copilot is OpenAI-shaped).
 function toWireMessages(messages) {
     const out = [];
@@ -54,16 +58,31 @@ async function errorDetail(res) {
 export class CopilotAdapter {
     tokenStore;
     fetchFn;
+    endpointsFor;
     name = "copilot";
-    constructor(tokenStore, fetchFn = fetch) {
+    // endpointsFor(model) -> the model's supported_endpoints (e.g. ["/responses"]). When known and it
+    // omits /chat/completions, route to /responses; unknown ([]) keeps the chat path (with a 400 net).
+    constructor(tokenStore, fetchFn = fetch, endpointsFor) {
         this.tokenStore = tokenStore;
         this.fetchFn = fetchFn;
+        this.endpointsFor = endpointsFor;
+    }
+    usesResponses(model) {
+        const eps = this.endpointsFor?.(model);
+        return !!eps && eps.length > 0 && !eps.includes("/chat/completions");
     }
     async complete(req) {
+        if (this.usesResponses(req.model))
+            return this.completeResponses(req);
         const token = await this.tokenStore.get();
         const res = await this.fetchFn(CHAT_URL, { method: "POST", headers: headers(token), body: JSON.stringify(buildBody({ ...req, stream: false })) });
-        if (!res.ok)
-            throw new Error(`copilot completion failed: ${res.status}${await errorDetail(res)}`);
+        if (!res.ok) {
+            const detail = await errorDetail(res);
+            // Safety net: a responses-only model rejected on /chat — retry once on /responses.
+            if (res.status === 400 && RESPONSES_HINT_RE.test(detail))
+                return this.completeResponses(req);
+            throw new Error(`copilot completion failed: ${res.status}${detail}`);
+        }
         const data = (await res.json());
         const choice = data.choices[0];
         const content = [];
@@ -77,11 +96,36 @@ export class CopilotAdapter {
             usage: { promptTokens: data.usage?.prompt_tokens ?? 0, completionTokens: data.usage?.completion_tokens ?? 0 },
         };
     }
+    // /responses variants — used for responses-only models and as the /chat 400 safety-net target.
+    async completeResponses(req) {
+        const token = await this.tokenStore.get();
+        const res = await this.fetchFn(RESPONSES_URL, { method: "POST", headers: headers(token), body: JSON.stringify(canonicalToResponsesBody({ ...req, stream: false })) });
+        if (!res.ok)
+            throw new Error(`copilot responses failed: ${res.status}${await errorDetail(res)}`);
+        return { ...parseResponsesResult(await res.json()), model: req.model };
+    }
+    async *streamResponsesReq(req) {
+        const token = await this.tokenStore.get();
+        const res = await this.fetchFn(RESPONSES_URL, { method: "POST", headers: headers(token), body: JSON.stringify(canonicalToResponsesBody({ ...req, stream: true })) });
+        if (!res.ok || !res.body)
+            throw new Error(`copilot responses stream failed: ${res.status}${await errorDetail(res)}`);
+        yield* streamResponses(res);
+    }
     async *stream(req) {
+        if (this.usesResponses(req.model)) {
+            yield* this.streamResponsesReq(req);
+            return;
+        }
         const token = await this.tokenStore.get();
         const res = await this.fetchFn(CHAT_URL, { method: "POST", headers: headers(token), body: JSON.stringify(buildBody({ ...req, stream: true })) });
-        if (!res.ok || !res.body)
-            throw new Error(`copilot stream failed: ${res.status}${await errorDetail(res)}`);
+        if (!res.ok || !res.body) {
+            const detail = await errorDetail(res);
+            if (res.status === 400 && RESPONSES_HINT_RE.test(detail)) {
+                yield* this.streamResponsesReq(req);
+                return;
+            }
+            throw new Error(`copilot stream failed: ${res.status}${detail}`);
+        }
         const reader = res.body.getReader();
         const decoder = new TextDecoder();
         const startedTools = new Set();

package/dist/providers/copilot/borrow-search.js ADDED Viewed

@@ -0,0 +1,86 @@
+import { RESPONSES_URL } from "./responses-upstream.js";
+// Same identity headers as the chat adapter, plus openai-intent (the /responses host expects it).
+function headers(token) {
+    return {
+        authorization: `Bearer ${token}`, "content-type": "application/json",
+        "editor-version": "vscode/1.95.0", "copilot-integration-id": "vscode-chat", "openai-intent": "conversation-edits",
+    };
+}
+// Pull {title,url} from every url_citation annotation across message output_text parts, de-duped by url.
+export function extractCitations(output) {
+    const seen = new Set();
+    const sources = [];
+    for (const item of output ?? []) {
+        if (item?.type !== "message")
+            continue;
+        for (const part of item.content ?? []) {
+            for (const ann of part?.annotations ?? []) {
+                if (ann?.type !== "url_citation" || !ann.url || seen.has(ann.url))
+                    continue;
+                seen.add(ann.url);
+                sources.push({ title: ann.title || ann.url, url: ann.url });
+            }
+        }
+    }
+    return sources;
+}
+// gpt-5's own prose answer (concatenated output_text). We feed Claude the SOURCES, not this — but it
+// is handy for web_fetch ("open this URL and extract…") where the extracted content is the payload.
+export function extractText(output) {
+    let text = "";
+    for (const item of output ?? []) {
+        if (item?.type !== "message")
+            continue;
+        for (const part of item.content ?? [])
+            if (part?.type === "output_text" && part.text)
+                text += part.text;
+    }
+    return text;
+}
+// Run one internal gpt-5-mini web_search. `input` is the full instruction (a query for web_search, or
+// "Open {url} and extract its content" for web_fetch). Never throws — failures become an error string
+// so the gateway tool loop can degrade gracefully. Bounded by a timeout so a congested upstream (gpt-5-
+// mini is prone to "high demand" stalls) fails fast instead of hanging the whole turn for minutes.
+const DEFAULT_TIMEOUT_MS = 30_000;
+export async function borrowSearch(tokenStore, input, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    if (!input.trim())
+        return { ok: false, error: "borrow search error: empty query" };
+    let token;
+    try {
+        token = await tokenStore.get();
+    }
+    catch (e) {
+        return { ok: false, error: `borrow search unavailable: ${e instanceof Error ? e.message : String(e)}` };
+    }
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+    try {
+        const res = await fetchFn(RESPONSES_URL, {
+            method: "POST", headers: headers(token), signal: ctrl.signal,
+            // reasoning.effort "low" is a ~5-6x speedup (≈30s→≈5s, and far less variance) vs the default:
+            // we discard gpt-5's prose and keep only the citations, so the heavy reasoning it would otherwise
+            // do before/after the search is wasted. ("minimal" is rejected by the API alongside web_search.)
+            body: JSON.stringify({ model: "gpt-5-mini", input, stream: false, tools: [{ type: "web_search" }], reasoning: { effort: "low" } }),
+        });
+        if (!res.ok) {
+            const detail = await res.text().catch(() => "");
+            return { ok: false, error: `borrow search failed: ${res.status}${detail ? ` — ${detail.slice(0, 200)}` : ""}` };
+        }
+        const data = (await res.json());
+        return { ok: true, sources: extractCitations(data.output ?? []), text: extractText(data.output ?? []) };
+    }
+    catch (e) {
+        const timedOut = e instanceof Error && e.name === "AbortError";
+        return { ok: false, error: timedOut ? `borrow search timed out after ${timeoutMs}ms` : "borrow search failed: could not reach Copilot" };
+    }
+    finally {
+        clearTimeout(timer);
+    }
+}
+// Render the borrowed sources as the tool_result text fed back to the model — numbered title+url so
+// the model can cite them. (We deliberately hand back sources, not gpt-5's prose, for web_search.)
+export function formatBorrowSources(sources) {
+    if (!sources.length)
+        return "no results found";
+    return sources.map((s, i) => `[${i + 1}] ${s.title}\n${s.url}`).join("\n\n");
+}

package/dist/providers/copilot/models.js CHANGED Viewed

@@ -32,6 +32,20 @@ export async function fetchCopilotModels(token, fetchFn = fetch, timeoutMs = DEF
     const ids = [...new Set(data.map((m) => m.id).filter((x) => Boolean(x)))];
     return ids.length ? ids : FALLBACK_MODELS;
 }
+// Map of model id -> the Copilot API endpoints it supports (e.g. ["/responses","ws:/responses"]).
+// Used to route each request to the right upstream: newer gpt-5.x models are /responses-only and
+// reject /chat/completions. Returns {} on failure so the adapter falls back to chat/completions.
+export async function fetchModelEndpoints(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    const data = await getModels(token, fetchFn, timeoutMs);
+    if (!data)
+        return {};
+    const out = {};
+    for (const m of data) {
+        if (m.id && Array.isArray(m.supported_endpoints) && m.supported_endpoints.length)
+            out[m.id] = m.supported_endpoints;
+    }
+    return out;
+}
 // Map of model id -> its real input/context window, used to size auto-compaction per model and
 // to show the window in the picker. Returns {} on failure/timeout so callers fall back gracefully.
 export async function fetchModelLimits(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {