copilot-reverse 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -11,12 +11,14 @@ import { startSupervisor } from "../supervisor/index.js";
11
11
  import { runAssistantTurn } from "../tui/assistant/runtime.js";
12
12
  import { makeOnChat } from "../tui/assistant/on-chat.js";
13
13
  import { readGhToken, clearGhToken } from "../shared/creds.js";
14
+ import { writeWebIqKey, readWebIqKey, clearWebIqKey, readWebSearchMode, writeWebSearchMode, resolveWebSearchBackend } from "../shared/webiq-key.js";
14
15
  import { readClientSetup, writeClientSetup } from "../shared/client-setup.js";
15
16
  import { readChatModel, writeChatModel } from "../shared/prefs.js";
16
17
  import { CopilotTokenStore, isCopilotTokenValid } from "../providers/copilot/token.js";
17
18
  import { fetchCopilotModels, fetchModelLimits } from "../providers/copilot/models.js";
18
19
  import { applyClaude, applyCodex, resetClaude, resetCodex, CLAUDE_ENV_KEYS, CODEX_ENV_KEYS } from "../tui/setup/apply.js";
19
20
  import { readClientStatus } from "../tui/setup/status.js";
21
+ import { summarizeStatus } from "../tui/status-summary.js";
20
22
  import { applyCodexToml } from "../tui/setup/codex-toml.js";
21
23
  import { claudeCopilotReverseEnv } from "../tui/setup/clients.js";
22
24
  import { dataDir } from "../shared/paths.js";
@@ -107,19 +109,21 @@ async function launchTui() {
107
109
  void tokenStore.get().then((t) => fetchModelLimits(t)).then((m) => Object.assign(modelLimits, m)).catch(() => { });
108
110
  // Apply a client's config (shared by the /setup wizard and the assistant's setup_* tools).
109
111
  // For Claude Code we also write the selected model's real context window so the client doesn't
112
+ // For Claude Code we also write the selected model's real context window so the client doesn't
110
113
  // assume the default 200K (which makes a 1M model read "context 100%" far too early). For Codex
111
- // we write BOTH a .env (legacy) and ~/.codex/config.toml (the native Codex config, with the
112
- // model's context window) so either Codex setup style works.
114
+ // the native config is ~/.codex/config.toml (what the standalone CLI actually reads); we also keep
115
+ // a legacy .env for older OpenAI-style tooling, but report the config.toml path since that's the
116
+ // one that matters.
113
117
  const applyClient = (clientKind, scope, model) => {
114
118
  if (clientKind === "claude") {
115
119
  const r = applyClaude(scope, claudeCopilotReverseEnv(anthropicBase, "copilot-reverse-local", model, modelLimits[model]));
116
120
  writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), claude: true });
117
121
  return r;
118
122
  }
119
- const r = applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model });
120
- applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model] });
123
+ applyCodex(scope, { OPENAI_BASE_URL: openaiBase, OPENAI_API_KEY: "copilot-reverse-local", OPENAI_MODEL: model }); // legacy .env
124
+ const toml = applyCodexToml({ baseUrl: openaiBase, model, contextWindow: modelLimits[model], apiKey: "copilot-reverse-local" });
121
125
  writeClientSetup(dataDir(), { ...readClientSetup(dataDir()), codex: true });
122
- return r;
126
+ return toml; // the native config Codex reads — surface this path in the setup card
123
127
  };
124
128
  const setup = { apply: async (clientKind, scope, model) => applyClient(clientKind, scope, model) };
125
129
  const onChat = makeOnChat({
@@ -144,6 +148,16 @@ async function launchTui() {
144
148
  }
145
149
  });
146
150
  const persistedModel = readChatModel(dataDir());
151
+ // Startup overview. The token was already validated above (re-auth happens before we get here), so
152
+ // GitHub is connected; web search readiness and configured clients are read from disk.
153
+ const clientStatus = readClientStatus();
154
+ const startupStatus = summarizeStatus({
155
+ hasToken: Boolean(readGhToken(dataDir())),
156
+ tokenValid: true,
157
+ webSearch: resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
158
+ worker: "ready",
159
+ clients: { claude: clientStatus.claude.user || clientStatus.claude.project, codex: clientStatus.codex.user || clientStatus.codex.project },
160
+ });
147
161
  app = render(React.createElement(App, {
148
162
  registry,
149
163
  title: "copilot-reverse",
@@ -164,6 +178,16 @@ async function launchTui() {
164
178
  onModelChange: (m) => writeChatModel(dataDir(), m),
165
179
  pickModelOnStart: !persistedModel,
166
180
  login: doLogin,
181
+ enableWebiq: (k) => { writeWebIqKey(k, dataDir()); writeWebSearchMode(dataDir(), "webiq"); },
182
+ disableWebiq: () => { clearWebIqKey(dataDir()); },
183
+ webSearchBackend: () => resolveWebSearchBackend(readWebSearchMode(dataDir()), Boolean(readWebIqKey(dataDir()))),
184
+ startupStatus,
185
+ githubStatus: async () => {
186
+ const token = readGhToken(dataDir());
187
+ if (!token)
188
+ return "signed-out";
189
+ return (await isCopilotTokenValid(token)) ? "connected" : "expired";
190
+ },
167
191
  }));
168
192
  }
169
193
  const program = new Command();
@@ -1,3 +1,4 @@
1
+ import { GATEWAY_TOOL_DEFS, isGatewayTool } from "./server-tools.js";
1
2
  // The Anthropic `system` field may be a plain string or an array of text blocks (the Claude Code
2
3
  // SDK sends blocks with cache_control). Flatten either shape to a string — otherwise it stringifies
3
4
  // to "[object Object]" and the model gets garbage instructions.
@@ -41,15 +42,32 @@ export function anthropicRequestToCanonical(req) {
41
42
  }
42
43
  return {
43
44
  model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_tokens,
44
- // Keep only custom tools with a real JSON-Schema. Anthropic server-side tools (web_search,
45
- // bash, computer, …) arrive with a `type` and no `input_schema`; forwarding them produces an
46
- // invalid tool the model can't fulfil, and the client hangs forever waiting for a tool_result.
47
- tools: req.tools
48
- ?.filter((t) => t.input_schema != null && typeof t.input_schema === "object")
49
- .map((t) => ({ name: t.name, description: t.description, parameters: t.input_schema })),
45
+ tools: mapTools(req.tools),
50
46
  messages,
51
47
  };
52
48
  }
49
+ // Custom tools (with a real JSON-Schema) pass through. Anthropic server-side tools arrive with a
50
+ // dated `type` and no input_schema: web_search / web_fetch are converted to gateway function tools
51
+ // (the gateway runs them itself against WebIQ), and every OTHER server tool (bash, computer, …) is
52
+ // dropped — forwarding an unfulfillable tool makes the client hang forever waiting for a result.
53
+ function mapTools(tools) {
54
+ if (!tools)
55
+ return undefined;
56
+ const out = [];
57
+ let injectedGateway = false;
58
+ for (const t of tools) {
59
+ if (t.input_schema != null && typeof t.input_schema === "object") {
60
+ out.push({ name: t.name, description: t.description, parameters: t.input_schema });
61
+ }
62
+ else if (isGatewayTool(t.name) && !injectedGateway) {
63
+ // Replace the schema-less server tool with our gateway defs. Inject the whole set once so the
64
+ // model can use both web_search and web_fetch whenever it asks for either.
65
+ out.push(...GATEWAY_TOOL_DEFS);
66
+ injectedGateway = true;
67
+ }
68
+ }
69
+ return out;
70
+ }
53
71
  export function canonicalToAnthropicResponse(r) {
54
72
  const content = r.content.map((b) => b.type === "text" ? { type: "text", text: b.text } :
55
73
  b.type === "tool_use" ? { type: "tool_use", id: b.id, name: b.name, input: b.input } :
@@ -0,0 +1,143 @@
1
+ import { joinText } from "./canonical.js";
2
+ function partsText(content) {
3
+ if (content == null)
4
+ return "";
5
+ if (typeof content === "string")
6
+ return content;
7
+ return content.map((p) => (typeof p === "string" ? p : p?.text ?? "")).join("");
8
+ }
9
+ function partsImages(content) {
10
+ if (!Array.isArray(content))
11
+ return [];
12
+ const urlOf = (p) => typeof p.image_url === "string" ? p.image_url : p.image_url?.url;
13
+ return content.filter((p) => p?.type === "input_image" && urlOf(p)).map((p) => ({ type: "image", dataUrl: urlOf(p) }));
14
+ }
15
+ function safeJson(s) { try {
16
+ return s ? JSON.parse(s) : {};
17
+ }
18
+ catch {
19
+ return {};
20
+ } }
21
+ function itemToMessage(it) {
22
+ if (it.type === "function_call" && it.call_id) {
23
+ return { role: "assistant", content: [{ type: "tool_use", id: it.call_id, name: it.name ?? "", input: safeJson(it.arguments) }] };
24
+ }
25
+ if (it.type === "function_call_output" && it.call_id) {
26
+ return { role: "tool", content: [{ type: "tool_result", toolUseId: it.call_id, content: it.output ?? "" }] };
27
+ }
28
+ // default: a message item
29
+ const role = (["system", "user", "assistant"].includes(it.role ?? "") ? it.role : "user");
30
+ const content = [];
31
+ const text = partsText(it.content);
32
+ if (text)
33
+ content.push({ type: "text", text });
34
+ content.push(...partsImages(it.content));
35
+ return content.length ? { role, content } : null;
36
+ }
37
+ export function responsesRequestToCanonical(req) {
38
+ const messages = [];
39
+ if (req.instructions)
40
+ messages.push({ role: "system", content: [{ type: "text", text: req.instructions }] });
41
+ if (typeof req.input === "string") {
42
+ messages.push({ role: "user", content: [{ type: "text", text: req.input }] });
43
+ }
44
+ else {
45
+ for (const it of req.input) {
46
+ const m = itemToMessage(it);
47
+ if (m)
48
+ messages.push(m);
49
+ }
50
+ }
51
+ return {
52
+ model: req.model, stream: Boolean(req.stream), temperature: req.temperature, maxTokens: req.max_output_tokens,
53
+ tools: req.tools?.filter((t) => t.type === "function" && t.name).map((t) => ({ name: t.name, description: t.description, parameters: t.parameters ?? {} })),
54
+ // Hosted tools (web_search etc.) Codex requests for Copilot to run server-side. Keep them so the
55
+ // outbound /responses translator forwards them verbatim, instead of dropping them like before.
56
+ hostedTools: req.tools?.filter((t) => t.type !== "function" && t.type).map((t) => t.type),
57
+ messages,
58
+ };
59
+ }
60
+ // Build the non-stream Responses object: text -> an output_text message item, tool_use -> function_call items.
61
+ export function canonicalToResponsesResponse(r) {
62
+ const output = [];
63
+ const text = joinText(r.content);
64
+ if (text)
65
+ output.push({ type: "message", id: `msg_${r.id}`, role: "assistant", status: "completed", content: [{ type: "output_text", text, annotations: [] }] });
66
+ for (const b of r.content) {
67
+ if (b.type === "tool_use")
68
+ output.push({ type: "function_call", id: `fc_${b.id}`, call_id: b.id, name: b.name, arguments: JSON.stringify(b.input ?? {}), status: "completed" });
69
+ }
70
+ return {
71
+ id: r.id, object: "response", status: "completed", model: r.model,
72
+ output, output_text: text,
73
+ usage: { input_tokens: r.usage.promptTokens, output_tokens: r.usage.completionTokens, total_tokens: r.usage.promptTokens + r.usage.completionTokens },
74
+ };
75
+ }
76
+ // Stateful SSE emitter for the Responses stream. Each event carries a monotonically increasing
77
+ // sequence_number (Codex/agent-maestro require it). Text streams as one output_text message item;
78
+ // each tool call is its own function_call output item. Indices are allocated sequentially.
79
+ const frame = (event) => `data: ${JSON.stringify(event)}\n\n`;
80
+ export class ResponsesSSE {
81
+ responseId;
82
+ model;
83
+ seq = 0;
84
+ nextIndex = 0;
85
+ textIndex;
86
+ textItemId;
87
+ toolIndex = new Map();
88
+ constructor(responseId, model) {
89
+ this.responseId = responseId;
90
+ this.model = model;
91
+ }
92
+ ev(type, extra) {
93
+ return frame({ type, sequence_number: this.seq++, ...extra });
94
+ }
95
+ envelope(status) {
96
+ return { id: this.responseId, object: "response", status, model: this.model };
97
+ }
98
+ start() {
99
+ return this.ev("response.created", { response: { ...this.envelope("in_progress"), output: [] } });
100
+ }
101
+ text(delta) {
102
+ const out = [];
103
+ if (this.textIndex === undefined) {
104
+ this.textIndex = this.nextIndex++;
105
+ this.textItemId = `msg_${this.responseId}`;
106
+ out.push(this.ev("response.output_item.added", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "in_progress", content: [] } }));
107
+ out.push(this.ev("response.content_part.added", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
108
+ }
109
+ out.push(this.ev("response.output_text.delta", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, delta }));
110
+ return out;
111
+ }
112
+ toolStart(copilotIdx, callId, name) {
113
+ if (this.toolIndex.has(copilotIdx))
114
+ return [];
115
+ const outputIndex = this.nextIndex++;
116
+ const itemId = `fc_${callId}`;
117
+ this.toolIndex.set(copilotIdx, { outputIndex, itemId });
118
+ return [this.ev("response.output_item.added", { output_index: outputIndex, item: { type: "function_call", id: itemId, call_id: callId, name, arguments: "", status: "in_progress" } })];
119
+ }
120
+ toolArgs(copilotIdx, deltaArgs) {
121
+ const t = this.toolIndex.get(copilotIdx);
122
+ if (!t)
123
+ return [];
124
+ return [this.ev("response.function_call_arguments.delta", { item_id: t.itemId, output_index: t.outputIndex, delta: deltaArgs })];
125
+ }
126
+ // Close all open items and complete the response. `argsByIdx` supplies final accumulated tool args.
127
+ finish(usage, _finishReason, argsByIdx) {
128
+ const out = [];
129
+ if (this.textIndex !== undefined) {
130
+ out.push(this.ev("response.output_text.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, text: "" }));
131
+ out.push(this.ev("response.content_part.done", { item_id: this.textItemId, output_index: this.textIndex, content_index: 0, part: { type: "output_text", text: "", annotations: [] } }));
132
+ out.push(this.ev("response.output_item.done", { output_index: this.textIndex, item: { type: "message", id: this.textItemId, role: "assistant", status: "completed", content: [] } }));
133
+ }
134
+ for (const [copilotIdx, t] of this.toolIndex) {
135
+ const args = argsByIdx?.get(copilotIdx) ?? "";
136
+ out.push(this.ev("response.function_call_arguments.done", { item_id: t.itemId, output_index: t.outputIndex, arguments: args }));
137
+ out.push(this.ev("response.output_item.done", { output_index: t.outputIndex, item: { type: "function_call", id: t.itemId, status: "completed" } }));
138
+ }
139
+ const u = usage ? { input_tokens: usage.promptTokens, output_tokens: usage.completionTokens, total_tokens: usage.promptTokens + usage.completionTokens } : undefined;
140
+ out.push(this.ev("response.completed", { response: { ...this.envelope("completed"), ...(u ? { usage: u } : {}) } }));
141
+ return out;
142
+ }
143
+ }
@@ -0,0 +1,60 @@
1
+ import { webSearch, webFetch, formatSearchResults, formatFetchResult } from "../providers/webiq/client.js";
2
+ import { formatBorrowSources } from "../providers/copilot/borrow-search.js";
3
+ // Tools the GATEWAY executes itself, rather than forwarding to the model's client. These mirror Claude
4
+ // Code's server-side web_search / web_fetch, which a Copilot-backed gateway must fulfil internally —
5
+ // the model calls them like normal function tools and we run them in-process.
6
+ export const GATEWAY_TOOL_DEFS = [
7
+ {
8
+ name: "web_search",
9
+ description: "Search the web for current information. Returns ranked results with titles, URLs, and content snippets.",
10
+ parameters: { type: "object", properties: { query: { type: "string", description: "The search query." } }, required: ["query"] },
11
+ },
12
+ {
13
+ name: "web_fetch",
14
+ description: "Fetch and read the content of a specific web page by URL.",
15
+ parameters: { type: "object", properties: { url: { type: "string", description: "The URL of the page to fetch." } }, required: ["url"] },
16
+ },
17
+ ];
18
+ const GATEWAY_TOOL_NAMES = new Set(GATEWAY_TOOL_DEFS.map((t) => t.name));
19
+ export function isGatewayTool(name) { return GATEWAY_TOOL_NAMES.has(name); }
20
+ const DEFAULT_WEBIQ = { search: webSearch, fetchPage: webFetch };
21
+ // Shown when web search is unavailable (Copilot borrow disabled and no WebIQ key configured).
22
+ const UNAVAILABLE = "web search/fetch not available, please run /webiq to use the key, to get the key please go to https://webiq.microsoft.ai/profiles/";
23
+ export function makeGatewayRunner(cfg) {
24
+ const webiq = cfg.webiq ?? DEFAULT_WEBIQ;
25
+ return async (name, input) => {
26
+ const arg = (input ?? {});
27
+ const backend = cfg.backend();
28
+ const key = cfg.webiqKey();
29
+ if (name === "web_search") {
30
+ const query = typeof arg.query === "string" ? arg.query.trim() : "";
31
+ if (!query)
32
+ return "web_search error: missing 'query'";
33
+ if (backend === "unavailable")
34
+ return UNAVAILABLE;
35
+ if (backend === "webiq") {
36
+ const out = await webiq.search(key, { query });
37
+ return out.ok ? formatSearchResults(out.results) : out.error;
38
+ }
39
+ const out = await cfg.borrow.run(query);
40
+ return out.ok ? formatBorrowSources(out.sources) : out.error;
41
+ }
42
+ if (name === "web_fetch") {
43
+ const url = typeof arg.url === "string" ? arg.url.trim() : "";
44
+ if (!url)
45
+ return "web_fetch error: missing 'url'";
46
+ if (backend === "unavailable")
47
+ return UNAVAILABLE;
48
+ if (backend === "webiq") {
49
+ const out = await webiq.fetchPage(key, { url });
50
+ return out.ok ? formatFetchResult(out) : out.error;
51
+ }
52
+ // Copilot's web_search tool also fetches: "Open {url}…" makes gpt-5-mini open that exact page.
53
+ const out = await cfg.borrow.run(`Open ${url} and extract its main content.`);
54
+ if (!out.ok)
55
+ return out.error;
56
+ return out.text || formatBorrowSources(out.sources);
57
+ }
58
+ return `unknown gateway tool: ${name}`;
59
+ };
60
+ }
@@ -4,7 +4,15 @@ import { randomUUID } from "node:crypto";
4
4
  const TRIGGER_RE = /<(?:antml:)?(?:function_calls>|invoke\b)/;
5
5
  // Longest suffix of `s` that is a proper prefix of a trigger token — text we must hold back because
6
6
  // it might be the front of a sentinel split across chunk boundaries (e.g. "…<inv" then "oke name=").
7
- const PREFIX_TOKENS = ["<function_calls>", "<function_calls>", "<invoke", "<invoke"];
7
+ // MUST list both the bare and the `antml:`-namespaced sentinels: Copilot streams Claude's tool call
8
+ // token by token, so an opening `<invoke` is routinely split (e.g. "…<a" then "ntml:invoke");
9
+ // if the namespaced forms are missing, that "<a" tail isn't recognized as a partial sentinel, leaks
10
+ // as text, and the remainder no longer matches the trigger — the whole call renders literally.
11
+ // Bare sentinel bodies, plus their namespaced variants built by inserting the prefix after "<" (the
12
+ // literal is assembled here rather than written inline so the namespace can't be stripped from source).
13
+ const NS = "antml" + ":";
14
+ const BARE_TOKENS = ["<function_calls>", "<invoke"];
15
+ const PREFIX_TOKENS = [...BARE_TOKENS, ...BARE_TOKENS.map((t) => "<" + NS + t.slice(1))];
8
16
  function heldBackLen(s) {
9
17
  let max = 0;
10
18
  for (const t of PREFIX_TOKENS) {
@@ -1,6 +1,10 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import { ToolCallExtractor } from "../../core/tool-xml.js";
3
+ import { canonicalToResponsesBody, parseResponsesResult, streamResponses, RESPONSES_URL } from "./responses-upstream.js";
3
4
  const CHAT_URL = "https://api.githubcopilot.com/chat/completions";
5
+ // A /chat 400 whose body names one of these means "this model is responses-only" — retry on /responses
6
+ // once. Matches agent-maestro's safety net for models that drop /chat/completions from their endpoints.
7
+ const RESPONSES_HINT_RE = /unsupported_api_for_model|invalid_request_body|does not support|use the responses|model_not_supported/i;
4
8
  // Canonical messages -> OpenAI wire messages (Copilot is OpenAI-shaped).
5
9
  function toWireMessages(messages) {
6
10
  const out = [];
@@ -54,16 +58,31 @@ async function errorDetail(res) {
54
58
  export class CopilotAdapter {
55
59
  tokenStore;
56
60
  fetchFn;
61
+ endpointsFor;
57
62
  name = "copilot";
58
- constructor(tokenStore, fetchFn = fetch) {
63
+ // endpointsFor(model) -> the model's supported_endpoints (e.g. ["/responses"]). When known and it
64
+ // omits /chat/completions, route to /responses; unknown ([]) keeps the chat path (with a 400 net).
65
+ constructor(tokenStore, fetchFn = fetch, endpointsFor) {
59
66
  this.tokenStore = tokenStore;
60
67
  this.fetchFn = fetchFn;
68
+ this.endpointsFor = endpointsFor;
69
+ }
70
+ usesResponses(model) {
71
+ const eps = this.endpointsFor?.(model);
72
+ return !!eps && eps.length > 0 && !eps.includes("/chat/completions");
61
73
  }
62
74
  async complete(req) {
75
+ if (this.usesResponses(req.model))
76
+ return this.completeResponses(req);
63
77
  const token = await this.tokenStore.get();
64
78
  const res = await this.fetchFn(CHAT_URL, { method: "POST", headers: headers(token), body: JSON.stringify(buildBody({ ...req, stream: false })) });
65
- if (!res.ok)
66
- throw new Error(`copilot completion failed: ${res.status}${await errorDetail(res)}`);
79
+ if (!res.ok) {
80
+ const detail = await errorDetail(res);
81
+ // Safety net: a responses-only model rejected on /chat — retry once on /responses.
82
+ if (res.status === 400 && RESPONSES_HINT_RE.test(detail))
83
+ return this.completeResponses(req);
84
+ throw new Error(`copilot completion failed: ${res.status}${detail}`);
85
+ }
67
86
  const data = (await res.json());
68
87
  const choice = data.choices[0];
69
88
  const content = [];
@@ -77,11 +96,36 @@ export class CopilotAdapter {
77
96
  usage: { promptTokens: data.usage?.prompt_tokens ?? 0, completionTokens: data.usage?.completion_tokens ?? 0 },
78
97
  };
79
98
  }
99
+ // /responses variants — used for responses-only models and as the /chat 400 safety-net target.
100
+ async completeResponses(req) {
101
+ const token = await this.tokenStore.get();
102
+ const res = await this.fetchFn(RESPONSES_URL, { method: "POST", headers: headers(token), body: JSON.stringify(canonicalToResponsesBody({ ...req, stream: false })) });
103
+ if (!res.ok)
104
+ throw new Error(`copilot responses failed: ${res.status}${await errorDetail(res)}`);
105
+ return { ...parseResponsesResult(await res.json()), model: req.model };
106
+ }
107
+ async *streamResponsesReq(req) {
108
+ const token = await this.tokenStore.get();
109
+ const res = await this.fetchFn(RESPONSES_URL, { method: "POST", headers: headers(token), body: JSON.stringify(canonicalToResponsesBody({ ...req, stream: true })) });
110
+ if (!res.ok || !res.body)
111
+ throw new Error(`copilot responses stream failed: ${res.status}${await errorDetail(res)}`);
112
+ yield* streamResponses(res);
113
+ }
80
114
  async *stream(req) {
115
+ if (this.usesResponses(req.model)) {
116
+ yield* this.streamResponsesReq(req);
117
+ return;
118
+ }
81
119
  const token = await this.tokenStore.get();
82
120
  const res = await this.fetchFn(CHAT_URL, { method: "POST", headers: headers(token), body: JSON.stringify(buildBody({ ...req, stream: true })) });
83
- if (!res.ok || !res.body)
84
- throw new Error(`copilot stream failed: ${res.status}${await errorDetail(res)}`);
121
+ if (!res.ok || !res.body) {
122
+ const detail = await errorDetail(res);
123
+ if (res.status === 400 && RESPONSES_HINT_RE.test(detail)) {
124
+ yield* this.streamResponsesReq(req);
125
+ return;
126
+ }
127
+ throw new Error(`copilot stream failed: ${res.status}${detail}`);
128
+ }
85
129
  const reader = res.body.getReader();
86
130
  const decoder = new TextDecoder();
87
131
  const startedTools = new Set();
@@ -0,0 +1,86 @@
1
+ import { RESPONSES_URL } from "./responses-upstream.js";
2
+ // Same identity headers as the chat adapter, plus openai-intent (the /responses host expects it).
3
+ function headers(token) {
4
+ return {
5
+ authorization: `Bearer ${token}`, "content-type": "application/json",
6
+ "editor-version": "vscode/1.95.0", "copilot-integration-id": "vscode-chat", "openai-intent": "conversation-edits",
7
+ };
8
+ }
9
+ // Pull {title,url} from every url_citation annotation across message output_text parts, de-duped by url.
10
+ export function extractCitations(output) {
11
+ const seen = new Set();
12
+ const sources = [];
13
+ for (const item of output ?? []) {
14
+ if (item?.type !== "message")
15
+ continue;
16
+ for (const part of item.content ?? []) {
17
+ for (const ann of part?.annotations ?? []) {
18
+ if (ann?.type !== "url_citation" || !ann.url || seen.has(ann.url))
19
+ continue;
20
+ seen.add(ann.url);
21
+ sources.push({ title: ann.title || ann.url, url: ann.url });
22
+ }
23
+ }
24
+ }
25
+ return sources;
26
+ }
27
+ // gpt-5's own prose answer (concatenated output_text). We feed Claude the SOURCES, not this — but it
28
+ // is handy for web_fetch ("open this URL and extract…") where the extracted content is the payload.
29
+ export function extractText(output) {
30
+ let text = "";
31
+ for (const item of output ?? []) {
32
+ if (item?.type !== "message")
33
+ continue;
34
+ for (const part of item.content ?? [])
35
+ if (part?.type === "output_text" && part.text)
36
+ text += part.text;
37
+ }
38
+ return text;
39
+ }
40
+ // Run one internal gpt-5-mini web_search. `input` is the full instruction (a query for web_search, or
41
+ // "Open {url} and extract its content" for web_fetch). Never throws — failures become an error string
42
+ // so the gateway tool loop can degrade gracefully. Bounded by a timeout so a congested upstream (gpt-5-
43
+ // mini is prone to "high demand" stalls) fails fast instead of hanging the whole turn for minutes.
44
+ const DEFAULT_TIMEOUT_MS = 30_000;
45
+ export async function borrowSearch(tokenStore, input, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
46
+ if (!input.trim())
47
+ return { ok: false, error: "borrow search error: empty query" };
48
+ let token;
49
+ try {
50
+ token = await tokenStore.get();
51
+ }
52
+ catch (e) {
53
+ return { ok: false, error: `borrow search unavailable: ${e instanceof Error ? e.message : String(e)}` };
54
+ }
55
+ const ctrl = new AbortController();
56
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs);
57
+ try {
58
+ const res = await fetchFn(RESPONSES_URL, {
59
+ method: "POST", headers: headers(token), signal: ctrl.signal,
60
+ // reasoning.effort "low" is a ~5-6x speedup (≈30s→≈5s, and far less variance) vs the default:
61
+ // we discard gpt-5's prose and keep only the citations, so the heavy reasoning it would otherwise
62
+ // do before/after the search is wasted. ("minimal" is rejected by the API alongside web_search.)
63
+ body: JSON.stringify({ model: "gpt-5-mini", input, stream: false, tools: [{ type: "web_search" }], reasoning: { effort: "low" } }),
64
+ });
65
+ if (!res.ok) {
66
+ const detail = await res.text().catch(() => "");
67
+ return { ok: false, error: `borrow search failed: ${res.status}${detail ? ` — ${detail.slice(0, 200)}` : ""}` };
68
+ }
69
+ const data = (await res.json());
70
+ return { ok: true, sources: extractCitations(data.output ?? []), text: extractText(data.output ?? []) };
71
+ }
72
+ catch (e) {
73
+ const timedOut = e instanceof Error && e.name === "AbortError";
74
+ return { ok: false, error: timedOut ? `borrow search timed out after ${timeoutMs}ms` : "borrow search failed: could not reach Copilot" };
75
+ }
76
+ finally {
77
+ clearTimeout(timer);
78
+ }
79
+ }
80
+ // Render the borrowed sources as the tool_result text fed back to the model — numbered title+url so
81
+ // the model can cite them. (We deliberately hand back sources, not gpt-5's prose, for web_search.)
82
+ export function formatBorrowSources(sources) {
83
+ if (!sources.length)
84
+ return "no results found";
85
+ return sources.map((s, i) => `[${i + 1}] ${s.title}\n${s.url}`).join("\n\n");
86
+ }
@@ -32,6 +32,20 @@ export async function fetchCopilotModels(token, fetchFn = fetch, timeoutMs = DEF
32
32
  const ids = [...new Set(data.map((m) => m.id).filter((x) => Boolean(x)))];
33
33
  return ids.length ? ids : FALLBACK_MODELS;
34
34
  }
35
+ // Map of model id -> the Copilot API endpoints it supports (e.g. ["/responses","ws:/responses"]).
36
+ // Used to route each request to the right upstream: newer gpt-5.x models are /responses-only and
37
+ // reject /chat/completions. Returns {} on failure so the adapter falls back to chat/completions.
38
+ export async function fetchModelEndpoints(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {
39
+ const data = await getModels(token, fetchFn, timeoutMs);
40
+ if (!data)
41
+ return {};
42
+ const out = {};
43
+ for (const m of data) {
44
+ if (m.id && Array.isArray(m.supported_endpoints) && m.supported_endpoints.length)
45
+ out[m.id] = m.supported_endpoints;
46
+ }
47
+ return out;
48
+ }
35
49
  // Map of model id -> its real input/context window, used to size auto-compaction per model and
36
50
  // to show the window in the picker. Returns {} on failure/timeout so callers fall back gracefully.
37
51
  export async function fetchModelLimits(token, fetchFn = fetch, timeoutMs = DEFAULT_TIMEOUT_MS) {