@hsupu/copilot-api 0.7.21 → 0.7.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.mjs CHANGED
@@ -21,6 +21,47 @@ import { cors } from "hono/cors";
21
21
  import { trimTrailingSlash } from "hono/trailing-slash";
22
22
  import { streamSSE } from "hono/streaming";
23
23
 
24
+ //#region src/lib/state.ts
25
+ /**
26
+ * Rebuild model lookup indexes from state.models.
27
+ * Called by cacheModels() in production; call directly in tests after setting state.models.
28
+ */
29
+ function rebuildModelIndex() {
30
+ const data = state.models?.data ?? [];
31
+ state.modelIndex = new Map(data.map((m) => [m.id, m]));
32
+ state.modelIds = new Set(data.map((m) => m.id));
33
+ }
34
+ const DEFAULT_MODEL_OVERRIDES = {
35
+ opus: "claude-opus-4.6",
36
+ sonnet: "claude-sonnet-4.6",
37
+ haiku: "claude-haiku-4.5"
38
+ };
39
+ const state = {
40
+ accountType: "individual",
41
+ autoTruncate: true,
42
+ compressToolResultsBeforeTruncate: true,
43
+ contextEditingMode: "off",
44
+ stripServerTools: false,
45
+ dedupToolCalls: false,
46
+ fetchTimeout: 300,
47
+ historyLimit: 200,
48
+ historyMinEntries: 50,
49
+ modelIds: /* @__PURE__ */ new Set(),
50
+ modelIndex: /* @__PURE__ */ new Map(),
51
+ modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
52
+ rewriteSystemReminders: false,
53
+ showGitHubToken: false,
54
+ shutdownAbortWait: 120,
55
+ shutdownGracefulWait: 60,
56
+ staleRequestMaxAge: 600,
57
+ streamIdleTimeout: 300,
58
+ systemPromptOverrides: [],
59
+ stripReadToolResultTags: false,
60
+ normalizeResponsesCallIds: false,
61
+ verbose: false
62
+ };
63
+
64
+ //#endregion
24
65
  //#region src/lib/utils.ts
25
66
  const sleep = (ms) => new Promise((resolve) => {
26
67
  setTimeout(resolve, ms);
@@ -408,6 +449,7 @@ function updateEntry(id, update) {
408
449
  if (update.pipelineInfo) entry.pipelineInfo = update.pipelineInfo;
409
450
  if (update.durationMs !== void 0) entry.durationMs = update.durationMs;
410
451
  if (update.sseEvents) entry.sseEvents = update.sseEvents;
452
+ if (update.httpHeaders) entry.httpHeaders = update.httpHeaders;
411
453
  if (update.response) {
412
454
  const session = historyState.sessions.get(entry.sessionId);
413
455
  if (session) {
@@ -619,46 +661,6 @@ function exportHistory(format = "json") {
619
661
  return [headers.join(","), ...rows.map((r) => r.map((v) => escapeCsvValue(v)).join(","))].join("\n");
620
662
  }
621
663
 
622
- //#endregion
623
- //#region src/lib/state.ts
624
- /**
625
- * Rebuild model lookup indexes from state.models.
626
- * Called by cacheModels() in production; call directly in tests after setting state.models.
627
- */
628
- function rebuildModelIndex() {
629
- const data = state.models?.data ?? [];
630
- state.modelIndex = new Map(data.map((m) => [m.id, m]));
631
- state.modelIds = new Set(data.map((m) => m.id));
632
- }
633
- const DEFAULT_MODEL_OVERRIDES = {
634
- opus: "claude-opus-4.6",
635
- sonnet: "claude-sonnet-4.6",
636
- haiku: "claude-haiku-4.5"
637
- };
638
- const state = {
639
- accountType: "individual",
640
- autoTruncate: true,
641
- compressToolResultsBeforeTruncate: true,
642
- contextEditingMode: "off",
643
- stripServerTools: false,
644
- dedupToolCalls: false,
645
- fetchTimeout: 300,
646
- historyLimit: 200,
647
- historyMinEntries: 50,
648
- modelIds: /* @__PURE__ */ new Set(),
649
- modelIndex: /* @__PURE__ */ new Map(),
650
- modelOverrides: { ...DEFAULT_MODEL_OVERRIDES },
651
- rewriteSystemReminders: false,
652
- showGitHubToken: false,
653
- shutdownAbortWait: 120,
654
- shutdownGracefulWait: 60,
655
- staleRequestMaxAge: 600,
656
- streamIdleTimeout: 300,
657
- systemPromptOverrides: [],
658
- stripReadToolResultTags: false,
659
- verbose: false
660
- };
661
-
662
664
  //#endregion
663
665
  //#region src/lib/history/memory-pressure.ts
664
666
  /**
@@ -747,7 +749,7 @@ function startMemoryPressureMonitor() {
747
749
  consola.error("[memory] Error in memory pressure check:", error);
748
750
  });
749
751
  }, CHECK_INTERVAL_MS);
750
- if (timer && "unref" in timer) timer.unref();
752
+ if ("unref" in timer) timer.unref();
751
753
  }
752
754
  /** Stop the memory pressure monitor */
753
755
  function stopMemoryPressureMonitor() {
@@ -910,6 +912,8 @@ async function applyConfigToState() {
910
912
  if (config.fetch_timeout !== void 0) state.fetchTimeout = config.fetch_timeout;
911
913
  if (config.stream_idle_timeout !== void 0) state.streamIdleTimeout = config.stream_idle_timeout;
912
914
  if (config.stale_request_max_age !== void 0) state.staleRequestMaxAge = config.stale_request_max_age;
915
+ const responsesConfig = config["openai-responses"];
916
+ if (responsesConfig && responsesConfig.normalize_call_ids !== void 0) state.normalizeResponsesCallIds = responsesConfig.normalize_call_ids;
913
917
  const currentMtime = getConfigMtimeMs();
914
918
  if (hasApplied && currentMtime !== lastAppliedMtimeMs) consola.info("[config] Reloaded config.yaml");
915
919
  hasApplied = true;
@@ -1081,97 +1085,6 @@ function initProxyBun(options) {
1081
1085
  consola.debug(`Proxy configured (Bun env): ${formatProxyDisplay(options.url)}`);
1082
1086
  }
1083
1087
 
1084
- //#endregion
1085
- //#region src/lib/copilot-api.ts
1086
- const standardHeaders = () => ({
1087
- "content-type": "application/json",
1088
- accept: "application/json"
1089
- });
1090
- const COPILOT_VERSION = "0.38.0";
1091
- const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1092
- const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1093
- /** Copilot Chat API version (for chat/completions requests) */
1094
- const COPILOT_API_VERSION = "2025-05-01";
1095
- /** Copilot internal API version (for token & usage endpoints) */
1096
- const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1097
- /** GitHub public API version (for /user, repos, etc.) */
1098
- const GITHUB_API_VERSION = "2022-11-28";
1099
- /**
1100
- * Session-level interaction ID.
1101
- * Used to correlate all requests within a single server session.
1102
- * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
1103
- */
1104
- const INTERACTION_ID = randomUUID();
1105
- const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1106
- const copilotHeaders = (state, opts) => {
1107
- const headers = {
1108
- Authorization: `Bearer ${state.copilotToken}`,
1109
- "content-type": standardHeaders()["content-type"],
1110
- "copilot-integration-id": "vscode-chat",
1111
- "editor-version": `vscode/${state.vsCodeVersion}`,
1112
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1113
- "user-agent": USER_AGENT,
1114
- "openai-intent": opts?.intent ?? "conversation-panel",
1115
- "x-github-api-version": COPILOT_API_VERSION,
1116
- "x-request-id": randomUUID(),
1117
- "X-Interaction-Id": INTERACTION_ID,
1118
- "x-vscode-user-agent-library-version": "electron-fetch"
1119
- };
1120
- if (opts?.vision) headers["copilot-vision-request"] = "true";
1121
- if (opts?.modelRequestHeaders) {
1122
- const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1123
- for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
1124
- }
1125
- return headers;
1126
- };
1127
- const GITHUB_API_BASE_URL = "https://api.github.com";
1128
- const githubHeaders = (state) => ({
1129
- ...standardHeaders(),
1130
- authorization: `token ${state.githubToken}`,
1131
- "editor-version": `vscode/${state.vsCodeVersion}`,
1132
- "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1133
- "user-agent": USER_AGENT,
1134
- "x-github-api-version": GITHUB_API_VERSION,
1135
- "x-vscode-user-agent-library-version": "electron-fetch"
1136
- });
1137
- const GITHUB_BASE_URL = "https://github.com";
1138
- const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1139
- const GITHUB_APP_SCOPES = ["read:user"].join(" ");
1140
- /** Fallback VSCode version when GitHub API is unavailable */
1141
- const VSCODE_VERSION_FALLBACK = "1.104.3";
1142
- /** GitHub API endpoint for latest VSCode release */
1143
- const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1144
- /** Fetch the latest VSCode version and cache in global state */
1145
- async function cacheVSCodeVersion() {
1146
- const response = await getVSCodeVersion();
1147
- state.vsCodeVersion = response;
1148
- consola.info(`Using VSCode version: ${response}`);
1149
- }
1150
- /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1151
- async function getVSCodeVersion() {
1152
- const controller = new AbortController();
1153
- const timeout = setTimeout(() => {
1154
- controller.abort();
1155
- }, 5e3);
1156
- try {
1157
- const response = await fetch(VSCODE_RELEASE_URL, {
1158
- signal: controller.signal,
1159
- headers: {
1160
- Accept: "application/vnd.github.v3+json",
1161
- "User-Agent": "copilot-api"
1162
- }
1163
- });
1164
- if (!response.ok) return VSCODE_VERSION_FALLBACK;
1165
- const version = (await response.json()).tag_name;
1166
- if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
1167
- return VSCODE_VERSION_FALLBACK;
1168
- } catch {
1169
- return VSCODE_VERSION_FALLBACK;
1170
- } finally {
1171
- clearTimeout(timeout);
1172
- }
1173
- }
1174
-
1175
1088
  //#endregion
1176
1089
  //#region src/lib/sanitize-system-reminder.ts
1177
1090
  /**
@@ -2014,63 +1927,162 @@ function getErrorMessage(error, fallback = "Unknown error") {
2014
1927
  }
2015
1928
 
2016
1929
  //#endregion
2017
- //#region src/lib/token/copilot-client.ts
2018
- /** Copilot API client token and usage */
2019
- const getCopilotToken = async () => {
2020
- const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, { headers: {
2021
- ...githubHeaders(state),
2022
- "x-github-api-version": COPILOT_INTERNAL_API_VERSION
2023
- } });
2024
- if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot token", response);
2025
- return await response.json();
2026
- };
2027
- const getCopilotUsage = async () => {
2028
- const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, { headers: {
2029
- ...githubHeaders(state),
2030
- "x-github-api-version": COPILOT_INTERNAL_API_VERSION
2031
- } });
2032
- if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot usage", response);
2033
- return await response.json();
2034
- };
2035
-
2036
- //#endregion
2037
- //#region src/lib/token/copilot-token-manager.ts
1930
+ //#region src/lib/copilot-api.ts
1931
+ const standardHeaders = () => ({
1932
+ "content-type": "application/json",
1933
+ accept: "application/json"
1934
+ });
1935
+ const COPILOT_VERSION = "0.38.0";
1936
+ const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`;
1937
+ const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`;
1938
+ /** Copilot Chat API version (for chat/completions requests) */
1939
+ const COPILOT_API_VERSION = "2025-05-01";
1940
+ /** Copilot internal API version (for token & usage endpoints) */
1941
+ const COPILOT_INTERNAL_API_VERSION = "2025-04-01";
1942
+ /** GitHub public API version (for /user, repos, etc.) */
1943
+ const GITHUB_API_VERSION = "2022-11-28";
2038
1944
  /**
2039
- * Manages Copilot token lifecycle including automatic refresh.
2040
- * Depends on GitHubTokenManager for authentication.
2041
- *
2042
- * All refresh paths (scheduled + on-demand via 401) go through `refresh()`,
2043
- * which deduplicates concurrent callers and reschedules the next refresh based
2044
- * on the server's `refresh_in` value.
1945
+ * Session-level interaction ID.
1946
+ * Used to correlate all requests within a single server session.
1947
+ * Unlike x-request-id (per-request UUID), this stays constant for the server lifetime.
2045
1948
  */
2046
- var CopilotTokenManager = class {
2047
- githubTokenManager;
2048
- currentToken = null;
2049
- refreshTimeout = null;
2050
- minRefreshIntervalMs;
2051
- maxRetries;
2052
- /** Shared promise to prevent concurrent refresh attempts */
2053
- refreshInFlight = null;
2054
- constructor(options) {
2055
- this.githubTokenManager = options.githubTokenManager;
2056
- this.minRefreshIntervalMs = (options.minRefreshIntervalSeconds ?? 60) * 1e3;
2057
- this.maxRetries = options.maxRetries ?? 3;
2058
- }
2059
- /**
2060
- * Get the current Copilot token info.
2061
- */
2062
- getCurrentToken() {
2063
- return this.currentToken;
1949
+ const INTERACTION_ID = randomUUID();
1950
+ const copilotBaseUrl = (state) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com`;
1951
+ const copilotHeaders = (state, opts) => {
1952
+ const headers = {
1953
+ Authorization: `Bearer ${state.copilotToken}`,
1954
+ "content-type": standardHeaders()["content-type"],
1955
+ "copilot-integration-id": "vscode-chat",
1956
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1957
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1958
+ "user-agent": USER_AGENT,
1959
+ "openai-intent": opts?.intent ?? "conversation-panel",
1960
+ "x-github-api-version": COPILOT_API_VERSION,
1961
+ "x-request-id": randomUUID(),
1962
+ "X-Interaction-Id": INTERACTION_ID,
1963
+ "x-vscode-user-agent-library-version": "electron-fetch"
1964
+ };
1965
+ if (opts?.vision) headers["copilot-vision-request"] = "true";
1966
+ if (opts?.modelRequestHeaders) {
1967
+ const coreKeysLower = new Set(Object.keys(headers).map((k) => k.toLowerCase()));
1968
+ for (const [key, value] of Object.entries(opts.modelRequestHeaders)) if (!coreKeysLower.has(key.toLowerCase())) headers[key] = value;
2064
1969
  }
2065
- /**
2066
- * Initialize the Copilot token and start automatic refresh.
2067
- */
2068
- async initialize() {
2069
- const tokenInfo = await this.fetchCopilotToken();
2070
- state.copilotToken = tokenInfo.token;
2071
- consola.debug("GitHub Copilot Token fetched successfully!");
2072
- this.scheduleRefresh(tokenInfo.refreshIn);
2073
- return tokenInfo;
1970
+ return headers;
1971
+ };
1972
+ const GITHUB_API_BASE_URL = "https://api.github.com";
1973
+ const githubHeaders = (state) => ({
1974
+ ...standardHeaders(),
1975
+ authorization: `token ${state.githubToken}`,
1976
+ "editor-version": `vscode/${state.vsCodeVersion}`,
1977
+ "editor-plugin-version": EDITOR_PLUGIN_VERSION,
1978
+ "user-agent": USER_AGENT,
1979
+ "x-github-api-version": GITHUB_API_VERSION,
1980
+ "x-vscode-user-agent-library-version": "electron-fetch"
1981
+ });
1982
+ const GITHUB_BASE_URL = "https://github.com";
1983
+ const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98";
1984
+ const GITHUB_APP_SCOPES = ["read:user"].join(" ");
1985
+ /** Fallback VSCode version when GitHub API is unavailable */
1986
+ const VSCODE_VERSION_FALLBACK = "1.104.3";
1987
+ /** GitHub API endpoint for latest VSCode release */
1988
+ const VSCODE_RELEASE_URL = "https://api.github.com/repos/microsoft/vscode/releases/latest";
1989
+ /** Fetch the latest VSCode version and cache in global state */
1990
+ async function cacheVSCodeVersion() {
1991
+ const response = await getVSCodeVersion();
1992
+ state.vsCodeVersion = response;
1993
+ consola.info(`Using VSCode version: ${response}`);
1994
+ }
1995
+ /** Fetch the latest VSCode version from GitHub releases, falling back to a hardcoded version */
1996
+ async function getVSCodeVersion() {
1997
+ const controller = new AbortController();
1998
+ const timeout = setTimeout(() => {
1999
+ controller.abort();
2000
+ }, 5e3);
2001
+ try {
2002
+ const response = await fetch(VSCODE_RELEASE_URL, {
2003
+ signal: controller.signal,
2004
+ headers: {
2005
+ Accept: "application/vnd.github.v3+json",
2006
+ "User-Agent": "copilot-api"
2007
+ }
2008
+ });
2009
+ if (!response.ok) return VSCODE_VERSION_FALLBACK;
2010
+ const version = (await response.json()).tag_name;
2011
+ if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
2012
+ return VSCODE_VERSION_FALLBACK;
2013
+ } catch {
2014
+ return VSCODE_VERSION_FALLBACK;
2015
+ } finally {
2016
+ clearTimeout(timeout);
2017
+ }
2018
+ }
2019
+
2020
+ //#endregion
2021
+ //#region src/lib/token/copilot-client.ts
2022
+ /** Copilot API client — token and usage */
2023
+ const getCopilotToken = async () => {
2024
+ const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, {
2025
+ headers: {
2026
+ ...githubHeaders(state),
2027
+ "x-github-api-version": COPILOT_INTERNAL_API_VERSION
2028
+ },
2029
+ signal: AbortSignal.timeout(15e3)
2030
+ });
2031
+ if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot token", response);
2032
+ return await response.json();
2033
+ };
2034
+ const getCopilotUsage = async () => {
2035
+ const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, {
2036
+ headers: {
2037
+ ...githubHeaders(state),
2038
+ "x-github-api-version": COPILOT_INTERNAL_API_VERSION
2039
+ },
2040
+ signal: AbortSignal.timeout(15e3)
2041
+ });
2042
+ if (!response.ok) throw await HTTPError.fromResponse("Failed to get Copilot usage", response);
2043
+ return await response.json();
2044
+ };
2045
+
2046
+ //#endregion
2047
+ //#region src/lib/token/copilot-token-manager.ts
2048
+ /**
2049
+ * Manages Copilot token lifecycle including automatic refresh.
2050
+ * Depends on GitHubTokenManager for authentication.
2051
+ *
2052
+ * All refresh paths (scheduled + on-demand via 401) go through `refresh()`,
2053
+ * which deduplicates concurrent callers and reschedules the next refresh based
2054
+ * on the server's `refresh_in` value.
2055
+ */
2056
+ var CopilotTokenManager = class {
2057
+ githubTokenManager;
2058
+ currentToken = null;
2059
+ refreshTimeout = null;
2060
+ minRefreshIntervalMs;
2061
+ maxRetries;
2062
+ /** Shared promise to prevent concurrent refresh attempts */
2063
+ refreshInFlight = null;
2064
+ /** Set when a refresh attempt fails; cleared on next success */
2065
+ _refreshNeeded = false;
2066
+ constructor(options) {
2067
+ this.githubTokenManager = options.githubTokenManager;
2068
+ this.minRefreshIntervalMs = (options.minRefreshIntervalSeconds ?? 60) * 1e3;
2069
+ this.maxRetries = options.maxRetries ?? 3;
2070
+ }
2071
+ /**
2072
+ * Get the current Copilot token info.
2073
+ */
2074
+ getCurrentToken() {
2075
+ return this.currentToken;
2076
+ }
2077
+ /**
2078
+ * Initialize the Copilot token and start automatic refresh.
2079
+ */
2080
+ async initialize() {
2081
+ const tokenInfo = await this.fetchCopilotToken();
2082
+ state.copilotToken = tokenInfo.token;
2083
+ consola.debug("GitHub Copilot Token fetched successfully!");
2084
+ this.scheduleRefresh(tokenInfo.refreshIn);
2085
+ return tokenInfo;
2074
2086
  }
2075
2087
  /**
2076
2088
  * Fetch a new Copilot token from the API.
@@ -2105,10 +2117,12 @@ var CopilotTokenManager = class {
2105
2117
  }
2106
2118
  }
2107
2119
  const delay = Math.min(1e3 * 2 ** attempt, 3e4);
2108
- consola.warn(`Token refresh attempt ${attempt + 1}/${this.maxRetries} failed, retrying in ${delay}ms`);
2120
+ const reason = error instanceof Error ? formatErrorWithCause(error) : String(error);
2121
+ consola.warn(`Token refresh attempt ${attempt + 1}/${this.maxRetries} failed: ${reason}, retrying in ${delay}ms`);
2109
2122
  await new Promise((resolve) => setTimeout(resolve, delay));
2110
2123
  }
2111
- consola.error("All token refresh attempts failed:", lastError);
2124
+ const reason = lastError instanceof Error ? formatErrorWithCause(lastError) : String(lastError);
2125
+ consola.error(`All token refresh attempts failed: ${reason}`);
2112
2126
  return null;
2113
2127
  }
2114
2128
  /**
@@ -2171,10 +2185,12 @@ var CopilotTokenManager = class {
2171
2185
  }
2172
2186
  this.refreshInFlight = this.fetchTokenWithRetry().then((tokenInfo) => {
2173
2187
  if (tokenInfo) {
2188
+ this._refreshNeeded = false;
2174
2189
  state.copilotToken = tokenInfo.token;
2175
2190
  this.scheduleRefresh(tokenInfo.refreshIn);
2176
2191
  consola.verbose(`[CopilotToken] Token refreshed (next refresh_in=${tokenInfo.refreshIn}s)`);
2177
2192
  } else {
2193
+ this._refreshNeeded = true;
2178
2194
  consola.error("[CopilotToken] Token refresh failed, keeping existing token");
2179
2195
  this.scheduleRefresh(300);
2180
2196
  }
@@ -2185,6 +2201,16 @@ var CopilotTokenManager = class {
2185
2201
  return this.refreshInFlight;
2186
2202
  }
2187
2203
  /**
2204
+ * Proactively ensure the token is valid before sending a request.
2205
+ * Triggers a refresh if the token is expired/expiring or the last
2206
+ * refresh attempt failed. Concurrent callers share the same in-flight
2207
+ * refresh via `refresh()`.
2208
+ */
2209
+ async ensureValidToken() {
2210
+ if (!this.isExpiredOrExpiring() && !this._refreshNeeded) return;
2211
+ await this.refresh();
2212
+ }
2213
+ /**
2188
2214
  * Check if the current token is expired or about to expire.
2189
2215
  */
2190
2216
  isExpiredOrExpiring(marginSeconds = 60) {
@@ -2656,6 +2682,14 @@ function getCopilotTokenManager() {
2656
2682
  function stopTokenRefresh() {
2657
2683
  copilotTokenManager?.stopAutoRefresh();
2658
2684
  }
2685
+ /**
2686
+ * Proactively ensure the Copilot token is valid.
2687
+ * Triggers a refresh if the token is expired/expiring or the last
2688
+ * background refresh failed. No-op if the manager is not initialized.
2689
+ */
2690
+ async function ensureValidCopilotToken() {
2691
+ await copilotTokenManager?.ensureValidToken();
2692
+ }
2659
2693
 
2660
2694
  //#endregion
2661
2695
  //#region src/auth.ts
@@ -2765,6 +2799,15 @@ const checkUsage = defineCommand({
2765
2799
  function createFetchSignal() {
2766
2800
  return state.fetchTimeout > 0 ? AbortSignal.timeout(state.fetchTimeout * 1e3) : void 0;
2767
2801
  }
2802
+ /**
2803
+ * Populate a HeadersCapture object with request and response headers.
2804
+ * Should be called immediately after fetch(), before !response.ok check,
2805
+ * so headers are captured even for error responses.
2806
+ */
2807
+ function captureHttpHeaders(capture, requestHeaders, response) {
2808
+ capture.request = { ...requestHeaders };
2809
+ capture.response = Object.fromEntries(response.headers.entries());
2810
+ }
2768
2811
 
2769
2812
  //#endregion
2770
2813
  //#region src/lib/models/client.ts
@@ -3449,6 +3492,7 @@ function createRequestContext(opts) {
3449
3492
  let _pipelineInfo = null;
3450
3493
  let _preprocessInfo = null;
3451
3494
  let _sseEvents = null;
3495
+ let _httpHeaders = null;
3452
3496
  const _sanitizationHistory = [];
3453
3497
  let _queueWaitMs = 0;
3454
3498
  const _attempts = [];
@@ -3485,6 +3529,9 @@ function createRequestContext(opts) {
3485
3529
  get preprocessInfo() {
3486
3530
  return _preprocessInfo;
3487
3531
  },
3532
+ get httpHeaders() {
3533
+ return _httpHeaders;
3534
+ },
3488
3535
  get attempts() {
3489
3536
  return _attempts;
3490
3537
  },
@@ -3523,6 +3570,12 @@ function createRequestContext(opts) {
3523
3570
  setSseEvents(events) {
3524
3571
  _sseEvents = events.length > 0 ? events : null;
3525
3572
  },
3573
+ setHttpHeaders(capture) {
3574
+ if (capture.request && capture.response) _httpHeaders = {
3575
+ request: capture.request,
3576
+ response: capture.response
3577
+ };
3578
+ },
3526
3579
  beginAttempt(attemptOpts) {
3527
3580
  const attempt = {
3528
3581
  index: _attempts.length,
@@ -3653,6 +3706,7 @@ function createRequestContext(opts) {
3653
3706
  if (lastTruncation) entry.truncation = lastTruncation;
3654
3707
  if (_pipelineInfo) entry.pipelineInfo = _pipelineInfo;
3655
3708
  if (_sseEvents) entry.sseEvents = _sseEvents;
3709
+ if (_httpHeaders) entry.httpHeaders = _httpHeaders;
3656
3710
  if (_attempts.length > 1) entry.attempts = _attempts.map((a) => ({
3657
3711
  index: a.index,
3658
3712
  strategy: a.strategy,
@@ -4728,7 +4782,7 @@ const setupClaudeCode = defineCommand({
4728
4782
 
4729
4783
  //#endregion
4730
4784
  //#region package.json
4731
- var version = "0.7.21";
4785
+ var version = "0.7.22";
4732
4786
 
4733
4787
  //#endregion
4734
4788
  //#region src/lib/context/error-persistence.ts
@@ -4845,7 +4899,8 @@ function handleHistoryEvent(event) {
4845
4899
  updateEntry(entryData.id, {
4846
4900
  response,
4847
4901
  durationMs: entryData.durationMs,
4848
- sseEvents: entryData.sseEvents
4902
+ sseEvents: entryData.sseEvents,
4903
+ httpHeaders: entryData.httpHeaders
4849
4904
  });
4850
4905
  break;
4851
4906
  }
@@ -5838,6 +5893,7 @@ const createResponses = async (payload, opts) => {
5838
5893
  body: JSON.stringify(payload),
5839
5894
  signal: fetchSignal
5840
5895
  });
5896
+ if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
5841
5897
  if (!response.ok) {
5842
5898
  consola.error("Failed to create responses", response);
5843
5899
  throw await HTTPError.fromResponse("Failed to create responses", response, payload.model);
@@ -5954,15 +6010,20 @@ function createTokenRefreshStrategy() {
5954
6010
  * centralizes that configuration to avoid duplication.
5955
6011
  */
5956
6012
  /** Create the FormatAdapter for Responses API pipeline execution */
5957
- function createResponsesAdapter(selectedModel) {
6013
+ function createResponsesAdapter(selectedModel, headersCapture) {
5958
6014
  return {
5959
6015
  format: "openai-responses",
5960
- sanitize: (p) => ({
5961
- payload: p,
5962
- blocksRemoved: 0,
5963
- systemReminderRemovals: 0
5964
- }),
5965
- execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, { resolvedModel: selectedModel })),
6016
+ sanitize: (p) => {
6017
+ return {
6018
+ payload: state.normalizeResponsesCallIds ? normalizeCallIds(p) : p,
6019
+ blocksRemoved: 0,
6020
+ systemReminderRemovals: 0
6021
+ };
6022
+ },
6023
+ execute: (p) => executeWithAdaptiveRateLimit(() => createResponses(p, {
6024
+ resolvedModel: selectedModel,
6025
+ headersCapture
6026
+ })),
5966
6027
  logPayloadSize: (p) => {
5967
6028
  const count = typeof p.input === "string" ? 1 : p.input.length;
5968
6029
  consola.debug(`Responses payload: ${count} input item(s), model: ${p.model}`);
@@ -5973,6 +6034,36 @@ function createResponsesAdapter(selectedModel) {
5973
6034
  function createResponsesStrategies() {
5974
6035
  return [createNetworkRetryStrategy(), createTokenRefreshStrategy()];
5975
6036
  }
6037
+ const CALL_PREFIX = "call_";
6038
+ const FC_PREFIX = "fc_";
6039
+ /**
6040
+ * Normalize function call IDs in Responses API input.
6041
+ * Converts Chat Completions format `call_xxx` IDs to Responses format `fc_xxx` IDs
6042
+ * on `function_call` and `function_call_output` items.
6043
+ */
6044
+ function normalizeCallIds(payload) {
6045
+ if (typeof payload.input === "string") return payload;
6046
+ let count = 0;
6047
+ const normalizedInput = payload.input.map((item) => {
6048
+ if (item.type !== "function_call" && item.type !== "function_call_output") return item;
6049
+ const newItem = { ...item };
6050
+ if (newItem.id?.startsWith(CALL_PREFIX)) {
6051
+ newItem.id = FC_PREFIX + newItem.id.slice(5);
6052
+ count++;
6053
+ }
6054
+ if (newItem.call_id?.startsWith(CALL_PREFIX)) {
6055
+ newItem.call_id = FC_PREFIX + newItem.call_id.slice(5);
6056
+ count++;
6057
+ }
6058
+ return newItem;
6059
+ });
6060
+ if (count === 0) return payload;
6061
+ consola.debug(`[responses] Normalized ${count} call ID(s) (call_ → fc_)`);
6062
+ return {
6063
+ ...payload,
6064
+ input: normalizedInput
6065
+ };
6066
+ }
5976
6067
 
5977
6068
  //#endregion
5978
6069
  //#region src/routes/responses/ws.ts
@@ -6054,17 +6145,20 @@ async function handleResponseCreate(ws, payload) {
6054
6145
  model: resolvedModel,
6055
6146
  clientModel: requestedModel
6056
6147
  });
6057
- const adapter = createResponsesAdapter(selectedModel);
6148
+ const headersCapture = {};
6149
+ const adapter = createResponsesAdapter(selectedModel, headersCapture);
6058
6150
  const strategies = createResponsesStrategies();
6059
6151
  try {
6060
- const iterator = (await executeRequestPipeline({
6152
+ const pipelineResult = await executeRequestPipeline({
6061
6153
  adapter,
6062
6154
  strategies,
6063
6155
  payload,
6064
6156
  originalPayload: payload,
6065
6157
  model: selectedModel,
6066
6158
  maxRetries: 1
6067
- })).response[Symbol.asyncIterator]();
6159
+ });
6160
+ reqCtx.setHttpHeaders(headersCapture);
6161
+ const iterator = pipelineResult.response[Symbol.asyncIterator]();
6068
6162
  const acc = createResponsesStreamAccumulator();
6069
6163
  const idleTimeoutMs = state.streamIdleTimeout > 0 ? state.streamIdleTimeout * 1e3 : 0;
6070
6164
  const shutdownSignal = getShutdownSignal();
@@ -6092,6 +6186,7 @@ async function handleResponseCreate(ws, payload) {
6092
6186
  reqCtx.complete(responseData);
6093
6187
  ws.close(1e3, "done");
6094
6188
  } catch (error) {
6189
+ reqCtx.setHttpHeaders(headersCapture);
6095
6190
  reqCtx.fail(resolvedModel, error);
6096
6191
  const message = error instanceof Error ? error.message : String(error);
6097
6192
  consola.error(`[WS] Responses API error: ${message}`);
@@ -6879,6 +6974,7 @@ const createChatCompletions = async (payload, opts) => {
6879
6974
  body: JSON.stringify(payload),
6880
6975
  signal: fetchSignal
6881
6976
  });
6977
+ if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
6882
6978
  if (!response.ok) {
6883
6979
  consola.error("Failed to create chat completions", response);
6884
6980
  throw await HTTPError.fromResponse("Failed to create chat completions", response, payload.model);
@@ -7262,10 +7358,14 @@ async function handleChatCompletion(c) {
7262
7358
  */
7263
7359
  async function executeRequest(opts) {
7264
7360
  const { c, payload, originalPayload, selectedModel, reqCtx } = opts;
7361
+ const headersCapture = {};
7265
7362
  const adapter = {
7266
7363
  format: "openai-chat-completions",
7267
7364
  sanitize: (p) => sanitizeOpenAIMessages(p),
7268
- execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, { resolvedModel: selectedModel })),
7365
+ execute: (p) => executeWithAdaptiveRateLimit(() => createChatCompletions(p, {
7366
+ resolvedModel: selectedModel,
7367
+ headersCapture
7368
+ })),
7269
7369
  logPayloadSize: (p) => logPayloadSizeInfo(p, selectedModel)
7270
7370
  };
7271
7371
  const strategies = [
@@ -7280,7 +7380,7 @@ async function executeRequest(opts) {
7280
7380
  ];
7281
7381
  let truncateResult;
7282
7382
  try {
7283
- const response = (await executeRequestPipeline({
7383
+ const result = await executeRequestPipeline({
7284
7384
  adapter,
7285
7385
  strategies,
7286
7386
  payload,
@@ -7293,7 +7393,9 @@ async function executeRequest(opts) {
7293
7393
  if (retryTruncateResult) truncateResult = retryTruncateResult;
7294
7394
  if (reqCtx.tuiLogId) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags: ["truncated", `retry-${attempt + 1}`] });
7295
7395
  }
7296
- })).response;
7396
+ });
7397
+ reqCtx.setHttpHeaders(headersCapture);
7398
+ const response = result.response;
7297
7399
  if (isNonStreaming(response)) return handleNonStreamingResponse(c, response, reqCtx, truncateResult);
7298
7400
  consola.debug("Streaming response");
7299
7401
  reqCtx.transition("streaming");
@@ -7310,6 +7412,7 @@ async function executeRequest(opts) {
7310
7412
  });
7311
7413
  });
7312
7414
  } catch (error) {
7415
+ reqCtx.setHttpHeaders(headersCapture);
7313
7416
  reqCtx.fail(payload.model, error);
7314
7417
  throw error;
7315
7418
  }
@@ -9132,6 +9235,7 @@ async function createAnthropicMessages(payload, opts) {
9132
9235
  body: JSON.stringify(wire),
9133
9236
  signal: fetchSignal
9134
9237
  });
9238
+ if (opts?.headersCapture) captureHttpHeaders(opts.headersCapture, headers, response);
9135
9239
  if (!response.ok) {
9136
9240
  consola.debug("Request failed:", {
9137
9241
  model,
@@ -9147,6 +9251,161 @@ async function createAnthropicMessages(payload, opts) {
9147
9251
  return await response.json();
9148
9252
  }
9149
9253
 
9254
+ //#endregion
9255
+ //#region src/lib/anthropic/message-mapping.ts
9256
+ /**
9257
+ * Check if two messages likely correspond to the same original message.
9258
+ * Used by buildMessageMapping to handle cases where sanitization removes
9259
+ * content blocks within a message (changing its shape) or removes entire messages.
9260
+ */
9261
+ function messagesMatch(orig, rewritten) {
9262
+ if (orig.role !== rewritten.role) return false;
9263
+ if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
9264
+ const origBlocks = Array.isArray(orig.content) ? orig.content : [];
9265
+ const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
9266
+ if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
9267
+ const ob = origBlocks[0];
9268
+ const rb = rwBlocks[0];
9269
+ if (ob.type !== rb.type) return false;
9270
+ if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
9271
+ if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
9272
+ return true;
9273
+ }
9274
+ /**
9275
+ * Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
9276
+ * Uses a two-pointer approach since rewritten messages maintain the same relative
9277
+ * order as originals (all transformations are deletions, never reorderings).
9278
+ */
9279
+ function buildMessageMapping(original, rewritten) {
9280
+ const mapping = [];
9281
+ let origIdx = 0;
9282
+ for (const element of rewritten) while (origIdx < original.length) {
9283
+ if (messagesMatch(original[origIdx], element)) {
9284
+ mapping.push(origIdx);
9285
+ origIdx++;
9286
+ break;
9287
+ }
9288
+ origIdx++;
9289
+ }
9290
+ while (mapping.length < rewritten.length) mapping.push(-1);
9291
+ return mapping;
9292
+ }
9293
+
9294
+ //#endregion
9295
+ //#region src/lib/anthropic/server-tool-filter.ts
9296
+ /**
9297
+ * Server tool block filter for Anthropic SSE streams and non-streaming responses.
9298
+ *
9299
+ * Always active — matching vscode-copilot-chat behavior, which intercepts
9300
+ * server_tool_use and *_tool_result blocks unconditionally. These are server-side
9301
+ * artifacts (e.g. tool_search injected by copilot-api, web_search) that clients
9302
+ * don't expect and most SDKs can't validate.
9303
+ *
9304
+ * Also provides logging for server tool blocks (called before filtering,
9305
+ * so information is never lost even when blocks are stripped).
9306
+ */
9307
+ /** Check if a block type is a server-side tool result (ends with _tool_result, but not plain tool_result) */
9308
+ function isServerToolResultType(type) {
9309
+ return type !== "tool_result" && type.endsWith("_tool_result");
9310
+ }
9311
+ /**
9312
+ * Check if a content block is a server-side tool block.
9313
+ * Matches `server_tool_use` (any name) and all server tool result types
9314
+ * (web_search_tool_result, tool_search_tool_result, code_execution_tool_result, etc.).
9315
+ */
9316
+ function isServerToolBlock(block) {
9317
+ if (block.type === "server_tool_use") return true;
9318
+ return isServerToolResultType(block.type);
9319
+ }
9320
+ /**
9321
+ * Log a single server tool block (server_tool_use or *_tool_result).
9322
+ * No-op for non-server-tool blocks — safe to call unconditionally.
9323
+ *
9324
+ * Called before filtering, so information is never lost even when blocks are stripped.
9325
+ */
9326
+ function logServerToolBlock(block) {
9327
+ if (block.type === "server_tool_use") {
9328
+ consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
9329
+ return;
9330
+ }
9331
+ if (!isServerToolResultType(block.type)) return;
9332
+ const content = block.content;
9333
+ if (!content) return;
9334
+ const contentType = content.type;
9335
+ if (contentType === "tool_search_tool_search_result") {
9336
+ const toolNames = content.tool_references?.map((r) => r.tool_name).filter(Boolean) ?? [];
9337
+ consola.debug(`[ServerTool] tool_search result: discovered ${toolNames.length} tools${toolNames.length > 0 ? ` [${toolNames.join(", ")}]` : ""}`);
9338
+ } else if (contentType === "tool_search_tool_result_error") consola.warn(`[ServerTool] tool_search error: ${content.error_code}`);
9339
+ else consola.debug(`[ServerTool] ${block.type}: ${contentType ?? "unknown"}`);
9340
+ }
9341
+ /**
9342
+ * Log all server tool blocks from a non-streaming response content array.
9343
+ * Must be called before filterServerToolBlocksFromResponse() to preserve info.
9344
+ */
9345
+ function logServerToolBlocks(content) {
9346
+ for (const block of content) logServerToolBlock(block);
9347
+ }
9348
+ /**
9349
+ * Filters server tool blocks from the SSE stream before forwarding to the client.
9350
+ * Handles index remapping so block indices remain dense/sequential after filtering.
9351
+ *
9352
+ * Always active — matching vscode-copilot-chat behavior, which intercepts
9353
+ * server_tool_use and *_tool_result blocks unconditionally. These are server-side
9354
+ * artifacts (e.g. tool_search injected by copilot-api, web_search) that clients
9355
+ * don't expect and most SDKs can't validate.
9356
+ */
9357
+ function createServerToolBlockFilter() {
9358
+ const filteredIndices = /* @__PURE__ */ new Set();
9359
+ const clientIndexMap = /* @__PURE__ */ new Map();
9360
+ let nextClientIndex = 0;
9361
+ function getClientIndex(apiIndex) {
9362
+ let idx = clientIndexMap.get(apiIndex);
9363
+ if (idx === void 0) {
9364
+ idx = nextClientIndex++;
9365
+ clientIndexMap.set(apiIndex, idx);
9366
+ }
9367
+ return idx;
9368
+ }
9369
+ return { rewriteEvent(parsed, rawData) {
9370
+ if (!parsed) return rawData;
9371
+ if (parsed.type === "content_block_start") {
9372
+ const block = parsed.content_block;
9373
+ if (isServerToolBlock(block)) {
9374
+ filteredIndices.add(parsed.index);
9375
+ return null;
9376
+ }
9377
+ if (filteredIndices.size === 0) {
9378
+ getClientIndex(parsed.index);
9379
+ return rawData;
9380
+ }
9381
+ const clientIndex = getClientIndex(parsed.index);
9382
+ if (clientIndex === parsed.index) return rawData;
9383
+ const obj = JSON.parse(rawData);
9384
+ obj.index = clientIndex;
9385
+ return JSON.stringify(obj);
9386
+ }
9387
+ if (parsed.type === "content_block_delta" || parsed.type === "content_block_stop") {
9388
+ if (filteredIndices.has(parsed.index)) return null;
9389
+ if (filteredIndices.size === 0) return rawData;
9390
+ const clientIndex = getClientIndex(parsed.index);
9391
+ if (clientIndex === parsed.index) return rawData;
9392
+ const obj = JSON.parse(rawData);
9393
+ obj.index = clientIndex;
9394
+ return JSON.stringify(obj);
9395
+ }
9396
+ return rawData;
9397
+ } };
9398
+ }
9399
+ /** Filter server tool blocks from a non-streaming response */
9400
+ function filterServerToolBlocksFromResponse(response) {
9401
+ const filtered = response.content.filter((block) => !isServerToolBlock(block));
9402
+ if (filtered.length === response.content.length) return response;
9403
+ return {
9404
+ ...response,
9405
+ content: filtered
9406
+ };
9407
+ }
9408
+
9150
9409
  //#endregion
9151
9410
  //#region src/lib/anthropic/stream-accumulator.ts
9152
9411
  /**
@@ -9269,10 +9528,6 @@ function handleContentBlockStart(index, block, acc) {
9269
9528
  }
9270
9529
  acc.contentBlocks[index] = newBlock;
9271
9530
  }
9272
- /** Check if a block type is a server-side tool result (ends with _tool_result, but not plain tool_result) */
9273
- function isServerToolResultType(type) {
9274
- return type !== "tool_result" && type.endsWith("_tool_result");
9275
- }
9276
9531
  function handleContentBlockDelta(index, delta, acc, copilotAnnotations) {
9277
9532
  const block = acc.contentBlocks[index];
9278
9533
  if (!block) return;
@@ -9322,7 +9577,7 @@ function handleMessageDelta(delta, usage, acc) {
9322
9577
  }
9323
9578
 
9324
9579
  //#endregion
9325
- //#region src/lib/anthropic/handlers.ts
9580
+ //#region src/lib/anthropic/sse.ts
9326
9581
  /**
9327
9582
  * Check if a model supports direct Anthropic API.
9328
9583
  * Returns a decision with reason so callers can log/display the routing rationale.
@@ -9383,46 +9638,6 @@ async function* processAnthropicStream(response, acc, clientAbortSignal) {
9383
9638
  }
9384
9639
  }
9385
9640
 
9386
- //#endregion
9387
- //#region src/lib/anthropic/message-mapping.ts
9388
- /**
9389
- * Check if two messages likely correspond to the same original message.
9390
- * Used by buildMessageMapping to handle cases where sanitization removes
9391
- * content blocks within a message (changing its shape) or removes entire messages.
9392
- */
9393
- function messagesMatch(orig, rewritten) {
9394
- if (orig.role !== rewritten.role) return false;
9395
- if (typeof orig.content === "string" && typeof rewritten.content === "string") return rewritten.content.startsWith(orig.content.slice(0, 100)) || orig.content.startsWith(rewritten.content.slice(0, 100));
9396
- const origBlocks = Array.isArray(orig.content) ? orig.content : [];
9397
- const rwBlocks = Array.isArray(rewritten.content) ? rewritten.content : [];
9398
- if (origBlocks.length === 0 || rwBlocks.length === 0) return true;
9399
- const ob = origBlocks[0];
9400
- const rb = rwBlocks[0];
9401
- if (ob.type !== rb.type) return false;
9402
- if (ob.type === "tool_use" && rb.type === "tool_use") return ob.id === rb.id;
9403
- if (ob.type === "tool_result" && rb.type === "tool_result") return ob.tool_use_id === rb.tool_use_id;
9404
- return true;
9405
- }
9406
- /**
9407
- * Build messageMapping (rwIdx → origIdx) for the direct Anthropic path.
9408
- * Uses a two-pointer approach since rewritten messages maintain the same relative
9409
- * order as originals (all transformations are deletions, never reorderings).
9410
- */
9411
- function buildMessageMapping(original, rewritten) {
9412
- const mapping = [];
9413
- let origIdx = 0;
9414
- for (const element of rewritten) while (origIdx < original.length) {
9415
- if (messagesMatch(original[origIdx], element)) {
9416
- mapping.push(origIdx);
9417
- origIdx++;
9418
- break;
9419
- }
9420
- origIdx++;
9421
- }
9422
- while (mapping.length < rewritten.length) mapping.push(-1);
9423
- return mapping;
9424
- }
9425
-
9426
9641
  //#endregion
9427
9642
  //#region src/lib/repetition-detector.ts
9428
9643
  /**
@@ -9713,10 +9928,14 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
9713
9928
  if (initialSanitized.thinking && initialSanitized.thinking.type !== "disabled") tags.push(`thinking:${initialSanitized.thinking.type}`);
9714
9929
  if (tags.length > 0) tuiLogger.updateRequest(reqCtx.tuiLogId, { tags });
9715
9930
  }
9931
+ const headersCapture = {};
9716
9932
  const adapter = {
9717
9933
  format: "anthropic-messages",
9718
9934
  sanitize: (p) => sanitizeAnthropicMessages(preprocessTools(p)),
9719
- execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, { resolvedModel: selectedModel })),
9935
+ execute: (p) => executeWithAdaptiveRateLimit(() => createAnthropicMessages(p, {
9936
+ resolvedModel: selectedModel,
9937
+ headersCapture
9938
+ })),
9720
9939
  logPayloadSize: (p) => logPayloadSizeInfoAnthropic(p, selectedModel)
9721
9940
  };
9722
9941
  const strategies = [
@@ -9764,6 +9983,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
9764
9983
  }
9765
9984
  }
9766
9985
  });
9986
+ reqCtx.setHttpHeaders(headersCapture);
9767
9987
  const response = result.response;
9768
9988
  const effectivePayload = result.effectivePayload;
9769
9989
  if (Symbol.asyncIterator in response) {
@@ -9783,6 +10003,7 @@ async function handleDirectAnthropicCompletion(c, anthropicPayload, reqCtx) {
9783
10003
  }
9784
10004
  return handleDirectAnthropicNonStreamingResponse(c, response, reqCtx, truncateResult);
9785
10005
  } catch (error) {
10006
+ reqCtx.setHttpHeaders(headersCapture);
9786
10007
  reqCtx.fail(anthropicPayload.model, error);
9787
10008
  throw error;
9788
10009
  }
@@ -9798,7 +10019,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
9798
10019
  let eventsIn = 0;
9799
10020
  let currentBlockType = "";
9800
10021
  let firstEventLogged = false;
9801
- const serverToolFilter = state.stripServerTools ? createServerToolBlockFilter() : null;
10022
+ const serverToolFilter = createServerToolBlockFilter();
9802
10023
  try {
9803
10024
  for await (const { raw: rawEvent, parsed } of processAnthropicStream(response, acc, clientAbortSignal)) {
9804
10025
  const dataLen = rawEvent.data?.length ?? 0;
@@ -9818,8 +10039,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
9818
10039
  currentBlockType = parsed.content_block.type;
9819
10040
  consola.debug(`[Stream] Block #${parsed.index} start: ${currentBlockType} at +${Date.now() - streamStartMs}ms`);
9820
10041
  const block = parsed.content_block;
9821
- if (block.type === "server_tool_use") consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
9822
- else if (block.type !== "tool_result" && block.type.endsWith("_tool_result")) logServerToolResult(block);
10042
+ logServerToolBlock(block);
9823
10043
  } else if (parsed?.type === "content_block_stop") {
9824
10044
  const offset = Date.now() - streamStartMs;
9825
10045
  consola.debug(`[Stream] Block #${parsed.index} stop (${currentBlockType}) at +${offset}ms, cumulative ↓${bytesIn}B ${eventsIn}ev`);
@@ -9834,7 +10054,7 @@ async function handleDirectAnthropicStreamingResponse(opts) {
9834
10054
  const delta = parsed.delta;
9835
10055
  if (delta.type === "text_delta" && delta.text) checkRepetition(delta.text);
9836
10056
  }
9837
- const forwardData = serverToolFilter ? serverToolFilter.rewriteEvent(parsed, rawEvent.data ?? "") : rawEvent.data ?? "";
10057
+ const forwardData = serverToolFilter.rewriteEvent(parsed, rawEvent.data ?? "");
9838
10058
  if (forwardData === null) continue;
9839
10059
  await stream.writeSSE({
9840
10060
  data: forwardData,
@@ -9889,31 +10109,9 @@ function handleDirectAnthropicNonStreamingResponse(c, response, reqCtx, truncate
9889
10109
  let finalResponse = response;
9890
10110
  if (state.verbose && truncateResult?.wasTruncated) finalResponse = prependMarkerToResponse(response, createTruncationMarker$1(truncateResult));
9891
10111
  logServerToolBlocks(finalResponse.content);
9892
- if (state.stripServerTools) finalResponse = filterServerToolBlocksFromResponse(finalResponse);
10112
+ finalResponse = filterServerToolBlocksFromResponse(finalResponse);
9893
10113
  return c.json(finalResponse);
9894
10114
  }
9895
- /**
9896
- * Log information extracted from a server tool result block.
9897
- * Called before filtering, so information is never lost even when blocks are stripped.
9898
- */
9899
- function logServerToolResult(block) {
9900
- const content = block.content;
9901
- if (!content) return;
9902
- const contentType = content.type;
9903
- if (contentType === "tool_search_tool_search_result") {
9904
- const toolNames = content.tool_references?.map((r) => r.tool_name).filter(Boolean) ?? [];
9905
- consola.debug(`[ServerTool] tool_search result: discovered ${toolNames.length} tools${toolNames.length > 0 ? ` [${toolNames.join(", ")}]` : ""}`);
9906
- } else if (contentType === "tool_search_tool_result_error") consola.warn(`[ServerTool] tool_search error: ${content.error_code}`);
9907
- else consola.debug(`[ServerTool] ${block.type}: ${contentType ?? "unknown"}`);
9908
- }
9909
- /**
9910
- * Log server tool blocks from a non-streaming response.
9911
- * Must be called before filterServerToolBlocksFromResponse() to preserve info.
9912
- */
9913
- function logServerToolBlocks(content) {
9914
- for (const block of content) if (block.type === "server_tool_use") consola.debug(`[ServerTool] server_tool_use: ${block.name}`);
9915
- else if (block.type !== "tool_result" && block.type.endsWith("_tool_result")) logServerToolResult(block);
9916
- }
9917
10115
  /** Convert SanitizationStats to the format expected by rewrites */
9918
10116
  function toSanitizationInfo(stats) {
9919
10117
  return {
@@ -9925,75 +10123,6 @@ function toSanitizationInfo(stats) {
9925
10123
  systemReminderRemovals: stats.systemReminderRemovals
9926
10124
  };
9927
10125
  }
9928
- /**
9929
- * Check if a content block is a server-side tool block.
9930
- * Matches `server_tool_use` (any name) and all server tool result types
9931
- * (web_search_tool_result, tool_search_tool_result, code_execution_tool_result, etc.).
9932
- */
9933
- function isServerToolBlock(block) {
9934
- if (block.type === "server_tool_use") return true;
9935
- return block.type !== "tool_result" && block.type.endsWith("_tool_result");
9936
- }
9937
- /**
9938
- * Filters server tool blocks from the SSE stream before forwarding to the client.
9939
- * Handles index remapping so block indices remain dense/sequential after filtering.
9940
- *
9941
- * Only active when stripServerTools is enabled — in that mode, server tools
9942
- * were stripped from the request, so any server_tool_use blocks in the response
9943
- * are unexpected artifacts. When disabled (default), server tool blocks are
9944
- * transparently forwarded per Anthropic protocol.
9945
- */
9946
- function createServerToolBlockFilter() {
9947
- const filteredIndices = /* @__PURE__ */ new Set();
9948
- const clientIndexMap = /* @__PURE__ */ new Map();
9949
- let nextClientIndex = 0;
9950
- function getClientIndex(apiIndex) {
9951
- let idx = clientIndexMap.get(apiIndex);
9952
- if (idx === void 0) {
9953
- idx = nextClientIndex++;
9954
- clientIndexMap.set(apiIndex, idx);
9955
- }
9956
- return idx;
9957
- }
9958
- return { rewriteEvent(parsed, rawData) {
9959
- if (!parsed) return rawData;
9960
- if (parsed.type === "content_block_start") {
9961
- const block = parsed.content_block;
9962
- if (isServerToolBlock(block)) {
9963
- filteredIndices.add(parsed.index);
9964
- return null;
9965
- }
9966
- if (filteredIndices.size === 0) {
9967
- getClientIndex(parsed.index);
9968
- return rawData;
9969
- }
9970
- const clientIndex = getClientIndex(parsed.index);
9971
- if (clientIndex === parsed.index) return rawData;
9972
- const obj = JSON.parse(rawData);
9973
- obj.index = clientIndex;
9974
- return JSON.stringify(obj);
9975
- }
9976
- if (parsed.type === "content_block_delta" || parsed.type === "content_block_stop") {
9977
- if (filteredIndices.has(parsed.index)) return null;
9978
- if (filteredIndices.size === 0) return rawData;
9979
- const clientIndex = getClientIndex(parsed.index);
9980
- if (clientIndex === parsed.index) return rawData;
9981
- const obj = JSON.parse(rawData);
9982
- obj.index = clientIndex;
9983
- return JSON.stringify(obj);
9984
- }
9985
- return rawData;
9986
- } };
9987
- }
9988
- /** Filter server tool blocks from a non-streaming response */
9989
- function filterServerToolBlocksFromResponse(response) {
9990
- const filtered = response.content.filter((block) => !isServerToolBlock(block));
9991
- if (filtered.length === response.content.length) return response;
9992
- return {
9993
- ...response,
9994
- content: filtered
9995
- };
9996
- }
9997
10126
 
9998
10127
  //#endregion
9999
10128
  //#region src/routes/messages/route.ts
@@ -10102,7 +10231,8 @@ async function handleResponses(c) {
10102
10231
  async function handleDirectResponses(opts) {
10103
10232
  const { c, payload, reqCtx } = opts;
10104
10233
  const selectedModel = state.modelIndex.get(payload.model);
10105
- const adapter = createResponsesAdapter(selectedModel);
10234
+ const headersCapture = {};
10235
+ const adapter = createResponsesAdapter(selectedModel, headersCapture);
10106
10236
  const strategies = createResponsesStrategies();
10107
10237
  try {
10108
10238
  const pipelineResult = await executeRequestPipeline({
@@ -10114,6 +10244,7 @@ async function handleDirectResponses(opts) {
10114
10244
  maxRetries: 1,
10115
10245
  requestContext: reqCtx
10116
10246
  });
10247
+ reqCtx.setHttpHeaders(headersCapture);
10117
10248
  const response = pipelineResult.response;
10118
10249
  reqCtx.addQueueWaitMs(pipelineResult.queueWaitMs);
10119
10250
  if (!payload.stream) {
@@ -10186,6 +10317,7 @@ async function handleDirectResponses(opts) {
10186
10317
  }
10187
10318
  });
10188
10319
  } catch (error) {
10320
+ reqCtx.setHttpHeaders(headersCapture);
10189
10321
  reqCtx.fail(payload.model, error);
10190
10322
  throw error;
10191
10323
  }
@@ -10268,6 +10400,7 @@ server.notFound((c) => {
10268
10400
  });
10269
10401
  server.use(async (_c, next) => {
10270
10402
  await applyConfigToState();
10403
+ await ensureValidCopilotToken();
10271
10404
  await next();
10272
10405
  });
10273
10406
  server.use(tuiMiddleware());