@jeffreycao/copilot-api 1.10.18 → 1.10.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { _ as setModelMappings, a as getModelMappings, c as getReasoningEffortForModel, d as isResponsesApiContextManagementModel, f as isResponsesApiWebSearchEnabled, g as resolveMappedModel, i as getExtraPromptForModel, l as getSmallModel, n as getClaudeTokenMultiplier, o as getProviderConfig, p as isResponsesApiWebSocketEnabled, r as getConfig, s as getRawProviderConfig, t as getAnthropicApiKey, u as isMessagesApiEnabled, y as PATHS } from "./config-DtIF3Zvk.js";
2
- import { B as forwardError, C as prepareMessageProxyHeaders, E as compactMessageSections, F as createPooledWebSocketStream, I as createWebSocketUrl, M as generateTraceId, N as requestContext, O as compactSystemPromptStarts, P as resolveTraceId$1, R as state, S as prepareInteractionHeaders, T as compactAutoContinuePromptStarts, _ as getCopilotUsage, b as copilotWebSocketHeaders, d as generateRequestIdFromPayload, f as getRootSessionId, g as sleep, h as parseUserIdMetadata, j as forwardCodexResponses, m as isNullish, p as getUUID, r as setupCodexToken, s as cacheModels, v as copilotBaseUrl, x as prepareForCompact, y as copilotHeaders, z as HTTPError } from "./token-DktKOFot.js";
2
+ import { B as forwardError, C as prepareMessageProxyHeaders, E as compactMessageSections, F as createPooledWebSocketStream, I as createWebSocketUrl, M as generateTraceId, N as requestContext, O as compactSystemPromptStarts, P as resolveTraceId$1, R as state, S as prepareInteractionHeaders, T as compactAutoContinuePromptStarts, _ as getCopilotUsage, b as copilotWebSocketHeaders, d as generateRequestIdFromPayload, f as getRootSessionId, g as sleep, h as parseUserIdMetadata, j as forwardCodexResponses, m as isNullish, p as getUUID, r as setupCodexToken, s as cacheModels, v as copilotBaseUrl, x as prepareForCompact, y as copilotHeaders, z as HTTPError } from "./token-DkKHbyN6.js";
3
3
  import { a as isDeferredToolName, c as parseMcpToolSearchSentinel, d as shouldEnableResponsesToolSearch, i as isBridgeToolSearchName, l as resolveBridgeToolSearchName, o as listDeferredToolNames, r as formatToolSearchBridgeArguments, s as normalizeToolSearchBridgeArguments, t as BRIDGE_TOOL_SEARCH_NAME, u as selectDeferredToolsByNames } from "./tool-search-wA-fLduL.js";
4
4
  import consola from "consola";
5
5
  import { createHash } from "node:crypto";
@@ -1749,37 +1749,6 @@ function getAnthropicToolUseBlocks(toolCalls) {
1749
1749
  }));
1750
1750
  }
1751
1751
  //#endregion
1752
- //#region src/routes/provider/messages/count-tokens-handler.ts
1753
- const logger$6 = createHandlerLogger("provider-count-tokens-handler");
1754
- async function handleProviderCountTokens(c) {
1755
- const provider = c.req.param("provider");
1756
- return await handleProviderCountTokensForProvider(c, {
1757
- payload: await c.req.json(),
1758
- provider
1759
- });
1760
- }
1761
- async function handleProviderCountTokensForProvider(c, options) {
1762
- const { payload: anthropicPayload, provider } = options;
1763
- const modelId = anthropicPayload.model.trim();
1764
- const providerConfig = await resolveProviderConfig(provider);
1765
- if (!providerConfig) return c.json({ error: {
1766
- message: `Provider '${provider}' not found or disabled`,
1767
- type: "invalid_request_error"
1768
- } }, 404);
1769
- const modelConfig = providerConfig.models?.[modelId];
1770
- const tokenCount = await getTokenCount(translateToOpenAI(anthropicPayload, providerConfig.type === "openai-compatible" || providerConfig.type === "openai-responses" ? {
1771
- supportPdf: modelConfig?.supportPdf,
1772
- toolContentSupportType: modelConfig?.toolContentSupportType ?? []
1773
- } : void 0), createFallbackModel(modelId));
1774
- const finalTokenCount = tokenCount.input + tokenCount.output;
1775
- logger$6.debug("provider.count_tokens.success", {
1776
- provider,
1777
- model: anthropicPayload.model,
1778
- input_tokens: finalTokenCount
1779
- });
1780
- return c.json({ input_tokens: finalTokenCount });
1781
- }
1782
- //#endregion
1783
1752
  //#region src/lib/models.ts
1784
1753
  const findEndpointModel = (sdkModelId) => {
1785
1754
  const models = state.models?.data ?? [];
@@ -1829,893 +1798,1275 @@ const normalizeSdkModelId = (sdkModelId) => {
1829
1798
  version: pattern5[1]
1830
1799
  };
1831
1800
  };
1832
- //#endregion
1833
- //#region src/routes/messages/count-tokens-handler.ts
1834
- const resolveCountTokensModel = (modelId, findModel = findEndpointModel) => {
1835
- const selectedModel = findModel(modelId);
1836
- if (selectedModel) return {
1837
- fallback: false,
1838
- model: selectedModel
1839
- };
1840
- return {
1841
- fallback: true,
1842
- model: createFallbackModel(modelId.trim())
1843
- };
1844
- };
1845
- /**
1846
- * Forwards token counting to Anthropic's real /v1/messages/count_tokens endpoint.
1847
- * Returns the result on success, or null to fall through to estimation.
1848
- */
1849
- async function countTokensViaAnthropic(c, payload) {
1850
- if (!payload.model.startsWith("claude")) return null;
1851
- const apiKey = getAnthropicApiKey();
1852
- if (!apiKey) return null;
1853
- const model = payload.model.replaceAll(".", "-");
1854
- const res = await fetch("https://api.anthropic.com/v1/messages/count_tokens", {
1855
- method: "POST",
1856
- headers: {
1857
- "content-type": "application/json",
1858
- "x-api-key": apiKey,
1859
- "anthropic-version": "2023-06-01",
1860
- "anthropic-beta": "token-counting-2024-11-01"
1861
- },
1862
- body: JSON.stringify({
1863
- ...payload,
1864
- model
1865
- })
1801
+ const SYSTEM_REMINDER_START = "<system-reminder>";
1802
+ const SYSTEM_REMINDER_END = "</system-reminder>";
1803
+ const SUBAGENT_START_HOOK_ADDITIONAL_PREFIX = "SubagentStart hook additional";
1804
+ const IDE_EXECUTE_CODE_TOOL = "mcp__ide__executeCode";
1805
+ const IDE_GET_DIAGNOSTICS_TOOL = "mcp__ide__getDiagnostics";
1806
+ const IDE_GET_DIAGNOSTICS_DESCRIPTION = "Get language diagnostics from VS Code. Returns errors, warnings, information, and hints for files in the workspace.";
1807
+ const PDF_FILE_READ_PREFIX = "PDF file read:";
1808
+ const createTextBlock = (text) => ({
1809
+ type: "text",
1810
+ text
1811
+ });
1812
+ const appendTextSegment = (base, addition) => {
1813
+ if (base.length === 0) return addition;
1814
+ if (addition.length === 0) return base;
1815
+ return `${base}\n\n${addition}`;
1816
+ };
1817
+ const ensureSystemReminderText = (text) => {
1818
+ if (text.startsWith(SYSTEM_REMINDER_START)) return text;
1819
+ return `${SYSTEM_REMINDER_START}\n${text.trim()}\n${SYSTEM_REMINDER_END}`;
1820
+ };
1821
+ const normalizeSystemStringForMerge = (text) => {
1822
+ if (!text.startsWith(SUBAGENT_START_HOOK_ADDITIONAL_PREFIX)) return ensureSystemReminderText(text);
1823
+ const lineBreakMatch = /\r?\n/.exec(text);
1824
+ if (!lineBreakMatch) return [createTextBlock(ensureSystemReminderText(text))];
1825
+ const firstLine = text.slice(0, lineBreakMatch.index);
1826
+ const rest = text.slice(lineBreakMatch.index + lineBreakMatch[0].length);
1827
+ return [createTextBlock(ensureSystemReminderText(firstLine)), ...rest.length > 0 ? [createTextBlock(ensureSystemReminderText(rest))] : []];
1828
+ };
1829
+ const normalizeSystemContentForMerge = (content) => {
1830
+ if (typeof content === "string") return normalizeSystemStringForMerge(content);
1831
+ return content.map((block) => block.text.startsWith(SYSTEM_REMINDER_START) ? block : {
1832
+ ...block,
1833
+ text: ensureSystemReminderText(block.text)
1866
1834
  });
1867
- if (!res.ok) {
1868
- consola.warn("Anthropic count_tokens failed:", res.status, await res.text().catch(() => ""), "- falling back to estimation");
1869
- return null;
1870
- }
1871
- const result = await res.json();
1872
- consola.info("Token count (Anthropic API):", result.input_tokens);
1873
- return c.json(result);
1874
- }
1875
- /**
1876
- * Handles token counting for Anthropic messages.
1877
- *
1878
- * When an Anthropic API key is available (via config or ANTHROPIC_API_KEY env var)
1879
- * and the model is a Claude model, forwards to Anthropic's free /v1/messages/count_tokens
1880
- * endpoint for accurate counts. Otherwise falls back to GPT tokenizer estimation.
1881
- */
1882
- async function handleCountTokens(c) {
1883
- const anthropicPayload = await c.req.json();
1884
- anthropicPayload.model = resolveMappedModel(anthropicPayload.model);
1885
- const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
1886
- if (providerModelAlias) {
1887
- anthropicPayload.model = providerModelAlias.model;
1888
- return await handleProviderCountTokensForProvider(c, {
1889
- payload: anthropicPayload,
1890
- provider: providerModelAlias.provider
1891
- });
1835
+ };
1836
+ const toSystemTextBlocks = (content) => {
1837
+ return typeof content === "string" ? [createTextBlock(content)] : [...content];
1838
+ };
1839
+ const mergeSystemPromptContent = (current, addition) => {
1840
+ if (current === void 0) return typeof addition === "string" ? addition : [...addition];
1841
+ if (typeof current === "string" && typeof addition === "string") return appendTextSegment(current, addition);
1842
+ return [...toSystemTextBlocks(current), ...toSystemTextBlocks(addition)];
1843
+ };
1844
+ const prependSystemContentToUserMessage = (message, addition) => {
1845
+ if (typeof message.content === "string" && typeof addition === "string") {
1846
+ message.content = appendTextSegment(addition, message.content);
1847
+ return;
1892
1848
  }
1893
- const anthropicResult = await countTokensViaAnthropic(c, anthropicPayload);
1894
- if (anthropicResult) return anthropicResult;
1895
- const anthropicBeta = c.req.header("anthropic-beta");
1896
- const openAIPayload = translateToOpenAI(anthropicPayload);
1897
- const requestedModel = anthropicPayload.model;
1898
- const resolve = resolveCountTokensModel(requestedModel);
1899
- const selectedModel = resolve.model;
1900
- anthropicPayload.model = selectedModel.id;
1901
- if (resolve.fallback) consola.warn(`Model '${requestedModel}' not found, using o200k_base fallback tokenizer`);
1902
- const tokenCount = await getTokenCount(openAIPayload, selectedModel);
1903
- if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
1904
- let addToolSystemPromptCount = false;
1905
- if (anthropicBeta) {
1906
- const toolsLength = anthropicPayload.tools.length;
1907
- addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
1908
- }
1909
- if (addToolSystemPromptCount) {
1910
- if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
1911
- else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
1849
+ if (Array.isArray(message.content)) {
1850
+ const lastToolResultIndex = message.content.findLastIndex((block) => block.type === "tool_result");
1851
+ if (lastToolResultIndex >= 0) {
1852
+ message.content = [
1853
+ ...message.content.slice(0, lastToolResultIndex + 1),
1854
+ ...toSystemTextBlocks(addition),
1855
+ ...message.content.slice(lastToolResultIndex + 1)
1856
+ ];
1857
+ return;
1912
1858
  }
1913
1859
  }
1914
- let finalTokenCount = tokenCount.input + tokenCount.output;
1915
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * getClaudeTokenMultiplier());
1916
- consola.info("Token count:", finalTokenCount);
1917
- return c.json({ input_tokens: finalTokenCount });
1918
- }
1919
- //#endregion
1920
- //#region src/lib/codex-rate-limit.ts
1921
- const codexRateLimitScopes = ["primary", "secondary"];
1922
- const formatCodexRateLimitResetAt = (resetAt) => {
1923
- const date = /* @__PURE__ */ new Date(resetAt * 1e3);
1924
- return Number.isNaN(date.getTime()) ? String(resetAt) : date.toLocaleString();
1860
+ message.content = [...toSystemTextBlocks(addition), ...typeof message.content === "string" ? [createTextBlock(message.content)] : message.content];
1925
1861
  };
1926
- const logCodexRateLimitsEvent = (event) => {
1927
- if (!event || typeof event !== "object") return;
1928
- const eventRecord = event;
1929
- if (eventRecord.type !== "codex.rate_limits") return;
1930
- const rateLimits = eventRecord.rate_limits;
1931
- if (!rateLimits || typeof rateLimits !== "object") return;
1932
- const planType = typeof eventRecord.plan_type === "string" ? eventRecord.plan_type : null;
1933
- const rateLimitRecord = rateLimits;
1934
- const allowed = typeof rateLimitRecord.allowed === "boolean" ? rateLimitRecord.allowed : null;
1935
- const limitReached = typeof rateLimitRecord.limit_reached === "boolean" ? rateLimitRecord.limit_reached : null;
1936
- for (const scope of codexRateLimitScopes) {
1937
- const window = rateLimitRecord[scope];
1938
- if (!isCodexRateLimitWindow(window)) continue;
1939
- const summary = [];
1940
- if (allowed !== null) summary.push(`allowed=${allowed}`);
1941
- if (limitReached !== null) summary.push(`limit_reached=${limitReached}`);
1942
- summary.push(`used=${window.used_percent}%`, `reset_at=${formatCodexRateLimitResetAt(window.reset_at)}`);
1943
- const label = planType ? `Codex ${scope} rate limit (${planType})` : `Codex ${scope} rate limit`;
1944
- consola.log(`${label}: ${summary.join(", ")}`);
1862
+ const normalizeSystemMessages = (payload) => {
1863
+ if (!payload.messages.some((msg) => msg.role === "system")) return;
1864
+ const normalizedMessages = [];
1865
+ let system = payload.system;
1866
+ for (const message of payload.messages) {
1867
+ if (message.role === "system") {
1868
+ const normalizedContent = normalizeSystemContentForMerge(message.content);
1869
+ const previousMessage = normalizedMessages.at(-1);
1870
+ if (previousMessage?.role === "user") prependSystemContentToUserMessage(previousMessage, normalizedContent);
1871
+ else if (!previousMessage) system = mergeSystemPromptContent(system, normalizedContent);
1872
+ continue;
1873
+ }
1874
+ normalizedMessages.push(message);
1945
1875
  }
1876
+ payload.messages = normalizedMessages;
1877
+ payload.system = system;
1946
1878
  };
1947
- const isCodexRateLimitWindow = (value) => {
1948
- if (!value || typeof value !== "object") return false;
1949
- const record = value;
1950
- return typeof record.reset_after_seconds === "number" && typeof record.reset_at === "number" && typeof record.used_percent === "number" && typeof record.window_minutes === "number";
1879
+ const isVersionAtLeast = (version, minimumMajor, minimumMinor) => {
1880
+ const [majorPart, minorPart = "0"] = version.split(".");
1881
+ const major = Number.parseInt(majorPart, 10);
1882
+ const minor = Number.parseInt(minorPart, 10);
1883
+ if (!Number.isInteger(major) || !Number.isInteger(minor)) return false;
1884
+ return major > minimumMajor || major === minimumMajor && minor >= minimumMinor;
1951
1885
  };
1952
- //#endregion
1953
- //#region src/routes/messages/stream-translation.ts
1954
- function isToolBlockOpen(state) {
1955
- if (!state.contentBlockOpen) return false;
1956
- return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
1957
- }
1958
- function translateChunkToAnthropicEvents(chunk, state) {
1959
- const events = [];
1960
- if (chunk.choices.length === 0) {
1961
- completePendingMessage(state, events, chunk);
1962
- return events;
1886
+ const shouldSummarizeThinkingDisplayForModel = (model) => {
1887
+ const normalized = normalizeSdkModelId(model);
1888
+ return Boolean(normalized && isVersionAtLeast(normalized.version, 4, 7));
1889
+ };
1890
+ const getBlockCacheControl = (block) => {
1891
+ if (!block || block.type === "thinking") return;
1892
+ const cacheControl = block.cache_control;
1893
+ if (!cacheControl || typeof cacheControl !== "object") return;
1894
+ return cacheControl;
1895
+ };
1896
+ const getLastMessageContentCacheControl = (lastMessage) => {
1897
+ if (!lastMessage || !Array.isArray(lastMessage.content)) return;
1898
+ const cacheControl = getBlockCacheControl(lastMessage.content.at(-1));
1899
+ return cacheControl ? { ...cacheControl } : void 0;
1900
+ };
1901
+ const applyLastMessageCacheControl = (anthropicPayload, lastMessageCacheControl) => {
1902
+ const cacheControl = lastMessageCacheControl ?? { type: "ephemeral" };
1903
+ const lastMessage = anthropicPayload.messages.at(-1);
1904
+ if (!lastMessage || !Array.isArray(lastMessage.content)) return;
1905
+ const lastBlock = lastMessage.content.at(-1);
1906
+ if (!lastBlock || lastBlock.type === "thinking" || lastBlock.cache_control) return;
1907
+ lastBlock.cache_control = { ...cacheControl };
1908
+ };
1909
+ const getCompactCandidateText = (message) => {
1910
+ if (message.role !== "user") return "";
1911
+ if (typeof message.content === "string") return message.content;
1912
+ return message.content.filter((block) => block.type === "text").map((block) => block.text.startsWith("<system-reminder>") ? "" : block.text).filter((text) => text.length > 0).join("\n\n");
1913
+ };
1914
+ const isCompactMessage = (lastMessage) => {
1915
+ const text = getCompactCandidateText(lastMessage);
1916
+ if (!text) return false;
1917
+ return text.includes("CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.") && text.includes("Your task is to create a detailed summary of the conversation so far") && compactMessageSections.some((section) => text.includes(section));
1918
+ };
1919
+ const isCompactAutoContinueMessage = (lastMessage) => {
1920
+ const text = getCompactCandidateText(lastMessage);
1921
+ return Boolean(text) && compactAutoContinuePromptStarts.some((promptStart) => text.startsWith(promptStart));
1922
+ };
1923
+ const getCompactType = (anthropicPayload) => {
1924
+ const lastMessage = anthropicPayload.messages.at(-1);
1925
+ if (lastMessage && isCompactMessage(lastMessage)) return 1;
1926
+ if (lastMessage && isCompactAutoContinueMessage(lastMessage)) return 2;
1927
+ const system = anthropicPayload.system;
1928
+ if (typeof system === "string") return compactSystemPromptStarts.some((promptStart) => system.startsWith(promptStart)) ? 1 : 0;
1929
+ if (!Array.isArray(system)) return 0;
1930
+ if (system.some((msg) => typeof msg.text === "string" && compactSystemPromptStarts.some((promptStart) => msg.text.startsWith(promptStart)))) return 1;
1931
+ return 0;
1932
+ };
1933
+ const mergeContentWithText = (tr, textBlock) => {
1934
+ if (typeof tr.content === "string") return {
1935
+ ...tr,
1936
+ content: `${tr.content}\n\n${textBlock.text}`
1937
+ };
1938
+ if (hasToolRef(tr)) return tr;
1939
+ return {
1940
+ ...tr,
1941
+ content: [...tr.content, stripContentBlockCacheControl(textBlock)]
1942
+ };
1943
+ };
1944
+ const mergeContentWithTexts = (tr, textBlocks) => {
1945
+ if (typeof tr.content === "string") {
1946
+ const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n");
1947
+ return {
1948
+ ...tr,
1949
+ content: `${tr.content}\n\n${appendedTexts}`
1950
+ };
1963
1951
  }
1964
- const choice = chunk.choices[0];
1965
- const { delta } = choice;
1966
- handleMessageStart(state, events, chunk);
1967
- handleThinkingText(delta, state, events);
1968
- handleContent(delta, state, events);
1969
- handleToolCalls(delta, state, events);
1970
- handleFinish(choice, state, {
1971
- events,
1972
- chunk
1973
- });
1974
- return events;
1975
- }
1976
- function flushPendingAnthropicStreamEvents(state) {
1977
- const events = [];
1978
- completePendingMessage(state, events);
1979
- return events;
1980
- }
1981
- function completePendingMessage(state, events, chunk) {
1982
- if (!state.pendingMessageDelta) return;
1983
- if (chunk?.usage) state.pendingMessageDelta.usage = getAnthropicUsageFromOpenAIChunk(chunk);
1984
- events.push(state.pendingMessageDelta, { type: "message_stop" });
1985
- state.pendingMessageDelta = void 0;
1986
- }
1987
- function handleFinish(choice, state, context) {
1988
- const { events, chunk } = context;
1989
- if (choice.finish_reason && choice.finish_reason.length > 0) {
1990
- if (state.contentBlockOpen) {
1991
- const toolBlockOpen = isToolBlockOpen(state);
1992
- context.events.push({
1993
- type: "content_block_stop",
1994
- index: state.contentBlockIndex
1995
- });
1996
- state.contentBlockOpen = false;
1997
- state.contentBlockIndex++;
1998
- if (!toolBlockOpen) handleReasoningOpaque(choice.delta, events, state);
1999
- }
2000
- flushDeferredContent(state, events);
2001
- state.pendingMessageDelta = {
2002
- type: "message_delta",
2003
- delta: {
2004
- stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
2005
- stop_sequence: null
2006
- },
2007
- usage: getAnthropicUsageFromOpenAIChunk(chunk)
2008
- };
2009
- if (chunk.usage) completePendingMessage(state, events, chunk);
2010
- }
2011
- }
2012
- function getAnthropicUsageFromOpenAIChunk(chunk) {
2013
- const { cachedTokens, cacheCreationTokens, inputTokens } = getOpenAIChunkUsageTokens(chunk);
1952
+ if (hasToolRef(tr)) return tr;
2014
1953
  return {
2015
- input_tokens: inputTokens,
2016
- output_tokens: chunk.usage?.completion_tokens ?? 0,
2017
- ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
2018
- ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: cachedTokens }
1954
+ ...tr,
1955
+ content: [...tr.content, ...textBlocks.map(stripContentBlockCacheControl)]
1956
+ };
1957
+ };
1958
+ const mergeContentWithAttachments = (tr, attachments) => {
1959
+ const cleanAttachments = attachments.map(stripContentBlockCacheControl);
1960
+ if (typeof tr.content === "string") return {
1961
+ ...tr,
1962
+ content: [{
1963
+ type: "text",
1964
+ text: tr.content
1965
+ }, ...cleanAttachments]
2019
1966
  };
2020
- }
2021
- function getOpenAIChunkUsageTokens(chunk) {
2022
- const promptTokens = chunk.usage?.prompt_tokens ?? 0;
2023
- const cachedTokens = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
2024
- const cacheCreationTokens = chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
2025
1967
  return {
2026
- cacheCreationTokens,
2027
- cachedTokens,
2028
- inputTokens: Math.max(0, promptTokens - cachedTokens - cacheCreationTokens)
1968
+ ...tr,
1969
+ content: [...tr.content, ...cleanAttachments]
2029
1970
  };
2030
- }
2031
- function handleToolCalls(delta, state, events) {
2032
- if (delta.tool_calls && delta.tool_calls.length > 0) {
2033
- closeThinkingBlockIfOpen(state, events);
2034
- handleReasoningOpaqueInToolCalls(state, events, delta);
2035
- for (const toolCall of delta.tool_calls) {
2036
- if (toolCall.id && toolCall.function?.name) {
2037
- if (state.contentBlockOpen) {
2038
- events.push({
2039
- type: "content_block_stop",
2040
- index: state.contentBlockIndex
2041
- });
2042
- state.contentBlockIndex++;
2043
- state.contentBlockOpen = false;
2044
- }
2045
- const anthropicBlockIndex = state.contentBlockIndex;
2046
- state.toolCalls[toolCall.index] = {
2047
- id: toolCall.id,
2048
- name: toolCall.function.name,
2049
- anthropicBlockIndex
2050
- };
2051
- events.push({
2052
- type: "content_block_start",
2053
- index: anthropicBlockIndex,
2054
- content_block: {
2055
- type: "tool_use",
2056
- id: toolCall.id,
2057
- name: toolCall.function.name,
2058
- input: {}
2059
- }
2060
- });
2061
- state.contentBlockOpen = true;
2062
- }
2063
- if (toolCall.function?.arguments) {
2064
- const toolCallInfo = state.toolCalls[toolCall.index];
2065
- if (toolCallInfo) events.push({
2066
- type: "content_block_delta",
2067
- index: toolCallInfo.anthropicBlockIndex,
2068
- delta: {
2069
- type: "input_json_delta",
2070
- partial_json: toolCall.function.arguments
2071
- }
2072
- });
1971
+ };
1972
+ const stripContentBlockCacheControl = (block) => {
1973
+ if (!Object.hasOwn(block, "cache_control")) return block;
1974
+ const copy = { ...block };
1975
+ delete copy.cache_control;
1976
+ return copy;
1977
+ };
1978
+ const isAttachmentBlock = (block) => {
1979
+ return block.type === "image" || block.type === "document";
1980
+ };
1981
+ const getMergeableToolResultIndices = (toolResults) => {
1982
+ return toolResults.flatMap((block, index) => block.is_error || hasToolRef(block) ? [] : [index]);
1983
+ };
1984
+ const mergeAttachmentsIntoToolResults = (toolResults, attachmentsByToolResultIndex) => {
1985
+ if (attachmentsByToolResultIndex.size === 0) return toolResults;
1986
+ return toolResults.map((block, index) => {
1987
+ const matchedAttachments = attachmentsByToolResultIndex.get(index);
1988
+ if (!matchedAttachments) return block;
1989
+ return mergeContentWithAttachments(block, [...matchedAttachments].sort((left, right) => left.order - right.order).map(({ attachment }) => attachment));
1990
+ });
1991
+ };
1992
+ const assignAttachmentsToToolResults = (target, attachments, options) => {
1993
+ const { toolResultIndices } = options;
1994
+ const fallbackToolResultIndices = options.fallbackToolResultIndices ?? toolResultIndices;
1995
+ if (attachments.length === 0) return;
1996
+ if (toolResultIndices.length > 0 && toolResultIndices.length === attachments.length) {
1997
+ for (const [index, toolResultIndex] of toolResultIndices.entries()) {
1998
+ const currentAttachments = target.get(toolResultIndex);
1999
+ if (currentAttachments) {
2000
+ currentAttachments.push(attachments[index]);
2001
+ continue;
2073
2002
  }
2003
+ target.set(toolResultIndex, [attachments[index]]);
2074
2004
  }
2005
+ return;
2075
2006
  }
2076
- }
2077
- function handleReasoningOpaqueInToolCalls(state, events, delta) {
2078
- if (state.contentBlockOpen && !isToolBlockOpen(state)) {
2079
- events.push({
2080
- type: "content_block_stop",
2081
- index: state.contentBlockIndex
2082
- });
2083
- state.contentBlockIndex++;
2084
- state.contentBlockOpen = false;
2007
+ const lastToolResultIndex = fallbackToolResultIndices.at(-1);
2008
+ if (lastToolResultIndex === void 0) return;
2009
+ const currentAttachments = target.get(lastToolResultIndex);
2010
+ if (currentAttachments) {
2011
+ currentAttachments.push(...attachments);
2012
+ return;
2085
2013
  }
2086
- handleReasoningOpaque(delta, events, state);
2087
- }
2088
- function handleContent(delta, state, events) {
2089
- if (delta.content && delta.content.length > 0) {
2090
- closeThinkingBlockIfOpen(state, events);
2091
- if (isToolBlockOpen(state) || hasToolCallDelta(delta)) {
2092
- state.deferredContent = `${state.deferredContent ?? ""}${delta.content}`;
2093
- return;
2014
+ target.set(lastToolResultIndex, [...attachments]);
2015
+ };
2016
+ const startsWithPdfFileRead = (toolResult) => {
2017
+ if (typeof toolResult.content === "string") return toolResult.content.startsWith(PDF_FILE_READ_PREFIX);
2018
+ if (toolResult.content.some((block) => block.type === "document")) return false;
2019
+ if (toolResult.content.length === 0) return false;
2020
+ const firstBlock = toolResult.content[0];
2021
+ if (firstBlock.type !== "text") return false;
2022
+ return firstBlock.text.startsWith(PDF_FILE_READ_PREFIX);
2023
+ };
2024
+ const collectMergeableUserContent = (content) => {
2025
+ const toolResults = [];
2026
+ const textBlocks = [];
2027
+ const attachments = [];
2028
+ for (const [order, block] of content.entries()) {
2029
+ if (block.type === "tool_result") {
2030
+ toolResults.push(block);
2031
+ continue;
2094
2032
  }
2095
- if (!state.contentBlockOpen) {
2096
- events.push({
2097
- type: "content_block_start",
2098
- index: state.contentBlockIndex,
2099
- content_block: {
2100
- type: "text",
2101
- text: ""
2102
- }
2033
+ if (block.type === "text") {
2034
+ textBlocks.push(block);
2035
+ continue;
2036
+ }
2037
+ if (isAttachmentBlock(block)) {
2038
+ attachments.push({
2039
+ attachment: block,
2040
+ order
2103
2041
  });
2104
- state.contentBlockOpen = true;
2042
+ continue;
2105
2043
  }
2106
- events.push({
2107
- type: "content_block_delta",
2108
- index: state.contentBlockIndex,
2109
- delta: {
2110
- type: "text_delta",
2111
- text: delta.content
2112
- }
2113
- });
2044
+ return null;
2114
2045
  }
2115
- if (delta.content === "" && delta.reasoning_opaque && delta.reasoning_opaque.length > 0 && state.thinkingBlockOpen) {
2116
- events.push({
2117
- type: "content_block_delta",
2118
- index: state.contentBlockIndex,
2119
- delta: {
2120
- type: "signature_delta",
2121
- signature: delta.reasoning_opaque
2122
- }
2123
- }, {
2124
- type: "content_block_stop",
2125
- index: state.contentBlockIndex
2126
- });
2127
- state.contentBlockIndex++;
2128
- state.thinkingBlockOpen = false;
2046
+ return {
2047
+ toolResults,
2048
+ textBlocks,
2049
+ attachments
2050
+ };
2051
+ };
2052
+ const mergeAttachmentsForToolResults = (toolResults, attachments) => {
2053
+ if (attachments.length === 0) return toolResults;
2054
+ const documentBlocks = attachments.filter(({ attachment }) => attachment.type === "document");
2055
+ const mergeableToolResultIndices = getMergeableToolResultIndices(toolResults);
2056
+ const pdfReadToolResultIndices = mergeableToolResultIndices.filter((index) => startsWithPdfFileRead(toolResults[index]));
2057
+ const attachmentsByToolResultIndex = /* @__PURE__ */ new Map();
2058
+ let remainingAttachments = attachments;
2059
+ let countMatchToolResultIndices = mergeableToolResultIndices;
2060
+ if (documentBlocks.length > 0 && pdfReadToolResultIndices.length > 0) {
2061
+ const matchedDocumentCount = Math.min(pdfReadToolResultIndices.length, documentBlocks.length);
2062
+ const matchedDocuments = documentBlocks.slice(0, matchedDocumentCount);
2063
+ const matchedDocumentOrders = new Set(matchedDocuments.map(({ order }) => order));
2064
+ const matchedPdfToolResultIndices = pdfReadToolResultIndices.slice(0, matchedDocumentCount);
2065
+ const matchedPdfToolResultIndexSet = new Set(matchedPdfToolResultIndices);
2066
+ assignAttachmentsToToolResults(attachmentsByToolResultIndex, matchedDocuments, { toolResultIndices: matchedPdfToolResultIndices });
2067
+ countMatchToolResultIndices = mergeableToolResultIndices.filter((index) => !matchedPdfToolResultIndexSet.has(index));
2068
+ remainingAttachments = attachments.filter(({ attachment, order }) => attachment.type !== "document" || !matchedDocumentOrders.has(order));
2129
2069
  }
2130
- }
2131
- function hasToolCallDelta(delta) {
2132
- return Boolean(delta.tool_calls && delta.tool_calls.length > 0);
2133
- }
2134
- function flushDeferredContent(state, events) {
2135
- if (!state.deferredContent) return;
2136
- if (!state.contentBlockOpen) {
2137
- events.push({
2138
- type: "content_block_start",
2139
- index: state.contentBlockIndex,
2140
- content_block: {
2141
- type: "text",
2142
- text: ""
2143
- }
2144
- });
2145
- state.contentBlockOpen = true;
2146
- }
2147
- events.push({
2148
- type: "content_block_delta",
2149
- index: state.contentBlockIndex,
2150
- delta: {
2151
- type: "text_delta",
2152
- text: state.deferredContent
2153
- }
2154
- }, {
2155
- type: "content_block_stop",
2156
- index: state.contentBlockIndex
2070
+ assignAttachmentsToToolResults(attachmentsByToolResultIndex, remainingAttachments, {
2071
+ toolResultIndices: countMatchToolResultIndices,
2072
+ fallbackToolResultIndices: mergeableToolResultIndices
2157
2073
  });
2158
- state.deferredContent = void 0;
2159
- state.contentBlockOpen = false;
2160
- state.contentBlockIndex++;
2161
- }
2162
- function handleMessageStart(state, events, chunk) {
2163
- if (!state.messageStartSent) {
2164
- const { cachedTokens, cacheCreationTokens, inputTokens } = getOpenAIChunkUsageTokens(chunk);
2165
- events.push({
2166
- type: "message_start",
2167
- message: {
2168
- id: chunk.id,
2169
- type: "message",
2170
- role: "assistant",
2171
- content: [],
2172
- model: chunk.model,
2173
- stop_reason: null,
2174
- stop_sequence: null,
2175
- usage: {
2176
- input_tokens: inputTokens,
2177
- output_tokens: 0,
2178
- ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
2179
- ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: cachedTokens }
2180
- }
2181
- }
2182
- });
2183
- state.messageStartSent = true;
2074
+ return mergeAttachmentsIntoToolResults(toolResults, attachmentsByToolResultIndex);
2075
+ };
2076
+ const mergeUserMessageContent = (content) => {
2077
+ const mergeableContent = collectMergeableUserContent(content);
2078
+ if (!mergeableContent) return null;
2079
+ const { toolResults, textBlocks, attachments } = mergeableContent;
2080
+ if (toolResults.length === 0 || textBlocks.length === 0 && attachments.length === 0) return null;
2081
+ return mergeAttachmentsForToolResults(textBlocks.length === 0 ? toolResults : mergeToolResult(toolResults, textBlocks), attachments);
2082
+ };
2083
+ const mergeToolResult = (toolResults, textBlocks) => {
2084
+ if (toolResults.length === textBlocks.length) return toolResults.map((tr, i) => mergeContentWithText(tr, textBlocks[i]));
2085
+ const lastIndex = toolResults.length - 1;
2086
+ return toolResults.map((tr, i) => i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr);
2087
+ };
2088
+ const stripToolReferenceTurnBoundary = (anthropicPayload) => {
2089
+ for (const msg of anthropicPayload.messages) {
2090
+ if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
2091
+ if (!msg.content.some((block) => block.type === "tool_result" && hasToolRef(block))) continue;
2092
+ msg.content = msg.content.filter((block) => block.type !== "text" || block.text.trim() !== "Tool loaded.");
2184
2093
  }
2185
- }
2186
- function handleReasoningOpaque(delta, events, state) {
2187
- if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) {
2188
- events.push({
2189
- type: "content_block_start",
2190
- index: state.contentBlockIndex,
2191
- content_block: {
2192
- type: "thinking",
2193
- thinking: ""
2194
- }
2195
- }, {
2196
- type: "content_block_delta",
2197
- index: state.contentBlockIndex,
2198
- delta: {
2199
- type: "thinking_delta",
2200
- thinking: THINKING_TEXT$1
2201
- }
2202
- }, {
2203
- type: "content_block_delta",
2204
- index: state.contentBlockIndex,
2205
- delta: {
2206
- type: "signature_delta",
2207
- signature: delta.reasoning_opaque
2208
- }
2209
- }, {
2210
- type: "content_block_stop",
2211
- index: state.contentBlockIndex
2212
- });
2213
- state.contentBlockIndex++;
2094
+ };
2095
+ const mergeToolResultForClaude = (anthropicPayload, options) => {
2096
+ const lastMessageIndex = anthropicPayload.messages.length - 1;
2097
+ for (const [index, msg] of anthropicPayload.messages.entries()) {
2098
+ if (options?.skipLastMessage && index === lastMessageIndex) continue;
2099
+ if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
2100
+ const mergedContent = mergeUserMessageContent(msg.content);
2101
+ if (mergedContent) msg.content = mergedContent;
2214
2102
  }
2215
- }
2216
- function handleThinkingText(delta, state, events) {
2217
- const reasoningText = delta.reasoning_text ?? delta.reasoning_content;
2218
- if (reasoningText && reasoningText.length > 0) {
2219
- if (state.contentBlockOpen) {
2220
- delta.content = reasoningText;
2221
- delta.reasoning_text = void 0;
2222
- delta.reasoning_content = void 0;
2223
- return;
2224
- }
2225
- if (!state.thinkingBlockOpen) {
2226
- events.push({
2227
- type: "content_block_start",
2228
- index: state.contentBlockIndex,
2229
- content_block: {
2230
- type: "thinking",
2231
- thinking: ""
2232
- }
2233
- });
2234
- state.thinkingBlockOpen = true;
2103
+ };
2104
+ const sanitizeIdeTools = (payload) => {
2105
+ if (!payload.tools || payload.tools.length === 0) return;
2106
+ payload.tools = payload.tools.flatMap((tool) => {
2107
+ if (tool.name === IDE_EXECUTE_CODE_TOOL && !tool.defer_loading) return [];
2108
+ if (tool.name === IDE_GET_DIAGNOSTICS_TOOL) return [{
2109
+ ...tool,
2110
+ description: IDE_GET_DIAGNOSTICS_DESCRIPTION
2111
+ }];
2112
+ return [tool];
2113
+ });
2114
+ };
2115
+ const hasToolRef = (block) => {
2116
+ return Array.isArray(block.content) && block.content.some((c) => c.type === "tool_reference");
2117
+ };
2118
+ const stripCacheControl = (payload) => {
2119
+ if (Array.isArray(payload.system)) for (const block of payload.system) {
2120
+ const cacheControl = block.cache_control;
2121
+ if (cacheControl && typeof cacheControl === "object") {
2122
+ const { scope, ...rest } = cacheControl;
2123
+ block.cache_control = rest;
2235
2124
  }
2236
- events.push({
2237
- type: "content_block_delta",
2238
- index: state.contentBlockIndex,
2239
- delta: {
2240
- type: "thinking_delta",
2241
- thinking: reasoningText
2242
- }
2243
- });
2244
2125
  }
2245
- }
2246
- function closeThinkingBlockIfOpen(state, events) {
2247
- if (state.thinkingBlockOpen) {
2248
- events.push({
2249
- type: "content_block_delta",
2250
- index: state.contentBlockIndex,
2251
- delta: {
2252
- type: "signature_delta",
2253
- signature: ""
2254
- }
2255
- }, {
2256
- type: "content_block_stop",
2257
- index: state.contentBlockIndex
2258
- });
2259
- state.contentBlockIndex++;
2260
- state.thinkingBlockOpen = false;
2126
+ };
2127
+ const filterAssistantThinkingBlocks = (payload) => {
2128
+ for (const msg of payload.messages) if (msg.role === "assistant" && Array.isArray(msg.content)) msg.content = msg.content.filter((block) => {
2129
+ if (block.type !== "thinking") return true;
2130
+ return block.thinking && block.thinking !== "Thinking..." && block.signature && !block.signature.includes("@");
2131
+ });
2132
+ };
2133
+ const prepareMessagesApiPayload = (payload, selectedModel) => {
2134
+ stripCacheControl(payload);
2135
+ filterAssistantThinkingBlocks(payload);
2136
+ const hasThinking = Boolean(payload.thinking);
2137
+ const toolChoice = payload.tool_choice;
2138
+ const disableThink = toolChoice?.type === "any" || toolChoice?.type === "tool";
2139
+ if (selectedModel?.capabilities.supports.adaptive_thinking && !disableThink) {
2140
+ payload.thinking = { type: "adaptive" };
2141
+ if (!hasThinking) payload.thinking.display = "summarized";
2142
+ if (shouldSummarizeThinkingDisplayForModel(payload.model)) payload.thinking.display = "summarized";
2143
+ let effort = getReasoningEffortForModel(payload.model);
2144
+ if (effort === "none" || effort === "minimal") effort = "low";
2145
+ const reasoningEffort = selectedModel.capabilities.supports.reasoning_effort;
2146
+ if (reasoningEffort && !reasoningEffort.includes(effort)) effort = reasoningEffort.at(-1);
2147
+ payload.output_config = { effort };
2261
2148
  }
2149
+ };
2150
+ //#endregion
2151
+ //#region src/routes/provider/messages/count-tokens-handler.ts
2152
+ const logger$6 = createHandlerLogger("provider-count-tokens-handler");
2153
+ async function handleProviderCountTokens(c) {
2154
+ const provider = c.req.param("provider");
2155
+ return await handleProviderCountTokensForProvider(c, {
2156
+ payload: await c.req.json(),
2157
+ provider
2158
+ });
2159
+ }
2160
+ async function handleProviderCountTokensForProvider(c, options) {
2161
+ const { payload: anthropicPayload, provider } = options;
2162
+ normalizeSystemMessages(anthropicPayload);
2163
+ const modelId = anthropicPayload.model.trim();
2164
+ const providerConfig = await resolveProviderConfig(provider);
2165
+ if (!providerConfig) return c.json({ error: {
2166
+ message: `Provider '${provider}' not found or disabled`,
2167
+ type: "invalid_request_error"
2168
+ } }, 404);
2169
+ const modelConfig = providerConfig.models?.[modelId];
2170
+ const tokenCount = await getTokenCount(translateToOpenAI(anthropicPayload, providerConfig.type === "openai-compatible" || providerConfig.type === "openai-responses" ? {
2171
+ supportPdf: modelConfig?.supportPdf,
2172
+ toolContentSupportType: modelConfig?.toolContentSupportType ?? []
2173
+ } : void 0), createFallbackModel(modelId));
2174
+ const finalTokenCount = tokenCount.input + tokenCount.output;
2175
+ logger$6.debug("provider.count_tokens.success", {
2176
+ provider,
2177
+ model: anthropicPayload.model,
2178
+ input_tokens: finalTokenCount
2179
+ });
2180
+ return c.json({ input_tokens: finalTokenCount });
2262
2181
  }
2263
2182
  //#endregion
2264
- //#region src/services/copilot/create-responses.ts
2265
- const createResponses = async (payload, { vision, initiator, subagentMarker, requestId, sessionId, compactType, transport = "http" }) => {
2266
- if (!state.copilotToken) throw new Error("Copilot token not found");
2267
- const headers = {
2268
- ...copilotHeaders(state, requestId, vision),
2269
- "x-initiator": initiator
2183
+ //#region src/routes/messages/count-tokens-handler.ts
2184
+ const resolveCountTokensModel = (modelId, findModel = findEndpointModel) => {
2185
+ const selectedModel = findModel(modelId);
2186
+ if (selectedModel) return {
2187
+ fallback: false,
2188
+ model: selectedModel
2189
+ };
2190
+ return {
2191
+ fallback: true,
2192
+ model: createFallbackModel(modelId.trim())
2270
2193
  };
2271
- prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers);
2272
- prepareForCompact(headers, compactType);
2273
- payload.service_tier = void 0;
2274
- consola.log(`<-- model: ${payload.model}`);
2275
- const effectiveTransport = compactType === 1 ? "http" : transport;
2276
- if (payload.stream === true && effectiveTransport === "websocket") return createPooledResponsesWebSocketStream(prepareResponsesWebSocketRequest(payload, headers, {
2277
- requestId,
2278
- subagentMarker
2279
- }));
2280
- return await createHttpResponses(payload, headers);
2281
2194
  };
2282
- const createHttpResponses = async (payload, headers) => {
2283
- const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
2195
+ /**
2196
+ * Forwards token counting to Anthropic's real /v1/messages/count_tokens endpoint.
2197
+ * Returns the result on success, or null to fall through to estimation.
2198
+ */
2199
+ async function countTokensViaAnthropic(c, payload) {
2200
+ if (!payload.model.startsWith("claude")) return null;
2201
+ const apiKey = getAnthropicApiKey();
2202
+ if (!apiKey) return null;
2203
+ const model = payload.model.replaceAll(".", "-");
2204
+ const res = await fetch("https://api.anthropic.com/v1/messages/count_tokens", {
2284
2205
  method: "POST",
2285
- headers,
2286
- body: JSON.stringify(payload)
2206
+ headers: {
2207
+ "content-type": "application/json",
2208
+ "x-api-key": apiKey,
2209
+ "anthropic-version": "2023-06-01",
2210
+ "anthropic-beta": "token-counting-2024-11-01"
2211
+ },
2212
+ body: JSON.stringify({
2213
+ ...payload,
2214
+ model
2215
+ })
2287
2216
  });
2288
- logCopilotRateLimits(response.headers);
2289
- if (!response.ok) {
2290
- consola.error("Failed to create responses", response);
2291
- throw new HTTPError("Failed to create responses", response);
2217
+ if (!res.ok) {
2218
+ consola.warn("Anthropic count_tokens failed:", res.status, await res.text().catch(() => ""), "- falling back to estimation");
2219
+ return null;
2292
2220
  }
2293
- if (payload.stream) return events(response);
2294
- return await response.json();
2295
- };
2296
- const prepareResponsesWebSocketRequest = (payload, preparedHeaders, options) => {
2297
- const initiator = getResponsesWebSocketInitiator(preparedHeaders);
2298
- return {
2299
- headers: copilotWebSocketHeaders(preparedHeaders),
2300
- poolKey: buildResponsesWebSocketPoolKey(payload, options),
2301
- payload: buildResponsesWebSocketPayload(payload, initiator),
2302
- url: buildResponsesWebSocketUrl(copilotBaseUrl(state))
2303
- };
2304
- };
2305
- const buildResponsesWebSocketPoolKey = (payload, { requestId, subagentMarker }) => {
2306
- const tokenFingerprint = state.copilotToken ? createHash("sha256").update(state.copilotToken).digest("hex").slice(0, 16) : "missing-token";
2307
- const subagentKey = subagentMarker ? [
2308
- subagentMarker.session_id,
2309
- subagentMarker.agent_id,
2310
- subagentMarker.agent_type
2311
- ].join(":") : "main";
2312
- return [
2313
- tokenFingerprint,
2314
- payload.model,
2315
- requestId,
2316
- subagentKey
2317
- ].map(encodePoolKeyPart).join("|");
2318
- };
2319
- const getResponsesWebSocketInitiator = (preparedHeaders) => {
2320
- return getHeaderValue(preparedHeaders, "x-initiator")?.toLowerCase() === "agent" ? "agent" : "user";
2321
- };
2322
- const createPooledResponsesWebSocketStream = (request) => createPooledWebSocketStream(request, {
2323
- createChunk: createResponsesWebSocketStreamChunk,
2324
- isTerminalChunk: isTerminalResponsesStreamChunk,
2325
- openErrorMessage: "Failed to create responses websocket",
2326
- streamErrorMessage: "Responses websocket stream error",
2327
- terminalChunkMissingMessage: "Responses websocket ended without a terminal response"
2328
- });
2329
- const buildResponsesWebSocketPayload = (payload, initiator) => {
2330
- const websocketPayload = {
2331
- ...payload,
2332
- type: "response.create",
2333
- initiator
2334
- };
2335
- delete websocketPayload.stream;
2336
- delete websocketPayload["background"];
2337
- delete websocketPayload.service_tier;
2338
- return websocketPayload;
2339
- };
2340
- const buildResponsesWebSocketUrl = (baseUrl) => {
2341
- return createWebSocketUrl(`${baseUrl.replace(/\/+$/u, "")}/responses`);
2342
- };
2343
- const getHeaderValue = (headers, headerName) => {
2344
- const normalizedHeaderName = headerName.toLowerCase();
2345
- return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1];
2346
- };
2347
- const encodePoolKeyPart = (value) => encodeURIComponent(value);
2348
- const createResponsesWebSocketStreamChunk = (data) => {
2349
- if (data === "[DONE]") return { data };
2350
- try {
2351
- const parsed = JSON.parse(data);
2352
- if (parsed.type === "response.completed") logCopilotQuotaSnapshots(parsed.copilot_quota_snapshots);
2353
- return {
2354
- data: JSON.stringify(parsed),
2355
- event: typeof parsed.type === "string" ? parsed.type : void 0,
2356
- id: typeof parsed.id === "string" ? parsed.id : void 0
2357
- };
2358
- } catch {
2359
- return { data };
2221
+ const result = await res.json();
2222
+ consola.info("Token count (Anthropic API):", result.input_tokens);
2223
+ return c.json(result);
2224
+ }
2225
+ /**
2226
+ * Handles token counting for Anthropic messages.
2227
+ *
2228
+ * When an Anthropic API key is available (via config or ANTHROPIC_API_KEY env var)
2229
+ * and the model is a Claude model, forwards to Anthropic's free /v1/messages/count_tokens
2230
+ * endpoint for accurate counts. Otherwise falls back to GPT tokenizer estimation.
2231
+ */
2232
+ async function handleCountTokens(c) {
2233
+ const anthropicPayload = await c.req.json();
2234
+ anthropicPayload.model = resolveMappedModel(anthropicPayload.model);
2235
+ normalizeSystemMessages(anthropicPayload);
2236
+ const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
2237
+ if (providerModelAlias) {
2238
+ anthropicPayload.model = providerModelAlias.model;
2239
+ return await handleProviderCountTokensForProvider(c, {
2240
+ payload: anthropicPayload,
2241
+ provider: providerModelAlias.provider
2242
+ });
2360
2243
  }
2361
- };
2362
- const isTerminalResponsesStreamChunk = (chunk) => {
2363
- if (!chunk.data || chunk.data === "[DONE]") return false;
2364
- try {
2365
- const parsed = JSON.parse(chunk.data);
2366
- return parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete" || parsed.type === "error";
2367
- } catch {
2368
- return false;
2244
+ const anthropicResult = await countTokensViaAnthropic(c, anthropicPayload);
2245
+ if (anthropicResult) return anthropicResult;
2246
+ const anthropicBeta = c.req.header("anthropic-beta");
2247
+ const openAIPayload = translateToOpenAI(anthropicPayload);
2248
+ const requestedModel = anthropicPayload.model;
2249
+ const resolve = resolveCountTokensModel(requestedModel);
2250
+ const selectedModel = resolve.model;
2251
+ anthropicPayload.model = selectedModel.id;
2252
+ if (resolve.fallback) consola.warn(`Model '${requestedModel}' not found, using o200k_base fallback tokenizer`);
2253
+ const tokenCount = await getTokenCount(openAIPayload, selectedModel);
2254
+ if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
2255
+ let addToolSystemPromptCount = false;
2256
+ if (anthropicBeta) {
2257
+ const toolsLength = anthropicPayload.tools.length;
2258
+ addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
2259
+ }
2260
+ if (addToolSystemPromptCount) {
2261
+ if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
2262
+ else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
2263
+ }
2369
2264
  }
2370
- };
2265
+ let finalTokenCount = tokenCount.input + tokenCount.output;
2266
+ if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * getClaudeTokenMultiplier());
2267
+ consola.info("Token count:", finalTokenCount);
2268
+ return c.json({ input_tokens: finalTokenCount });
2269
+ }
2371
2270
  //#endregion
2372
- //#region src/routes/messages/responses-translation.ts
2373
- const MESSAGE_TYPE = "message";
2374
- const COMPACTION_SIGNATURE_PREFIX = "cm1#";
2375
- const COMPACTION_SIGNATURE_SEPARATOR = "@";
2376
- const THINKING_TEXT = "Thinking...";
2377
- const buildPromptCacheKey = (basePromptCacheKey, subagentAgentId) => {
2378
- if (!basePromptCacheKey) return null;
2379
- const normalizedSubagentAgentId = subagentAgentId?.trim() || null;
2380
- if (!normalizedSubagentAgentId) return basePromptCacheKey;
2381
- return `${basePromptCacheKey}:agent:${normalizedSubagentAgentId}`;
2382
- };
2383
- const translateAnthropicMessagesToResponsesPayload = (payload, subagentAgentId) => {
2384
- const input = [];
2385
- const applyPhase = shouldApplyPhase(payload.model);
2386
- const toolSearchEnabled = shouldEnableResponsesToolSearch({
2387
- model: payload.model,
2388
- tools: payload.tools
2389
- });
2390
- const translationState = {
2391
- originalTools: payload.tools ?? [],
2392
- toolSearchEnabled,
2393
- toolUseNameById: /* @__PURE__ */ new Map()
2394
- };
2395
- for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase, translationState));
2396
- const hasOriginalTools = Array.isArray(payload.tools) && payload.tools.length > 0;
2397
- const translatedTools = convertAnthropicTools(payload.tools, toolSearchEnabled);
2398
- const toolChoice = convertAnthropicToolChoice(payload.tool_choice, toolSearchEnabled);
2399
- const { sessionId: metadataPromptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
2400
- const sessionAffinity = requestContext.getStore()?.sessionAffinity?.trim() || null;
2401
- const promptCacheKey = buildPromptCacheKey(metadataPromptCacheKey ?? sessionAffinity, subagentAgentId);
2402
- const responsesPayload = {
2403
- model: payload.model,
2404
- input,
2405
- instructions: translateSystemPrompt(payload.system, payload.model),
2406
- temperature: 1,
2407
- top_p: payload.top_p ?? null,
2408
- max_output_tokens: Math.max(payload.max_tokens, 12800),
2409
- tools: translatedTools,
2410
- tool_choice: toolChoice,
2411
- metadata: payload.metadata ? { ...payload.metadata } : null,
2412
- stream: payload.stream ?? null,
2413
- store: false,
2414
- parallel_tool_calls: true,
2415
- reasoning: {
2416
- effort: getReasoningEffortForModel(payload.model),
2417
- summary: "detailed"
2418
- },
2419
- include: ["reasoning.encrypted_content"]
2420
- };
2421
- if (hasOriginalTools) responsesPayload.prompt_cache_key = promptCacheKey;
2422
- return responsesPayload;
2423
- };
2424
- const encodeCompactionCarrierSignature = (compaction) => {
2425
- return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
2271
+ //#region src/lib/codex-rate-limit.ts
2272
+ const codexRateLimitScopes = ["primary", "secondary"];
2273
+ const formatCodexRateLimitResetAt = (resetAt) => {
2274
+ const date = /* @__PURE__ */ new Date(resetAt * 1e3);
2275
+ return Number.isNaN(date.getTime()) ? String(resetAt) : date.toLocaleString();
2426
2276
  };
2427
- const decodeCompactionCarrierSignature = (signature) => {
2428
- if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
2429
- const raw = signature.slice(4);
2430
- const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR);
2431
- if (separatorIndex <= 0 || separatorIndex === raw.length - 1) return;
2432
- const encrypted_content = raw.slice(0, separatorIndex);
2433
- const id = raw.slice(separatorIndex + 1);
2434
- if (!encrypted_content) return;
2435
- return {
2436
- id,
2437
- encrypted_content
2438
- };
2277
+ const logCodexRateLimitsEvent = (event) => {
2278
+ if (!event || typeof event !== "object") return;
2279
+ const eventRecord = event;
2280
+ if (eventRecord.type !== "codex.rate_limits") return;
2281
+ const rateLimits = eventRecord.rate_limits;
2282
+ if (!rateLimits || typeof rateLimits !== "object") return;
2283
+ const planType = typeof eventRecord.plan_type === "string" ? eventRecord.plan_type : null;
2284
+ const rateLimitRecord = rateLimits;
2285
+ const allowed = typeof rateLimitRecord.allowed === "boolean" ? rateLimitRecord.allowed : null;
2286
+ const limitReached = typeof rateLimitRecord.limit_reached === "boolean" ? rateLimitRecord.limit_reached : null;
2287
+ for (const scope of codexRateLimitScopes) {
2288
+ const window = rateLimitRecord[scope];
2289
+ if (!isCodexRateLimitWindow(window)) continue;
2290
+ const summary = [];
2291
+ if (allowed !== null) summary.push(`allowed=${allowed}`);
2292
+ if (limitReached !== null) summary.push(`limit_reached=${limitReached}`);
2293
+ summary.push(`used=${window.used_percent}%`, `reset_at=${formatCodexRateLimitResetAt(window.reset_at)}`);
2294
+ const label = planType ? `Codex ${scope} rate limit (${planType})` : `Codex ${scope} rate limit`;
2295
+ consola.log(`${label}: ${summary.join(", ")}`);
2439
2296
  }
2440
2297
  };
2441
- const translateMessage = (message, model, applyPhase, state) => {
2442
- if (message.role === "user") return translateUserMessage(message, state);
2443
- return translateAssistantMessage(message, model, applyPhase, state);
2444
- };
2445
- const translateUserMessage = (message, state) => {
2446
- if (typeof message.content === "string") return [createMessage("user", message.content)];
2447
- if (!Array.isArray(message.content)) return [];
2448
- const items = [];
2449
- const pendingContent = [];
2450
- for (const block of message.content) {
2451
- if (block.type === "tool_result") {
2452
- flushPendingContent(pendingContent, items, { role: "user" });
2453
- items.push(createToolCallOutput(block, state));
2454
- continue;
2455
- }
2456
- const converted = translateUserContentBlock(block);
2457
- if (converted.length > 0) pendingContent.push(...converted);
2458
- }
2459
- flushPendingContent(pendingContent, items, { role: "user" });
2460
- return items;
2298
+ const isCodexRateLimitWindow = (value) => {
2299
+ if (!value || typeof value !== "object") return false;
2300
+ const record = value;
2301
+ return typeof record.reset_after_seconds === "number" && typeof record.reset_at === "number" && typeof record.used_percent === "number" && typeof record.window_minutes === "number";
2461
2302
  };
2462
- const translateAssistantMessage = (message, model, applyPhase, state) => {
2463
- const assistantPhase = resolveAssistantPhase(model, message.content, applyPhase);
2464
- if (typeof message.content === "string") return [createMessage("assistant", message.content, assistantPhase)];
2465
- if (!Array.isArray(message.content)) return [];
2466
- const items = [];
2467
- const pendingContent = [];
2468
- for (const block of message.content) {
2469
- if (block.type === "tool_use") {
2470
- state.toolUseNameById.set(block.id, block.name);
2471
- flushPendingContent(pendingContent, items, {
2472
- role: "assistant",
2473
- phase: assistantPhase
2303
+ //#endregion
2304
+ //#region src/routes/messages/stream-translation.ts
2305
+ function isToolBlockOpen(state) {
2306
+ if (!state.contentBlockOpen) return false;
2307
+ return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
2308
+ }
2309
+ function translateChunkToAnthropicEvents(chunk, state) {
2310
+ const events = [];
2311
+ if (chunk.choices.length === 0) {
2312
+ completePendingMessage(state, events, chunk);
2313
+ return events;
2314
+ }
2315
+ const choice = chunk.choices[0];
2316
+ const { delta } = choice;
2317
+ handleMessageStart(state, events, chunk);
2318
+ handleThinkingText(delta, state, events);
2319
+ handleContent(delta, state, events);
2320
+ handleToolCalls(delta, state, events);
2321
+ handleFinish(choice, state, {
2322
+ events,
2323
+ chunk
2324
+ });
2325
+ return events;
2326
+ }
2327
+ function flushPendingAnthropicStreamEvents(state) {
2328
+ const events = [];
2329
+ completePendingMessage(state, events);
2330
+ return events;
2331
+ }
2332
+ function completePendingMessage(state, events, chunk) {
2333
+ if (!state.pendingMessageDelta) return;
2334
+ if (chunk?.usage) state.pendingMessageDelta.usage = getAnthropicUsageFromOpenAIChunk(chunk);
2335
+ events.push(state.pendingMessageDelta, { type: "message_stop" });
2336
+ state.pendingMessageDelta = void 0;
2337
+ }
2338
+ function handleFinish(choice, state, context) {
2339
+ const { events, chunk } = context;
2340
+ if (choice.finish_reason && choice.finish_reason.length > 0) {
2341
+ if (state.contentBlockOpen) {
2342
+ const toolBlockOpen = isToolBlockOpen(state);
2343
+ context.events.push({
2344
+ type: "content_block_stop",
2345
+ index: state.contentBlockIndex
2474
2346
  });
2475
- items.push(createToolCall(block, state));
2476
- continue;
2347
+ state.contentBlockOpen = false;
2348
+ state.contentBlockIndex++;
2349
+ if (!toolBlockOpen) handleReasoningOpaque(choice.delta, events, state);
2477
2350
  }
2478
- if (block.type === "thinking" && block.signature) {
2479
- const compactionContent = createCompactionContent(block);
2480
- if (compactionContent) {
2481
- flushPendingContent(pendingContent, items, {
2482
- role: "assistant",
2483
- phase: assistantPhase
2351
+ flushDeferredContent(state, events);
2352
+ state.pendingMessageDelta = {
2353
+ type: "message_delta",
2354
+ delta: {
2355
+ stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
2356
+ stop_sequence: null
2357
+ },
2358
+ usage: getAnthropicUsageFromOpenAIChunk(chunk)
2359
+ };
2360
+ if (chunk.usage) completePendingMessage(state, events, chunk);
2361
+ }
2362
+ }
2363
+ function getAnthropicUsageFromOpenAIChunk(chunk) {
2364
+ const { cachedTokens, cacheCreationTokens, inputTokens } = getOpenAIChunkUsageTokens(chunk);
2365
+ return {
2366
+ input_tokens: inputTokens,
2367
+ output_tokens: chunk.usage?.completion_tokens ?? 0,
2368
+ ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
2369
+ ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: cachedTokens }
2370
+ };
2371
+ }
2372
+ function getOpenAIChunkUsageTokens(chunk) {
2373
+ const promptTokens = chunk.usage?.prompt_tokens ?? 0;
2374
+ const cachedTokens = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
2375
+ const cacheCreationTokens = chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
2376
+ return {
2377
+ cacheCreationTokens,
2378
+ cachedTokens,
2379
+ inputTokens: Math.max(0, promptTokens - cachedTokens - cacheCreationTokens)
2380
+ };
2381
+ }
2382
+ function handleToolCalls(delta, state, events) {
2383
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
2384
+ closeThinkingBlockIfOpen(state, events);
2385
+ handleReasoningOpaqueInToolCalls(state, events, delta);
2386
+ for (const toolCall of delta.tool_calls) {
2387
+ if (toolCall.id && toolCall.function?.name) {
2388
+ if (state.contentBlockOpen) {
2389
+ events.push({
2390
+ type: "content_block_stop",
2391
+ index: state.contentBlockIndex
2392
+ });
2393
+ state.contentBlockIndex++;
2394
+ state.contentBlockOpen = false;
2395
+ }
2396
+ const anthropicBlockIndex = state.contentBlockIndex;
2397
+ state.toolCalls[toolCall.index] = {
2398
+ id: toolCall.id,
2399
+ name: toolCall.function.name,
2400
+ anthropicBlockIndex
2401
+ };
2402
+ events.push({
2403
+ type: "content_block_start",
2404
+ index: anthropicBlockIndex,
2405
+ content_block: {
2406
+ type: "tool_use",
2407
+ id: toolCall.id,
2408
+ name: toolCall.function.name,
2409
+ input: {}
2410
+ }
2484
2411
  });
2485
- items.push(compactionContent);
2486
- continue;
2412
+ state.contentBlockOpen = true;
2487
2413
  }
2488
- if (block.signature.includes("@")) {
2489
- flushPendingContent(pendingContent, items, {
2490
- role: "assistant",
2491
- phase: assistantPhase
2414
+ if (toolCall.function?.arguments) {
2415
+ const toolCallInfo = state.toolCalls[toolCall.index];
2416
+ if (toolCallInfo) events.push({
2417
+ type: "content_block_delta",
2418
+ index: toolCallInfo.anthropicBlockIndex,
2419
+ delta: {
2420
+ type: "input_json_delta",
2421
+ partial_json: toolCall.function.arguments
2422
+ }
2492
2423
  });
2493
- items.push(createReasoningContent(block));
2494
- continue;
2495
2424
  }
2496
2425
  }
2497
- const converted = translateAssistantContentBlock(block);
2498
- if (converted) pendingContent.push(converted);
2499
2426
  }
2500
- flushPendingContent(pendingContent, items, {
2501
- role: "assistant",
2502
- phase: assistantPhase
2503
- });
2504
- return items;
2505
- };
2506
- const translateUserContentBlock = (block) => {
2507
- switch (block.type) {
2508
- case "text": return [createTextContent(block.text)];
2509
- case "image": return [createImageContent(block)];
2510
- case "document": return [createFileContent(block)];
2511
- default: return [];
2427
+ }
2428
+ function handleReasoningOpaqueInToolCalls(state, events, delta) {
2429
+ if (state.contentBlockOpen && !isToolBlockOpen(state)) {
2430
+ events.push({
2431
+ type: "content_block_stop",
2432
+ index: state.contentBlockIndex
2433
+ });
2434
+ state.contentBlockIndex++;
2435
+ state.contentBlockOpen = false;
2512
2436
  }
2513
- };
2514
- const translateAssistantContentBlock = (block) => {
2515
- switch (block.type) {
2516
- case "text": return createOutPutTextContent(block.text);
2517
- default: return;
2437
+ handleReasoningOpaque(delta, events, state);
2438
+ }
2439
+ function handleContent(delta, state, events) {
2440
+ if (delta.content && delta.content.length > 0) {
2441
+ closeThinkingBlockIfOpen(state, events);
2442
+ if (isToolBlockOpen(state) || hasToolCallDelta(delta)) {
2443
+ state.deferredContent = `${state.deferredContent ?? ""}${delta.content}`;
2444
+ return;
2445
+ }
2446
+ if (!state.contentBlockOpen) {
2447
+ events.push({
2448
+ type: "content_block_start",
2449
+ index: state.contentBlockIndex,
2450
+ content_block: {
2451
+ type: "text",
2452
+ text: ""
2453
+ }
2454
+ });
2455
+ state.contentBlockOpen = true;
2456
+ }
2457
+ events.push({
2458
+ type: "content_block_delta",
2459
+ index: state.contentBlockIndex,
2460
+ delta: {
2461
+ type: "text_delta",
2462
+ text: delta.content
2463
+ }
2464
+ });
2518
2465
  }
2519
- };
2520
- const flushPendingContent = (pendingContent, target, message) => {
2521
- if (pendingContent.length === 0) return;
2522
- const messageContent = [...pendingContent];
2523
- target.push(createMessage(message.role, messageContent, message.phase));
2524
- pendingContent.length = 0;
2525
- };
2526
- const createMessage = (role, content, phase) => ({
2527
- type: MESSAGE_TYPE,
2528
- role,
2529
- content,
2530
- ...role === "assistant" && phase ? { phase } : {}
2531
- });
2532
- const resolveAssistantPhase = (_model, content, applyPhase) => {
2533
- if (!applyPhase) return;
2534
- if (typeof content === "string") return "final_answer";
2535
- if (!Array.isArray(content)) return;
2536
- if (!content.some((block) => block.type === "text")) return;
2537
- return content.some((block) => block.type === "tool_use") ? "commentary" : "final_answer";
2538
- };
2539
- const shouldApplyPhase = (_model) => {
2540
- return true;
2541
- };
2542
- const createTextContent = (text) => ({
2543
- type: "input_text",
2544
- text
2545
- });
2546
- const createOutPutTextContent = (text) => ({
2547
- type: "output_text",
2548
- text
2549
- });
2550
- const createImageContent = (block) => ({
2551
- type: "input_image",
2552
- image_url: `data:${block.source.media_type};base64,${block.source.data}`,
2553
- detail: "auto"
2554
- });
2555
- const createFileContent = (block) => ({
2556
- type: "input_file",
2557
- file_data: `data:${block.source.media_type};base64,${block.source.data}`,
2558
- filename: block.title ?? "document.pdf"
2559
- });
2560
- const createReasoningContent = (block) => {
2561
- const { encryptedContent, id } = parseReasoningSignature(block.signature);
2562
- const thinking = block.thinking === "Thinking..." ? "" : block.thinking;
2563
- return {
2564
- id,
2565
- type: "reasoning",
2566
- summary: thinking ? [{
2567
- type: "summary_text",
2568
- text: thinking
2569
- }] : [],
2570
- encrypted_content: encryptedContent
2571
- };
2572
- };
2573
- const createCompactionContent = (block) => {
2574
- const compaction = decodeCompactionCarrierSignature(block.signature);
2575
- if (!compaction) return;
2576
- return {
2577
- id: compaction.id,
2578
- type: "compaction",
2579
- encrypted_content: compaction.encrypted_content
2466
+ if (delta.content === "" && delta.reasoning_opaque && delta.reasoning_opaque.length > 0 && state.thinkingBlockOpen) {
2467
+ events.push({
2468
+ type: "content_block_delta",
2469
+ index: state.contentBlockIndex,
2470
+ delta: {
2471
+ type: "signature_delta",
2472
+ signature: delta.reasoning_opaque
2473
+ }
2474
+ }, {
2475
+ type: "content_block_stop",
2476
+ index: state.contentBlockIndex
2477
+ });
2478
+ state.contentBlockIndex++;
2479
+ state.thinkingBlockOpen = false;
2480
+ }
2481
+ }
2482
+ function hasToolCallDelta(delta) {
2483
+ return Boolean(delta.tool_calls && delta.tool_calls.length > 0);
2484
+ }
2485
+ function flushDeferredContent(state, events) {
2486
+ if (!state.deferredContent) return;
2487
+ if (!state.contentBlockOpen) {
2488
+ events.push({
2489
+ type: "content_block_start",
2490
+ index: state.contentBlockIndex,
2491
+ content_block: {
2492
+ type: "text",
2493
+ text: ""
2494
+ }
2495
+ });
2496
+ state.contentBlockOpen = true;
2497
+ }
2498
+ events.push({
2499
+ type: "content_block_delta",
2500
+ index: state.contentBlockIndex,
2501
+ delta: {
2502
+ type: "text_delta",
2503
+ text: state.deferredContent
2504
+ }
2505
+ }, {
2506
+ type: "content_block_stop",
2507
+ index: state.contentBlockIndex
2508
+ });
2509
+ state.deferredContent = void 0;
2510
+ state.contentBlockOpen = false;
2511
+ state.contentBlockIndex++;
2512
+ }
2513
+ function handleMessageStart(state, events, chunk) {
2514
+ if (!state.messageStartSent) {
2515
+ const { cachedTokens, cacheCreationTokens, inputTokens } = getOpenAIChunkUsageTokens(chunk);
2516
+ events.push({
2517
+ type: "message_start",
2518
+ message: {
2519
+ id: chunk.id,
2520
+ type: "message",
2521
+ role: "assistant",
2522
+ content: [],
2523
+ model: chunk.model,
2524
+ stop_reason: null,
2525
+ stop_sequence: null,
2526
+ usage: {
2527
+ input_tokens: inputTokens,
2528
+ output_tokens: 0,
2529
+ ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
2530
+ ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: cachedTokens }
2531
+ }
2532
+ }
2533
+ });
2534
+ state.messageStartSent = true;
2535
+ }
2536
+ }
2537
+ function handleReasoningOpaque(delta, events, state) {
2538
+ if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) {
2539
+ events.push({
2540
+ type: "content_block_start",
2541
+ index: state.contentBlockIndex,
2542
+ content_block: {
2543
+ type: "thinking",
2544
+ thinking: ""
2545
+ }
2546
+ }, {
2547
+ type: "content_block_delta",
2548
+ index: state.contentBlockIndex,
2549
+ delta: {
2550
+ type: "thinking_delta",
2551
+ thinking: THINKING_TEXT$1
2552
+ }
2553
+ }, {
2554
+ type: "content_block_delta",
2555
+ index: state.contentBlockIndex,
2556
+ delta: {
2557
+ type: "signature_delta",
2558
+ signature: delta.reasoning_opaque
2559
+ }
2560
+ }, {
2561
+ type: "content_block_stop",
2562
+ index: state.contentBlockIndex
2563
+ });
2564
+ state.contentBlockIndex++;
2565
+ }
2566
+ }
2567
+ function handleThinkingText(delta, state, events) {
2568
+ const reasoningText = delta.reasoning_text ?? delta.reasoning_content;
2569
+ if (reasoningText && reasoningText.length > 0) {
2570
+ if (state.contentBlockOpen) {
2571
+ delta.content = reasoningText;
2572
+ delta.reasoning_text = void 0;
2573
+ delta.reasoning_content = void 0;
2574
+ return;
2575
+ }
2576
+ if (!state.thinkingBlockOpen) {
2577
+ events.push({
2578
+ type: "content_block_start",
2579
+ index: state.contentBlockIndex,
2580
+ content_block: {
2581
+ type: "thinking",
2582
+ thinking: ""
2583
+ }
2584
+ });
2585
+ state.thinkingBlockOpen = true;
2586
+ }
2587
+ events.push({
2588
+ type: "content_block_delta",
2589
+ index: state.contentBlockIndex,
2590
+ delta: {
2591
+ type: "thinking_delta",
2592
+ thinking: reasoningText
2593
+ }
2594
+ });
2595
+ }
2596
+ }
2597
+ function closeThinkingBlockIfOpen(state, events) {
2598
+ if (state.thinkingBlockOpen) {
2599
+ events.push({
2600
+ type: "content_block_delta",
2601
+ index: state.contentBlockIndex,
2602
+ delta: {
2603
+ type: "signature_delta",
2604
+ signature: ""
2605
+ }
2606
+ }, {
2607
+ type: "content_block_stop",
2608
+ index: state.contentBlockIndex
2609
+ });
2610
+ state.contentBlockIndex++;
2611
+ state.thinkingBlockOpen = false;
2612
+ }
2613
+ }
2614
+ //#endregion
2615
+ //#region src/services/copilot/create-responses.ts
2616
+ const createResponses = async (payload, { vision, initiator, subagentMarker, requestId, sessionId, compactType, transport = "http" }) => {
2617
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2618
+ const headers = {
2619
+ ...copilotHeaders(state, requestId, vision),
2620
+ "x-initiator": initiator
2580
2621
  };
2622
+ prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers);
2623
+ prepareForCompact(headers, compactType);
2624
+ payload.service_tier = void 0;
2625
+ consola.log(`<-- model: ${payload.model}`);
2626
+ const effectiveTransport = compactType === 1 ? "http" : transport;
2627
+ if (payload.stream === true && effectiveTransport === "websocket") return createPooledResponsesWebSocketStream(prepareResponsesWebSocketRequest(payload, headers, {
2628
+ requestId,
2629
+ subagentMarker
2630
+ }));
2631
+ return await createHttpResponses(payload, headers);
2581
2632
  };
2582
- const parseReasoningSignature = (signature) => {
2583
- const splitIndex = signature.lastIndexOf("@");
2584
- if (splitIndex <= 0 || splitIndex === signature.length - 1) return {
2585
- encryptedContent: signature,
2586
- id: ""
2587
- };
2633
+ const createHttpResponses = async (payload, headers) => {
2634
+ const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
2635
+ method: "POST",
2636
+ headers,
2637
+ body: JSON.stringify(payload)
2638
+ });
2639
+ logCopilotRateLimits(response.headers);
2640
+ if (!response.ok) {
2641
+ consola.error("Failed to create responses", response);
2642
+ throw new HTTPError("Failed to create responses", response);
2643
+ }
2644
+ if (payload.stream) return events(response);
2645
+ return await response.json();
2646
+ };
2647
+ const prepareResponsesWebSocketRequest = (payload, preparedHeaders, options) => {
2648
+ const initiator = getResponsesWebSocketInitiator(preparedHeaders);
2588
2649
  return {
2589
- encryptedContent: signature.slice(0, splitIndex),
2590
- id: signature.slice(splitIndex + 1)
2650
+ headers: copilotWebSocketHeaders(preparedHeaders),
2651
+ poolKey: buildResponsesWebSocketPoolKey(payload, options),
2652
+ payload: buildResponsesWebSocketPayload(payload, initiator),
2653
+ url: buildResponsesWebSocketUrl(copilotBaseUrl(state))
2591
2654
  };
2592
2655
  };
2593
- const createFunctionToolCall = (block, state) => ({
2594
- type: "function_call",
2595
- call_id: block.id,
2596
- name: block.name,
2597
- arguments: JSON.stringify(block.input),
2598
- status: "completed",
2599
- ...state.toolSearchEnabled && isDeferredToolName(block.name) ? { namespace: block.name } : {}
2600
- });
2601
- const createToolSearchCall = (block) => ({
2602
- type: "tool_search_call",
2603
- call_id: block.id,
2604
- arguments: normalizeToolSearchBridgeArguments(block.input),
2605
- execution: "client",
2606
- status: "completed"
2607
- });
2608
- const createToolCall = (block, state) => {
2609
- if (state.toolSearchEnabled && isBridgeToolSearchName(block.name)) return createToolSearchCall(block);
2610
- return createFunctionToolCall(block, state);
2656
+ const buildResponsesWebSocketPoolKey = (payload, { requestId, subagentMarker }) => {
2657
+ const tokenFingerprint = state.copilotToken ? createHash("sha256").update(state.copilotToken).digest("hex").slice(0, 16) : "missing-token";
2658
+ const subagentKey = subagentMarker ? [
2659
+ subagentMarker.session_id,
2660
+ subagentMarker.agent_id,
2661
+ subagentMarker.agent_type
2662
+ ].join(":") : "main";
2663
+ return [
2664
+ tokenFingerprint,
2665
+ payload.model,
2666
+ requestId,
2667
+ subagentKey
2668
+ ].map(encodePoolKeyPart).join("|");
2611
2669
  };
2612
- const createFunctionCallOutput = (block) => ({
2613
- type: "function_call_output",
2614
- call_id: block.tool_use_id,
2615
- output: convertToolResultContent(block.content),
2616
- status: block.is_error ? "incomplete" : "completed"
2670
+ const getResponsesWebSocketInitiator = (preparedHeaders) => {
2671
+ return getHeaderValue(preparedHeaders, "x-initiator")?.toLowerCase() === "agent" ? "agent" : "user";
2672
+ };
2673
+ const createPooledResponsesWebSocketStream = (request) => createPooledWebSocketStream(request, {
2674
+ createChunk: createResponsesWebSocketStreamChunk,
2675
+ isTerminalChunk: isTerminalResponsesStreamChunk,
2676
+ openErrorMessage: "Failed to create responses websocket",
2677
+ streamErrorMessage: "Responses websocket stream error",
2678
+ terminalChunkMissingMessage: "Responses websocket ended without a terminal response"
2617
2679
  });
2618
- const createToolCallOutput = (block, state) => {
2619
- const toolUseName = state.toolUseNameById.get(block.tool_use_id);
2620
- if (state.toolSearchEnabled && isBridgeToolSearchName(toolUseName ?? "")) return createToolSearchOutput(block, state.originalTools);
2621
- return createFunctionCallOutput(block);
2680
+ const buildResponsesWebSocketPayload = (payload, initiator) => {
2681
+ const websocketPayload = {
2682
+ ...payload,
2683
+ type: "response.create",
2684
+ initiator
2685
+ };
2686
+ delete websocketPayload.stream;
2687
+ delete websocketPayload["background"];
2688
+ delete websocketPayload.service_tier;
2689
+ return websocketPayload;
2622
2690
  };
2623
- const createToolSearchOutput = (block, originalTools) => {
2624
- const referencedToolNames = resolveToolSearchReferencedToolNames(block.content, originalTools);
2625
- return {
2626
- type: "tool_search_output",
2627
- call_id: block.tool_use_id,
2628
- tools: referencedToolNames.map((toolName) => convertDeferredToolToNamespace(resolveDeferredTool(toolName, originalTools))),
2629
- execution: "client",
2630
- status: block.is_error ? "incomplete" : "completed"
2631
- };
2691
+ const buildResponsesWebSocketUrl = (baseUrl) => {
2692
+ return createWebSocketUrl(`${baseUrl.replace(/\/+$/u, "")}/responses`);
2632
2693
  };
2633
- const resolveToolSearchReferencedToolNames = (content, originalTools) => {
2634
- const explicitReferences = extractToolReferenceNames(content);
2635
- if (explicitReferences.length > 0) return uniqueToolNames(explicitReferences);
2636
- const sentinel = extractMcpToolSearchSentinel(content);
2637
- if (sentinel) return selectDeferredToolsByNames(sentinel.names, originalTools).map((tool) => tool.name);
2638
- return [];
2694
+ const getHeaderValue = (headers, headerName) => {
2695
+ const normalizedHeaderName = headerName.toLowerCase();
2696
+ return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1];
2639
2697
  };
2640
- const extractToolReferenceNames = (content) => {
2641
- if (!Array.isArray(content)) return [];
2642
- return content.flatMap((block) => block.type === "tool_reference" ? [block.tool_name] : []);
2698
+ const encodePoolKeyPart = (value) => encodeURIComponent(value);
2699
+ const createResponsesWebSocketStreamChunk = (data) => {
2700
+ if (data === "[DONE]") return { data };
2701
+ try {
2702
+ const parsed = JSON.parse(data);
2703
+ if (parsed.type === "response.completed") logCopilotQuotaSnapshots(parsed.copilot_quota_snapshots);
2704
+ return {
2705
+ data: JSON.stringify(parsed),
2706
+ event: typeof parsed.type === "string" ? parsed.type : void 0,
2707
+ id: typeof parsed.id === "string" ? parsed.id : void 0
2708
+ };
2709
+ } catch {
2710
+ return { data };
2711
+ }
2643
2712
  };
2644
- const extractMcpToolSearchSentinel = (content) => {
2645
- if (typeof content === "string") return parseMcpToolSearchSentinel(content);
2646
- for (const block of content) {
2647
- if (block.type !== "text") continue;
2648
- const sentinel = parseMcpToolSearchSentinel(block.text);
2649
- if (sentinel) return sentinel;
2713
+ const isTerminalResponsesStreamChunk = (chunk) => {
2714
+ if (!chunk.data || chunk.data === "[DONE]") return false;
2715
+ try {
2716
+ const parsed = JSON.parse(chunk.data);
2717
+ return parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete" || parsed.type === "error";
2718
+ } catch {
2719
+ return false;
2650
2720
  }
2651
- return null;
2652
2721
  };
2653
- const resolveDeferredTool = (toolName, originalTools) => {
2654
- const tool = originalTools.find((candidate) => candidate.name === toolName);
2655
- if (tool && isDeferredToolName(tool.name)) return tool;
2656
- throw createInvalidRequestError(`Tool reference '${toolName}' has no corresponding deferred tool definition`);
2722
+ //#endregion
2723
+ //#region src/routes/messages/responses-translation.ts
2724
+ const MESSAGE_TYPE = "message";
2725
+ const COMPACTION_SIGNATURE_PREFIX = "cm1#";
2726
+ const COMPACTION_SIGNATURE_SEPARATOR = "@";
2727
+ const THINKING_TEXT = "Thinking...";
2728
+ const buildPromptCacheKey = (basePromptCacheKey, subagentAgentId) => {
2729
+ if (!basePromptCacheKey) return null;
2730
+ const normalizedSubagentAgentId = subagentAgentId?.trim() || null;
2731
+ if (!normalizedSubagentAgentId) return basePromptCacheKey;
2732
+ return `${basePromptCacheKey}:agent:${normalizedSubagentAgentId}`;
2657
2733
  };
2658
- const uniqueToolNames = (toolNames) => [...new Set(toolNames)];
2659
- const createInvalidRequestError = (message) => new HTTPError(message, new Response(JSON.stringify({ error: {
2660
- message,
2661
- type: "invalid_request_error"
2662
- } }), {
2663
- status: 400,
2664
- headers: { "content-type": "application/json" }
2665
- }));
2666
- const translateSystemPrompt = (system, model) => {
2667
- if (!system) return null;
2668
- const extraPrompt = getExtraPromptForModel(model);
2669
- if (typeof system === "string") return system + extraPrompt;
2670
- const text = system.map((block, index) => {
2671
- if (index === 0) return block.text + "\n\n" + extraPrompt + "\n\n";
2672
- return block.text;
2673
- }).join(" ");
2674
- return text.length > 0 ? text : null;
2734
+ const translateAnthropicMessagesToResponsesPayload = (payload, subagentAgentId) => {
2735
+ const input = [];
2736
+ const applyPhase = shouldApplyPhase(payload.model);
2737
+ const toolSearchEnabled = shouldEnableResponsesToolSearch({
2738
+ model: payload.model,
2739
+ tools: payload.tools
2740
+ });
2741
+ const translationState = {
2742
+ originalTools: payload.tools ?? [],
2743
+ toolSearchEnabled,
2744
+ toolUseNameById: /* @__PURE__ */ new Map()
2745
+ };
2746
+ for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase, translationState));
2747
+ const hasOriginalTools = Array.isArray(payload.tools) && payload.tools.length > 0;
2748
+ const translatedTools = convertAnthropicTools(payload.tools, toolSearchEnabled);
2749
+ const toolChoice = convertAnthropicToolChoice(payload.tool_choice, toolSearchEnabled);
2750
+ const { sessionId: metadataPromptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
2751
+ const sessionAffinity = requestContext.getStore()?.sessionAffinity?.trim() || null;
2752
+ const promptCacheKey = buildPromptCacheKey(metadataPromptCacheKey ?? sessionAffinity, subagentAgentId);
2753
+ const responsesPayload = {
2754
+ model: payload.model,
2755
+ input,
2756
+ instructions: translateSystemPrompt(payload.system, payload.model),
2757
+ temperature: 1,
2758
+ top_p: payload.top_p ?? null,
2759
+ max_output_tokens: Math.max(payload.max_tokens, 12800),
2760
+ tools: translatedTools,
2761
+ tool_choice: toolChoice,
2762
+ metadata: payload.metadata ? { ...payload.metadata } : null,
2763
+ stream: payload.stream ?? null,
2764
+ store: false,
2765
+ parallel_tool_calls: true,
2766
+ reasoning: {
2767
+ effort: getReasoningEffortForModel(payload.model),
2768
+ summary: "detailed"
2769
+ },
2770
+ include: ["reasoning.encrypted_content"]
2771
+ };
2772
+ if (hasOriginalTools) responsesPayload.prompt_cache_key = promptCacheKey;
2773
+ return responsesPayload;
2675
2774
  };
2676
- const convertAnthropicTools = (tools, toolSearchEnabled) => {
2677
- if (!tools || tools.length === 0) return null;
2678
- const converted = [];
2679
- let addedToolSearch = false;
2680
- const searchableToolNames = toolSearchEnabled ? listDeferredToolNames(tools) : [];
2681
- for (const tool of tools) {
2682
- if (isBridgeToolSearchName(tool.name)) {
2683
- if (toolSearchEnabled && !addedToolSearch) {
2684
- converted.push(createResponsesToolSearchDefinition(searchableToolNames));
2685
- addedToolSearch = true;
2686
- }
2775
+ const encodeCompactionCarrierSignature = (compaction) => {
2776
+ return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
2777
+ };
2778
+ const decodeCompactionCarrierSignature = (signature) => {
2779
+ if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
2780
+ const raw = signature.slice(4);
2781
+ const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR);
2782
+ if (separatorIndex <= 0 || separatorIndex === raw.length - 1) return;
2783
+ const encrypted_content = raw.slice(0, separatorIndex);
2784
+ const id = raw.slice(separatorIndex + 1);
2785
+ if (!encrypted_content) return;
2786
+ return {
2787
+ id,
2788
+ encrypted_content
2789
+ };
2790
+ }
2791
+ };
2792
+ const translateMessage = (message, model, applyPhase, state) => {
2793
+ if (message.role === "user") return translateUserMessage(message, state);
2794
+ return translateAssistantMessage(message, model, applyPhase, state);
2795
+ };
2796
+ const translateUserMessage = (message, state) => {
2797
+ if (typeof message.content === "string") return [createMessage("user", message.content)];
2798
+ if (!Array.isArray(message.content)) return [];
2799
+ const items = [];
2800
+ const pendingContent = [];
2801
+ for (const block of message.content) {
2802
+ if (block.type === "tool_result") {
2803
+ flushPendingContent(pendingContent, items, { role: "user" });
2804
+ items.push(createToolCallOutput(block, state));
2687
2805
  continue;
2688
2806
  }
2689
- if (toolSearchEnabled && isDeferredToolName(tool.name)) {
2690
- converted.push(convertDeferredToolToNamespace(tool));
2807
+ const converted = translateUserContentBlock(block);
2808
+ if (converted.length > 0) pendingContent.push(...converted);
2809
+ }
2810
+ flushPendingContent(pendingContent, items, { role: "user" });
2811
+ return items;
2812
+ };
2813
+ const translateAssistantMessage = (message, model, applyPhase, state) => {
2814
+ const assistantPhase = resolveAssistantPhase(model, message.content, applyPhase);
2815
+ if (typeof message.content === "string") return [createMessage("assistant", message.content, assistantPhase)];
2816
+ if (!Array.isArray(message.content)) return [];
2817
+ const items = [];
2818
+ const pendingContent = [];
2819
+ for (const block of message.content) {
2820
+ if (block.type === "tool_use") {
2821
+ state.toolUseNameById.set(block.id, block.name);
2822
+ flushPendingContent(pendingContent, items, {
2823
+ role: "assistant",
2824
+ phase: assistantPhase
2825
+ });
2826
+ items.push(createToolCall(block, state));
2691
2827
  continue;
2692
2828
  }
2693
- converted.push(convertToolToFunction(tool));
2829
+ if (block.type === "thinking" && block.signature) {
2830
+ const compactionContent = createCompactionContent(block);
2831
+ if (compactionContent) {
2832
+ flushPendingContent(pendingContent, items, {
2833
+ role: "assistant",
2834
+ phase: assistantPhase
2835
+ });
2836
+ items.push(compactionContent);
2837
+ continue;
2838
+ }
2839
+ if (block.signature.includes("@")) {
2840
+ flushPendingContent(pendingContent, items, {
2841
+ role: "assistant",
2842
+ phase: assistantPhase
2843
+ });
2844
+ items.push(createReasoningContent(block));
2845
+ continue;
2846
+ }
2847
+ }
2848
+ const converted = translateAssistantContentBlock(block);
2849
+ if (converted) pendingContent.push(converted);
2694
2850
  }
2695
- return converted;
2851
+ flushPendingContent(pendingContent, items, {
2852
+ role: "assistant",
2853
+ phase: assistantPhase
2854
+ });
2855
+ return items;
2696
2856
  };
2697
- const createResponsesToolSearchDefinition = (searchableToolNames) => ({
2698
- type: "tool_search",
2699
- execution: "client",
2700
- description: "Load deferred tools by exact name before using them. Return only the searchable tool names you need for the next step.",
2701
- parameters: {
2702
- type: "object",
2703
- properties: { names: {
2704
- type: "array",
2705
- description: "Exact deferred tool names to load.",
2706
- items: {
2707
- type: "string",
2708
- enum: searchableToolNames
2709
- },
2710
- minItems: 1
2711
- } },
2712
- required: ["names"],
2713
- additionalProperties: false
2857
+ const translateUserContentBlock = (block) => {
2858
+ switch (block.type) {
2859
+ case "text": return [createTextContent(block.text)];
2860
+ case "image": return [createImageContent(block)];
2861
+ case "document": return [createFileContent(block)];
2862
+ default: return [];
2863
+ }
2864
+ };
2865
+ const translateAssistantContentBlock = (block) => {
2866
+ switch (block.type) {
2867
+ case "text": return createOutPutTextContent(block.text);
2868
+ default: return;
2714
2869
  }
2870
+ };
2871
+ const flushPendingContent = (pendingContent, target, message) => {
2872
+ if (pendingContent.length === 0) return;
2873
+ const messageContent = [...pendingContent];
2874
+ target.push(createMessage(message.role, messageContent, message.phase));
2875
+ pendingContent.length = 0;
2876
+ };
2877
+ const createMessage = (role, content, phase) => ({
2878
+ type: MESSAGE_TYPE,
2879
+ role,
2880
+ content,
2881
+ ...role === "assistant" && phase ? { phase } : {}
2715
2882
  });
2716
- const convertToolToFunction = (tool) => ({
2717
- type: "function",
2718
- name: tool.name,
2883
+ const resolveAssistantPhase = (_model, content, applyPhase) => {
2884
+ if (!applyPhase) return;
2885
+ if (typeof content === "string") return "final_answer";
2886
+ if (!Array.isArray(content)) return;
2887
+ if (!content.some((block) => block.type === "text")) return;
2888
+ return content.some((block) => block.type === "tool_use") ? "commentary" : "final_answer";
2889
+ };
2890
+ const shouldApplyPhase = (_model) => {
2891
+ return true;
2892
+ };
2893
+ const createTextContent = (text) => ({
2894
+ type: "input_text",
2895
+ text
2896
+ });
2897
+ const createOutPutTextContent = (text) => ({
2898
+ type: "output_text",
2899
+ text
2900
+ });
2901
+ const createImageContent = (block) => ({
2902
+ type: "input_image",
2903
+ image_url: `data:${block.source.media_type};base64,${block.source.data}`,
2904
+ detail: "auto"
2905
+ });
2906
+ const createFileContent = (block) => ({
2907
+ type: "input_file",
2908
+ file_data: `data:${block.source.media_type};base64,${block.source.data}`,
2909
+ filename: block.title ?? "document.pdf"
2910
+ });
2911
+ const createReasoningContent = (block) => {
2912
+ const { encryptedContent, id } = parseReasoningSignature(block.signature);
2913
+ const thinking = block.thinking === "Thinking..." ? "" : block.thinking;
2914
+ return {
2915
+ id,
2916
+ type: "reasoning",
2917
+ summary: thinking ? [{
2918
+ type: "summary_text",
2919
+ text: thinking
2920
+ }] : [],
2921
+ encrypted_content: encryptedContent
2922
+ };
2923
+ };
2924
+ const createCompactionContent = (block) => {
2925
+ const compaction = decodeCompactionCarrierSignature(block.signature);
2926
+ if (!compaction) return;
2927
+ return {
2928
+ id: compaction.id,
2929
+ type: "compaction",
2930
+ encrypted_content: compaction.encrypted_content
2931
+ };
2932
+ };
2933
+ const parseReasoningSignature = (signature) => {
2934
+ const splitIndex = signature.lastIndexOf("@");
2935
+ if (splitIndex <= 0 || splitIndex === signature.length - 1) return {
2936
+ encryptedContent: signature,
2937
+ id: ""
2938
+ };
2939
+ return {
2940
+ encryptedContent: signature.slice(0, splitIndex),
2941
+ id: signature.slice(splitIndex + 1)
2942
+ };
2943
+ };
2944
+ const createFunctionToolCall = (block, state) => ({
2945
+ type: "function_call",
2946
+ call_id: block.id,
2947
+ name: block.name,
2948
+ arguments: JSON.stringify(block.input),
2949
+ status: "completed",
2950
+ ...state.toolSearchEnabled && isDeferredToolName(block.name) ? { namespace: block.name } : {}
2951
+ });
2952
+ const createToolSearchCall = (block) => ({
2953
+ type: "tool_search_call",
2954
+ call_id: block.id,
2955
+ arguments: normalizeToolSearchBridgeArguments(block.input),
2956
+ execution: "client",
2957
+ status: "completed"
2958
+ });
2959
+ const createToolCall = (block, state) => {
2960
+ if (state.toolSearchEnabled && isBridgeToolSearchName(block.name)) return createToolSearchCall(block);
2961
+ return createFunctionToolCall(block, state);
2962
+ };
2963
+ const createFunctionCallOutput = (block) => ({
2964
+ type: "function_call_output",
2965
+ call_id: block.tool_use_id,
2966
+ output: convertToolResultContent(block.content),
2967
+ status: block.is_error ? "incomplete" : "completed"
2968
+ });
2969
+ const createToolCallOutput = (block, state) => {
2970
+ const toolUseName = state.toolUseNameById.get(block.tool_use_id);
2971
+ if (state.toolSearchEnabled && isBridgeToolSearchName(toolUseName ?? "")) return createToolSearchOutput(block, state.originalTools);
2972
+ return createFunctionCallOutput(block);
2973
+ };
2974
+ const createToolSearchOutput = (block, originalTools) => {
2975
+ const referencedToolNames = resolveToolSearchReferencedToolNames(block.content, originalTools);
2976
+ return {
2977
+ type: "tool_search_output",
2978
+ call_id: block.tool_use_id,
2979
+ tools: referencedToolNames.map((toolName) => convertDeferredToolToNamespace(resolveDeferredTool(toolName, originalTools))),
2980
+ execution: "client",
2981
+ status: block.is_error ? "incomplete" : "completed"
2982
+ };
2983
+ };
2984
+ const resolveToolSearchReferencedToolNames = (content, originalTools) => {
2985
+ const explicitReferences = extractToolReferenceNames(content);
2986
+ if (explicitReferences.length > 0) return uniqueToolNames(explicitReferences);
2987
+ const sentinel = extractMcpToolSearchSentinel(content);
2988
+ if (sentinel) return selectDeferredToolsByNames(sentinel.names, originalTools).map((tool) => tool.name);
2989
+ return [];
2990
+ };
2991
+ const extractToolReferenceNames = (content) => {
2992
+ if (!Array.isArray(content)) return [];
2993
+ return content.flatMap((block) => block.type === "tool_reference" ? [block.tool_name] : []);
2994
+ };
2995
+ const extractMcpToolSearchSentinel = (content) => {
2996
+ if (typeof content === "string") return parseMcpToolSearchSentinel(content);
2997
+ for (const block of content) {
2998
+ if (block.type !== "text") continue;
2999
+ const sentinel = parseMcpToolSearchSentinel(block.text);
3000
+ if (sentinel) return sentinel;
3001
+ }
3002
+ return null;
3003
+ };
3004
+ const resolveDeferredTool = (toolName, originalTools) => {
3005
+ const tool = originalTools.find((candidate) => candidate.name === toolName);
3006
+ if (tool && isDeferredToolName(tool.name)) return tool;
3007
+ throw createInvalidRequestError(`Tool reference '${toolName}' has no corresponding deferred tool definition`);
3008
+ };
3009
+ const uniqueToolNames = (toolNames) => [...new Set(toolNames)];
3010
+ const createInvalidRequestError = (message) => new HTTPError(message, new Response(JSON.stringify({ error: {
3011
+ message,
3012
+ type: "invalid_request_error"
3013
+ } }), {
3014
+ status: 400,
3015
+ headers: { "content-type": "application/json" }
3016
+ }));
3017
+ const translateSystemPrompt = (system, model) => {
3018
+ if (!system) return null;
3019
+ const extraPrompt = getExtraPromptForModel(model);
3020
+ if (typeof system === "string") return system + extraPrompt;
3021
+ const text = system.map((block, index) => {
3022
+ if (index === 0) return block.text + "\n\n" + extraPrompt + "\n\n";
3023
+ return block.text;
3024
+ }).join(" ");
3025
+ return text.length > 0 ? text : null;
3026
+ };
3027
+ const convertAnthropicTools = (tools, toolSearchEnabled) => {
3028
+ if (!tools || tools.length === 0) return null;
3029
+ const converted = [];
3030
+ let addedToolSearch = false;
3031
+ const searchableToolNames = toolSearchEnabled ? listDeferredToolNames(tools) : [];
3032
+ for (const tool of tools) {
3033
+ if (isBridgeToolSearchName(tool.name)) {
3034
+ if (toolSearchEnabled && !addedToolSearch) {
3035
+ converted.push(createResponsesToolSearchDefinition(searchableToolNames));
3036
+ addedToolSearch = true;
3037
+ }
3038
+ continue;
3039
+ }
3040
+ if (toolSearchEnabled && isDeferredToolName(tool.name)) {
3041
+ converted.push(convertDeferredToolToNamespace(tool));
3042
+ continue;
3043
+ }
3044
+ converted.push(convertToolToFunction(tool));
3045
+ }
3046
+ return converted;
3047
+ };
3048
+ const createResponsesToolSearchDefinition = (searchableToolNames) => ({
3049
+ type: "tool_search",
3050
+ execution: "client",
3051
+ description: "Load deferred tools by exact name before using them. Return only the searchable tool names you need for the next step.",
3052
+ parameters: {
3053
+ type: "object",
3054
+ properties: { names: {
3055
+ type: "array",
3056
+ description: "Exact deferred tool names to load.",
3057
+ items: {
3058
+ type: "string",
3059
+ enum: searchableToolNames
3060
+ },
3061
+ minItems: 1
3062
+ } },
3063
+ required: ["names"],
3064
+ additionalProperties: false
3065
+ }
3066
+ });
3067
+ const convertToolToFunction = (tool) => ({
3068
+ type: "function",
3069
+ name: tool.name,
2719
3070
  parameters: normalizeToolSchema(tool.input_schema),
2720
3071
  strict: false,
2721
3072
  ...tool.description ? { description: tool.description } : {}
@@ -3663,11 +4014,12 @@ async function handleProviderMessagesForProvider(c, options) {
3663
4014
  } }, 404);
3664
4015
  try {
3665
4016
  const modelConfig = providerConfig.models?.[payload.model];
3666
- applyModelDefaults(payload, modelConfig);
3667
4017
  debugJson(logger$5, "provider.messages.request", {
3668
4018
  payload,
3669
4019
  provider
3670
4020
  });
4021
+ normalizeSystemMessages(payload);
4022
+ applyModelDefaults(payload, modelConfig);
3671
4023
  if (providerConfig.type === "openai-responses") return await handleOpenAIResponsesProviderMessages(c, {
3672
4024
  modelConfig,
3673
4025
  payload,
@@ -3678,736 +4030,465 @@ async function handleProviderMessagesForProvider(c, options) {
3678
4030
  modelConfig,
3679
4031
  payload,
3680
4032
  provider,
3681
- providerConfig
3682
- });
3683
- applyMissingExtraBody(payload, { extraBody: modelConfig?.extraBody });
3684
- const upstreamResponse = await forwardProviderMessages(providerConfig, payload, c.req.raw.headers);
3685
- if (!upstreamResponse.ok) {
3686
- logger$5.error("Failed to create responses", upstreamResponse);
3687
- throw new HTTPError("Failed to create responses", upstreamResponse);
3688
- }
3689
- const contentType = upstreamResponse.headers.get("content-type") ?? "";
3690
- if (Boolean(payload.stream) && contentType.includes("text/event-stream")) return streamProviderMessages({
3691
- c,
3692
- payload,
3693
- provider,
3694
- providerConfig,
3695
- upstreamResponse
3696
- });
3697
- return respondProviderMessagesJson(c, {
3698
- body: await upstreamResponse.json(),
3699
- payload,
3700
- provider,
3701
- providerConfig
3702
- });
3703
- } catch (error) {
3704
- logger$5.error("provider.messages.error", {
3705
- provider,
3706
- error
3707
- });
3708
- throw error;
3709
- }
3710
- }
3711
- const handleOpenAIResponsesProviderMessages = async (c, options) => {
3712
- const { payload, provider, providerConfig } = options;
3713
- const selectedModel = providerConfig.name === "codex" ? getModels().data.find((model) => model.id === payload.model) : void 0;
3714
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(payload);
3715
- applyResponsesApiContextManagement(responsesPayload, selectedModel?.capabilities.limits.max_prompt_tokens);
3716
- compactInputByLatestCompaction(responsesPayload);
3717
- debugJson(logger$5, "provider.messages.responses.request", {
3718
- payload: responsesPayload,
3719
- provider
3720
- });
3721
- if (providerConfig.name === "codex") {
3722
- const upstreamResponse = await forwardCodexResponses(responsesPayload, c.req.raw.headers, providerConfig.baseUrl);
3723
- if (responsesPayload.stream && isResponsesStream$1(upstreamResponse)) return streamResponsesProviderMessages({
3724
- c,
3725
- payload,
3726
- provider,
3727
- providerConfig,
3728
- upstreamResponse
3729
- });
3730
- return respondResponsesProviderMessagesJson(c, {
3731
- body: upstreamResponse,
3732
- payload,
3733
- provider,
3734
- providerConfig
3735
- });
3736
- }
3737
- const upstreamResponse = await forwardProviderResponses(providerConfig, responsesPayload, c.req.raw.headers);
3738
- if (!upstreamResponse.ok) {
3739
- logger$5.error("Failed to create provider responses", upstreamResponse);
3740
- throw new HTTPError("Failed to create provider responses", upstreamResponse);
3741
- }
3742
- if (responsesPayload.stream) return streamResponsesProviderMessages({
3743
- c,
3744
- payload,
3745
- provider,
3746
- providerConfig,
3747
- upstreamResponse: events(upstreamResponse)
3748
- });
3749
- return respondResponsesProviderMessagesJson(c, {
3750
- body: await upstreamResponse.json(),
3751
- payload,
3752
- provider,
3753
- providerConfig
3754
- });
3755
- };
3756
- const applyModelDefaults = (payload, modelConfig) => {
3757
- payload.temperature ??= modelConfig?.temperature;
3758
- payload.top_p ??= modelConfig?.topP;
3759
- payload.top_k ??= modelConfig?.topK;
3760
- };
3761
- const applyMissingExtraBody = (payload, options) => {
3762
- for (const [key, value] of Object.entries(options.extraBody ?? {})) if (!Object.hasOwn(payload, key)) payload[key] = value;
3763
- };
3764
- const getRequestThinkingBudget = (payload) => {
3765
- const budget = payload.thinking?.budget_tokens;
3766
- if (typeof budget !== "number" || !Number.isFinite(budget)) return;
3767
- return budget;
3768
- };
3769
- const applyOpenAICompatibleThinkingBudget = (payload, source) => {
3770
- const thinkingBudget = getRequestThinkingBudget(source);
3771
- if (thinkingBudget !== void 0) {
3772
- payload.thinking_budget = thinkingBudget;
3773
- return;
3774
- }
3775
- if (payload.thinking_budget === void 0) delete payload.thinking_budget;
3776
- };
3777
- const applyOpenAICompatibleExtraBodyThinkingBudget = (payload, options) => {
3778
- const { extraBody } = options;
3779
- if (!extraBody || !Object.hasOwn(extraBody, "thinking_budget")) return;
3780
- const rawPayload = payload;
3781
- rawPayload.thinking_budget = extraBody.thinking_budget;
3782
- };
3783
- const handleOpenAICompatibleProviderMessages = async (c, options) => {
3784
- const { modelConfig, payload, provider, providerConfig } = options;
3785
- const openAIPayload = createOpenAICompatiblePayload(payload, modelConfig);
3786
- debugJson(logger$5, "provider.messages.openai_compatible.request", {
3787
- payload: openAIPayload,
3788
- provider
3789
- });
3790
- const upstreamResponse = await forwardProviderChatCompletions(providerConfig, openAIPayload, c.req.raw.headers);
3791
- if (!upstreamResponse.ok) {
3792
- logger$5.error("Failed to create openai-compatible responses", upstreamResponse);
3793
- throw new HTTPError("Failed to create openai-compatible responses", upstreamResponse);
3794
- }
3795
- const contentType = upstreamResponse.headers.get("content-type") ?? "";
3796
- if (Boolean(openAIPayload.stream) && contentType.includes("text/event-stream")) return streamOpenAICompatibleProviderMessages({
3797
- c,
3798
- payload,
3799
- provider,
3800
- upstreamResponse
3801
- });
3802
- return respondOpenAICompatibleProviderMessagesJson(c, {
3803
- body: await upstreamResponse.json(),
3804
- payload,
3805
- provider
3806
- });
3807
- };
3808
- const createOpenAICompatiblePayload = (payload, modelConfig) => {
3809
- const openAIPayload = translateToOpenAI(payload, {
3810
- supportPdf: modelConfig?.supportPdf,
3811
- toolContentSupportType: modelConfig?.toolContentSupportType ?? []
3812
- });
3813
- applyOpenAICompatibleThinkingBudget(openAIPayload, payload);
3814
- if (payload.top_k !== void 0) openAIPayload.top_k = payload.top_k;
3815
- if (openAIPayload.stream) openAIPayload.stream_options = { include_usage: true };
3816
- normalizeOpenAICompatibleReasoningContent(openAIPayload);
3817
- applyOpenAICompatibleRequestOverrides(openAIPayload, {
3818
- extraBody: modelConfig?.extraBody,
3819
- source: payload
3820
- });
3821
- applyMissingExtraBody(openAIPayload, { extraBody: modelConfig?.extraBody });
3822
- applyOpenAICompatibleExtraBodyThinkingBudget(openAIPayload, { extraBody: modelConfig?.extraBody });
3823
- if (!Object.hasOwn(openAIPayload, "parallel_tool_calls")) openAIPayload.parallel_tool_calls = true;
3824
- if (modelConfig?.contextCache !== false) applyOpenAICompatibleContextCache(openAIPayload);
3825
- return openAIPayload;
3826
- };
3827
- const normalizeOpenAICompatibleReasoningContent = (payload) => {
3828
- for (const message of payload.messages) {
3829
- if (message.role !== "assistant") continue;
3830
- if (message.reasoning_content === void 0 && message.reasoning_text !== void 0) message.reasoning_content = message.reasoning_text;
3831
- delete message.reasoning_text;
3832
- delete message.reasoning_opaque;
3833
- }
3834
- };
3835
- const applyOpenAICompatibleRequestOverrides = (payload, options) => {
3836
- const allowedKeys = new Set(Object.keys(options.extraBody ?? {}));
3837
- for (const key of allowedKeys) if (Object.hasOwn(options.source, key)) payload[key] = options.source[key];
3838
- };
3839
- const applyOpenAICompatibleContextCache = (payload) => {
3840
- const messageIndexes = selectContextCacheMessageIndexes(payload.messages);
3841
- for (const messageIndex of messageIndexes) applyContextCacheControl(payload.messages[messageIndex]);
3842
- };
3843
- const selectContextCacheMessageIndexes = (messages) => {
3844
- const cacheableIndexes = messages.flatMap((message, index) => isContextCacheMarkerEligible(message) ? [index] : []);
3845
- const systemIndexes = cacheableIndexes.filter((index) => messages[index]?.role === "system").slice(0, 2);
3846
- const finalIndexes = cacheableIndexes.filter((index) => messages[index]?.role !== "system").slice(-2);
3847
- return uniqueIndexes$1([...systemIndexes, ...finalIndexes]).sort((a, b) => a - b);
3848
- };
3849
- const uniqueIndexes$1 = (indexes) => [...new Set(indexes)].slice(0, OPENAI_COMPATIBLE_CONTEXT_CACHE_MARKER_LIMIT);
3850
- const isContextCacheMarkerEligible = (message) => {
3851
- if (!OPENAI_COMPATIBLE_CONTEXT_CACHE_ROLES.has(message.role)) return false;
3852
- if (typeof message.content === "string") return message.content.length > 0;
3853
- return Array.isArray(message.content) && message.content.length > 0;
3854
- };
3855
- const applyContextCacheControl = (message) => {
3856
- if (!message) return;
3857
- if (typeof message.content === "string") {
3858
- message.content = [{
3859
- type: "text",
3860
- text: message.content,
3861
- cache_control: { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL }
3862
- }];
3863
- return;
3864
- }
3865
- if (!Array.isArray(message.content)) return;
3866
- const lastPart = message.content.at(-1);
3867
- if (!lastPart) return;
3868
- setContextCacheControl(lastPart);
3869
- };
3870
- const setContextCacheControl = (part) => {
3871
- part.cache_control = { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL };
3872
- };
3873
- const streamProviderMessages = ({ c, payload, provider, providerConfig, upstreamResponse }) => {
3874
- logger$5.debug("provider.messages.streaming");
3875
- const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
3876
- return streamSSE(c, async (stream) => {
3877
- let usage = {};
3878
- for await (const chunk of events(upstreamResponse)) {
3879
- logger$5.debug("provider.messages.raw_stream_event:", chunk.data);
3880
- const eventName = chunk.event;
3881
- if (eventName === "ping") {
3882
- await stream.writeSSE({
3883
- event: "ping",
3884
- data: "{\"type\":\"ping\"}"
3885
- });
3886
- continue;
3887
- }
3888
- let data = chunk.data;
3889
- if (!data) continue;
3890
- if (chunk.data === "[DONE]") break;
3891
- const parsed = parseProviderStreamEvent(data, providerConfig);
3892
- if (parsed) {
3893
- usage = mergeAnthropicUsage(usage, parsed.usage);
3894
- data = parsed.data;
3895
- }
3896
- await stream.writeSSE({
3897
- event: eventName,
3898
- data
3899
- });
3900
- }
3901
- recordUsage(usage);
3902
- });
3903
- };
3904
- const streamOpenAICompatibleProviderMessages = ({ c, payload, provider, upstreamResponse }) => {
3905
- logger$5.debug("provider.messages.openai_compatible.streaming");
3906
- const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
3907
- return streamSSE(c, async (stream) => {
3908
- let usage = {};
3909
- const streamState = {
3910
- messageStartSent: false,
3911
- contentBlockIndex: 0,
3912
- contentBlockOpen: false,
3913
- toolCalls: {},
3914
- thinkingBlockOpen: false
3915
- };
3916
- for await (const chunk of events(upstreamResponse)) {
3917
- logger$5.debug("provider.messages.openai_compatible.raw_stream_event:", chunk.data);
3918
- if (chunk.event === "ping") {
3919
- await stream.writeSSE({
3920
- event: "ping",
3921
- data: "{\"type\":\"ping\"}"
3922
- });
3923
- continue;
3924
- }
3925
- if (!chunk.data || chunk.data === "[DONE]") {
3926
- if (chunk.data === "[DONE]") break;
3927
- continue;
3928
- }
3929
- const parsed = parseOpenAICompatibleStreamChunk(chunk.data);
3930
- if (!parsed) continue;
3931
- if (parsed.usage) usage = normalizeOpenAIUsage(parsed.usage);
3932
- const events = translateChunkToAnthropicEvents(parsed, streamState);
3933
- for (const event of events) {
3934
- const eventData = JSON.stringify(event);
3935
- debugLazy(logger$5, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
3936
- await stream.writeSSE({
3937
- event: event.type,
3938
- data: eventData
3939
- });
3940
- }
3941
- }
3942
- for (const event of flushPendingAnthropicStreamEvents(streamState)) {
3943
- const eventData = JSON.stringify(event);
3944
- debugLazy(logger$5, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
3945
- await stream.writeSSE({
3946
- event: event.type,
3947
- data: eventData
3948
- });
3949
- }
3950
- recordUsage(usage);
3951
- });
3952
- };
3953
- const streamResponsesProviderMessages = ({ c, payload, provider, providerConfig, upstreamResponse }) => {
3954
- logger$5.debug("provider.messages.responses.streaming", { provider });
3955
- const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
3956
- return streamSSE(c, async (stream) => {
3957
- let usage = {};
3958
- const streamState = createResponsesStreamState({ toolSearchName: resolveBridgeToolSearchName(payload.tools) });
3959
- for await (const chunk of upstreamResponse) {
3960
- logger$5.debug("provider.messages.responses.raw_stream_event:", chunk.data);
3961
- if (chunk.event === "ping") {
3962
- await stream.writeSSE({
3963
- event: "ping",
3964
- data: "{\"type\":\"ping\"}"
3965
- });
3966
- continue;
3967
- }
3968
- if (!chunk.data || chunk.data === "[DONE]") {
3969
- if (chunk.data === "[DONE]") break;
3970
- continue;
3971
- }
3972
- const parsed = parseResponsesProviderStreamChunk(chunk.data, providerConfig);
3973
- if (!parsed) continue;
3974
- if (parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete") usage = normalizeResponsesUsage(parsed.response.usage);
3975
- const events = translateResponsesStreamEvent(parsed, streamState);
3976
- for (const event of events) {
3977
- const eventData = JSON.stringify(event);
3978
- debugLazy(logger$5, () => ["provider.messages.responses.translated_event:", eventData]);
3979
- await stream.writeSSE({
3980
- event: event.type,
3981
- data: eventData
3982
- });
3983
- }
3984
- }
3985
- if (!streamState.messageCompleted) {
3986
- const errorEvent = buildErrorEvent(`${provider} stream ended without a completion event`);
3987
- await stream.writeSSE({
3988
- event: errorEvent.type,
3989
- data: JSON.stringify(errorEvent)
3990
- });
3991
- }
3992
- recordUsage(usage);
3993
- });
3994
- };
3995
- const isResponsesStream$1 = (value) => {
3996
- return Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
3997
- };
3998
- const parseOpenAICompatibleStreamChunk = (data) => {
3999
- try {
4000
- return JSON.parse(data);
4001
- } catch (error) {
4002
- logger$5.error("provider.messages.openai_compatible.parse_chunk_error", {
4003
- data,
4004
- error
4033
+ providerConfig
4034
+ });
4035
+ applyMissingExtraBody(payload, { extraBody: modelConfig?.extraBody });
4036
+ debugJson(logger$5, "Translated provider.messages.request", {
4037
+ payload,
4038
+ provider
4039
+ });
4040
+ const upstreamResponse = await forwardProviderMessages(providerConfig, payload, c.req.raw.headers);
4041
+ if (!upstreamResponse.ok) {
4042
+ logger$5.error("Failed to create responses", upstreamResponse);
4043
+ throw new HTTPError("Failed to create responses", upstreamResponse);
4044
+ }
4045
+ const contentType = upstreamResponse.headers.get("content-type") ?? "";
4046
+ if (Boolean(payload.stream) && contentType.includes("text/event-stream")) return streamProviderMessages({
4047
+ c,
4048
+ payload,
4049
+ provider,
4050
+ providerConfig,
4051
+ upstreamResponse
4052
+ });
4053
+ return respondProviderMessagesJson(c, {
4054
+ body: await upstreamResponse.json(),
4055
+ payload,
4056
+ provider,
4057
+ providerConfig
4005
4058
  });
4006
- return null;
4007
- }
4008
- };
4009
- const parseResponsesProviderStreamChunk = (data, providerConfig) => {
4010
- try {
4011
- const parsed = JSON.parse(data);
4012
- if (providerConfig.name === "codex") logCodexRateLimitsEvent(parsed);
4013
- return parsed;
4014
4059
  } catch (error) {
4015
- logger$5.error("provider.messages.responses.parse_chunk_error", {
4016
- provider: providerConfig.name,
4017
- data,
4060
+ logger$5.error("provider.messages.error", {
4061
+ provider,
4018
4062
  error
4019
4063
  });
4020
- return null;
4064
+ throw error;
4021
4065
  }
4022
- };
4023
- const parseProviderStreamEvent = (data, providerConfig) => {
4024
- try {
4025
- const parsed = JSON.parse(data);
4026
- if (parsed.type === "message_start") {
4027
- adjustInputTokens(providerConfig, parsed.message.usage);
4028
- return {
4029
- data: JSON.stringify(parsed),
4030
- model: parsed.message.model,
4031
- usage: normalizeAnthropicUsage(parsed.message.usage)
4032
- };
4033
- }
4034
- if (parsed.type === "message_delta") {
4035
- adjustInputTokens(providerConfig, parsed.usage);
4036
- return {
4037
- data: JSON.stringify(parsed),
4038
- usage: normalizeAnthropicUsage(parsed.usage)
4039
- };
4040
- }
4041
- return {
4042
- data: JSON.stringify(parsed),
4043
- usage: {}
4044
- };
4045
- } catch (error) {
4046
- logger$5.error("provider.messages.streaming.adjust_tokens_error", {
4047
- error,
4048
- originalData: data
4066
+ }
4067
+ const handleOpenAIResponsesProviderMessages = async (c, options) => {
4068
+ const { payload, provider, providerConfig } = options;
4069
+ const selectedModel = providerConfig.name === "codex" ? getModels().data.find((model) => model.id === payload.model) : void 0;
4070
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(payload);
4071
+ applyResponsesApiContextManagement(responsesPayload, selectedModel?.capabilities.limits.max_prompt_tokens);
4072
+ compactInputByLatestCompaction(responsesPayload);
4073
+ debugJson(logger$5, "provider.messages.responses.request", {
4074
+ payload: responsesPayload,
4075
+ provider
4076
+ });
4077
+ if (providerConfig.name === "codex") {
4078
+ const upstreamResponse = await forwardCodexResponses(responsesPayload, c.req.raw.headers, providerConfig.baseUrl);
4079
+ if (responsesPayload.stream && isResponsesStream$1(upstreamResponse)) return streamResponsesProviderMessages({
4080
+ c,
4081
+ payload,
4082
+ provider,
4083
+ providerConfig,
4084
+ upstreamResponse
4085
+ });
4086
+ return respondResponsesProviderMessagesJson(c, {
4087
+ body: upstreamResponse,
4088
+ payload,
4089
+ provider,
4090
+ providerConfig
4049
4091
  });
4050
- return null;
4051
4092
  }
4093
+ const upstreamResponse = await forwardProviderResponses(providerConfig, responsesPayload, c.req.raw.headers);
4094
+ if (!upstreamResponse.ok) {
4095
+ logger$5.error("Failed to create provider responses", upstreamResponse);
4096
+ throw new HTTPError("Failed to create provider responses", upstreamResponse);
4097
+ }
4098
+ if (responsesPayload.stream) return streamResponsesProviderMessages({
4099
+ c,
4100
+ payload,
4101
+ provider,
4102
+ providerConfig,
4103
+ upstreamResponse: events(upstreamResponse)
4104
+ });
4105
+ return respondResponsesProviderMessagesJson(c, {
4106
+ body: await upstreamResponse.json(),
4107
+ payload,
4108
+ provider,
4109
+ providerConfig
4110
+ });
4052
4111
  };
4053
- const respondProviderMessagesJson = (c, options) => {
4054
- const { body, payload, provider, providerConfig } = options;
4055
- const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
4056
- adjustInputTokens(providerConfig, body.usage);
4057
- recordUsage(normalizeAnthropicUsage(body.usage));
4058
- debugJson(logger$5, "provider.messages.no_stream result:", body);
4059
- return c.json(body);
4060
- };
4061
- const respondOpenAICompatibleProviderMessagesJson = (c, options) => {
4062
- const { body, payload, provider } = options;
4063
- createProviderMessagesUsageRecorder(payload, provider)(normalizeOpenAIUsage(body.usage));
4064
- const anthropicResponse = translateToAnthropic(body);
4065
- debugJson(logger$5, "provider.messages.openai_compatible.no_stream result:", anthropicResponse);
4066
- return c.json(anthropicResponse);
4112
+ const applyModelDefaults = (payload, modelConfig) => {
4113
+ payload.temperature ??= modelConfig?.temperature;
4114
+ payload.top_p ??= modelConfig?.topP;
4115
+ payload.top_k ??= modelConfig?.topK;
4067
4116
  };
4068
- const respondResponsesProviderMessagesJson = (c, options) => {
4069
- const { body, payload, provider, providerConfig } = options;
4070
- createProviderMessagesUsageRecorder(payload, provider)(normalizeResponsesUsage(body.usage));
4071
- const anthropicResponse = translateResponsesResultToAnthropic(body, { toolSearchName: resolveBridgeToolSearchName(payload.tools) });
4072
- debugJson(logger$5, "provider.messages.responses.no_stream result:", anthropicResponse);
4073
- if (providerConfig.name === "codex") logger$5.debug("provider.messages.codex.no_stream.result");
4074
- return c.json(anthropicResponse);
4117
+ const applyMissingExtraBody = (payload, options) => {
4118
+ for (const [key, value] of Object.entries(options.extraBody ?? {})) if (!Object.hasOwn(payload, key)) payload[key] = value;
4075
4119
  };
4076
- const createProviderMessagesUsageRecorder = (payload, provider) => createProviderTokenUsageRecorder({
4077
- endpoint: "provider_messages",
4078
- model: payload.model,
4079
- providerName: provider,
4080
- sessionId: parseUserIdMetadata(payload.metadata?.user_id).sessionId
4081
- });
4082
- const adjustInputTokens = (providerConfig, usage) => {
4083
- if (!providerConfig.adjustInputTokens || !usage) return;
4084
- usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
4085
- debugJson(logger$5, "provider.messages.adjusted_usage:", usage);
4120
+ const getRequestThinkingBudget = (payload) => {
4121
+ const budget = payload.thinking?.budget_tokens;
4122
+ if (typeof budget !== "number" || !Number.isFinite(budget)) return;
4123
+ return budget;
4086
4124
  };
4087
- //#endregion
4088
- //#region src/services/copilot/create-messages.ts
4089
- const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14";
4090
- const allowedAnthropicBetas = new Set([
4091
- INTERLEAVED_THINKING_BETA,
4092
- "context-management-2025-06-27",
4093
- "advanced-tool-use-2025-11-20"
4094
- ]);
4095
- const buildAnthropicBetaHeader = (anthropicBetaHeader, thinking, _model) => {
4096
- const isAdaptiveThinking = thinking?.type === "adaptive";
4097
- if (anthropicBetaHeader) {
4098
- const uniqueFilteredBetas = [...anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => item.length > 0).filter((item) => allowedAnthropicBetas.has(item))];
4099
- if (uniqueFilteredBetas.length > 0) return uniqueFilteredBetas.join(",");
4125
+ const applyOpenAICompatibleThinkingBudget = (payload, source) => {
4126
+ const thinkingBudget = getRequestThinkingBudget(source);
4127
+ if (thinkingBudget !== void 0) {
4128
+ payload.thinking_budget = thinkingBudget;
4100
4129
  return;
4101
4130
  }
4102
- if (thinking?.budget_tokens && !isAdaptiveThinking) return INTERLEAVED_THINKING_BETA;
4131
+ if (payload.thinking_budget === void 0) delete payload.thinking_budget;
4103
4132
  };
4104
- const createMessages = async (payload, anthropicBetaHeader, options) => {
4105
- if (!state.copilotToken) throw new Error("Copilot token not found");
4106
- const enableVision = payload.messages.some((message) => {
4107
- if (!Array.isArray(message.content)) return false;
4108
- return message.content.some((block) => block.type === "image" || block.type === "tool_result" && Array.isArray(block.content) && block.content.some((inner) => inner.type === "image"));
4109
- });
4110
- let isInitiateRequest = false;
4111
- const lastMessage = payload.messages.at(-1);
4112
- if (lastMessage?.role === "user") isInitiateRequest = Array.isArray(lastMessage.content) ? lastMessage.content.some((block) => block.type !== "tool_result") : true;
4113
- const headers = {
4114
- ...copilotHeaders(state, options.requestId, enableVision),
4115
- "x-initiator": isInitiateRequest ? "user" : "agent"
4116
- };
4117
- prepareInteractionHeaders(options.sessionId, Boolean(options.subagentMarker), headers);
4118
- prepareForCompact(headers, options.compactType);
4119
- const { safetyIdentifier, sessionId } = parseUserIdMetadata(payload.metadata?.user_id);
4120
- if (safetyIdentifier && sessionId && payload.model !== "claude-opus-4.8") prepareMessageProxyHeaders(headers);
4121
- const anthropicBeta = buildAnthropicBetaHeader(anthropicBetaHeader, payload.thinking, payload.model);
4122
- if (anthropicBeta) headers["anthropic-beta"] = anthropicBeta;
4123
- consola.log(`<-- model: ${payload.model}`);
4124
- const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
4125
- method: "POST",
4126
- headers,
4127
- body: JSON.stringify(payload)
4133
+ const applyOpenAICompatibleExtraBodyThinkingBudget = (payload, options) => {
4134
+ const { extraBody } = options;
4135
+ if (!extraBody || !Object.hasOwn(extraBody, "thinking_budget")) return;
4136
+ const rawPayload = payload;
4137
+ rawPayload.thinking_budget = extraBody.thinking_budget;
4138
+ };
4139
+ const handleOpenAICompatibleProviderMessages = async (c, options) => {
4140
+ const { modelConfig, payload, provider, providerConfig } = options;
4141
+ const openAIPayload = createOpenAICompatiblePayload(payload, modelConfig);
4142
+ debugJson(logger$5, "provider.messages.openai_compatible.request", {
4143
+ payload: openAIPayload,
4144
+ provider
4128
4145
  });
4129
- logCopilotRateLimits(response.headers);
4130
- if (!response.ok) {
4131
- consola.error("Failed to create messages", response);
4132
- throw new HTTPError("Failed to create messages", response);
4146
+ const upstreamResponse = await forwardProviderChatCompletions(providerConfig, openAIPayload, c.req.raw.headers);
4147
+ if (!upstreamResponse.ok) {
4148
+ logger$5.error("Failed to create openai-compatible responses", upstreamResponse);
4149
+ throw new HTTPError("Failed to create openai-compatible responses", upstreamResponse);
4133
4150
  }
4134
- if (payload.stream) return events(response);
4135
- return await response.json();
4136
- };
4137
- const IDE_EXECUTE_CODE_TOOL = "mcp__ide__executeCode";
4138
- const IDE_GET_DIAGNOSTICS_TOOL = "mcp__ide__getDiagnostics";
4139
- const IDE_GET_DIAGNOSTICS_DESCRIPTION = "Get language diagnostics from VS Code. Returns errors, warnings, information, and hints for files in the workspace.";
4140
- const PDF_FILE_READ_PREFIX = "PDF file read:";
4141
- const isVersionAtLeast = (version, minimumMajor, minimumMinor) => {
4142
- const [majorPart, minorPart = "0"] = version.split(".");
4143
- const major = Number.parseInt(majorPart, 10);
4144
- const minor = Number.parseInt(minorPart, 10);
4145
- if (!Number.isInteger(major) || !Number.isInteger(minor)) return false;
4146
- return major > minimumMajor || major === minimumMajor && minor >= minimumMinor;
4147
- };
4148
- const shouldSummarizeThinkingDisplayForModel = (model) => {
4149
- const normalized = normalizeSdkModelId(model);
4150
- return Boolean(normalized && isVersionAtLeast(normalized.version, 4, 7));
4151
- };
4152
- const getBlockCacheControl = (block) => {
4153
- if (!block || block.type === "thinking") return;
4154
- const cacheControl = block.cache_control;
4155
- if (!cacheControl || typeof cacheControl !== "object") return;
4156
- return cacheControl;
4157
- };
4158
- const getLastMessageContentCacheControl = (lastMessage) => {
4159
- if (!lastMessage || !Array.isArray(lastMessage.content)) return;
4160
- const cacheControl = getBlockCacheControl(lastMessage.content.at(-1));
4161
- return cacheControl ? { ...cacheControl } : void 0;
4162
- };
4163
- const applyLastMessageCacheControl = (anthropicPayload, lastMessageCacheControl) => {
4164
- const cacheControl = lastMessageCacheControl ?? { type: "ephemeral" };
4165
- const lastMessage = anthropicPayload.messages.at(-1);
4166
- if (!lastMessage || !Array.isArray(lastMessage.content)) return;
4167
- const lastBlock = lastMessage.content.at(-1);
4168
- if (!lastBlock || lastBlock.type === "thinking" || lastBlock.cache_control) return;
4169
- lastBlock.cache_control = { ...cacheControl };
4151
+ const contentType = upstreamResponse.headers.get("content-type") ?? "";
4152
+ if (Boolean(openAIPayload.stream) && contentType.includes("text/event-stream")) return streamOpenAICompatibleProviderMessages({
4153
+ c,
4154
+ payload,
4155
+ provider,
4156
+ upstreamResponse
4157
+ });
4158
+ return respondOpenAICompatibleProviderMessagesJson(c, {
4159
+ body: await upstreamResponse.json(),
4160
+ payload,
4161
+ provider
4162
+ });
4170
4163
  };
4171
- const getCompactCandidateText = (message) => {
4172
- if (message.role !== "user") return "";
4173
- if (typeof message.content === "string") return message.content;
4174
- return message.content.filter((block) => block.type === "text").map((block) => block.text.startsWith("<system-reminder>") ? "" : block.text).filter((text) => text.length > 0).join("\n\n");
4164
+ const createOpenAICompatiblePayload = (payload, modelConfig) => {
4165
+ const openAIPayload = translateToOpenAI(payload, {
4166
+ supportPdf: modelConfig?.supportPdf,
4167
+ toolContentSupportType: modelConfig?.toolContentSupportType ?? []
4168
+ });
4169
+ applyOpenAICompatibleThinkingBudget(openAIPayload, payload);
4170
+ if (payload.top_k !== void 0) openAIPayload.top_k = payload.top_k;
4171
+ if (openAIPayload.stream) openAIPayload.stream_options = { include_usage: true };
4172
+ normalizeOpenAICompatibleReasoningContent(openAIPayload);
4173
+ applyOpenAICompatibleRequestOverrides(openAIPayload, {
4174
+ extraBody: modelConfig?.extraBody,
4175
+ source: payload
4176
+ });
4177
+ applyMissingExtraBody(openAIPayload, { extraBody: modelConfig?.extraBody });
4178
+ applyOpenAICompatibleExtraBodyThinkingBudget(openAIPayload, { extraBody: modelConfig?.extraBody });
4179
+ if (!Object.hasOwn(openAIPayload, "parallel_tool_calls")) openAIPayload.parallel_tool_calls = true;
4180
+ if (modelConfig?.contextCache !== false) applyOpenAICompatibleContextCache(openAIPayload);
4181
+ return openAIPayload;
4175
4182
  };
4176
- const isCompactMessage = (lastMessage) => {
4177
- const text = getCompactCandidateText(lastMessage);
4178
- if (!text) return false;
4179
- return text.includes("CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.") && text.includes("Your task is to create a detailed summary of the conversation so far") && compactMessageSections.some((section) => text.includes(section));
4183
+ const normalizeOpenAICompatibleReasoningContent = (payload) => {
4184
+ for (const message of payload.messages) {
4185
+ if (message.role !== "assistant") continue;
4186
+ if (message.reasoning_content === void 0 && message.reasoning_text !== void 0) message.reasoning_content = message.reasoning_text;
4187
+ delete message.reasoning_text;
4188
+ delete message.reasoning_opaque;
4189
+ }
4180
4190
  };
4181
- const isCompactAutoContinueMessage = (lastMessage) => {
4182
- const text = getCompactCandidateText(lastMessage);
4183
- return Boolean(text) && compactAutoContinuePromptStarts.some((promptStart) => text.startsWith(promptStart));
4191
+ const applyOpenAICompatibleRequestOverrides = (payload, options) => {
4192
+ const allowedKeys = new Set(Object.keys(options.extraBody ?? {}));
4193
+ for (const key of allowedKeys) if (Object.hasOwn(options.source, key)) payload[key] = options.source[key];
4184
4194
  };
4185
- const getCompactType = (anthropicPayload) => {
4186
- const lastMessage = anthropicPayload.messages.at(-1);
4187
- if (lastMessage && isCompactMessage(lastMessage)) return 1;
4188
- if (lastMessage && isCompactAutoContinueMessage(lastMessage)) return 2;
4189
- const system = anthropicPayload.system;
4190
- if (typeof system === "string") return compactSystemPromptStarts.some((promptStart) => system.startsWith(promptStart)) ? 1 : 0;
4191
- if (!Array.isArray(system)) return 0;
4192
- if (system.some((msg) => typeof msg.text === "string" && compactSystemPromptStarts.some((promptStart) => msg.text.startsWith(promptStart)))) return 1;
4193
- return 0;
4195
+ const applyOpenAICompatibleContextCache = (payload) => {
4196
+ const messageIndexes = selectContextCacheMessageIndexes(payload.messages);
4197
+ for (const messageIndex of messageIndexes) applyContextCacheControl(payload.messages[messageIndex]);
4194
4198
  };
4195
- const mergeContentWithText = (tr, textBlock) => {
4196
- if (typeof tr.content === "string") return {
4197
- ...tr,
4198
- content: `${tr.content}\n\n${textBlock.text}`
4199
- };
4200
- if (hasToolRef(tr)) return tr;
4201
- return {
4202
- ...tr,
4203
- content: [...tr.content, stripContentBlockCacheControl(textBlock)]
4204
- };
4199
+ const selectContextCacheMessageIndexes = (messages) => {
4200
+ const cacheableIndexes = messages.flatMap((message, index) => isContextCacheMarkerEligible(message) ? [index] : []);
4201
+ const systemIndexes = cacheableIndexes.filter((index) => messages[index]?.role === "system").slice(0, 2);
4202
+ const finalIndexes = cacheableIndexes.filter((index) => messages[index]?.role !== "system").slice(-2);
4203
+ return uniqueIndexes$1([...systemIndexes, ...finalIndexes]).sort((a, b) => a - b);
4205
4204
  };
4206
- const mergeContentWithTexts = (tr, textBlocks) => {
4207
- if (typeof tr.content === "string") {
4208
- const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n");
4209
- return {
4210
- ...tr,
4211
- content: `${tr.content}\n\n${appendedTexts}`
4212
- };
4213
- }
4214
- if (hasToolRef(tr)) return tr;
4215
- return {
4216
- ...tr,
4217
- content: [...tr.content, ...textBlocks.map(stripContentBlockCacheControl)]
4218
- };
4205
+ const uniqueIndexes$1 = (indexes) => [...new Set(indexes)].slice(0, OPENAI_COMPATIBLE_CONTEXT_CACHE_MARKER_LIMIT);
4206
+ const isContextCacheMarkerEligible = (message) => {
4207
+ if (!OPENAI_COMPATIBLE_CONTEXT_CACHE_ROLES.has(message.role)) return false;
4208
+ if (typeof message.content === "string") return message.content.length > 0;
4209
+ return Array.isArray(message.content) && message.content.length > 0;
4219
4210
  };
4220
- const mergeContentWithAttachments = (tr, attachments) => {
4221
- const cleanAttachments = attachments.map(stripContentBlockCacheControl);
4222
- if (typeof tr.content === "string") return {
4223
- ...tr,
4224
- content: [{
4211
+ const applyContextCacheControl = (message) => {
4212
+ if (!message) return;
4213
+ if (typeof message.content === "string") {
4214
+ message.content = [{
4225
4215
  type: "text",
4226
- text: tr.content
4227
- }, ...cleanAttachments]
4228
- };
4229
- return {
4230
- ...tr,
4231
- content: [...tr.content, ...cleanAttachments]
4232
- };
4233
- };
4234
- const stripContentBlockCacheControl = (block) => {
4235
- if (!Object.hasOwn(block, "cache_control")) return block;
4236
- const copy = { ...block };
4237
- delete copy.cache_control;
4238
- return copy;
4216
+ text: message.content,
4217
+ cache_control: { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL }
4218
+ }];
4219
+ return;
4220
+ }
4221
+ if (!Array.isArray(message.content)) return;
4222
+ const lastPart = message.content.at(-1);
4223
+ if (!lastPart) return;
4224
+ setContextCacheControl(lastPart);
4239
4225
  };
4240
- const isAttachmentBlock = (block) => {
4241
- return block.type === "image" || block.type === "document";
4226
+ const setContextCacheControl = (part) => {
4227
+ part.cache_control = { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL };
4242
4228
  };
4243
- const getMergeableToolResultIndices = (toolResults) => {
4244
- return toolResults.flatMap((block, index) => block.is_error || hasToolRef(block) ? [] : [index]);
4229
+ const streamProviderMessages = ({ c, payload, provider, providerConfig, upstreamResponse }) => {
4230
+ logger$5.debug("provider.messages.streaming");
4231
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
4232
+ return streamSSE(c, async (stream) => {
4233
+ let usage = {};
4234
+ for await (const chunk of events(upstreamResponse)) {
4235
+ logger$5.debug("provider.messages.raw_stream_event:", chunk.data);
4236
+ const eventName = chunk.event;
4237
+ if (eventName === "ping") {
4238
+ await stream.writeSSE({
4239
+ event: "ping",
4240
+ data: "{\"type\":\"ping\"}"
4241
+ });
4242
+ continue;
4243
+ }
4244
+ let data = chunk.data;
4245
+ if (!data) continue;
4246
+ if (chunk.data === "[DONE]") break;
4247
+ const parsed = parseProviderStreamEvent(data, providerConfig);
4248
+ if (parsed) {
4249
+ usage = mergeAnthropicUsage(usage, parsed.usage);
4250
+ data = parsed.data;
4251
+ }
4252
+ await stream.writeSSE({
4253
+ event: eventName,
4254
+ data
4255
+ });
4256
+ }
4257
+ recordUsage(usage);
4258
+ });
4245
4259
  };
4246
- const mergeAttachmentsIntoToolResults = (toolResults, attachmentsByToolResultIndex) => {
4247
- if (attachmentsByToolResultIndex.size === 0) return toolResults;
4248
- return toolResults.map((block, index) => {
4249
- const matchedAttachments = attachmentsByToolResultIndex.get(index);
4250
- if (!matchedAttachments) return block;
4251
- return mergeContentWithAttachments(block, [...matchedAttachments].sort((left, right) => left.order - right.order).map(({ attachment }) => attachment));
4260
+ const streamOpenAICompatibleProviderMessages = ({ c, payload, provider, upstreamResponse }) => {
4261
+ logger$5.debug("provider.messages.openai_compatible.streaming");
4262
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
4263
+ return streamSSE(c, async (stream) => {
4264
+ let usage = {};
4265
+ const streamState = {
4266
+ messageStartSent: false,
4267
+ contentBlockIndex: 0,
4268
+ contentBlockOpen: false,
4269
+ toolCalls: {},
4270
+ thinkingBlockOpen: false
4271
+ };
4272
+ for await (const chunk of events(upstreamResponse)) {
4273
+ logger$5.debug("provider.messages.openai_compatible.raw_stream_event:", chunk.data);
4274
+ if (chunk.event === "ping") {
4275
+ await stream.writeSSE({
4276
+ event: "ping",
4277
+ data: "{\"type\":\"ping\"}"
4278
+ });
4279
+ continue;
4280
+ }
4281
+ if (!chunk.data || chunk.data === "[DONE]") {
4282
+ if (chunk.data === "[DONE]") break;
4283
+ continue;
4284
+ }
4285
+ const parsed = parseOpenAICompatibleStreamChunk(chunk.data);
4286
+ if (!parsed) continue;
4287
+ if (parsed.usage) usage = normalizeOpenAIUsage(parsed.usage);
4288
+ const events = translateChunkToAnthropicEvents(parsed, streamState);
4289
+ for (const event of events) {
4290
+ const eventData = JSON.stringify(event);
4291
+ debugLazy(logger$5, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
4292
+ await stream.writeSSE({
4293
+ event: event.type,
4294
+ data: eventData
4295
+ });
4296
+ }
4297
+ }
4298
+ for (const event of flushPendingAnthropicStreamEvents(streamState)) {
4299
+ const eventData = JSON.stringify(event);
4300
+ debugLazy(logger$5, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
4301
+ await stream.writeSSE({
4302
+ event: event.type,
4303
+ data: eventData
4304
+ });
4305
+ }
4306
+ recordUsage(usage);
4252
4307
  });
4253
4308
  };
4254
- const assignAttachmentsToToolResults = (target, attachments, options) => {
4255
- const { toolResultIndices } = options;
4256
- const fallbackToolResultIndices = options.fallbackToolResultIndices ?? toolResultIndices;
4257
- if (attachments.length === 0) return;
4258
- if (toolResultIndices.length > 0 && toolResultIndices.length === attachments.length) {
4259
- for (const [index, toolResultIndex] of toolResultIndices.entries()) {
4260
- const currentAttachments = target.get(toolResultIndex);
4261
- if (currentAttachments) {
4262
- currentAttachments.push(attachments[index]);
4309
+ const streamResponsesProviderMessages = ({ c, payload, provider, providerConfig, upstreamResponse }) => {
4310
+ logger$5.debug("provider.messages.responses.streaming", { provider });
4311
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
4312
+ return streamSSE(c, async (stream) => {
4313
+ let usage = {};
4314
+ const streamState = createResponsesStreamState({ toolSearchName: resolveBridgeToolSearchName(payload.tools) });
4315
+ for await (const chunk of upstreamResponse) {
4316
+ logger$5.debug("provider.messages.responses.raw_stream_event:", chunk.data);
4317
+ if (chunk.event === "ping") {
4318
+ await stream.writeSSE({
4319
+ event: "ping",
4320
+ data: "{\"type\":\"ping\"}"
4321
+ });
4263
4322
  continue;
4264
4323
  }
4265
- target.set(toolResultIndex, [attachments[index]]);
4324
+ if (!chunk.data || chunk.data === "[DONE]") {
4325
+ if (chunk.data === "[DONE]") break;
4326
+ continue;
4327
+ }
4328
+ const parsed = parseResponsesProviderStreamChunk(chunk.data, providerConfig);
4329
+ if (!parsed) continue;
4330
+ if (parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete") usage = normalizeResponsesUsage(parsed.response.usage);
4331
+ const events = translateResponsesStreamEvent(parsed, streamState);
4332
+ for (const event of events) {
4333
+ const eventData = JSON.stringify(event);
4334
+ debugLazy(logger$5, () => ["provider.messages.responses.translated_event:", eventData]);
4335
+ await stream.writeSSE({
4336
+ event: event.type,
4337
+ data: eventData
4338
+ });
4339
+ }
4340
+ }
4341
+ if (!streamState.messageCompleted) {
4342
+ const errorEvent = buildErrorEvent(`${provider} stream ended without a completion event`);
4343
+ await stream.writeSSE({
4344
+ event: errorEvent.type,
4345
+ data: JSON.stringify(errorEvent)
4346
+ });
4266
4347
  }
4267
- return;
4268
- }
4269
- const lastToolResultIndex = fallbackToolResultIndices.at(-1);
4270
- if (lastToolResultIndex === void 0) return;
4271
- const currentAttachments = target.get(lastToolResultIndex);
4272
- if (currentAttachments) {
4273
- currentAttachments.push(...attachments);
4274
- return;
4348
+ recordUsage(usage);
4349
+ });
4350
+ };
4351
+ const isResponsesStream$1 = (value) => {
4352
+ return Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
4353
+ };
4354
+ const parseOpenAICompatibleStreamChunk = (data) => {
4355
+ try {
4356
+ return JSON.parse(data);
4357
+ } catch (error) {
4358
+ logger$5.error("provider.messages.openai_compatible.parse_chunk_error", {
4359
+ data,
4360
+ error
4361
+ });
4362
+ return null;
4275
4363
  }
4276
- target.set(lastToolResultIndex, [...attachments]);
4277
4364
  };
4278
- const startsWithPdfFileRead = (toolResult) => {
4279
- if (typeof toolResult.content === "string") return toolResult.content.startsWith(PDF_FILE_READ_PREFIX);
4280
- if (toolResult.content.some((block) => block.type === "document")) return false;
4281
- if (toolResult.content.length === 0) return false;
4282
- const firstBlock = toolResult.content[0];
4283
- if (firstBlock.type !== "text") return false;
4284
- return firstBlock.text.startsWith(PDF_FILE_READ_PREFIX);
4365
+ const parseResponsesProviderStreamChunk = (data, providerConfig) => {
4366
+ try {
4367
+ const parsed = JSON.parse(data);
4368
+ if (providerConfig.name === "codex") logCodexRateLimitsEvent(parsed);
4369
+ return parsed;
4370
+ } catch (error) {
4371
+ logger$5.error("provider.messages.responses.parse_chunk_error", {
4372
+ provider: providerConfig.name,
4373
+ data,
4374
+ error
4375
+ });
4376
+ return null;
4377
+ }
4285
4378
  };
4286
- const collectMergeableUserContent = (content) => {
4287
- const toolResults = [];
4288
- const textBlocks = [];
4289
- const attachments = [];
4290
- for (const [order, block] of content.entries()) {
4291
- if (block.type === "tool_result") {
4292
- toolResults.push(block);
4293
- continue;
4294
- }
4295
- if (block.type === "text") {
4296
- textBlocks.push(block);
4297
- continue;
4379
+ const parseProviderStreamEvent = (data, providerConfig) => {
4380
+ try {
4381
+ const parsed = JSON.parse(data);
4382
+ if (parsed.type === "message_start") {
4383
+ adjustInputTokens(providerConfig, parsed.message.usage);
4384
+ return {
4385
+ data: JSON.stringify(parsed),
4386
+ model: parsed.message.model,
4387
+ usage: normalizeAnthropicUsage(parsed.message.usage)
4388
+ };
4298
4389
  }
4299
- if (isAttachmentBlock(block)) {
4300
- attachments.push({
4301
- attachment: block,
4302
- order
4303
- });
4304
- continue;
4390
+ if (parsed.type === "message_delta") {
4391
+ adjustInputTokens(providerConfig, parsed.usage);
4392
+ return {
4393
+ data: JSON.stringify(parsed),
4394
+ usage: normalizeAnthropicUsage(parsed.usage)
4395
+ };
4305
4396
  }
4397
+ return {
4398
+ data: JSON.stringify(parsed),
4399
+ usage: {}
4400
+ };
4401
+ } catch (error) {
4402
+ logger$5.error("provider.messages.streaming.adjust_tokens_error", {
4403
+ error,
4404
+ originalData: data
4405
+ });
4306
4406
  return null;
4307
4407
  }
4308
- return {
4309
- toolResults,
4310
- textBlocks,
4311
- attachments
4312
- };
4313
4408
  };
4314
- const mergeAttachmentsForToolResults = (toolResults, attachments) => {
4315
- if (attachments.length === 0) return toolResults;
4316
- const documentBlocks = attachments.filter(({ attachment }) => attachment.type === "document");
4317
- const mergeableToolResultIndices = getMergeableToolResultIndices(toolResults);
4318
- const pdfReadToolResultIndices = mergeableToolResultIndices.filter((index) => startsWithPdfFileRead(toolResults[index]));
4319
- const attachmentsByToolResultIndex = /* @__PURE__ */ new Map();
4320
- let remainingAttachments = attachments;
4321
- let countMatchToolResultIndices = mergeableToolResultIndices;
4322
- if (documentBlocks.length > 0 && pdfReadToolResultIndices.length > 0) {
4323
- const matchedDocumentCount = Math.min(pdfReadToolResultIndices.length, documentBlocks.length);
4324
- const matchedDocuments = documentBlocks.slice(0, matchedDocumentCount);
4325
- const matchedDocumentOrders = new Set(matchedDocuments.map(({ order }) => order));
4326
- const matchedPdfToolResultIndices = pdfReadToolResultIndices.slice(0, matchedDocumentCount);
4327
- const matchedPdfToolResultIndexSet = new Set(matchedPdfToolResultIndices);
4328
- assignAttachmentsToToolResults(attachmentsByToolResultIndex, matchedDocuments, { toolResultIndices: matchedPdfToolResultIndices });
4329
- countMatchToolResultIndices = mergeableToolResultIndices.filter((index) => !matchedPdfToolResultIndexSet.has(index));
4330
- remainingAttachments = attachments.filter(({ attachment, order }) => attachment.type !== "document" || !matchedDocumentOrders.has(order));
4331
- }
4332
- assignAttachmentsToToolResults(attachmentsByToolResultIndex, remainingAttachments, {
4333
- toolResultIndices: countMatchToolResultIndices,
4334
- fallbackToolResultIndices: mergeableToolResultIndices
4335
- });
4336
- return mergeAttachmentsIntoToolResults(toolResults, attachmentsByToolResultIndex);
4409
+ const respondProviderMessagesJson = (c, options) => {
4410
+ const { body, payload, provider, providerConfig } = options;
4411
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
4412
+ adjustInputTokens(providerConfig, body.usage);
4413
+ recordUsage(normalizeAnthropicUsage(body.usage));
4414
+ debugJson(logger$5, "provider.messages.no_stream result:", body);
4415
+ return c.json(body);
4337
4416
  };
4338
- const mergeUserMessageContent = (content) => {
4339
- const mergeableContent = collectMergeableUserContent(content);
4340
- if (!mergeableContent) return null;
4341
- const { toolResults, textBlocks, attachments } = mergeableContent;
4342
- if (toolResults.length === 0 || textBlocks.length === 0 && attachments.length === 0) return null;
4343
- return mergeAttachmentsForToolResults(textBlocks.length === 0 ? toolResults : mergeToolResult(toolResults, textBlocks), attachments);
4417
+ const respondOpenAICompatibleProviderMessagesJson = (c, options) => {
4418
+ const { body, payload, provider } = options;
4419
+ createProviderMessagesUsageRecorder(payload, provider)(normalizeOpenAIUsage(body.usage));
4420
+ const anthropicResponse = translateToAnthropic(body);
4421
+ debugJson(logger$5, "provider.messages.openai_compatible.no_stream result:", anthropicResponse);
4422
+ return c.json(anthropicResponse);
4344
4423
  };
4345
- const mergeToolResult = (toolResults, textBlocks) => {
4346
- if (toolResults.length === textBlocks.length) return toolResults.map((tr, i) => mergeContentWithText(tr, textBlocks[i]));
4347
- const lastIndex = toolResults.length - 1;
4348
- return toolResults.map((tr, i) => i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr);
4424
+ const respondResponsesProviderMessagesJson = (c, options) => {
4425
+ const { body, payload, provider, providerConfig } = options;
4426
+ createProviderMessagesUsageRecorder(payload, provider)(normalizeResponsesUsage(body.usage));
4427
+ const anthropicResponse = translateResponsesResultToAnthropic(body, { toolSearchName: resolveBridgeToolSearchName(payload.tools) });
4428
+ debugJson(logger$5, "provider.messages.responses.no_stream result:", anthropicResponse);
4429
+ if (providerConfig.name === "codex") logger$5.debug("provider.messages.codex.no_stream.result");
4430
+ return c.json(anthropicResponse);
4349
4431
  };
4350
- const stripToolReferenceTurnBoundary = (anthropicPayload) => {
4351
- for (const msg of anthropicPayload.messages) {
4352
- if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
4353
- if (!msg.content.some((block) => block.type === "tool_result" && hasToolRef(block))) continue;
4354
- msg.content = msg.content.filter((block) => block.type !== "text" || block.text.trim() !== "Tool loaded.");
4355
- }
4432
+ const createProviderMessagesUsageRecorder = (payload, provider) => createProviderTokenUsageRecorder({
4433
+ endpoint: "provider_messages",
4434
+ model: payload.model,
4435
+ providerName: provider,
4436
+ sessionId: parseUserIdMetadata(payload.metadata?.user_id).sessionId
4437
+ });
4438
+ const adjustInputTokens = (providerConfig, usage) => {
4439
+ if (!providerConfig.adjustInputTokens || !usage) return;
4440
+ usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
4441
+ debugJson(logger$5, "provider.messages.adjusted_usage:", usage);
4356
4442
  };
4357
- const mergeToolResultForClaude = (anthropicPayload, options) => {
4358
- const lastMessageIndex = anthropicPayload.messages.length - 1;
4359
- for (const [index, msg] of anthropicPayload.messages.entries()) {
4360
- if (options?.skipLastMessage && index === lastMessageIndex) continue;
4361
- if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
4362
- const mergedContent = mergeUserMessageContent(msg.content);
4363
- if (mergedContent) msg.content = mergedContent;
4443
+ //#endregion
4444
+ //#region src/services/copilot/create-messages.ts
4445
+ const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14";
4446
+ const allowedAnthropicBetas = new Set([
4447
+ INTERLEAVED_THINKING_BETA,
4448
+ "context-management-2025-06-27",
4449
+ "advanced-tool-use-2025-11-20"
4450
+ ]);
4451
+ const buildAnthropicBetaHeader = (anthropicBetaHeader, thinking, _model) => {
4452
+ const isAdaptiveThinking = thinking?.type === "adaptive";
4453
+ if (anthropicBetaHeader) {
4454
+ const uniqueFilteredBetas = [...anthropicBetaHeader.split(",").map((item) => item.trim()).filter((item) => item.length > 0).filter((item) => allowedAnthropicBetas.has(item))];
4455
+ if (uniqueFilteredBetas.length > 0) return uniqueFilteredBetas.join(",");
4456
+ return;
4364
4457
  }
4458
+ if (thinking?.budget_tokens && !isAdaptiveThinking) return INTERLEAVED_THINKING_BETA;
4365
4459
  };
4366
- const sanitizeIdeTools = (payload) => {
4367
- if (!payload.tools || payload.tools.length === 0) return;
4368
- payload.tools = payload.tools.flatMap((tool) => {
4369
- if (tool.name === IDE_EXECUTE_CODE_TOOL && !tool.defer_loading) return [];
4370
- if (tool.name === IDE_GET_DIAGNOSTICS_TOOL) return [{
4371
- ...tool,
4372
- description: IDE_GET_DIAGNOSTICS_DESCRIPTION
4373
- }];
4374
- return [tool];
4460
+ const createMessages = async (payload, anthropicBetaHeader, options) => {
4461
+ if (!state.copilotToken) throw new Error("Copilot token not found");
4462
+ const enableVision = payload.messages.some((message) => {
4463
+ if (!Array.isArray(message.content)) return false;
4464
+ return message.content.some((block) => block.type === "image" || block.type === "tool_result" && Array.isArray(block.content) && block.content.some((inner) => inner.type === "image"));
4375
4465
  });
4376
- };
4377
- const hasToolRef = (block) => {
4378
- return Array.isArray(block.content) && block.content.some((c) => c.type === "tool_reference");
4379
- };
4380
- const stripCacheControl = (payload) => {
4381
- if (Array.isArray(payload.system)) for (const block of payload.system) {
4382
- const cacheControl = block.cache_control;
4383
- if (cacheControl && typeof cacheControl === "object") {
4384
- const { scope, ...rest } = cacheControl;
4385
- block.cache_control = rest;
4386
- }
4387
- }
4388
- };
4389
- const filterAssistantThinkingBlocks = (payload) => {
4390
- for (const msg of payload.messages) if (msg.role === "assistant" && Array.isArray(msg.content)) msg.content = msg.content.filter((block) => {
4391
- if (block.type !== "thinking") return true;
4392
- return block.thinking && block.thinking !== "Thinking..." && block.signature && !block.signature.includes("@");
4466
+ let isInitiateRequest = false;
4467
+ const lastMessage = payload.messages.at(-1);
4468
+ if (lastMessage?.role === "user") isInitiateRequest = Array.isArray(lastMessage.content) ? lastMessage.content.some((block) => block.type !== "tool_result") : true;
4469
+ const headers = {
4470
+ ...copilotHeaders(state, options.requestId, enableVision),
4471
+ "x-initiator": isInitiateRequest ? "user" : "agent"
4472
+ };
4473
+ prepareInteractionHeaders(options.sessionId, Boolean(options.subagentMarker), headers);
4474
+ prepareForCompact(headers, options.compactType);
4475
+ const { safetyIdentifier, sessionId } = parseUserIdMetadata(payload.metadata?.user_id);
4476
+ if (safetyIdentifier && sessionId && payload.model !== "claude-opus-4.8") prepareMessageProxyHeaders(headers);
4477
+ const anthropicBeta = buildAnthropicBetaHeader(anthropicBetaHeader, payload.thinking, payload.model);
4478
+ if (anthropicBeta) headers["anthropic-beta"] = anthropicBeta;
4479
+ consola.log(`<-- model: ${payload.model}`);
4480
+ const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
4481
+ method: "POST",
4482
+ headers,
4483
+ body: JSON.stringify(payload)
4393
4484
  });
4394
- };
4395
- const prepareMessagesApiPayload = (payload, selectedModel) => {
4396
- stripCacheControl(payload);
4397
- filterAssistantThinkingBlocks(payload);
4398
- const hasThinking = Boolean(payload.thinking);
4399
- const toolChoice = payload.tool_choice;
4400
- const disableThink = toolChoice?.type === "any" || toolChoice?.type === "tool";
4401
- if (selectedModel?.capabilities.supports.adaptive_thinking && !disableThink) {
4402
- payload.thinking = { type: "adaptive" };
4403
- if (!hasThinking) payload.thinking.display = "summarized";
4404
- if (shouldSummarizeThinkingDisplayForModel(payload.model)) payload.thinking.display = "summarized";
4405
- let effort = getReasoningEffortForModel(payload.model);
4406
- if (effort === "none" || effort === "minimal") effort = "low";
4407
- const reasoningEffort = selectedModel.capabilities.supports.reasoning_effort;
4408
- if (reasoningEffort && !reasoningEffort.includes(effort)) effort = reasoningEffort.at(-1);
4409
- payload.output_config = { effort };
4485
+ logCopilotRateLimits(response.headers);
4486
+ if (!response.ok) {
4487
+ consola.error("Failed to create messages", response);
4488
+ throw new HTTPError("Failed to create messages", response);
4410
4489
  }
4490
+ if (payload.stream) return events(response);
4491
+ return await response.json();
4411
4492
  };
4412
4493
  //#endregion
4413
4494
  //#region src/routes/messages/api-flows.ts
@@ -4699,8 +4780,9 @@ async function handleCompletion(c) {
4699
4780
  provider: providerModelAlias.provider
4700
4781
  });
4701
4782
  }
4702
- await checkRateLimit(state);
4703
4783
  debugJson(logger$4, "Anthropic request payload:", anthropicPayload);
4784
+ normalizeSystemMessages(anthropicPayload);
4785
+ await checkRateLimit(state);
4704
4786
  sanitizeIdeTools(anthropicPayload);
4705
4787
  const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload);
4706
4788
  if (subagentMarker) debugJson(logger$4, "Detected Subagent marker:", subagentMarker);
@@ -5211,4 +5293,4 @@ server.route("/:provider/v1/models", providerModelRoutes);
5211
5293
  //#endregion
5212
5294
  export { server };
5213
5295
 
5214
- //# sourceMappingURL=server-CGmI5FOl.js.map
5296
+ //# sourceMappingURL=server-DcJiCgxI.js.map