@nick3/copilot-api 1.9.15 → 1.10.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +56 -7
  2. package/README.zh-CN.md +55 -6
  3. package/dist/{account-DjCbqJ2Q.js → account-COtMmvzU.js} +2 -2
  4. package/dist/{account-DjCbqJ2Q.js.map → account-COtMmvzU.js.map} +1 -1
  5. package/dist/admin/assets/{index-BRnD4-DB.js → index-DG4TRVMu.js} +36 -36
  6. package/dist/admin/index.html +1 -1
  7. package/dist/{auth--I1utaB6.js → auth-B0y-2njL.js} +3 -3
  8. package/dist/{auth--I1utaB6.js.map → auth-B0y-2njL.js.map} +1 -1
  9. package/dist/{check-usage-DHvjdha4.js → check-usage-DdevqHE5.js} +3 -3
  10. package/dist/{check-usage-DHvjdha4.js.map → check-usage-DdevqHE5.js.map} +1 -1
  11. package/dist/{get-copilot-token-ZbmbVF0I.js → get-copilot-token-8Rm-rVsp.js} +2 -2
  12. package/dist/{get-copilot-token-ZbmbVF0I.js.map → get-copilot-token-8Rm-rVsp.js.map} +1 -1
  13. package/dist/main.js +6 -4
  14. package/dist/main.js.map +1 -1
  15. package/dist/mcp-9Hgepkc5.js +37 -0
  16. package/dist/mcp-9Hgepkc5.js.map +1 -0
  17. package/dist/{poll-access-token-CIPDXrcm.js → poll-access-token-BAgM2-7k.js} +62 -6
  18. package/dist/poll-access-token-BAgM2-7k.js.map +1 -0
  19. package/dist/{quota-refresh-scheduler-runtime-XD2fDa2K.js → proxy-YVh74m0I.js} +67 -8
  20. package/dist/proxy-YVh74m0I.js.map +1 -0
  21. package/dist/{request-outbound-Cy6huWjK.js → request-outbound-BJjWS_jF.js} +1 -1
  22. package/dist/{request-outbound-CxvpSkOn.js → request-outbound-Pu1kp2x8.js} +3 -1
  23. package/dist/request-outbound-Pu1kp2x8.js.map +1 -0
  24. package/dist/{server-BDCnb3Ao.js → server-DmDAepfa.js} +667 -77
  25. package/dist/server-DmDAepfa.js.map +1 -0
  26. package/dist/{start-DkBnp9d8.js → start-D37Bi12h.js} +5 -52
  27. package/dist/start-D37Bi12h.js.map +1 -0
  28. package/dist/tool-search-BrN7M0Dd.js +110 -0
  29. package/dist/tool-search-BrN7M0Dd.js.map +1 -0
  30. package/package.json +3 -6
  31. package/dist/poll-access-token-CIPDXrcm.js.map +0 -1
  32. package/dist/quota-refresh-scheduler-runtime-XD2fDa2K.js.map +0 -1
  33. package/dist/request-outbound-CxvpSkOn.js.map +0 -1
  34. package/dist/server-BDCnb3Ao.js.map +0 -1
  35. package/dist/start-DkBnp9d8.js.map +0 -1
@@ -1,21 +1,23 @@
1
- import { A as captureOutboundHeadersSnapshot, D as prepareMessageProxyHeaders, E as prepareInteractionHeaders, M as requestContext, N as resolveTraceId, O as accountFromState, T as prepareForCompact, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, k as state, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, w as normalizeDomain, y as copilotBaseUrl } from "./poll-access-token-CIPDXrcm.js";
2
- import { a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-DjCbqJ2Q.js";
1
+ import { A as state, D as prepareInteractionHeaders, E as prepareForCompact, I as compactAutoContinuePromptStarts, L as compactMessageSections, N as requestContext, O as prepareMessageProxyHeaders, P as resolveTraceId, S as copilotWebSocketHeaders, T as normalizeDomain, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, j as captureOutboundHeadersSnapshot, k as accountFromState, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, y as copilotBaseUrl, z as compactSystemPromptStarts } from "./poll-access-token-BAgM2-7k.js";
2
+ import { a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-COtMmvzU.js";
3
3
  import { r as ensurePaths, t as PATHS } from "./paths-CclKwouX.js";
4
- import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-CxvpSkOn.js";
5
- import { A as getReasoningEffortForModel, B as shouldCompactUseSmallModel, C as getConfig, D as getModelAliasesInfo, E as getModelAliases, F as isMessagesApiEnabled, I as isResponsesApiContextManagementModel, L as isResponsesApiWebSearchEnabled, M as isAccountAffinityEnabled, N as isForceAgentEnabled, O as getModelRefreshIntervalMs, P as isMessageStartInputTokensFallbackEnabled, R as mergeConfigWithDefaults, S as getClaudeTokenMultiplier, T as getLogLevel, _ as flushPendingCapture, a as accountsManager, b as getAliasTargetSet, c as extractResponsesUsageFromStreamEvent, d as getStatsStore, f as normalizeChatCompletionsUsage, g as copilotFetch, h as toLocalDateString, i as updateQuotaRefreshSchedulerFromConfig, j as getSmallModel, k as getProviderConfig, l as getClientIpInfo, m as normalizeMessagesUsage, o as applySharedSessionAffinityRetention, p as normalizeEmbeddingsUsage, s as extractResponsesUsageFromResult, u as getRequestHistoryStore, v as isDevModeEnabled, w as getExtraPromptForModel, x as getAnthropicApiKey, y as PROVIDER_TYPE_ANTHROPIC, z as resolveModelAlias } from "./quota-refresh-scheduler-runtime-XD2fDa2K.js";
4
+ import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-Pu1kp2x8.js";
5
+ import { a as isDeferredToolName, c as parseMcpToolSearchSentinel, i as isBridgeToolSearchName, l as selectDeferredToolsByNames, o as listDeferredToolNames, r as formatToolSearchBridgeArguments, s as normalizeToolSearchBridgeArguments, t as BRIDGE_TOOL_SEARCH_NAME, u as shouldEnableResponsesToolSearch } from "./tool-search-BrN7M0Dd.js";
6
+ import { A as getModelRefreshIntervalMs, B as isResponsesApiWebSocketEnabled, C as getAnthropicApiKey, D as getLogLevel, E as getExtraPromptForModel, F as isForceAgentEnabled, H as resolveModelAlias, I as isMessageStartInputTokensFallbackEnabled, L as isMessagesApiEnabled, M as getReasoningEffortForModel, N as getSmallModel, O as getModelAliases, P as isAccountAffinityEnabled, R as isResponsesApiContextManagementModel, S as getAliasTargetSet, T as getConfig, U as shouldCompactUseSmallModel, V as mergeConfigWithDefaults, _ as toLocalDateString, b as isDevModeEnabled, c as applySharedSessionAffinityRetention, d as getClientIpInfo, f as getRequestHistoryStore, g as normalizeMessagesUsage, h as normalizeEmbeddingsUsage, j as getProviderConfig, k as getModelAliasesInfo, l as extractResponsesUsageFromResult, m as normalizeChatCompletionsUsage, o as updateQuotaRefreshSchedulerFromConfig, p as getStatsStore, s as accountsManager, t as getProxyEnvDispatcher, u as extractResponsesUsageFromStreamEvent, v as copilotFetch, w as getClaudeTokenMultiplier, x as PROVIDER_TYPE_ANTHROPIC, y as flushPendingCapture, z as isResponsesApiWebSearchEnabled } from "./proxy-YVh74m0I.js";
6
7
  import consola from "consola";
7
8
  import fs, { readFile } from "node:fs/promises";
8
9
  import { createHash, randomUUID, timingSafeEqual } from "node:crypto";
9
10
  import * as path$1 from "node:path";
10
11
  import path from "node:path";
11
12
  import { fileURLToPath } from "node:url";
13
+ import fs$1, { existsSync } from "node:fs";
12
14
  import { Hono } from "hono";
13
15
  import { cors } from "hono/cors";
14
16
  import { logger } from "hono/logger";
15
- import fs$1, { existsSync } from "node:fs";
16
17
  import { streamSSE } from "hono/streaming";
17
18
  import { events } from "fetch-event-stream";
18
19
  import util from "node:util";
20
+ import { WebSocket } from "undici";
19
21
  //#region src/lib/request-auth.ts
20
22
  const LEGACY_API_KEY_ENV_VAR = "COPILOT_API_KEY";
21
23
  let warnedLegacyEnvFallback = false;
@@ -350,10 +352,14 @@ const copilotRateLimitHeaders = {
350
352
  session: "x-usage-ratelimit-session",
351
353
  weekly: "x-usage-ratelimit-weekly"
352
354
  };
355
+ const copilotQuotaSnapshotKeys = {
356
+ session: "5Hour-Session-RateLimits",
357
+ weekly: "Weekly-Session-RateLimits"
358
+ };
353
359
  const hasGetMethod = (headers) => {
354
360
  return "get" in headers && typeof headers.get === "function";
355
361
  };
356
- const getHeaderValue = (headers, headerName) => {
362
+ const getHeaderValue$1 = (headers, headerName) => {
357
363
  if (hasGetMethod(headers)) return headers.get(headerName);
358
364
  const normalizedHeaderName = headerName.toLowerCase();
359
365
  return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1] ?? null;
@@ -370,7 +376,7 @@ const parseCopilotRateLimitHeader = (headerValue) => {
370
376
  };
371
377
  const getCopilotRateLimitUsage = (headers, type) => {
372
378
  const headerName = copilotRateLimitHeaders[type];
373
- const headerValue = getHeaderValue(headers, headerName);
379
+ const headerValue = getHeaderValue$1(headers, headerName);
374
380
  if (!headerValue) return null;
375
381
  const parsed = parseCopilotRateLimitHeader(headerValue);
376
382
  if (!parsed) return null;
@@ -379,15 +385,39 @@ const getCopilotRateLimitUsage = (headers, type) => {
379
385
  ...parsed
380
386
  };
381
387
  };
388
+ const getCopilotRateLimitUsageFromSnapshots = (snapshots, type) => {
389
+ const snapshot = snapshots?.[copilotQuotaSnapshotKeys[type]];
390
+ if (!isCopilotQuotaSnapshot(snapshot)) return null;
391
+ return {
392
+ remaining: String(snapshot.percent_remaining),
393
+ resetAt: snapshot.reset_date,
394
+ type
395
+ };
396
+ };
382
397
  const logCopilotRateLimits = (headers) => {
383
398
  for (const type of copilotRateLimitTypes) {
384
399
  const usage = getCopilotRateLimitUsage(headers, type);
385
400
  if (!usage) continue;
386
- const d = new Date(usage.resetAt);
387
- const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
388
- consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
401
+ logCopilotRateLimitUsage(usage);
402
+ }
403
+ };
404
+ const logCopilotQuotaSnapshots = (snapshots) => {
405
+ for (const type of copilotRateLimitTypes) {
406
+ const usage = getCopilotRateLimitUsageFromSnapshots(snapshots, type);
407
+ if (!usage) continue;
408
+ logCopilotRateLimitUsage(usage);
389
409
  }
390
410
  };
411
+ const logCopilotRateLimitUsage = (usage) => {
412
+ const d = new Date(usage.resetAt);
413
+ const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
414
+ consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
415
+ };
416
+ const isCopilotQuotaSnapshot = (value) => {
417
+ if (!value || typeof value !== "object") return false;
418
+ const record = value;
419
+ return typeof record.entitlement === "string" && typeof record.percent_remaining === "number" && typeof record.overage_permitted === "boolean" && typeof record.overage_count === "number" && typeof record.reset_date === "string";
420
+ };
391
421
  //#endregion
392
422
  //#region src/lib/request-initiator.ts
393
423
  function resolveEffectiveInitiator(baseInitiator, options) {
@@ -663,17 +693,29 @@ const getTokenCount = async (payload, model) => {
663
693
  output: outputTokens
664
694
  };
665
695
  };
666
- const compactSystemPromptStarts = ["You are a helpful AI assistant tasked with summarizing conversations", "You are an anchored context summarization assistant for coding sessions."];
667
- const compactAutoContinuePromptStarts = [
668
- "This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.",
669
- "Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.",
670
- "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context."
671
- ];
672
- const compactMessageSections = ["Pending Tasks:", "Current Work:"];
673
696
  const IDE_EXECUTE_CODE_TOOL = "mcp__ide__executeCode";
674
697
  const IDE_GET_DIAGNOSTICS_TOOL = "mcp__ide__getDiagnostics";
675
698
  const IDE_GET_DIAGNOSTICS_DESCRIPTION = "Get language diagnostics from VS Code. Returns errors, warnings, information, and hints for files in the workspace.";
676
699
  const PDF_FILE_READ_PREFIX = "PDF file read:";
700
+ const getBlockCacheControl = (block) => {
701
+ if (!block || block.type === "thinking") return;
702
+ const cacheControl = block.cache_control;
703
+ if (!cacheControl || typeof cacheControl !== "object") return;
704
+ return cacheControl;
705
+ };
706
+ const getLastMessageContentCacheControl = (lastMessage) => {
707
+ if (!lastMessage || !Array.isArray(lastMessage.content)) return;
708
+ const cacheControl = getBlockCacheControl(lastMessage.content.at(-1));
709
+ return cacheControl ? { ...cacheControl } : void 0;
710
+ };
711
+ const applyLastMessageCacheControl = (anthropicPayload, lastMessageCacheControl) => {
712
+ const cacheControl = lastMessageCacheControl ?? { type: "ephemeral" };
713
+ const lastMessage = anthropicPayload.messages.at(-1);
714
+ if (!lastMessage || !Array.isArray(lastMessage.content)) return;
715
+ const lastBlock = lastMessage.content.at(-1);
716
+ if (!lastBlock || lastBlock.type === "thinking" || lastBlock.cache_control) return;
717
+ lastBlock.cache_control = { ...cacheControl };
718
+ };
677
719
  const getCompactCandidateText = (message) => {
678
720
  if (message.role !== "user") return "";
679
721
  if (typeof message.content === "string") return message.content;
@@ -1734,6 +1776,7 @@ const CONFIG_KEYS = new Set([
1734
1776
  "modelRefreshIntervalHours",
1735
1777
  "sessionAffinityRetentionDays",
1736
1778
  "useMessagesApi",
1779
+ "useResponsesApiWebSocket",
1737
1780
  "useResponsesApiWebSearch",
1738
1781
  "devMode",
1739
1782
  "quotaRefresh"
@@ -2225,6 +2268,7 @@ const CONFIG_PATCH_HANDLERS = {
2225
2268
  modelRefreshIntervalHours: (next, value) => applyOptionalNumber(next, "modelRefreshIntervalHours", value),
2226
2269
  sessionAffinityRetentionDays: (next, value) => applyOptionalNumber(next, "sessionAffinityRetentionDays", value),
2227
2270
  useMessagesApi: (next, value) => applyOptionalBoolean(next, "useMessagesApi", value),
2271
+ useResponsesApiWebSocket: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSocket", value),
2228
2272
  useResponsesApiWebSearch: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSearch", value),
2229
2273
  devMode: applyDevModeConfig,
2230
2274
  quotaRefresh: applyQuotaRefreshConfig
@@ -4462,6 +4506,11 @@ const parseProviderModelAlias = (model) => {
4462
4506
  provider
4463
4507
  };
4464
4508
  };
4509
+ const resolveExistingProviderModelAlias = (model, resolveProvider) => {
4510
+ const alias = parseProviderModelAlias(model);
4511
+ if (!alias) return null;
4512
+ return resolveProvider(alias.provider) ? alias : null;
4513
+ };
4465
4514
  const createFallbackModel = (modelId) => ({
4466
4515
  capabilities: {
4467
4516
  family: "provider",
@@ -4531,6 +4580,9 @@ async function handleProviderCountTokensForProvider(c, options) {
4531
4580
  }
4532
4581
  //#endregion
4533
4582
  //#region src/routes/messages/count-tokens-handler.ts
4583
+ const getProviderConfigResolver$1 = (c) => {
4584
+ return c.get("providerConfigResolver") ?? getProviderConfig;
4585
+ };
4534
4586
  const resolveCountTokensModel = (modelId, findModel = findEndpointModel) => {
4535
4587
  const selectedModel = findModel(modelId);
4536
4588
  if (selectedModel) return {
@@ -4581,7 +4633,8 @@ async function countTokensViaAnthropic(c, payload) {
4581
4633
  */
4582
4634
  async function handleCountTokens(c) {
4583
4635
  const anthropicPayload = await c.req.json();
4584
- const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
4636
+ anthropicPayload.model = resolveModelAlias(anthropicPayload.model);
4637
+ const providerModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, getProviderConfigResolver$1(c));
4585
4638
  if (providerModelAlias) {
4586
4639
  anthropicPayload.model = providerModelAlias.model;
4587
4640
  return await handleProviderCountTokensForProvider(c, {
@@ -4617,7 +4670,8 @@ async function handleCountTokens(c) {
4617
4670
  }
4618
4671
  //#endregion
4619
4672
  //#region src/services/copilot/create-responses.ts
4620
- const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId, fetchImpl }, account) => {
4673
+ const RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS = 6e4;
4674
+ const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId, fetchImpl, transport = "http" }, account) => {
4621
4675
  const ctx = account ?? accountFromState();
4622
4676
  if (!ctx.copilotToken) throw new Error("Copilot token not found");
4623
4677
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
@@ -4632,14 +4686,30 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4632
4686
  prepareForCompact(headers, compactType);
4633
4687
  payload.service_tier = void 0;
4634
4688
  captureOutboundHeadersSnapshot(headers);
4635
- const response = await copilotFetch(`${copilotBaseUrl(ctx)}/responses`, {
4689
+ consola.log(`<-- model: ${payload.model}`);
4690
+ if ((compactType === 1 ? "http" : transport) === "websocket") {
4691
+ const stream = createPooledResponsesWebSocketStream(prepareResponsesWebSocketRequest(payload, headers, {
4692
+ copilotToken: ctx.copilotToken,
4693
+ requestId: requestId ?? upstreamRequestId ?? "missing-request-id",
4694
+ subagentMarker
4695
+ }), copilotBaseUrl(ctx));
4696
+ if (payload.stream) return stream;
4697
+ return await consumeResponsesWebSocketStream(stream);
4698
+ }
4699
+ return await createHttpResponses(payload, headers, ctx, {
4700
+ fetchImpl,
4701
+ requestId
4702
+ });
4703
+ };
4704
+ const createHttpResponses = async (payload, headers, account, options) => {
4705
+ const response = await copilotFetch(`${copilotBaseUrl(account)}/responses`, {
4636
4706
  method: "POST",
4637
4707
  headers,
4638
4708
  body: JSON.stringify(payload)
4639
4709
  }, {
4640
- requestId,
4710
+ requestId: options.requestId,
4641
4711
  callSite: "responses",
4642
- fetchImpl
4712
+ fetchImpl: options.fetchImpl
4643
4713
  });
4644
4714
  logCopilotRateLimits(response.headers);
4645
4715
  if (!response.ok) {
@@ -4649,21 +4719,337 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4649
4719
  if (payload.stream) return events(response);
4650
4720
  return await response.json();
4651
4721
  };
4722
+ const prepareResponsesWebSocketRequest = (payload, preparedHeaders, options) => {
4723
+ const initiator = getResponsesWebSocketInitiator(preparedHeaders);
4724
+ return {
4725
+ headers: copilotWebSocketHeaders(preparedHeaders),
4726
+ poolKey: buildResponsesWebSocketPoolKey(payload, options),
4727
+ payload: buildResponsesWebSocketPayload(payload, initiator)
4728
+ };
4729
+ };
4730
+ const buildResponsesWebSocketPoolKey = (payload, { copilotToken, requestId, subagentMarker }) => {
4731
+ const tokenFingerprint = copilotToken ? createHash("sha256").update(copilotToken).digest("hex").slice(0, 16) : "missing-token";
4732
+ const subagentKey = subagentMarker ? [
4733
+ subagentMarker.session_id,
4734
+ subagentMarker.agent_id,
4735
+ subagentMarker.agent_type
4736
+ ].join(":") : "main";
4737
+ return [
4738
+ tokenFingerprint,
4739
+ payload.model,
4740
+ requestId,
4741
+ subagentKey
4742
+ ].map(encodePoolKeyPart).join("|");
4743
+ };
4744
+ const getResponsesWebSocketInitiator = (preparedHeaders) => {
4745
+ return getHeaderValue(preparedHeaders, "x-initiator")?.toLowerCase() === "agent" ? "agent" : "user";
4746
+ };
4747
+ const createPooledResponsesWebSocketStream = (request, baseUrl) => runResponsesWebSocketRequest(request, baseUrl);
4748
+ const buildResponsesWebSocketPayload = (payload, initiator) => {
4749
+ const websocketPayload = {
4750
+ ...payload,
4751
+ type: "response.create",
4752
+ initiator
4753
+ };
4754
+ delete websocketPayload.stream;
4755
+ delete websocketPayload["background"];
4756
+ delete websocketPayload.service_tier;
4757
+ return websocketPayload;
4758
+ };
4759
+ const buildResponsesWebSocketUrl = (baseUrl) => {
4760
+ const url = new URL(`${baseUrl.replace(/\/+$/u, "")}/responses`);
4761
+ if (url.protocol === "https:") url.protocol = "wss:";
4762
+ else if (url.protocol === "http:") url.protocol = "ws:";
4763
+ return url.toString();
4764
+ };
4765
+ const responsesWebSocketPool = /* @__PURE__ */ new Map();
4766
+ const responsesWebSocketActiveRequests = /* @__PURE__ */ new Map();
4767
+ const runResponsesWebSocketRequest = async function* (request, baseUrl) {
4768
+ const { entry, pooled } = getResponsesWebSocketRequestTarget(request, baseUrl);
4769
+ const release = acquireResponsesWebSocketEntry(request.poolKey, entry, pooled);
4770
+ try {
4771
+ const websocket = await getReadyResponsesWebSocket(request.poolKey, entry, pooled);
4772
+ websocket.send(JSON.stringify(request.payload));
4773
+ for await (const data of createWebSocketMessageStream(websocket)) {
4774
+ const chunk = createResponsesWebSocketStreamChunk(data);
4775
+ yield chunk;
4776
+ if (isTerminalResponsesStreamChunk(chunk)) return;
4777
+ }
4778
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4779
+ throw new Error("Responses websocket ended without a terminal response");
4780
+ } catch (error) {
4781
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4782
+ throw toError(error);
4783
+ } finally {
4784
+ release();
4785
+ }
4786
+ };
4787
+ const getResponsesWebSocketRequestTarget = (request, baseUrl) => {
4788
+ if (getResponsesWebSocketActiveRequestCount(request.poolKey) > 0) return {
4789
+ entry: createResponsesWebSocketEntry(request, baseUrl),
4790
+ pooled: false
4791
+ };
4792
+ const existing = responsesWebSocketPool.get(request.poolKey);
4793
+ if (existing && !existing.closed) {
4794
+ clearResponsesWebSocketIdleTimer(existing);
4795
+ return {
4796
+ entry: existing,
4797
+ pooled: true
4798
+ };
4799
+ }
4800
+ const entry = createResponsesWebSocketEntry(request, baseUrl);
4801
+ responsesWebSocketPool.set(request.poolKey, entry);
4802
+ return {
4803
+ entry,
4804
+ pooled: true
4805
+ };
4806
+ };
4807
+ const createResponsesWebSocketEntry = (request, baseUrl) => {
4808
+ const entry = {
4809
+ closed: false,
4810
+ idleTimer: null,
4811
+ requestCount: 0,
4812
+ websocketPromise: openResponsesWebSocket({
4813
+ headers: request.headers,
4814
+ url: buildResponsesWebSocketUrl(baseUrl)
4815
+ })
4816
+ };
4817
+ entry.websocketPromise.then((websocket) => {
4818
+ websocket.addEventListener("close", () => {
4819
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4820
+ });
4821
+ websocket.addEventListener("error", () => {
4822
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4823
+ });
4824
+ }).catch(() => {
4825
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4826
+ });
4827
+ return entry;
4828
+ };
4829
+ const acquireResponsesWebSocketEntry = (poolKey, entry, pooled) => {
4830
+ clearResponsesWebSocketIdleTimer(entry);
4831
+ incrementResponsesWebSocketActiveRequestCount(poolKey);
4832
+ entry.requestCount += 1;
4833
+ let released = false;
4834
+ return () => {
4835
+ if (released) return;
4836
+ released = true;
4837
+ entry.requestCount -= 1;
4838
+ decrementResponsesWebSocketActiveRequestCount(poolKey);
4839
+ if (entry.closed || entry.requestCount > 0) return;
4840
+ if (pooled && responsesWebSocketPool.get(poolKey) === entry) {
4841
+ scheduleResponsesWebSocketIdleClose(poolKey, entry);
4842
+ return;
4843
+ }
4844
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4845
+ };
4846
+ };
4847
+ const getReadyResponsesWebSocket = async (poolKey, entry, pooled) => {
4848
+ if (entry.closed) throw new Error("Responses websocket became unavailable before the request started");
4849
+ const websocket = await entry.websocketPromise;
4850
+ if (entry.closed || pooled && responsesWebSocketPool.get(poolKey) !== entry) throw new Error("Responses websocket became unavailable before the request started");
4851
+ if (websocket.readyState !== WebSocket.OPEN) {
4852
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4853
+ throw new Error("Responses websocket became unavailable before the request started");
4854
+ }
4855
+ return websocket;
4856
+ };
4857
+ const scheduleResponsesWebSocketIdleClose = (poolKey, entry) => {
4858
+ clearResponsesWebSocketIdleTimer(entry);
4859
+ entry.idleTimer = setTimeout(() => {
4860
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4861
+ }, RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS);
4862
+ unrefTimer(entry.idleTimer);
4863
+ };
4864
+ const clearResponsesWebSocketIdleTimer = (entry) => {
4865
+ if (entry.idleTimer) {
4866
+ clearTimeout(entry.idleTimer);
4867
+ entry.idleTimer = null;
4868
+ }
4869
+ };
4870
+ const getResponsesWebSocketActiveRequestCount = (poolKey) => responsesWebSocketActiveRequests.get(poolKey) ?? 0;
4871
+ const incrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4872
+ responsesWebSocketActiveRequests.set(poolKey, getResponsesWebSocketActiveRequestCount(poolKey) + 1);
4873
+ };
4874
+ const decrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4875
+ const nextCount = getResponsesWebSocketActiveRequestCount(poolKey) - 1;
4876
+ if (nextCount <= 0) {
4877
+ responsesWebSocketActiveRequests.delete(poolKey);
4878
+ return;
4879
+ }
4880
+ responsesWebSocketActiveRequests.set(poolKey, nextCount);
4881
+ };
4882
+ const removeResponsesWebSocketPoolEntry = (poolKey, entry) => {
4883
+ if (responsesWebSocketPool.get(poolKey) === entry) responsesWebSocketPool.delete(poolKey);
4884
+ if (entry.closed) return;
4885
+ entry.closed = true;
4886
+ clearResponsesWebSocketIdleTimer(entry);
4887
+ entry.websocketPromise.then(closeResponsesWebSocket).catch(() => {});
4888
+ };
4889
+ const unrefTimer = (timer) => {
4890
+ if (typeof timer === "object" && "unref" in timer && typeof timer.unref === "function") timer.unref();
4891
+ };
4892
+ const createResponsesWebSocketError = (message, event) => {
4893
+ const reason = event?.error ?? event?.message;
4894
+ if (reason === void 0 || reason === "") return new Error(message);
4895
+ const cause = toError(reason);
4896
+ return new Error(`${message}: ${cause.message}`, { cause });
4897
+ };
4898
+ const openResponsesWebSocket = async ({ headers, url }) => await new Promise((resolve, reject) => {
4899
+ const dispatcher = getProxyEnvDispatcher();
4900
+ const websocket = new WebSocket(url, dispatcher ? {
4901
+ dispatcher,
4902
+ headers
4903
+ } : { headers });
4904
+ const cleanup = () => {
4905
+ websocket.removeEventListener("open", onOpen);
4906
+ websocket.removeEventListener("error", onError);
4907
+ };
4908
+ const onOpen = () => {
4909
+ cleanup();
4910
+ resolve(websocket);
4911
+ };
4912
+ const onError = (event) => {
4913
+ cleanup();
4914
+ reject(createResponsesWebSocketError("Failed to create responses websocket", event));
4915
+ };
4916
+ websocket.addEventListener("open", onOpen);
4917
+ websocket.addEventListener("error", onError);
4918
+ });
4919
+ const createWebSocketMessageStream = async function* (websocket) {
4920
+ const queue = [];
4921
+ let closed = false;
4922
+ let error = null;
4923
+ let notify = null;
4924
+ const wake = () => {
4925
+ notify?.();
4926
+ notify = null;
4927
+ };
4928
+ const onMessage = (event) => {
4929
+ queue.push(normalizeWebSocketMessageData(event.data));
4930
+ wake();
4931
+ };
4932
+ const onClose = () => {
4933
+ closed = true;
4934
+ wake();
4935
+ };
4936
+ const onError = (event) => {
4937
+ error = createResponsesWebSocketError("Responses websocket stream error", event);
4938
+ wake();
4939
+ };
4940
+ websocket.addEventListener("message", onMessage);
4941
+ websocket.addEventListener("close", onClose);
4942
+ websocket.addEventListener("error", onError);
4943
+ try {
4944
+ while (true) {
4945
+ const item = queue.shift();
4946
+ if (item) {
4947
+ yield await item;
4948
+ continue;
4949
+ }
4950
+ if (error) throw toError(error);
4951
+ if (closed) break;
4952
+ await new Promise((resolve) => {
4953
+ notify = resolve;
4954
+ });
4955
+ }
4956
+ } finally {
4957
+ websocket.removeEventListener("message", onMessage);
4958
+ websocket.removeEventListener("close", onClose);
4959
+ websocket.removeEventListener("error", onError);
4960
+ }
4961
+ };
4962
+ const normalizeWebSocketMessageData = async (data) => {
4963
+ if (typeof data === "string") return data;
4964
+ if (data instanceof ArrayBuffer) return new TextDecoder().decode(data);
4965
+ if (ArrayBuffer.isView(data)) {
4966
+ const view = data;
4967
+ return new TextDecoder().decode(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
4968
+ }
4969
+ if (isTextReadable(data)) return await data.text();
4970
+ return String(data);
4971
+ };
4972
+ const isTextReadable = (value) => {
4973
+ if (!value || typeof value !== "object" || !("text" in value)) return false;
4974
+ return typeof value.text === "function";
4975
+ };
4976
+ const toError = (value) => {
4977
+ if (value instanceof Error) return value;
4978
+ return new Error(String(value));
4979
+ };
4980
+ const getHeaderValue = (headers, headerName) => {
4981
+ const normalizedHeaderName = headerName.toLowerCase();
4982
+ return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1];
4983
+ };
4984
+ const encodePoolKeyPart = (value) => encodeURIComponent(value);
4985
+ const createResponsesWebSocketStreamChunk = (data) => {
4986
+ if (data === "[DONE]") return { data };
4987
+ try {
4988
+ const parsed = JSON.parse(data);
4989
+ if (parsed.type === "response.completed") logCopilotQuotaSnapshots(parsed.copilot_quota_snapshots);
4990
+ return {
4991
+ data: JSON.stringify(parsed),
4992
+ event: typeof parsed.type === "string" ? parsed.type : void 0,
4993
+ id: typeof parsed.id === "string" ? parsed.id : void 0
4994
+ };
4995
+ } catch {
4996
+ return { data };
4997
+ }
4998
+ };
4999
+ const isTerminalResponsesStreamChunk = (chunk) => {
5000
+ if (!chunk.data || chunk.data === "[DONE]") return false;
5001
+ try {
5002
+ const parsed = JSON.parse(chunk.data);
5003
+ return parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete" || parsed.type === "error";
5004
+ } catch {
5005
+ return false;
5006
+ }
5007
+ };
5008
+ const consumeResponsesWebSocketStream = async (stream) => {
5009
+ for await (const chunk of stream) {
5010
+ if (!chunk.data || chunk.data === "[DONE]") continue;
5011
+ const event = JSON.parse(chunk.data);
5012
+ if (event.type === "error") throw new Error(event.message);
5013
+ if (event.type === "response.completed" || event.type === "response.failed" || event.type === "response.incomplete") return event.response;
5014
+ }
5015
+ throw new Error("Responses websocket ended without a terminal response");
5016
+ };
5017
+ const closeResponsesWebSocket = (websocket) => {
5018
+ if (websocket.readyState === WebSocket.CONNECTING || websocket.readyState === WebSocket.OPEN) websocket.close();
5019
+ };
4652
5020
  //#endregion
4653
5021
  //#region src/routes/messages/responses-translation.ts
4654
5022
  const MESSAGE_TYPE = "message";
4655
5023
  const COMPACTION_SIGNATURE_PREFIX = "cm1#";
4656
5024
  const COMPACTION_SIGNATURE_SEPARATOR = "@";
4657
5025
  const THINKING_TEXT = "Thinking...";
4658
- const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) => {
4659
- const model = modelOverride ?? payload.model;
5026
+ const buildPromptCacheKey = (basePromptCacheKey, subagentAgentId) => {
5027
+ if (!basePromptCacheKey) return null;
5028
+ const normalizedSubagentAgentId = subagentAgentId?.trim() || null;
5029
+ if (!normalizedSubagentAgentId) return basePromptCacheKey;
5030
+ return `${basePromptCacheKey}:agent:${normalizedSubagentAgentId}`;
5031
+ };
5032
+ const translateAnthropicMessagesToResponsesPayload = (payload, options = {}) => {
5033
+ const model = options.modelOverride ?? payload.model;
4660
5034
  const input = [];
4661
5035
  const applyPhase = shouldApplyPhase(payload.model);
4662
- for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase));
4663
- const translatedTools = convertAnthropicTools(payload.tools);
4664
- const toolChoice = convertAnthropicToolChoice(payload.tool_choice);
4665
- const { sessionId: promptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
4666
- return {
5036
+ const toolSearchEnabled = shouldEnableResponsesToolSearch({
5037
+ model: payload.model,
5038
+ tools: payload.tools
5039
+ });
5040
+ const translationState = {
5041
+ originalTools: payload.tools ?? [],
5042
+ toolSearchEnabled,
5043
+ toolUseNameById: /* @__PURE__ */ new Map()
5044
+ };
5045
+ for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase, translationState));
5046
+ const hasOriginalTools = Array.isArray(payload.tools) && payload.tools.length > 0;
5047
+ const translatedTools = convertAnthropicTools(payload.tools, toolSearchEnabled);
5048
+ const toolChoice = convertAnthropicToolChoice(payload.tool_choice, toolSearchEnabled);
5049
+ const { sessionId: metadataPromptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
5050
+ const sessionAffinity = requestContext.getStore()?.sessionAffinity?.trim() || null;
5051
+ const promptCacheKey = buildPromptCacheKey(metadataPromptCacheKey ?? sessionAffinity, options.subagentAgentId);
5052
+ const responsesPayload = {
4667
5053
  model,
4668
5054
  input,
4669
5055
  instructions: translateSystemPrompt(payload.system, model),
@@ -4673,7 +5059,6 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4673
5059
  tools: translatedTools,
4674
5060
  tool_choice: toolChoice,
4675
5061
  metadata: payload.metadata ? { ...payload.metadata } : null,
4676
- prompt_cache_key: promptCacheKey,
4677
5062
  stream: payload.stream ?? null,
4678
5063
  store: false,
4679
5064
  parallel_tool_calls: true,
@@ -4683,6 +5068,8 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4683
5068
  },
4684
5069
  include: ["reasoning.encrypted_content"]
4685
5070
  };
5071
+ if (hasOriginalTools) responsesPayload.prompt_cache_key = promptCacheKey;
5072
+ return responsesPayload;
4686
5073
  };
4687
5074
  const encodeCompactionCarrierSignature = (compaction) => {
4688
5075
  return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
@@ -4701,11 +5088,11 @@ const decodeCompactionCarrierSignature = (signature) => {
4701
5088
  };
4702
5089
  }
4703
5090
  };
4704
- const translateMessage = (message, model, applyPhase) => {
4705
- if (message.role === "user") return translateUserMessage(message);
4706
- return translateAssistantMessage(message, model, applyPhase);
5091
+ const translateMessage = (message, model, applyPhase, state) => {
5092
+ if (message.role === "user") return translateUserMessage(message, state);
5093
+ return translateAssistantMessage(message, model, applyPhase, state);
4707
5094
  };
4708
- const translateUserMessage = (message) => {
5095
+ const translateUserMessage = (message, state) => {
4709
5096
  if (typeof message.content === "string") return [createMessage("user", message.content)];
4710
5097
  if (!Array.isArray(message.content)) return [];
4711
5098
  const items = [];
@@ -4713,7 +5100,7 @@ const translateUserMessage = (message) => {
4713
5100
  for (const block of message.content) {
4714
5101
  if (block.type === "tool_result") {
4715
5102
  flushPendingContent(pendingContent, items, { role: "user" });
4716
- items.push(createFunctionCallOutput(block));
5103
+ items.push(createToolCallOutput(block, state));
4717
5104
  continue;
4718
5105
  }
4719
5106
  const converted = translateUserContentBlock(block);
@@ -4722,7 +5109,7 @@ const translateUserMessage = (message) => {
4722
5109
  flushPendingContent(pendingContent, items, { role: "user" });
4723
5110
  return items;
4724
5111
  };
4725
- const translateAssistantMessage = (message, model, applyPhase) => {
5112
+ const translateAssistantMessage = (message, model, applyPhase, state) => {
4726
5113
  const assistantPhase = resolveAssistantPhase(model, message.content, applyPhase);
4727
5114
  if (typeof message.content === "string") return [createMessage("assistant", message.content, assistantPhase)];
4728
5115
  if (!Array.isArray(message.content)) return [];
@@ -4730,11 +5117,12 @@ const translateAssistantMessage = (message, model, applyPhase) => {
4730
5117
  const pendingContent = [];
4731
5118
  for (const block of message.content) {
4732
5119
  if (block.type === "tool_use") {
5120
+ state.toolUseNameById.set(block.id, block.name);
4733
5121
  flushPendingContent(pendingContent, items, {
4734
5122
  role: "assistant",
4735
5123
  phase: assistantPhase
4736
5124
  });
4737
- items.push(createFunctionToolCall(block));
5125
+ items.push(createToolCall(block, state));
4738
5126
  continue;
4739
5127
  }
4740
5128
  if (block.type === "thinking" && block.signature) {
@@ -4852,19 +5240,79 @@ const parseReasoningSignature$1 = (signature) => {
4852
5240
  id: signature.slice(splitIndex + 1)
4853
5241
  };
4854
5242
  };
4855
- const createFunctionToolCall = (block) => ({
5243
+ const createFunctionToolCall = (block, state) => ({
4856
5244
  type: "function_call",
4857
5245
  call_id: block.id,
4858
5246
  name: block.name,
4859
5247
  arguments: JSON.stringify(block.input),
5248
+ status: "completed",
5249
+ ...state.toolSearchEnabled && isDeferredToolName(block.name) ? { namespace: block.name } : {}
5250
+ });
5251
+ const createToolSearchCall = (block) => ({
5252
+ type: "tool_search_call",
5253
+ call_id: block.id,
5254
+ arguments: normalizeToolSearchBridgeArguments(block.input),
5255
+ execution: "client",
4860
5256
  status: "completed"
4861
5257
  });
5258
+ const createToolCall = (block, state) => {
5259
+ if (state.toolSearchEnabled && isBridgeToolSearchName(block.name)) return createToolSearchCall(block);
5260
+ return createFunctionToolCall(block, state);
5261
+ };
4862
5262
  const createFunctionCallOutput = (block) => ({
4863
5263
  type: "function_call_output",
4864
5264
  call_id: block.tool_use_id,
4865
5265
  output: convertToolResultContent(block.content),
4866
5266
  status: block.is_error ? "incomplete" : "completed"
4867
5267
  });
5268
+ const createToolCallOutput = (block, state) => {
5269
+ const toolUseName = state.toolUseNameById.get(block.tool_use_id);
5270
+ if (state.toolSearchEnabled && isBridgeToolSearchName(toolUseName ?? "")) return createToolSearchOutput(block, state.originalTools);
5271
+ return createFunctionCallOutput(block);
5272
+ };
5273
+ const createToolSearchOutput = (block, originalTools) => {
5274
+ const referencedToolNames = resolveToolSearchReferencedToolNames(block.content, originalTools);
5275
+ return {
5276
+ type: "tool_search_output",
5277
+ call_id: block.tool_use_id,
5278
+ tools: referencedToolNames.map((toolName) => convertDeferredToolToNamespace(resolveDeferredTool(toolName, originalTools))),
5279
+ execution: "client",
5280
+ status: block.is_error ? "incomplete" : "completed"
5281
+ };
5282
+ };
5283
+ const resolveToolSearchReferencedToolNames = (content, originalTools) => {
5284
+ const explicitReferences = extractToolReferenceNames(content);
5285
+ if (explicitReferences.length > 0) return uniqueToolNames(explicitReferences);
5286
+ const sentinel = extractMcpToolSearchSentinel(content);
5287
+ if (sentinel) return selectDeferredToolsByNames(sentinel.names, originalTools).map((tool) => tool.name);
5288
+ return [];
5289
+ };
5290
+ const extractToolReferenceNames = (content) => {
5291
+ if (!Array.isArray(content)) return [];
5292
+ return content.flatMap((block) => block.type === "tool_reference" ? [block.tool_name] : []);
5293
+ };
5294
+ const extractMcpToolSearchSentinel = (content) => {
5295
+ if (typeof content === "string") return parseMcpToolSearchSentinel(content);
5296
+ for (const block of content) {
5297
+ if (block.type !== "text") continue;
5298
+ const sentinel = parseMcpToolSearchSentinel(block.text);
5299
+ if (sentinel) return sentinel;
5300
+ }
5301
+ return null;
5302
+ };
5303
+ const resolveDeferredTool = (toolName, originalTools) => {
5304
+ const tool = originalTools.find((candidate) => candidate.name === toolName);
5305
+ if (tool && isDeferredToolName(tool.name)) return tool;
5306
+ throw createInvalidRequestError(`Tool reference '${toolName}' has no corresponding deferred tool definition`);
5307
+ };
5308
+ const uniqueToolNames = (toolNames) => [...new Set(toolNames)];
5309
+ const createInvalidRequestError = (message) => new HTTPError(message, new Response(JSON.stringify({ error: {
5310
+ message,
5311
+ type: "invalid_request_error"
5312
+ } }), {
5313
+ status: 400,
5314
+ headers: { "content-type": "application/json" }
5315
+ }));
4868
5316
  const translateSystemPrompt = (system, model) => {
4869
5317
  if (!system) return null;
4870
5318
  const extraPrompt = getExtraPromptForModel(model);
@@ -4875,31 +5323,83 @@ const translateSystemPrompt = (system, model) => {
4875
5323
  }).join(" ");
4876
5324
  return text.length > 0 ? text : null;
4877
5325
  };
4878
- const convertAnthropicTools = (tools) => {
5326
+ const convertAnthropicTools = (tools, toolSearchEnabled) => {
4879
5327
  if (!tools || tools.length === 0) return null;
4880
- return tools.map((tool) => ({
5328
+ const converted = [];
5329
+ let addedToolSearch = false;
5330
+ const searchableToolNames = toolSearchEnabled ? listDeferredToolNames(tools) : [];
5331
+ for (const tool of tools) {
5332
+ if (isBridgeToolSearchName(tool.name)) {
5333
+ if (toolSearchEnabled && !addedToolSearch) {
5334
+ converted.push(createResponsesToolSearchDefinition(searchableToolNames));
5335
+ addedToolSearch = true;
5336
+ }
5337
+ continue;
5338
+ }
5339
+ if (toolSearchEnabled && isDeferredToolName(tool.name)) {
5340
+ converted.push(convertDeferredToolToNamespace(tool));
5341
+ continue;
5342
+ }
5343
+ converted.push(convertToolToFunction(tool));
5344
+ }
5345
+ return converted;
5346
+ };
5347
+ const createResponsesToolSearchDefinition = (searchableToolNames) => ({
5348
+ type: "tool_search",
5349
+ execution: "client",
5350
+ description: "Load deferred tools by exact name before using them. Return only the searchable tool names you need for the next step.",
5351
+ parameters: {
5352
+ type: "object",
5353
+ properties: { names: {
5354
+ type: "array",
5355
+ description: "Exact deferred tool names to load.",
5356
+ items: {
5357
+ type: "string",
5358
+ enum: searchableToolNames
5359
+ },
5360
+ minItems: 1
5361
+ } },
5362
+ required: ["names"],
5363
+ additionalProperties: false
5364
+ }
5365
+ });
5366
+ const convertToolToFunction = (tool) => ({
5367
+ type: "function",
5368
+ name: tool.name,
5369
+ parameters: normalizeToolSchema(tool.input_schema),
5370
+ strict: false,
5371
+ ...tool.description ? { description: tool.description } : {}
5372
+ });
5373
+ const convertDeferredToolToNamespace = (tool) => ({
5374
+ type: "namespace",
5375
+ name: tool.name,
5376
+ ...tool.description ? { description: tool.description } : {},
5377
+ tools: [{
4881
5378
  type: "function",
4882
5379
  name: tool.name,
4883
5380
  parameters: normalizeToolSchema(tool.input_schema),
4884
5381
  strict: false,
5382
+ defer_loading: true,
4885
5383
  ...tool.description ? { description: tool.description } : {}
4886
- }));
4887
- };
4888
- const convertAnthropicToolChoice = (choice) => {
5384
+ }]
5385
+ });
5386
+ const convertAnthropicToolChoice = (choice, toolSearchEnabled) => {
4889
5387
  if (!choice) return "auto";
4890
5388
  switch (choice.type) {
4891
5389
  case "auto": return "auto";
4892
5390
  case "any": return "required";
4893
- case "tool": return choice.name ? {
4894
- type: "function",
4895
- name: choice.name
4896
- } : "auto";
5391
+ case "tool":
5392
+ if (toolSearchEnabled && choice.name && isBridgeToolSearchName(choice.name)) return "auto";
5393
+ return choice.name ? {
5394
+ type: "function",
5395
+ name: choice.name
5396
+ } : "auto";
4897
5397
  case "none": return "none";
4898
5398
  default: return "auto";
4899
5399
  }
4900
5400
  };
4901
- const translateResponsesResultToAnthropic = (response) => {
4902
- const contentBlocks = mapOutputToAnthropicContent(response.output);
5401
+ const translateResponsesResultToAnthropic = (response, options) => {
5402
+ const contentBlocks = mapOutputToAnthropicContent(response.output, options);
4903
5403
  const usage = mapResponsesUsage(response);
4904
5404
  let anthropicContent = fallbackContentBlocks(response.output_text);
4905
5405
  if (contentBlocks.length > 0) anthropicContent = contentBlocks;
@@ -4915,7 +5415,7 @@ const translateResponsesResultToAnthropic = (response) => {
4915
5415
  usage
4916
5416
  };
4917
5417
  };
4918
- const mapOutputToAnthropicContent = (output) => {
5418
+ const mapOutputToAnthropicContent = (output, options) => {
4919
5419
  const contentBlocks = [];
4920
5420
  for (const item of output) switch (item.type) {
4921
5421
  case "reasoning": {
@@ -4932,6 +5432,12 @@ const mapOutputToAnthropicContent = (output) => {
4932
5432
  if (toolUseBlock) contentBlocks.push(toolUseBlock);
4933
5433
  break;
4934
5434
  }
5435
+ case "tool_search_call": {
5436
+ const toolUseBlock = createToolSearchUseContentBlock(item, options?.toolSearchName);
5437
+ if (toolUseBlock) contentBlocks.push(toolUseBlock);
5438
+ break;
5439
+ }
5440
+ case "tool_search_output": break;
4935
5441
  case "message": {
4936
5442
  const combinedText = combineMessageTextContent(item.content);
4937
5443
  if (combinedText.length > 0) contentBlocks.push({
@@ -4993,15 +5499,29 @@ const extractReasoningText = (item) => {
4993
5499
  };
4994
5500
  const createToolUseContentBlock = (call) => {
4995
5501
  const toolId = call.call_id;
4996
- if (!call.name || !toolId) return null;
4997
- const input = parseFunctionCallArguments(call.arguments);
5502
+ const toolName = resolveToolUseName(call);
5503
+ if (!toolName || !toolId) return null;
5504
+ return {
5505
+ type: "tool_use",
5506
+ id: toolId,
5507
+ name: toolName,
5508
+ input: parseFunctionCallArguments(call.arguments)
5509
+ };
5510
+ };
5511
+ const createToolSearchUseContentBlock = (call, toolSearchName = BRIDGE_TOOL_SEARCH_NAME) => {
5512
+ const toolId = call.call_id;
5513
+ if (!toolId) return null;
4998
5514
  return {
4999
5515
  type: "tool_use",
5000
5516
  id: toolId,
5001
- name: call.name,
5002
- input
5517
+ name: toolSearchName,
5518
+ input: parseToolSearchArguments(call.arguments)
5003
5519
  };
5004
5520
  };
5521
+ const resolveToolUseName = (call) => {
5522
+ if (typeof call.namespace === "string" && call.namespace.length > 0) return call.namespace;
5523
+ return call.name;
5524
+ };
5005
5525
  const createCompactionThinkingBlock = (item) => {
5006
5526
  if (!item.id || !item.encrypted_content) return null;
5007
5527
  return {
@@ -5027,6 +5547,9 @@ const parseFunctionCallArguments = (rawArguments) => {
5027
5547
  }
5028
5548
  return { raw_arguments: rawArguments };
5029
5549
  };
5550
+ const parseToolSearchArguments = (argumentsValue) => {
5551
+ return formatToolSearchBridgeArguments(argumentsValue);
5552
+ };
5030
5553
  const fallbackContentBlocks = (outputText) => {
5031
5554
  if (!outputText) return [];
5032
5555
  return [{
@@ -5037,7 +5560,7 @@ const fallbackContentBlocks = (outputText) => {
5037
5560
  const mapResponsesStopReason = (response) => {
5038
5561
  const { status, incomplete_details: incompleteDetails } = response;
5039
5562
  if (status === "completed") {
5040
- if (response.output.some((item) => item.type === "function_call")) return "tool_use";
5563
+ if (response.output.some((item) => item.type === "function_call" || item.type === "tool_search_call")) return "tool_use";
5041
5564
  return "end_turn";
5042
5565
  }
5043
5566
  if (status === "incomplete") {
@@ -5971,14 +6494,15 @@ const updateWhitespaceRunState = (previousCount, chunk) => {
5971
6494
  exceeded: false
5972
6495
  };
5973
6496
  };
5974
- const createResponsesStreamState = () => ({
6497
+ const createResponsesStreamState = (options) => ({
5975
6498
  messageStartSent: false,
5976
6499
  messageCompleted: false,
5977
6500
  nextContentBlockIndex: 0,
5978
6501
  blockIndexByKey: /* @__PURE__ */ new Map(),
5979
6502
  openBlocks: /* @__PURE__ */ new Set(),
5980
6503
  blockHasDelta: /* @__PURE__ */ new Set(),
5981
- functionCallStateByOutputIndex: /* @__PURE__ */ new Map()
6504
+ functionCallStateByOutputIndex: /* @__PURE__ */ new Map(),
6505
+ toolSearchName: options?.toolSearchName ?? "mcp__tool_search__search"
5982
6506
  });
5983
6507
  const translateResponsesStreamEvent = (rawEvent, state) => {
5984
6508
  switch (rawEvent.type) {
@@ -6003,7 +6527,7 @@ const handleResponseCreated = (rawEvent, state) => {
6003
6527
  };
6004
6528
  const handleOutputItemAdded$1 = (rawEvent, state) => {
6005
6529
  const events = new Array();
6006
- const functionCallDetails = extractFunctionCallDetails(rawEvent);
6530
+ const functionCallDetails = extractFunctionCallDetails(rawEvent, state);
6007
6531
  if (!functionCallDetails) return events;
6008
6532
  const { outputIndex, toolCallId, name, initialArguments } = functionCallDetails;
6009
6533
  const blockIndex = openFunctionCallBlock(state, {
@@ -6030,6 +6554,28 @@ const handleOutputItemDone$1 = (rawEvent, state) => {
6030
6554
  const item = rawEvent.item;
6031
6555
  const itemType = item.type;
6032
6556
  const outputIndex = rawEvent.output_index;
6557
+ if (itemType === "tool_search_call") {
6558
+ const blockIndex = openFunctionCallBlock(state, {
6559
+ outputIndex,
6560
+ toolCallId: item.call_id,
6561
+ name: state.toolSearchName,
6562
+ events
6563
+ });
6564
+ const finalArguments = stringifyToolSearchArguments(item.arguments);
6565
+ if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
6566
+ events.push({
6567
+ type: "content_block_delta",
6568
+ index: blockIndex,
6569
+ delta: {
6570
+ type: "input_json_delta",
6571
+ partial_json: finalArguments
6572
+ }
6573
+ });
6574
+ state.blockHasDelta.add(blockIndex);
6575
+ }
6576
+ state.functionCallStateByOutputIndex.delete(outputIndex);
6577
+ return events;
6578
+ }
6033
6579
  if (itemType === "compaction") {
6034
6580
  if (!item.id || !item.encrypted_content) return events;
6035
6581
  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events);
@@ -6365,24 +6911,47 @@ const openFunctionCallBlock = (state, params) => {
6365
6911
  }
6366
6912
  return blockIndex;
6367
6913
  };
6368
- const extractFunctionCallDetails = (rawEvent) => {
6914
+ const extractFunctionCallDetails = (rawEvent, state) => {
6369
6915
  const item = rawEvent.item;
6370
- if (item.type !== "function_call") return;
6916
+ const itemType = item.type;
6917
+ if (itemType === "tool_search_call") return {
6918
+ outputIndex: rawEvent.output_index,
6919
+ toolCallId: item.call_id,
6920
+ name: state.toolSearchName,
6921
+ initialArguments: ""
6922
+ };
6923
+ if (itemType !== "function_call") return;
6371
6924
  return {
6372
6925
  outputIndex: rawEvent.output_index,
6373
6926
  toolCallId: item.call_id,
6374
- name: item.name,
6927
+ name: resolveToolUseName(item),
6375
6928
  initialArguments: item.arguments
6376
6929
  };
6377
6930
  };
6378
- //#endregion
6379
- //#region src/routes/responses/utils.ts
6931
+ const stringifyToolSearchArguments = (argumentsValue) => {
6932
+ try {
6933
+ return JSON.stringify(formatToolSearchBridgeArguments(argumentsValue));
6934
+ } catch {
6935
+ return;
6936
+ }
6937
+ };
6938
+ const responsesUtilsDependencies = {
6939
+ isResponsesApiContextManagementModel,
6940
+ isResponsesApiWebSocketEnabled
6941
+ };
6380
6942
  const getResponsesRequestOptions = (payload) => {
6381
6943
  return {
6382
6944
  vision: hasVisionInput$1(payload),
6383
6945
  initiator: hasAgentInitiator(payload) ? "agent" : "user"
6384
6946
  };
6385
6947
  };
6948
+ const getResponsesTransportForModel = (selectedModel, options = {}) => {
6949
+ const supportedEndpoints = selectedModel?.supported_endpoints ?? [];
6950
+ const useWebSocket = responsesUtilsDependencies.isResponsesApiWebSocketEnabled();
6951
+ if (options.compactType !== 1 && useWebSocket && supportedEndpoints.includes("ws:/responses")) return "websocket";
6952
+ if (supportedEndpoints.includes("/responses")) return "http";
6953
+ return null;
6954
+ };
6386
6955
  const hasAgentInitiator = (payload) => {
6387
6956
  const items = getPayloadItems(payload);
6388
6957
  if (isForceAgentEnabled()) return items.some((item) => isAgentRole(item));
@@ -6407,7 +6976,7 @@ const createCompactionContextManagement = (compactThreshold) => [{
6407
6976
  }];
6408
6977
  const applyResponsesApiContextManagement = (payload, maxPromptTokens) => {
6409
6978
  if (payload.context_management !== void 0) return;
6410
- if (!isResponsesApiContextManagementModel(payload.model)) return;
6979
+ if (!responsesUtilsDependencies.isResponsesApiContextManagementModel(payload.model)) return;
6411
6980
  payload.context_management = createCompactionContextManagement(resolveResponsesCompactThreshold(maxPromptTokens));
6412
6981
  };
6413
6982
  const compactInputByLatestCompaction = (payload) => {
@@ -6677,6 +7246,14 @@ const logger$3 = createHandlerLogger("messages-handler");
6677
7246
  const CHAT_COMPLETIONS_ENDPOINT = "/chat/completions";
6678
7247
  const RESPONSES_ENDPOINT$1 = "/responses";
6679
7248
  const MESSAGES_ENDPOINT = "/v1/messages";
7249
+ const getProviderConfigResolver = (c) => {
7250
+ return c.get("providerConfigResolver") ?? getProviderConfig;
7251
+ };
7252
+ const resolveProviderTargetModelAlias = (model, providerConfigResolver) => {
7253
+ const targetModel = resolveModelAlias(model);
7254
+ if (targetModel === model) return null;
7255
+ return resolveExistingProviderModelAlias(targetModel, providerConfigResolver);
7256
+ };
6680
7257
  function normalizeProviderAliasUsage(usage) {
6681
7258
  const tokensInput = usage.inputTokens === void 0 ? void 0 : Math.max(0, usage.inputTokens);
6682
7259
  const tokensCachedInput = usage.cacheReadInputTokens;
@@ -6769,11 +7346,12 @@ async function handleProviderAliasCompletion(c, options) {
6769
7346
  }
6770
7347
  async function handleCompletion(c) {
6771
7348
  const anthropicPayload = await c.req.json();
6772
- const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
6773
- if (providerModelAlias) return await handleProviderAliasCompletion(c, {
7349
+ const providerConfigResolver = getProviderConfigResolver(c);
7350
+ const providerTargetModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, providerConfigResolver) ?? resolveProviderTargetModelAlias(anthropicPayload.model, providerConfigResolver);
7351
+ if (providerTargetModelAlias) return await handleProviderAliasCompletion(c, {
6774
7352
  payload: anthropicPayload,
6775
- provider: providerModelAlias.provider,
6776
- providerModel: providerModelAlias.model
7353
+ provider: providerTargetModelAlias.provider,
7354
+ providerModel: providerTargetModelAlias.model
6777
7355
  });
6778
7356
  await checkRateLimit(state);
6779
7357
  const store = getRequestHistoryStore();
@@ -6800,11 +7378,13 @@ async function handleCompletion(c) {
6800
7378
  const originalRequestModel = anthropicPayload.model;
6801
7379
  if (anthropicBeta && isWarmupProbeRequest(anthropicPayload)) anthropicPayload.model = getSmallModel();
6802
7380
  if (compactType !== 0) logger$3.debug("Compact request type:", compactType);
7381
+ const lastMessageCacheControl = getLastMessageContentCacheControl(anthropicPayload.messages.at(-1));
6803
7382
  if (compactType === 1 && shouldCompactUseSmallModel()) anthropicPayload.model = getSmallModel();
6804
7383
  if (compactType === 0) {
6805
7384
  stripToolReferenceTurnBoundary(anthropicPayload);
6806
7385
  mergeToolResultForClaude(anthropicPayload);
6807
7386
  }
7387
+ applyLastMessageCacheControl(anthropicPayload, lastMessageCacheControl);
6808
7388
  const upstreamRequestId = generateRequestIdFromPayload(anthropicPayload, sessionId);
6809
7389
  logger$3.debug("Generated request ID:", upstreamRequestId);
6810
7390
  const clientModel = anthropicPayload.model;
@@ -7010,11 +7590,15 @@ const handleWithChatCompletions = async (params) => {
7010
7590
  };
7011
7591
  const handleWithResponsesApi = async (params) => {
7012
7592
  const { c, anthropicPayload, openAIPayload, subagentMarker, sessionId, selectedModel, instr, compactType } = params;
7013
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, selectedModel.id);
7593
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, {
7594
+ modelOverride: selectedModel.id,
7595
+ subagentAgentId: subagentMarker?.agent_id
7596
+ });
7014
7597
  applyResponsesApiContextManagement(responsesPayload, selectedModel.capabilities.limits.max_prompt_tokens);
7015
7598
  compactInputByLatestCompaction(responsesPayload);
7016
7599
  debugJson(logger$3, "Translated Responses payload:", responsesPayload);
7017
7600
  const { vision, initiator } = getResponsesRequestOptions(responsesPayload);
7601
+ const transport = getResponsesTransportForModel(selectedModel, { compactType }) ?? "http";
7018
7602
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
7019
7603
  isCompact: compactType !== 0,
7020
7604
  isSubagent: Boolean(subagentMarker)
@@ -7030,7 +7614,8 @@ const handleWithResponsesApi = async (params) => {
7030
7614
  subagentMarker,
7031
7615
  sessionId,
7032
7616
  compactType,
7033
- requestId: instr.requestId
7617
+ requestId: instr.requestId,
7618
+ transport
7034
7619
  }, ctx);
7035
7620
  instr.confirmAffinity?.();
7036
7621
  instr.confirmOwnership?.();
@@ -7840,6 +8425,7 @@ const handleResponses = async (c) => {
7840
8425
  compactInputByLatestCompaction(upstreamPayload);
7841
8426
  const premiumRemainingBefore = account.premiumRemaining;
7842
8427
  const premiumUnlimitedBefore = account.unlimited;
8428
+ const transport = getResponsesTransportForModel(selectedModel) ?? "http";
7843
8429
  const { vision, initiator } = getResponsesRequestOptions(upstreamPayload);
7844
8430
  request.initiator = initiator;
7845
8431
  if (state.manualApprove) await awaitApproval();
@@ -7858,7 +8444,8 @@ const handleResponses = async (c) => {
7858
8444
  vision,
7859
8445
  initiator,
7860
8446
  premiumRemainingBefore,
7861
- premiumUnlimitedBefore
8447
+ premiumUnlimitedBefore,
8448
+ transport
7862
8449
  });
7863
8450
  return handleNonStreamingResponses({
7864
8451
  c,
@@ -7871,7 +8458,8 @@ const handleResponses = async (c) => {
7871
8458
  vision,
7872
8459
  initiator,
7873
8460
  premiumRemainingBefore,
7874
- premiumUnlimitedBefore
8461
+ premiumUnlimitedBefore,
8462
+ transport
7875
8463
  });
7876
8464
  };
7877
8465
  async function observeRequestError(accountId, error, affinity) {
@@ -7979,7 +8567,7 @@ function extractUsageFromChunkData(data) {
7979
8567
  }
7980
8568
  }
7981
8569
  async function handleStreamingResponses(params) {
7982
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8570
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
7983
8571
  let response;
7984
8572
  try {
7985
8573
  response = await createResponses(payload, {
@@ -7987,7 +8575,8 @@ async function handleStreamingResponses(params) {
7987
8575
  initiator,
7988
8576
  upstreamRequestId: request.upstreamRequestId,
7989
8577
  sessionId: request.upstreamSessionId,
7990
- requestId: request.requestId
8578
+ requestId: request.requestId,
8579
+ transport
7991
8580
  }, accountCtx);
7992
8581
  selection.confirmAffinity?.();
7993
8582
  } catch (error) {
@@ -8175,7 +8764,7 @@ async function streamResponsesAndLog(params) {
8175
8764
  }
8176
8765
  }
8177
8766
  async function handleNonStreamingResponses(params) {
8178
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8767
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
8179
8768
  const { account, reservation, selectedModel, endpoint, costUnits } = selection;
8180
8769
  let usage = {};
8181
8770
  let errorState = { httpStatus: 200 };
@@ -8186,7 +8775,8 @@ async function handleNonStreamingResponses(params) {
8186
8775
  initiator,
8187
8776
  upstreamRequestId: request.upstreamRequestId,
8188
8777
  sessionId: request.upstreamSessionId,
8189
- requestId: request.requestId
8778
+ requestId: request.requestId,
8779
+ transport
8190
8780
  }, accountCtx);
8191
8781
  if (isAsyncIterable$1(response)) throw new Error("Upstream returned a stream unexpectedly");
8192
8782
  selection.confirmAffinity?.();
@@ -8337,4 +8927,4 @@ server.route("/:provider/v1/models", providerModelRoutes);
8337
8927
  //#endregion
8338
8928
  export { server };
8339
8929
 
8340
- //# sourceMappingURL=server-BDCnb3Ao.js.map
8930
+ //# sourceMappingURL=server-DmDAepfa.js.map