@nick3/copilot-api 1.9.15 → 1.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +3 -1
  2. package/README.zh-CN.md +3 -1
  3. package/dist/{account-DjCbqJ2Q.js → account-COtMmvzU.js} +2 -2
  4. package/dist/{account-DjCbqJ2Q.js.map → account-COtMmvzU.js.map} +1 -1
  5. package/dist/admin/assets/{index-BRnD4-DB.js → index-DG4TRVMu.js} +36 -36
  6. package/dist/admin/index.html +1 -1
  7. package/dist/{auth--I1utaB6.js → auth-B0y-2njL.js} +3 -3
  8. package/dist/{auth--I1utaB6.js.map → auth-B0y-2njL.js.map} +1 -1
  9. package/dist/{check-usage-DHvjdha4.js → check-usage-DdevqHE5.js} +3 -3
  10. package/dist/{check-usage-DHvjdha4.js.map → check-usage-DdevqHE5.js.map} +1 -1
  11. package/dist/{get-copilot-token-ZbmbVF0I.js → get-copilot-token-8Rm-rVsp.js} +2 -2
  12. package/dist/{get-copilot-token-ZbmbVF0I.js.map → get-copilot-token-8Rm-rVsp.js.map} +1 -1
  13. package/dist/main.js +3 -3
  14. package/dist/{poll-access-token-CIPDXrcm.js → poll-access-token-BAgM2-7k.js} +62 -6
  15. package/dist/poll-access-token-BAgM2-7k.js.map +1 -0
  16. package/dist/{quota-refresh-scheduler-runtime-XD2fDa2K.js → proxy-BwmADhKh.js} +67 -8
  17. package/dist/proxy-BwmADhKh.js.map +1 -0
  18. package/dist/{request-outbound-Cy6huWjK.js → request-outbound-BJjWS_jF.js} +1 -1
  19. package/dist/{request-outbound-CxvpSkOn.js → request-outbound-Pu1kp2x8.js} +3 -1
  20. package/dist/request-outbound-Pu1kp2x8.js.map +1 -0
  21. package/dist/{server-BDCnb3Ao.js → server-DxQsi1x2.js} +429 -43
  22. package/dist/server-DxQsi1x2.js.map +1 -0
  23. package/dist/{start-DkBnp9d8.js → start-8QHzPrcg.js} +5 -52
  24. package/dist/start-8QHzPrcg.js.map +1 -0
  25. package/package.json +1 -1
  26. package/dist/poll-access-token-CIPDXrcm.js.map +0 -1
  27. package/dist/quota-refresh-scheduler-runtime-XD2fDa2K.js.map +0 -1
  28. package/dist/request-outbound-CxvpSkOn.js.map +0 -1
  29. package/dist/server-BDCnb3Ao.js.map +0 -1
  30. package/dist/start-DkBnp9d8.js.map +0 -1
@@ -1,21 +1,22 @@
1
- import { A as captureOutboundHeadersSnapshot, D as prepareMessageProxyHeaders, E as prepareInteractionHeaders, M as requestContext, N as resolveTraceId, O as accountFromState, T as prepareForCompact, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, k as state, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, w as normalizeDomain, y as copilotBaseUrl } from "./poll-access-token-CIPDXrcm.js";
2
- import { a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-DjCbqJ2Q.js";
1
+ import { A as state, D as prepareInteractionHeaders, E as prepareForCompact, I as compactAutoContinuePromptStarts, L as compactMessageSections, N as requestContext, O as prepareMessageProxyHeaders, P as resolveTraceId, S as copilotWebSocketHeaders, T as normalizeDomain, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, j as captureOutboundHeadersSnapshot, k as accountFromState, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, y as copilotBaseUrl, z as compactSystemPromptStarts } from "./poll-access-token-BAgM2-7k.js";
2
+ import { a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-COtMmvzU.js";
3
3
  import { r as ensurePaths, t as PATHS } from "./paths-CclKwouX.js";
4
- import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-CxvpSkOn.js";
5
- import { A as getReasoningEffortForModel, B as shouldCompactUseSmallModel, C as getConfig, D as getModelAliasesInfo, E as getModelAliases, F as isMessagesApiEnabled, I as isResponsesApiContextManagementModel, L as isResponsesApiWebSearchEnabled, M as isAccountAffinityEnabled, N as isForceAgentEnabled, O as getModelRefreshIntervalMs, P as isMessageStartInputTokensFallbackEnabled, R as mergeConfigWithDefaults, S as getClaudeTokenMultiplier, T as getLogLevel, _ as flushPendingCapture, a as accountsManager, b as getAliasTargetSet, c as extractResponsesUsageFromStreamEvent, d as getStatsStore, f as normalizeChatCompletionsUsage, g as copilotFetch, h as toLocalDateString, i as updateQuotaRefreshSchedulerFromConfig, j as getSmallModel, k as getProviderConfig, l as getClientIpInfo, m as normalizeMessagesUsage, o as applySharedSessionAffinityRetention, p as normalizeEmbeddingsUsage, s as extractResponsesUsageFromResult, u as getRequestHistoryStore, v as isDevModeEnabled, w as getExtraPromptForModel, x as getAnthropicApiKey, y as PROVIDER_TYPE_ANTHROPIC, z as resolveModelAlias } from "./quota-refresh-scheduler-runtime-XD2fDa2K.js";
4
+ import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-Pu1kp2x8.js";
5
+ import { A as getModelRefreshIntervalMs, B as isResponsesApiWebSocketEnabled, C as getAnthropicApiKey, D as getLogLevel, E as getExtraPromptForModel, F as isForceAgentEnabled, H as resolveModelAlias, I as isMessageStartInputTokensFallbackEnabled, L as isMessagesApiEnabled, M as getReasoningEffortForModel, N as getSmallModel, O as getModelAliases, P as isAccountAffinityEnabled, R as isResponsesApiContextManagementModel, S as getAliasTargetSet, T as getConfig, U as shouldCompactUseSmallModel, V as mergeConfigWithDefaults, _ as toLocalDateString, b as isDevModeEnabled, c as applySharedSessionAffinityRetention, d as getClientIpInfo, f as getRequestHistoryStore, g as normalizeMessagesUsage, h as normalizeEmbeddingsUsage, j as getProviderConfig, k as getModelAliasesInfo, l as extractResponsesUsageFromResult, m as normalizeChatCompletionsUsage, o as updateQuotaRefreshSchedulerFromConfig, p as getStatsStore, s as accountsManager, t as getProxyEnvDispatcher, u as extractResponsesUsageFromStreamEvent, v as copilotFetch, w as getClaudeTokenMultiplier, x as PROVIDER_TYPE_ANTHROPIC, y as flushPendingCapture, z as isResponsesApiWebSearchEnabled } from "./proxy-BwmADhKh.js";
6
6
  import consola from "consola";
7
7
  import fs, { readFile } from "node:fs/promises";
8
8
  import { createHash, randomUUID, timingSafeEqual } from "node:crypto";
9
9
  import * as path$1 from "node:path";
10
10
  import path from "node:path";
11
11
  import { fileURLToPath } from "node:url";
12
+ import fs$1, { existsSync } from "node:fs";
12
13
  import { Hono } from "hono";
13
14
  import { cors } from "hono/cors";
14
15
  import { logger } from "hono/logger";
15
- import fs$1, { existsSync } from "node:fs";
16
16
  import { streamSSE } from "hono/streaming";
17
17
  import { events } from "fetch-event-stream";
18
18
  import util from "node:util";
19
+ import { WebSocket } from "undici";
19
20
  //#region src/lib/request-auth.ts
20
21
  const LEGACY_API_KEY_ENV_VAR = "COPILOT_API_KEY";
21
22
  let warnedLegacyEnvFallback = false;
@@ -350,10 +351,14 @@ const copilotRateLimitHeaders = {
350
351
  session: "x-usage-ratelimit-session",
351
352
  weekly: "x-usage-ratelimit-weekly"
352
353
  };
354
+ const copilotQuotaSnapshotKeys = {
355
+ session: "5Hour-Session-RateLimits",
356
+ weekly: "Weekly-Session-RateLimits"
357
+ };
353
358
  const hasGetMethod = (headers) => {
354
359
  return "get" in headers && typeof headers.get === "function";
355
360
  };
356
- const getHeaderValue = (headers, headerName) => {
361
+ const getHeaderValue$1 = (headers, headerName) => {
357
362
  if (hasGetMethod(headers)) return headers.get(headerName);
358
363
  const normalizedHeaderName = headerName.toLowerCase();
359
364
  return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1] ?? null;
@@ -370,7 +375,7 @@ const parseCopilotRateLimitHeader = (headerValue) => {
370
375
  };
371
376
  const getCopilotRateLimitUsage = (headers, type) => {
372
377
  const headerName = copilotRateLimitHeaders[type];
373
- const headerValue = getHeaderValue(headers, headerName);
378
+ const headerValue = getHeaderValue$1(headers, headerName);
374
379
  if (!headerValue) return null;
375
380
  const parsed = parseCopilotRateLimitHeader(headerValue);
376
381
  if (!parsed) return null;
@@ -379,15 +384,39 @@ const getCopilotRateLimitUsage = (headers, type) => {
379
384
  ...parsed
380
385
  };
381
386
  };
387
+ const getCopilotRateLimitUsageFromSnapshots = (snapshots, type) => {
388
+ const snapshot = snapshots?.[copilotQuotaSnapshotKeys[type]];
389
+ if (!isCopilotQuotaSnapshot(snapshot)) return null;
390
+ return {
391
+ remaining: String(snapshot.percent_remaining),
392
+ resetAt: snapshot.reset_date,
393
+ type
394
+ };
395
+ };
382
396
  const logCopilotRateLimits = (headers) => {
383
397
  for (const type of copilotRateLimitTypes) {
384
398
  const usage = getCopilotRateLimitUsage(headers, type);
385
399
  if (!usage) continue;
386
- const d = new Date(usage.resetAt);
387
- const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
388
- consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
400
+ logCopilotRateLimitUsage(usage);
389
401
  }
390
402
  };
403
+ const logCopilotQuotaSnapshots = (snapshots) => {
404
+ for (const type of copilotRateLimitTypes) {
405
+ const usage = getCopilotRateLimitUsageFromSnapshots(snapshots, type);
406
+ if (!usage) continue;
407
+ logCopilotRateLimitUsage(usage);
408
+ }
409
+ };
410
+ const logCopilotRateLimitUsage = (usage) => {
411
+ const d = new Date(usage.resetAt);
412
+ const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
413
+ consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
414
+ };
415
+ const isCopilotQuotaSnapshot = (value) => {
416
+ if (!value || typeof value !== "object") return false;
417
+ const record = value;
418
+ return typeof record.entitlement === "string" && typeof record.percent_remaining === "number" && typeof record.overage_permitted === "boolean" && typeof record.overage_count === "number" && typeof record.reset_date === "string";
419
+ };
391
420
  //#endregion
392
421
  //#region src/lib/request-initiator.ts
393
422
  function resolveEffectiveInitiator(baseInitiator, options) {
@@ -663,13 +692,6 @@ const getTokenCount = async (payload, model) => {
663
692
  output: outputTokens
664
693
  };
665
694
  };
666
- const compactSystemPromptStarts = ["You are a helpful AI assistant tasked with summarizing conversations", "You are an anchored context summarization assistant for coding sessions."];
667
- const compactAutoContinuePromptStarts = [
668
- "This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.",
669
- "Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.",
670
- "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context."
671
- ];
672
- const compactMessageSections = ["Pending Tasks:", "Current Work:"];
673
695
  const IDE_EXECUTE_CODE_TOOL = "mcp__ide__executeCode";
674
696
  const IDE_GET_DIAGNOSTICS_TOOL = "mcp__ide__getDiagnostics";
675
697
  const IDE_GET_DIAGNOSTICS_DESCRIPTION = "Get language diagnostics from VS Code. Returns errors, warnings, information, and hints for files in the workspace.";
@@ -1734,6 +1756,7 @@ const CONFIG_KEYS = new Set([
1734
1756
  "modelRefreshIntervalHours",
1735
1757
  "sessionAffinityRetentionDays",
1736
1758
  "useMessagesApi",
1759
+ "useResponsesApiWebSocket",
1737
1760
  "useResponsesApiWebSearch",
1738
1761
  "devMode",
1739
1762
  "quotaRefresh"
@@ -2225,6 +2248,7 @@ const CONFIG_PATCH_HANDLERS = {
2225
2248
  modelRefreshIntervalHours: (next, value) => applyOptionalNumber(next, "modelRefreshIntervalHours", value),
2226
2249
  sessionAffinityRetentionDays: (next, value) => applyOptionalNumber(next, "sessionAffinityRetentionDays", value),
2227
2250
  useMessagesApi: (next, value) => applyOptionalBoolean(next, "useMessagesApi", value),
2251
+ useResponsesApiWebSocket: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSocket", value),
2228
2252
  useResponsesApiWebSearch: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSearch", value),
2229
2253
  devMode: applyDevModeConfig,
2230
2254
  quotaRefresh: applyQuotaRefreshConfig
@@ -4462,6 +4486,11 @@ const parseProviderModelAlias = (model) => {
4462
4486
  provider
4463
4487
  };
4464
4488
  };
4489
+ const resolveExistingProviderModelAlias = (model, resolveProvider) => {
4490
+ const alias = parseProviderModelAlias(model);
4491
+ if (!alias) return null;
4492
+ return resolveProvider(alias.provider) ? alias : null;
4493
+ };
4465
4494
  const createFallbackModel = (modelId) => ({
4466
4495
  capabilities: {
4467
4496
  family: "provider",
@@ -4531,6 +4560,9 @@ async function handleProviderCountTokensForProvider(c, options) {
4531
4560
  }
4532
4561
  //#endregion
4533
4562
  //#region src/routes/messages/count-tokens-handler.ts
4563
+ const getProviderConfigResolver$1 = (c) => {
4564
+ return c.get("providerConfigResolver") ?? getProviderConfig;
4565
+ };
4534
4566
  const resolveCountTokensModel = (modelId, findModel = findEndpointModel) => {
4535
4567
  const selectedModel = findModel(modelId);
4536
4568
  if (selectedModel) return {
@@ -4581,7 +4613,8 @@ async function countTokensViaAnthropic(c, payload) {
4581
4613
  */
4582
4614
  async function handleCountTokens(c) {
4583
4615
  const anthropicPayload = await c.req.json();
4584
- const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
4616
+ anthropicPayload.model = resolveModelAlias(anthropicPayload.model);
4617
+ const providerModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, getProviderConfigResolver$1(c));
4585
4618
  if (providerModelAlias) {
4586
4619
  anthropicPayload.model = providerModelAlias.model;
4587
4620
  return await handleProviderCountTokensForProvider(c, {
@@ -4617,7 +4650,8 @@ async function handleCountTokens(c) {
4617
4650
  }
4618
4651
  //#endregion
4619
4652
  //#region src/services/copilot/create-responses.ts
4620
- const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId, fetchImpl }, account) => {
4653
+ const RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS = 6e4;
4654
+ const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId, fetchImpl, transport = "http" }, account) => {
4621
4655
  const ctx = account ?? accountFromState();
4622
4656
  if (!ctx.copilotToken) throw new Error("Copilot token not found");
4623
4657
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
@@ -4632,14 +4666,30 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4632
4666
  prepareForCompact(headers, compactType);
4633
4667
  payload.service_tier = void 0;
4634
4668
  captureOutboundHeadersSnapshot(headers);
4635
- const response = await copilotFetch(`${copilotBaseUrl(ctx)}/responses`, {
4669
+ consola.log(`<-- model: ${payload.model}`);
4670
+ if ((compactType === 1 ? "http" : transport) === "websocket") {
4671
+ const stream = createPooledResponsesWebSocketStream(prepareResponsesWebSocketRequest(payload, headers, {
4672
+ copilotToken: ctx.copilotToken,
4673
+ requestId: requestId ?? upstreamRequestId ?? "missing-request-id",
4674
+ subagentMarker
4675
+ }), copilotBaseUrl(ctx));
4676
+ if (payload.stream) return stream;
4677
+ return await consumeResponsesWebSocketStream(stream);
4678
+ }
4679
+ return await createHttpResponses(payload, headers, ctx, {
4680
+ fetchImpl,
4681
+ requestId
4682
+ });
4683
+ };
4684
+ const createHttpResponses = async (payload, headers, account, options) => {
4685
+ const response = await copilotFetch(`${copilotBaseUrl(account)}/responses`, {
4636
4686
  method: "POST",
4637
4687
  headers,
4638
4688
  body: JSON.stringify(payload)
4639
4689
  }, {
4640
- requestId,
4690
+ requestId: options.requestId,
4641
4691
  callSite: "responses",
4642
- fetchImpl
4692
+ fetchImpl: options.fetchImpl
4643
4693
  });
4644
4694
  logCopilotRateLimits(response.headers);
4645
4695
  if (!response.ok) {
@@ -4649,21 +4699,328 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4649
4699
  if (payload.stream) return events(response);
4650
4700
  return await response.json();
4651
4701
  };
4702
+ const prepareResponsesWebSocketRequest = (payload, preparedHeaders, options) => {
4703
+ const initiator = getResponsesWebSocketInitiator(preparedHeaders);
4704
+ return {
4705
+ headers: copilotWebSocketHeaders(preparedHeaders),
4706
+ poolKey: buildResponsesWebSocketPoolKey(payload, options),
4707
+ payload: buildResponsesWebSocketPayload(payload, initiator)
4708
+ };
4709
+ };
4710
+ const buildResponsesWebSocketPoolKey = (payload, { copilotToken, requestId, subagentMarker }) => {
4711
+ const tokenFingerprint = copilotToken ? createHash("sha256").update(copilotToken).digest("hex").slice(0, 16) : "missing-token";
4712
+ const subagentKey = subagentMarker ? [
4713
+ subagentMarker.session_id,
4714
+ subagentMarker.agent_id,
4715
+ subagentMarker.agent_type
4716
+ ].join(":") : "main";
4717
+ return [
4718
+ tokenFingerprint,
4719
+ payload.model,
4720
+ requestId,
4721
+ subagentKey
4722
+ ].map(encodePoolKeyPart).join("|");
4723
+ };
4724
+ const getResponsesWebSocketInitiator = (preparedHeaders) => {
4725
+ return getHeaderValue(preparedHeaders, "x-initiator")?.toLowerCase() === "agent" ? "agent" : "user";
4726
+ };
4727
+ const createPooledResponsesWebSocketStream = (request, baseUrl) => runResponsesWebSocketRequest(request, baseUrl);
4728
+ const buildResponsesWebSocketPayload = (payload, initiator) => {
4729
+ const websocketPayload = {
4730
+ ...payload,
4731
+ type: "response.create",
4732
+ initiator
4733
+ };
4734
+ delete websocketPayload.stream;
4735
+ delete websocketPayload["background"];
4736
+ delete websocketPayload.service_tier;
4737
+ return websocketPayload;
4738
+ };
4739
+ const buildResponsesWebSocketUrl = (baseUrl) => {
4740
+ const url = new URL(`${baseUrl.replace(/\/+$/u, "")}/responses`);
4741
+ if (url.protocol === "https:") url.protocol = "wss:";
4742
+ else if (url.protocol === "http:") url.protocol = "ws:";
4743
+ return url.toString();
4744
+ };
4745
+ const responsesWebSocketPool = /* @__PURE__ */ new Map();
4746
+ const responsesWebSocketActiveRequests = /* @__PURE__ */ new Map();
4747
+ const runResponsesWebSocketRequest = async function* (request, baseUrl) {
4748
+ const { entry, pooled } = getResponsesWebSocketRequestTarget(request, baseUrl);
4749
+ const release = acquireResponsesWebSocketEntry(request.poolKey, entry, pooled);
4750
+ try {
4751
+ const websocket = await getReadyResponsesWebSocket(request.poolKey, entry, pooled);
4752
+ websocket.send(JSON.stringify(request.payload));
4753
+ for await (const data of createWebSocketMessageStream(websocket)) {
4754
+ const chunk = createResponsesWebSocketStreamChunk(data);
4755
+ yield chunk;
4756
+ if (isTerminalResponsesStreamChunk(chunk)) return;
4757
+ }
4758
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4759
+ throw new Error("Responses websocket ended without a terminal response");
4760
+ } catch (error) {
4761
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4762
+ throw toError(error);
4763
+ } finally {
4764
+ release();
4765
+ }
4766
+ };
4767
+ const getResponsesWebSocketRequestTarget = (request, baseUrl) => {
4768
+ if (getResponsesWebSocketActiveRequestCount(request.poolKey) > 0) return {
4769
+ entry: createResponsesWebSocketEntry(request, baseUrl),
4770
+ pooled: false
4771
+ };
4772
+ const existing = responsesWebSocketPool.get(request.poolKey);
4773
+ if (existing && !existing.closed) {
4774
+ clearResponsesWebSocketIdleTimer(existing);
4775
+ return {
4776
+ entry: existing,
4777
+ pooled: true
4778
+ };
4779
+ }
4780
+ const entry = createResponsesWebSocketEntry(request, baseUrl);
4781
+ responsesWebSocketPool.set(request.poolKey, entry);
4782
+ return {
4783
+ entry,
4784
+ pooled: true
4785
+ };
4786
+ };
4787
+ const createResponsesWebSocketEntry = (request, baseUrl) => {
4788
+ const entry = {
4789
+ closed: false,
4790
+ idleTimer: null,
4791
+ requestCount: 0,
4792
+ websocketPromise: openResponsesWebSocket({
4793
+ headers: request.headers,
4794
+ url: buildResponsesWebSocketUrl(baseUrl)
4795
+ })
4796
+ };
4797
+ entry.websocketPromise.then((websocket) => {
4798
+ websocket.addEventListener("close", () => {
4799
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4800
+ });
4801
+ websocket.addEventListener("error", () => {
4802
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4803
+ });
4804
+ }).catch(() => {
4805
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4806
+ });
4807
+ return entry;
4808
+ };
4809
+ const acquireResponsesWebSocketEntry = (poolKey, entry, pooled) => {
4810
+ clearResponsesWebSocketIdleTimer(entry);
4811
+ incrementResponsesWebSocketActiveRequestCount(poolKey);
4812
+ entry.requestCount += 1;
4813
+ let released = false;
4814
+ return () => {
4815
+ if (released) return;
4816
+ released = true;
4817
+ entry.requestCount -= 1;
4818
+ decrementResponsesWebSocketActiveRequestCount(poolKey);
4819
+ if (entry.closed || entry.requestCount > 0) return;
4820
+ if (pooled && responsesWebSocketPool.get(poolKey) === entry) {
4821
+ scheduleResponsesWebSocketIdleClose(poolKey, entry);
4822
+ return;
4823
+ }
4824
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4825
+ };
4826
+ };
4827
+ const getReadyResponsesWebSocket = async (poolKey, entry, pooled) => {
4828
+ if (entry.closed) throw new Error("Responses websocket became unavailable before the request started");
4829
+ const websocket = await entry.websocketPromise;
4830
+ if (entry.closed || pooled && responsesWebSocketPool.get(poolKey) !== entry) throw new Error("Responses websocket became unavailable before the request started");
4831
+ if (websocket.readyState !== WebSocket.OPEN) {
4832
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4833
+ throw new Error("Responses websocket became unavailable before the request started");
4834
+ }
4835
+ return websocket;
4836
+ };
4837
+ const scheduleResponsesWebSocketIdleClose = (poolKey, entry) => {
4838
+ clearResponsesWebSocketIdleTimer(entry);
4839
+ entry.idleTimer = setTimeout(() => {
4840
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4841
+ }, RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS);
4842
+ unrefTimer(entry.idleTimer);
4843
+ };
4844
+ const clearResponsesWebSocketIdleTimer = (entry) => {
4845
+ if (entry.idleTimer) {
4846
+ clearTimeout(entry.idleTimer);
4847
+ entry.idleTimer = null;
4848
+ }
4849
+ };
4850
+ const getResponsesWebSocketActiveRequestCount = (poolKey) => responsesWebSocketActiveRequests.get(poolKey) ?? 0;
4851
+ const incrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4852
+ responsesWebSocketActiveRequests.set(poolKey, getResponsesWebSocketActiveRequestCount(poolKey) + 1);
4853
+ };
4854
+ const decrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4855
+ const nextCount = getResponsesWebSocketActiveRequestCount(poolKey) - 1;
4856
+ if (nextCount <= 0) {
4857
+ responsesWebSocketActiveRequests.delete(poolKey);
4858
+ return;
4859
+ }
4860
+ responsesWebSocketActiveRequests.set(poolKey, nextCount);
4861
+ };
4862
+ const removeResponsesWebSocketPoolEntry = (poolKey, entry) => {
4863
+ if (responsesWebSocketPool.get(poolKey) === entry) responsesWebSocketPool.delete(poolKey);
4864
+ if (entry.closed) return;
4865
+ entry.closed = true;
4866
+ clearResponsesWebSocketIdleTimer(entry);
4867
+ entry.websocketPromise.then(closeResponsesWebSocket).catch(() => {});
4868
+ };
4869
+ const unrefTimer = (timer) => {
4870
+ if (typeof timer === "object" && "unref" in timer && typeof timer.unref === "function") timer.unref();
4871
+ };
4872
+ const createResponsesWebSocketError = (message, event) => {
4873
+ const reason = event?.error ?? event?.message;
4874
+ if (reason === void 0 || reason === "") return new Error(message);
4875
+ const cause = toError(reason);
4876
+ return new Error(`${message}: ${cause.message}`, { cause });
4877
+ };
4878
+ const openResponsesWebSocket = async ({ headers, url }) => await new Promise((resolve, reject) => {
4879
+ const dispatcher = getProxyEnvDispatcher();
4880
+ const websocket = new WebSocket(url, dispatcher ? {
4881
+ dispatcher,
4882
+ headers
4883
+ } : { headers });
4884
+ const cleanup = () => {
4885
+ websocket.removeEventListener("open", onOpen);
4886
+ websocket.removeEventListener("error", onError);
4887
+ };
4888
+ const onOpen = () => {
4889
+ cleanup();
4890
+ resolve(websocket);
4891
+ };
4892
+ const onError = (event) => {
4893
+ cleanup();
4894
+ reject(createResponsesWebSocketError("Failed to create responses websocket", event));
4895
+ };
4896
+ websocket.addEventListener("open", onOpen);
4897
+ websocket.addEventListener("error", onError);
4898
+ });
4899
+ const createWebSocketMessageStream = async function* (websocket) {
4900
+ const queue = [];
4901
+ let closed = false;
4902
+ let error = null;
4903
+ let notify = null;
4904
+ const wake = () => {
4905
+ notify?.();
4906
+ notify = null;
4907
+ };
4908
+ const onMessage = (event) => {
4909
+ queue.push(normalizeWebSocketMessageData(event.data));
4910
+ wake();
4911
+ };
4912
+ const onClose = () => {
4913
+ closed = true;
4914
+ wake();
4915
+ };
4916
+ const onError = (event) => {
4917
+ error = createResponsesWebSocketError("Responses websocket stream error", event);
4918
+ wake();
4919
+ };
4920
+ websocket.addEventListener("message", onMessage);
4921
+ websocket.addEventListener("close", onClose);
4922
+ websocket.addEventListener("error", onError);
4923
+ try {
4924
+ while (true) {
4925
+ const item = queue.shift();
4926
+ if (item) {
4927
+ yield await item;
4928
+ continue;
4929
+ }
4930
+ if (error) throw toError(error);
4931
+ if (closed) break;
4932
+ await new Promise((resolve) => {
4933
+ notify = resolve;
4934
+ });
4935
+ }
4936
+ } finally {
4937
+ websocket.removeEventListener("message", onMessage);
4938
+ websocket.removeEventListener("close", onClose);
4939
+ websocket.removeEventListener("error", onError);
4940
+ }
4941
+ };
4942
+ const normalizeWebSocketMessageData = async (data) => {
4943
+ if (typeof data === "string") return data;
4944
+ if (data instanceof ArrayBuffer) return new TextDecoder().decode(data);
4945
+ if (ArrayBuffer.isView(data)) {
4946
+ const view = data;
4947
+ return new TextDecoder().decode(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
4948
+ }
4949
+ if (isTextReadable(data)) return await data.text();
4950
+ return String(data);
4951
+ };
4952
+ const isTextReadable = (value) => {
4953
+ if (!value || typeof value !== "object" || !("text" in value)) return false;
4954
+ return typeof value.text === "function";
4955
+ };
4956
+ const toError = (value) => {
4957
+ if (value instanceof Error) return value;
4958
+ return new Error(String(value));
4959
+ };
4960
+ const getHeaderValue = (headers, headerName) => {
4961
+ const normalizedHeaderName = headerName.toLowerCase();
4962
+ return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1];
4963
+ };
4964
+ const encodePoolKeyPart = (value) => encodeURIComponent(value);
4965
+ const createResponsesWebSocketStreamChunk = (data) => {
4966
+ if (data === "[DONE]") return { data };
4967
+ try {
4968
+ const parsed = JSON.parse(data);
4969
+ if (parsed.type === "response.completed") logCopilotQuotaSnapshots(parsed.copilot_quota_snapshots);
4970
+ return {
4971
+ data: JSON.stringify(parsed),
4972
+ event: typeof parsed.type === "string" ? parsed.type : void 0,
4973
+ id: typeof parsed.id === "string" ? parsed.id : void 0
4974
+ };
4975
+ } catch {
4976
+ return { data };
4977
+ }
4978
+ };
4979
+ const isTerminalResponsesStreamChunk = (chunk) => {
4980
+ if (!chunk.data || chunk.data === "[DONE]") return false;
4981
+ try {
4982
+ const parsed = JSON.parse(chunk.data);
4983
+ return parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete" || parsed.type === "error";
4984
+ } catch {
4985
+ return false;
4986
+ }
4987
+ };
4988
+ const consumeResponsesWebSocketStream = async (stream) => {
4989
+ for await (const chunk of stream) {
4990
+ if (!chunk.data || chunk.data === "[DONE]") continue;
4991
+ const event = JSON.parse(chunk.data);
4992
+ if (event.type === "error") throw new Error(event.message);
4993
+ if (event.type === "response.completed" || event.type === "response.failed" || event.type === "response.incomplete") return event.response;
4994
+ }
4995
+ throw new Error("Responses websocket ended without a terminal response");
4996
+ };
4997
+ const closeResponsesWebSocket = (websocket) => {
4998
+ if (websocket.readyState === WebSocket.CONNECTING || websocket.readyState === WebSocket.OPEN) websocket.close();
4999
+ };
4652
5000
  //#endregion
4653
5001
  //#region src/routes/messages/responses-translation.ts
4654
5002
  const MESSAGE_TYPE = "message";
4655
5003
  const COMPACTION_SIGNATURE_PREFIX = "cm1#";
4656
5004
  const COMPACTION_SIGNATURE_SEPARATOR = "@";
4657
5005
  const THINKING_TEXT = "Thinking...";
4658
- const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) => {
4659
- const model = modelOverride ?? payload.model;
5006
+ const buildPromptCacheKey = (basePromptCacheKey, subagentAgentId) => {
5007
+ if (!basePromptCacheKey) return null;
5008
+ const normalizedSubagentAgentId = subagentAgentId?.trim() || null;
5009
+ if (!normalizedSubagentAgentId) return basePromptCacheKey;
5010
+ return `${basePromptCacheKey}:agent:${normalizedSubagentAgentId}`;
5011
+ };
5012
+ const translateAnthropicMessagesToResponsesPayload = (payload, options = {}) => {
5013
+ const model = options.modelOverride ?? payload.model;
4660
5014
  const input = [];
4661
5015
  const applyPhase = shouldApplyPhase(payload.model);
4662
5016
  for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase));
5017
+ const hasOriginalTools = Array.isArray(payload.tools) && payload.tools.length > 0;
4663
5018
  const translatedTools = convertAnthropicTools(payload.tools);
4664
5019
  const toolChoice = convertAnthropicToolChoice(payload.tool_choice);
4665
- const { sessionId: promptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
4666
- return {
5020
+ const { sessionId: metadataPromptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
5021
+ const sessionAffinity = requestContext.getStore()?.sessionAffinity?.trim() || null;
5022
+ const promptCacheKey = buildPromptCacheKey(metadataPromptCacheKey ?? sessionAffinity, options.subagentAgentId);
5023
+ const responsesPayload = {
4667
5024
  model,
4668
5025
  input,
4669
5026
  instructions: translateSystemPrompt(payload.system, model),
@@ -4673,7 +5030,6 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4673
5030
  tools: translatedTools,
4674
5031
  tool_choice: toolChoice,
4675
5032
  metadata: payload.metadata ? { ...payload.metadata } : null,
4676
- prompt_cache_key: promptCacheKey,
4677
5033
  stream: payload.stream ?? null,
4678
5034
  store: false,
4679
5035
  parallel_tool_calls: true,
@@ -4683,6 +5039,8 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4683
5039
  },
4684
5040
  include: ["reasoning.encrypted_content"]
4685
5041
  };
5042
+ if (hasOriginalTools) responsesPayload.prompt_cache_key = promptCacheKey;
5043
+ return responsesPayload;
4686
5044
  };
4687
5045
  const encodeCompactionCarrierSignature = (compaction) => {
4688
5046
  return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
@@ -6375,14 +6733,23 @@ const extractFunctionCallDetails = (rawEvent) => {
6375
6733
  initialArguments: item.arguments
6376
6734
  };
6377
6735
  };
6378
- //#endregion
6379
- //#region src/routes/responses/utils.ts
6736
+ const responsesUtilsDependencies = {
6737
+ isResponsesApiContextManagementModel,
6738
+ isResponsesApiWebSocketEnabled
6739
+ };
6380
6740
  const getResponsesRequestOptions = (payload) => {
6381
6741
  return {
6382
6742
  vision: hasVisionInput$1(payload),
6383
6743
  initiator: hasAgentInitiator(payload) ? "agent" : "user"
6384
6744
  };
6385
6745
  };
6746
+ const getResponsesTransportForModel = (selectedModel, options = {}) => {
6747
+ const supportedEndpoints = selectedModel?.supported_endpoints ?? [];
6748
+ const useWebSocket = responsesUtilsDependencies.isResponsesApiWebSocketEnabled();
6749
+ if (options.compactType !== 1 && useWebSocket && supportedEndpoints.includes("ws:/responses")) return "websocket";
6750
+ if (supportedEndpoints.includes("/responses")) return "http";
6751
+ return null;
6752
+ };
6386
6753
  const hasAgentInitiator = (payload) => {
6387
6754
  const items = getPayloadItems(payload);
6388
6755
  if (isForceAgentEnabled()) return items.some((item) => isAgentRole(item));
@@ -6407,7 +6774,7 @@ const createCompactionContextManagement = (compactThreshold) => [{
6407
6774
  }];
6408
6775
  const applyResponsesApiContextManagement = (payload, maxPromptTokens) => {
6409
6776
  if (payload.context_management !== void 0) return;
6410
- if (!isResponsesApiContextManagementModel(payload.model)) return;
6777
+ if (!responsesUtilsDependencies.isResponsesApiContextManagementModel(payload.model)) return;
6411
6778
  payload.context_management = createCompactionContextManagement(resolveResponsesCompactThreshold(maxPromptTokens));
6412
6779
  };
6413
6780
  const compactInputByLatestCompaction = (payload) => {
@@ -6677,6 +7044,14 @@ const logger$3 = createHandlerLogger("messages-handler");
6677
7044
  const CHAT_COMPLETIONS_ENDPOINT = "/chat/completions";
6678
7045
  const RESPONSES_ENDPOINT$1 = "/responses";
6679
7046
  const MESSAGES_ENDPOINT = "/v1/messages";
7047
+ const getProviderConfigResolver = (c) => {
7048
+ return c.get("providerConfigResolver") ?? getProviderConfig;
7049
+ };
7050
+ const resolveProviderTargetModelAlias = (model, providerConfigResolver) => {
7051
+ const targetModel = resolveModelAlias(model);
7052
+ if (targetModel === model) return null;
7053
+ return resolveExistingProviderModelAlias(targetModel, providerConfigResolver);
7054
+ };
6680
7055
  function normalizeProviderAliasUsage(usage) {
6681
7056
  const tokensInput = usage.inputTokens === void 0 ? void 0 : Math.max(0, usage.inputTokens);
6682
7057
  const tokensCachedInput = usage.cacheReadInputTokens;
@@ -6769,11 +7144,12 @@ async function handleProviderAliasCompletion(c, options) {
6769
7144
  }
6770
7145
  async function handleCompletion(c) {
6771
7146
  const anthropicPayload = await c.req.json();
6772
- const providerModelAlias = parseProviderModelAlias(anthropicPayload.model);
6773
- if (providerModelAlias) return await handleProviderAliasCompletion(c, {
7147
+ const providerConfigResolver = getProviderConfigResolver(c);
7148
+ const providerTargetModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, providerConfigResolver) ?? resolveProviderTargetModelAlias(anthropicPayload.model, providerConfigResolver);
7149
+ if (providerTargetModelAlias) return await handleProviderAliasCompletion(c, {
6774
7150
  payload: anthropicPayload,
6775
- provider: providerModelAlias.provider,
6776
- providerModel: providerModelAlias.model
7151
+ provider: providerTargetModelAlias.provider,
7152
+ providerModel: providerTargetModelAlias.model
6777
7153
  });
6778
7154
  await checkRateLimit(state);
6779
7155
  const store = getRequestHistoryStore();
@@ -7010,11 +7386,15 @@ const handleWithChatCompletions = async (params) => {
7010
7386
  };
7011
7387
  const handleWithResponsesApi = async (params) => {
7012
7388
  const { c, anthropicPayload, openAIPayload, subagentMarker, sessionId, selectedModel, instr, compactType } = params;
7013
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, selectedModel.id);
7389
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, {
7390
+ modelOverride: selectedModel.id,
7391
+ subagentAgentId: subagentMarker?.agent_id
7392
+ });
7014
7393
  applyResponsesApiContextManagement(responsesPayload, selectedModel.capabilities.limits.max_prompt_tokens);
7015
7394
  compactInputByLatestCompaction(responsesPayload);
7016
7395
  debugJson(logger$3, "Translated Responses payload:", responsesPayload);
7017
7396
  const { vision, initiator } = getResponsesRequestOptions(responsesPayload);
7397
+ const transport = getResponsesTransportForModel(selectedModel, { compactType }) ?? "http";
7018
7398
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
7019
7399
  isCompact: compactType !== 0,
7020
7400
  isSubagent: Boolean(subagentMarker)
@@ -7030,7 +7410,8 @@ const handleWithResponsesApi = async (params) => {
7030
7410
  subagentMarker,
7031
7411
  sessionId,
7032
7412
  compactType,
7033
- requestId: instr.requestId
7413
+ requestId: instr.requestId,
7414
+ transport
7034
7415
  }, ctx);
7035
7416
  instr.confirmAffinity?.();
7036
7417
  instr.confirmOwnership?.();
@@ -7840,6 +8221,7 @@ const handleResponses = async (c) => {
7840
8221
  compactInputByLatestCompaction(upstreamPayload);
7841
8222
  const premiumRemainingBefore = account.premiumRemaining;
7842
8223
  const premiumUnlimitedBefore = account.unlimited;
8224
+ const transport = getResponsesTransportForModel(selectedModel) ?? "http";
7843
8225
  const { vision, initiator } = getResponsesRequestOptions(upstreamPayload);
7844
8226
  request.initiator = initiator;
7845
8227
  if (state.manualApprove) await awaitApproval();
@@ -7858,7 +8240,8 @@ const handleResponses = async (c) => {
7858
8240
  vision,
7859
8241
  initiator,
7860
8242
  premiumRemainingBefore,
7861
- premiumUnlimitedBefore
8243
+ premiumUnlimitedBefore,
8244
+ transport
7862
8245
  });
7863
8246
  return handleNonStreamingResponses({
7864
8247
  c,
@@ -7871,7 +8254,8 @@ const handleResponses = async (c) => {
7871
8254
  vision,
7872
8255
  initiator,
7873
8256
  premiumRemainingBefore,
7874
- premiumUnlimitedBefore
8257
+ premiumUnlimitedBefore,
8258
+ transport
7875
8259
  });
7876
8260
  };
7877
8261
  async function observeRequestError(accountId, error, affinity) {
@@ -7979,7 +8363,7 @@ function extractUsageFromChunkData(data) {
7979
8363
  }
7980
8364
  }
7981
8365
  async function handleStreamingResponses(params) {
7982
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8366
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
7983
8367
  let response;
7984
8368
  try {
7985
8369
  response = await createResponses(payload, {
@@ -7987,7 +8371,8 @@ async function handleStreamingResponses(params) {
7987
8371
  initiator,
7988
8372
  upstreamRequestId: request.upstreamRequestId,
7989
8373
  sessionId: request.upstreamSessionId,
7990
- requestId: request.requestId
8374
+ requestId: request.requestId,
8375
+ transport
7991
8376
  }, accountCtx);
7992
8377
  selection.confirmAffinity?.();
7993
8378
  } catch (error) {
@@ -8175,7 +8560,7 @@ async function streamResponsesAndLog(params) {
8175
8560
  }
8176
8561
  }
8177
8562
  async function handleNonStreamingResponses(params) {
8178
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8563
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
8179
8564
  const { account, reservation, selectedModel, endpoint, costUnits } = selection;
8180
8565
  let usage = {};
8181
8566
  let errorState = { httpStatus: 200 };
@@ -8186,7 +8571,8 @@ async function handleNonStreamingResponses(params) {
8186
8571
  initiator,
8187
8572
  upstreamRequestId: request.upstreamRequestId,
8188
8573
  sessionId: request.upstreamSessionId,
8189
- requestId: request.requestId
8574
+ requestId: request.requestId,
8575
+ transport
8190
8576
  }, accountCtx);
8191
8577
  if (isAsyncIterable$1(response)) throw new Error("Upstream returned a stream unexpectedly");
8192
8578
  selection.confirmAffinity?.();
@@ -8337,4 +8723,4 @@ server.route("/:provider/v1/models", providerModelRoutes);
8337
8723
  //#endregion
8338
8724
  export { server };
8339
8725
 
8340
- //# sourceMappingURL=server-BDCnb3Ao.js.map
8726
+ //# sourceMappingURL=server-DxQsi1x2.js.map