@nick3/copilot-api 1.9.2 → 1.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +114 -89
  2. package/README.zh-CN.md +114 -86
  3. package/dist/{account-MllYSdRC.js → account-COtMmvzU.js} +20 -24
  4. package/dist/account-COtMmvzU.js.map +1 -0
  5. package/dist/admin/assets/index-CBMFCvqO.css +1 -0
  6. package/dist/admin/assets/index-DG4TRVMu.js +110 -0
  7. package/dist/admin/index.html +2 -2
  8. package/dist/{auth-DZoQA-kn.js → auth-B0y-2njL.js} +103 -112
  9. package/dist/auth-B0y-2njL.js.map +1 -0
  10. package/dist/{check-usage-DEbsehjH.js → check-usage-DdevqHE5.js} +6 -7
  11. package/dist/{check-usage-DEbsehjH.js.map → check-usage-DdevqHE5.js.map} +1 -1
  12. package/dist/{debug-BJfZVBB7.js → debug-BMo6ltbp.js} +6 -6
  13. package/dist/debug-BMo6ltbp.js.map +1 -0
  14. package/dist/{get-copilot-token-4mCKt94e.js → get-copilot-token-8Rm-rVsp.js} +3 -4
  15. package/dist/{get-copilot-token-4mCKt94e.js.map → get-copilot-token-8Rm-rVsp.js.map} +1 -1
  16. package/dist/main.js +6 -7
  17. package/dist/main.js.map +1 -1
  18. package/dist/{paths-DGlr310R.js → paths-CclKwouX.js} +3 -5
  19. package/dist/{paths-DGlr310R.js.map → paths-CclKwouX.js.map} +1 -1
  20. package/dist/{poll-access-token-Dvk6Ho0R.js → poll-access-token-BAgM2-7k.js} +65 -21
  21. package/dist/poll-access-token-BAgM2-7k.js.map +1 -0
  22. package/dist/{accounts-manager-BM66oT38.js → proxy-BwmADhKh.js} +494 -52
  23. package/dist/proxy-BwmADhKh.js.map +1 -0
  24. package/dist/request-outbound-BJjWS_jF.js +2 -0
  25. package/dist/{request-outbound-qyTeXbzy.js → request-outbound-Pu1kp2x8.js} +16 -9
  26. package/dist/request-outbound-Pu1kp2x8.js.map +1 -0
  27. package/dist/{server-DR9ZR_MN.js → server-DxQsi1x2.js} +2106 -935
  28. package/dist/server-DxQsi1x2.js.map +1 -0
  29. package/dist/{start-DDhYUFQR.js → start-8QHzPrcg.js} +11 -60
  30. package/dist/start-8QHzPrcg.js.map +1 -0
  31. package/package.json +20 -7
  32. package/dist/account-MllYSdRC.js.map +0 -1
  33. package/dist/accounts-manager-BM66oT38.js.map +0 -1
  34. package/dist/admin/assets/index-8eGib92I.js +0 -107
  35. package/dist/admin/assets/index-B2qj1asn.css +0 -1
  36. package/dist/auth-DZoQA-kn.js.map +0 -1
  37. package/dist/debug-BJfZVBB7.js.map +0 -1
  38. package/dist/poll-access-token-Dvk6Ho0R.js.map +0 -1
  39. package/dist/request-outbound-DhI9-SrV.js +0 -4
  40. package/dist/request-outbound-qyTeXbzy.js.map +0 -1
  41. package/dist/server-DR9ZR_MN.js.map +0 -1
  42. package/dist/start-DDhYUFQR.js.map +0 -1
@@ -1,23 +1,22 @@
1
- import { A as captureOutboundHeadersSnapshot, D as prepareMessageProxyHeaders, E as prepareInteractionHeaders, M as requestContext, N as resolveTraceId, O as accountFromState, T as prepareForCompact, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, k as state, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, w as normalizeDomain, y as copilotBaseUrl } from "./poll-access-token-Dvk6Ho0R.js";
2
- import { _ as DEFAULT_IDENTITY_ENTERPRISE_DOMAIN, a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-MllYSdRC.js";
3
- import { r as ensurePaths, t as PATHS } from "./paths-DGlr310R.js";
4
- import "./get-copilot-token-4mCKt94e.js";
5
- import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-qyTeXbzy.js";
6
- import { A as isMessageStartInputTokensFallbackEnabled, C as getModelAliasesInfo, D as getSmallModel, E as getReasoningEffortForModel, F as resolveModelAlias, I as shouldCompactUseSmallModel, M as isResponsesApiContextManagementModel, N as isResponsesApiWebSearchEnabled, O as isAccountAffinityEnabled, P as mergeConfigWithDefaults, S as getModelAliases, T as getProviderConfig, _ as getAnthropicApiKey, a as getClientIpInfo, b as getExtraPromptForModel, c as normalizeChatCompletionsUsage, d as toLocalDateString, f as copilotFetch, g as getAliasTargetSet, h as PROVIDER_TYPE_ANTHROPIC, i as extractResponsesUsageFromStreamEvent, j as isMessagesApiEnabled, k as isForceAgentEnabled, l as normalizeEmbeddingsUsage, m as isDevModeEnabled, n as applySharedSessionAffinityRetention, o as getRequestHistoryStore, p as flushPendingCapture, r as extractResponsesUsageFromResult, s as getStatsStore, t as accountsManager, u as normalizeMessagesUsage, v as getClaudeTokenMultiplier, w as getModelRefreshIntervalMs, x as getLogLevel, y as getConfig } from "./accounts-manager-BM66oT38.js";
1
+ import { A as state, D as prepareInteractionHeaders, E as prepareForCompact, I as compactAutoContinuePromptStarts, L as compactMessageSections, N as requestContext, O as prepareMessageProxyHeaders, P as resolveTraceId, S as copilotWebSocketHeaders, T as normalizeDomain, _ as HTTPError, b as copilotHeaders, c as getUUID, d as parseUserIdMetadata, f as resolveAffinityKey, g as getCopilotUsage, h as getDeviceCode, j as captureOutboundHeadersSnapshot, k as accountFromState, l as isNullish, m as getGitHubUser, o as generateRequestIdFromPayload, p as sleep, s as getRootSessionId, t as pollAccessToken, u as normalizeStableSessionId, v as forwardError, y as copilotBaseUrl, z as compactSystemPromptStarts } from "./poll-access-token-BAgM2-7k.js";
2
+ import { a as getAccountClientIdentityByLoginAndApp, b as getCurrentIdentityEnvironment, d as loadRegistry, g as saveRegistry, h as saveAccountToken, l as listAccountsFromRegistry, m as removeAccountToken, p as removeAccountFromRegistry, r as addAccountToRegistry, t as isAccountType } from "./account-COtMmvzU.js";
3
+ import { r as ensurePaths, t as PATHS } from "./paths-CclKwouX.js";
4
+ import { i as getRequestOutboundStore, r as getRedactedHeaderKeys } from "./request-outbound-Pu1kp2x8.js";
5
+ import { A as getModelRefreshIntervalMs, B as isResponsesApiWebSocketEnabled, C as getAnthropicApiKey, D as getLogLevel, E as getExtraPromptForModel, F as isForceAgentEnabled, H as resolveModelAlias, I as isMessageStartInputTokensFallbackEnabled, L as isMessagesApiEnabled, M as getReasoningEffortForModel, N as getSmallModel, O as getModelAliases, P as isAccountAffinityEnabled, R as isResponsesApiContextManagementModel, S as getAliasTargetSet, T as getConfig, U as shouldCompactUseSmallModel, V as mergeConfigWithDefaults, _ as toLocalDateString, b as isDevModeEnabled, c as applySharedSessionAffinityRetention, d as getClientIpInfo, f as getRequestHistoryStore, g as normalizeMessagesUsage, h as normalizeEmbeddingsUsage, j as getProviderConfig, k as getModelAliasesInfo, l as extractResponsesUsageFromResult, m as normalizeChatCompletionsUsage, o as updateQuotaRefreshSchedulerFromConfig, p as getStatsStore, s as accountsManager, t as getProxyEnvDispatcher, u as extractResponsesUsageFromStreamEvent, v as copilotFetch, w as getClaudeTokenMultiplier, x as PROVIDER_TYPE_ANTHROPIC, y as flushPendingCapture, z as isResponsesApiWebSearchEnabled } from "./proxy-BwmADhKh.js";
7
6
  import consola from "consola";
8
7
  import fs, { readFile } from "node:fs/promises";
9
- import { randomUUID, timingSafeEqual } from "node:crypto";
8
+ import { createHash, randomUUID, timingSafeEqual } from "node:crypto";
10
9
  import * as path$1 from "node:path";
11
10
  import path from "node:path";
11
+ import { fileURLToPath } from "node:url";
12
+ import fs$1, { existsSync } from "node:fs";
12
13
  import { Hono } from "hono";
13
14
  import { cors } from "hono/cors";
14
15
  import { logger } from "hono/logger";
15
- import fs$1, { existsSync } from "node:fs";
16
16
  import { streamSSE } from "hono/streaming";
17
17
  import { events } from "fetch-event-stream";
18
- import { fileURLToPath } from "node:url";
19
18
  import util from "node:util";
20
-
19
+ import { WebSocket } from "undici";
21
20
  //#region src/lib/request-auth.ts
22
21
  const LEGACY_API_KEY_ENV_VAR = "COPILOT_API_KEY";
23
22
  let warnedLegacyEnvFallback = false;
@@ -88,8 +87,8 @@ function timingSafeKeyCompare(a, b) {
88
87
  }
89
88
  function createAuthMiddleware(options = {}) {
90
89
  const getApiKeys = options.getApiKeys ?? getConfiguredApiKeys;
91
- const allowUnauthenticatedPaths = new Set((options.allowUnauthenticatedPaths ?? ["/"]).map((path$2) => normalizePathname(path$2)));
92
- const allowUnauthenticatedPathPrefixes = (options.allowUnauthenticatedPathPrefixes ?? []).map((path$2) => normalizePathname(path$2));
90
+ const allowUnauthenticatedPaths = new Set((options.allowUnauthenticatedPaths ?? ["/"]).map((path) => normalizePathname(path)));
91
+ const allowUnauthenticatedPathPrefixes = (options.allowUnauthenticatedPathPrefixes ?? []).map((path) => normalizePathname(path));
93
92
  const allowOptionsBypass = options.allowOptionsBypass ?? true;
94
93
  return async (c, next) => {
95
94
  if (allowOptionsBypass && c.req.method === "OPTIONS") return next();
@@ -103,7 +102,6 @@ function createAuthMiddleware(options = {}) {
103
102
  return next();
104
103
  };
105
104
  }
106
-
107
105
  //#endregion
108
106
  //#region src/lib/trace.ts
109
107
  const traceIdMiddleware = async (c, next) => {
@@ -120,7 +118,6 @@ const traceIdMiddleware = async (c, next) => {
120
118
  await next();
121
119
  });
122
120
  };
123
-
124
121
  //#endregion
125
122
  //#region src/routes/admin-api/auth-sessions.ts
126
123
  function buildOauthUrls(enterpriseDomain) {
@@ -277,7 +274,6 @@ var AuthSessionManager = class {
277
274
  }
278
275
  };
279
276
  const authSessionManager = new AuthSessionManager();
280
-
281
277
  //#endregion
282
278
  //#region src/routes/admin-api/config-writer.ts
283
279
  async function writeConfigFile(config) {
@@ -298,7 +294,6 @@ async function writeConfigFile(config) {
298
294
  throw error;
299
295
  }
300
296
  }
301
-
302
297
  //#endregion
303
298
  //#region src/lib/models.ts
304
299
  const getAvailableModels = () => (accountsManager.getFirstAccountModels()?.data ?? []).filter((model) => model.model_picker_enabled || model.capabilities.type === "embeddings");
@@ -349,7 +344,6 @@ const _normalizeSdkModelId = (sdkModelId) => {
349
344
  version: pattern5[1]
350
345
  };
351
346
  };
352
-
353
347
  //#endregion
354
348
  //#region src/lib/copilot-rate-limit.ts
355
349
  const copilotRateLimitTypes = ["session", "weekly"];
@@ -357,10 +351,14 @@ const copilotRateLimitHeaders = {
357
351
  session: "x-usage-ratelimit-session",
358
352
  weekly: "x-usage-ratelimit-weekly"
359
353
  };
354
+ const copilotQuotaSnapshotKeys = {
355
+ session: "5Hour-Session-RateLimits",
356
+ weekly: "Weekly-Session-RateLimits"
357
+ };
360
358
  const hasGetMethod = (headers) => {
361
359
  return "get" in headers && typeof headers.get === "function";
362
360
  };
363
- const getHeaderValue = (headers, headerName) => {
361
+ const getHeaderValue$1 = (headers, headerName) => {
364
362
  if (hasGetMethod(headers)) return headers.get(headerName);
365
363
  const normalizedHeaderName = headerName.toLowerCase();
366
364
  return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1] ?? null;
@@ -377,7 +375,7 @@ const parseCopilotRateLimitHeader = (headerValue) => {
377
375
  };
378
376
  const getCopilotRateLimitUsage = (headers, type) => {
379
377
  const headerName = copilotRateLimitHeaders[type];
380
- const headerValue = getHeaderValue(headers, headerName);
378
+ const headerValue = getHeaderValue$1(headers, headerName);
381
379
  if (!headerValue) return null;
382
380
  const parsed = parseCopilotRateLimitHeader(headerValue);
383
381
  if (!parsed) return null;
@@ -386,23 +384,45 @@ const getCopilotRateLimitUsage = (headers, type) => {
386
384
  ...parsed
387
385
  };
388
386
  };
387
+ const getCopilotRateLimitUsageFromSnapshots = (snapshots, type) => {
388
+ const snapshot = snapshots?.[copilotQuotaSnapshotKeys[type]];
389
+ if (!isCopilotQuotaSnapshot(snapshot)) return null;
390
+ return {
391
+ remaining: String(snapshot.percent_remaining),
392
+ resetAt: snapshot.reset_date,
393
+ type
394
+ };
395
+ };
389
396
  const logCopilotRateLimits = (headers) => {
390
397
  for (const type of copilotRateLimitTypes) {
391
398
  const usage = getCopilotRateLimitUsage(headers, type);
392
399
  if (!usage) continue;
393
- const d = new Date(usage.resetAt);
394
- const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
395
- consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
400
+ logCopilotRateLimitUsage(usage);
396
401
  }
397
402
  };
398
-
403
+ const logCopilotQuotaSnapshots = (snapshots) => {
404
+ for (const type of copilotRateLimitTypes) {
405
+ const usage = getCopilotRateLimitUsageFromSnapshots(snapshots, type);
406
+ if (!usage) continue;
407
+ logCopilotRateLimitUsage(usage);
408
+ }
409
+ };
410
+ const logCopilotRateLimitUsage = (usage) => {
411
+ const d = new Date(usage.resetAt);
412
+ const dateStr = Number.isNaN(d.getTime()) ? usage.resetAt : d.toLocaleString();
413
+ consola.info(`Copilot ${usage.type} quota remaining: ${usage.remaining}, resets at: ${dateStr}`);
414
+ };
415
+ const isCopilotQuotaSnapshot = (value) => {
416
+ if (!value || typeof value !== "object") return false;
417
+ const record = value;
418
+ return typeof record.entitlement === "string" && typeof record.percent_remaining === "number" && typeof record.overage_permitted === "boolean" && typeof record.overage_count === "number" && typeof record.reset_date === "string";
419
+ };
399
420
  //#endregion
400
421
  //#region src/lib/request-initiator.ts
401
422
  function resolveEffectiveInitiator(baseInitiator, options) {
402
423
  if (options.isCompact || options.isSubagent) return "agent";
403
424
  return baseInitiator;
404
425
  }
405
-
406
426
  //#endregion
407
427
  //#region src/services/copilot/create-chat-completions.ts
408
428
  function isGpt5MiniFamily(modelId) {
@@ -425,7 +445,7 @@ const getChatInitiator = (messages) => {
425
445
  const createChatCompletions = async (payload, account, options) => {
426
446
  const ctx = account ?? accountFromState();
427
447
  if (!ctx.copilotToken) throw new Error("Copilot token not found");
428
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
448
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"));
429
449
  const effectiveInitiator = resolveEffectiveInitiator(options?.initiator ?? getChatInitiator(payload.messages), {
430
450
  isCompact: Boolean(options?.compactType),
431
451
  isSubagent: Boolean(options?.subagentMarker)
@@ -444,7 +464,8 @@ const createChatCompletions = async (payload, account, options) => {
444
464
  body: JSON.stringify(upstreamPayload)
445
465
  }, {
446
466
  requestId: options?.requestId,
447
- callSite: "chat-completions"
467
+ callSite: "chat-completions",
468
+ fetchImpl: options?.fetchImpl
448
469
  });
449
470
  logCopilotRateLimits(response.headers);
450
471
  if (!response.ok) {
@@ -454,7 +475,6 @@ const createChatCompletions = async (payload, account, options) => {
454
475
  if (payload.stream) return events(response);
455
476
  return await response.json();
456
477
  };
457
-
458
478
  //#endregion
459
479
  //#region src/lib/tokenizer.ts
460
480
  const ENCODING_MAP = {
@@ -485,7 +505,10 @@ const calculateToolCallsTokens = (toolCalls, encoder, constants) => {
485
505
  const calculateContentPartsTokens = (contentParts, encoder) => {
486
506
  let tokens = 0;
487
507
  for (const part of contentParts) if (part.type === "image_url") tokens += encoder.encode(part.image_url.url).length + 85;
488
- else if (part.text) tokens += encoder.encode(part.text).length;
508
+ else if (part.type === "file") {
509
+ tokens += encoder.encode(part.file.file_data).length;
510
+ if (part.file.filename) tokens += encoder.encode(part.file.filename).length;
511
+ } else if (part.text) tokens += encoder.encode(part.text).length;
489
512
  return tokens;
490
513
  };
491
514
  /**
@@ -669,29 +692,6 @@ const getTokenCount = async (payload, model) => {
669
692
  output: outputTokens
670
693
  };
671
694
  };
672
-
673
- //#endregion
674
- //#region src/lib/compact.ts
675
- const COMPACT_REQUEST = 1;
676
- const COMPACT_AUTO_CONTINUE = 2;
677
- const compactSystemPromptStart = "You are a helpful AI assistant tasked with summarizing conversations";
678
- const compactOpenCodeSystemPromptStart = "You are an anchored context summarization assistant for coding sessions.";
679
- const compactSystemPromptStarts = [compactSystemPromptStart, compactOpenCodeSystemPromptStart];
680
- const compactTextOnlyGuard = "CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.";
681
- const compactSummaryPromptStart = "Your task is to create a detailed summary of the conversation so far";
682
- const compactAutoContinueClaudeCodePromptStart = "This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.";
683
- const compactAutoContinueOpenCodePromptStart = "Continue if you have next steps, or stop and ask for clarification if you are unsure how to proceed.";
684
- const compactAutoContinueOpenCodePromptStart2 = "The previous request exceeded the provider's size limit due to large media attachments. The conversation was compacted and media files were removed from context.";
685
- const compactAutoContinuePromptStarts = [
686
- compactAutoContinueClaudeCodePromptStart,
687
- compactAutoContinueOpenCodePromptStart,
688
- compactAutoContinueOpenCodePromptStart2
689
- ];
690
- const compactMessageSections = ["Pending Tasks:", "Current Work:"];
691
-
692
- //#endregion
693
- //#region src/routes/messages/preprocess.ts
694
- const TOOL_REFERENCE_TURN_BOUNDARY = "Tool loaded.";
695
695
  const IDE_EXECUTE_CODE_TOOL = "mcp__ide__executeCode";
696
696
  const IDE_GET_DIAGNOSTICS_TOOL = "mcp__ide__getDiagnostics";
697
697
  const IDE_GET_DIAGNOSTICS_DESCRIPTION = "Get language diagnostics from VS Code. Returns errors, warnings, information, and hints for files in the workspace.";
@@ -704,7 +704,7 @@ const getCompactCandidateText = (message) => {
704
704
  const isCompactMessage = (lastMessage) => {
705
705
  const text = getCompactCandidateText(lastMessage);
706
706
  if (!text) return false;
707
- return text.includes(compactTextOnlyGuard) && text.includes(compactSummaryPromptStart) && compactMessageSections.some((section) => text.includes(section));
707
+ return text.includes("CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.") && text.includes("Your task is to create a detailed summary of the conversation so far") && compactMessageSections.some((section) => text.includes(section));
708
708
  };
709
709
  const isCompactAutoContinueMessage = (lastMessage) => {
710
710
  const text = getCompactCandidateText(lastMessage);
@@ -712,12 +712,12 @@ const isCompactAutoContinueMessage = (lastMessage) => {
712
712
  };
713
713
  const getCompactType = (anthropicPayload) => {
714
714
  const lastMessage = anthropicPayload.messages.at(-1);
715
- if (lastMessage && isCompactMessage(lastMessage)) return COMPACT_REQUEST;
716
- if (lastMessage && isCompactAutoContinueMessage(lastMessage)) return COMPACT_AUTO_CONTINUE;
715
+ if (lastMessage && isCompactMessage(lastMessage)) return 1;
716
+ if (lastMessage && isCompactAutoContinueMessage(lastMessage)) return 2;
717
717
  const system = anthropicPayload.system;
718
- if (typeof system === "string") return compactSystemPromptStarts.some((promptStart) => system.startsWith(promptStart)) ? COMPACT_REQUEST : 0;
718
+ if (typeof system === "string") return compactSystemPromptStarts.some((promptStart) => system.startsWith(promptStart)) ? 1 : 0;
719
719
  if (!Array.isArray(system)) return 0;
720
- if (system.some((msg) => typeof msg.text === "string" && compactSystemPromptStarts.some((promptStart) => msg.text.startsWith(promptStart)))) return COMPACT_REQUEST;
720
+ if (system.some((msg) => typeof msg.text === "string" && compactSystemPromptStarts.some((promptStart) => msg.text.startsWith(promptStart)))) return 1;
721
721
  return 0;
722
722
  };
723
723
  const mergeContentWithText = (tr, textBlock) => {
@@ -778,9 +778,9 @@ const assignAttachmentsToToolResults = (target, attachments, options) => {
778
778
  if (attachments.length === 0) return;
779
779
  if (toolResultIndices.length > 0 && toolResultIndices.length === attachments.length) {
780
780
  for (const [index, toolResultIndex] of toolResultIndices.entries()) {
781
- const currentAttachments$1 = target.get(toolResultIndex);
782
- if (currentAttachments$1) {
783
- currentAttachments$1.push(attachments[index]);
781
+ const currentAttachments = target.get(toolResultIndex);
782
+ if (currentAttachments) {
783
+ currentAttachments.push(attachments[index]);
784
784
  continue;
785
785
  }
786
786
  target.set(toolResultIndex, [attachments[index]]);
@@ -872,7 +872,7 @@ const stripToolReferenceTurnBoundary = (anthropicPayload) => {
872
872
  for (const msg of anthropicPayload.messages) {
873
873
  if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
874
874
  if (!msg.content.some((block) => block.type === "tool_result" && hasToolRef(block))) continue;
875
- msg.content = msg.content.filter((block) => block.type !== "text" || block.text.trim() !== TOOL_REFERENCE_TURN_BOUNDARY);
875
+ msg.content = msg.content.filter((block) => block.type !== "text" || block.text.trim() !== "Tool loaded.");
876
876
  }
877
877
  };
878
878
  const mergeToolResultForClaude = (anthropicPayload, options) => {
@@ -947,7 +947,6 @@ const prepareMessagesApiPayload = (payload, selectedModel) => {
947
947
  payload.output_config = { effort };
948
948
  }
949
949
  };
950
-
951
950
  //#endregion
952
951
  //#region src/routes/messages/utils.ts
953
952
  function mapOpenAIStopReasonToAnthropic(finishReason) {
@@ -959,14 +958,17 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
959
958
  content_filter: "end_turn"
960
959
  }[finishReason];
961
960
  }
962
- const estimateInputTokens = async (payload, selectedModel, logger$7) => {
961
+ const estimateInputTokens = async (payload, selectedModel, logger) => {
963
962
  try {
964
963
  return (await getTokenCount(payload, selectedModel)).input;
965
964
  } catch (error) {
966
- logger$7.warn("Failed to estimate input tokens for message_start", error);
965
+ logger.warn("Failed to estimate input tokens for message_start", error);
967
966
  return;
968
967
  }
969
968
  };
969
+ function stringifyOwnerKeys$1(keys) {
970
+ return keys && keys.length > 0 ? JSON.stringify(keys) : void 0;
971
+ }
970
972
  const isWarmupProbeRequest = (payload) => {
971
973
  const lastMsg = payload.messages.at(-1);
972
974
  if (!lastMsg || lastMsg.role !== "user" || !Array.isArray(lastMsg.content)) return false;
@@ -983,7 +985,7 @@ const isWarmupProbeRequest = (payload) => {
983
985
  return false;
984
986
  };
985
987
  const handleSelectionFailure = (context) => {
986
- const { c, store, requestId, startedAtMs, method, path: path$2, streamRequested, clientModel, clientIp, clientIpSource, userAgent, userId, safetyIdentifier, promptCacheKey, initiator, isSubagent, affinityKeyUsed, affinityKeySource, selectionReason, selection } = context;
988
+ const { c, store, requestId, startedAtMs, method, path, streamRequested, clientModel, clientIp, clientIpSource, userAgent, userId, safetyIdentifier, promptCacheKey, initiator, isSubagent, affinityKeyUsed, affinityKeySource, selectionReason, responsesItemOwnerLookupKeys, selection } = context;
987
989
  const finishedAtMs = Date.now();
988
990
  store.insert({
989
991
  requestId,
@@ -991,7 +993,7 @@ const handleSelectionFailure = (context) => {
991
993
  finishedAtMs,
992
994
  durationMs: finishedAtMs - startedAtMs,
993
995
  method,
994
- path: path$2,
996
+ path,
995
997
  stream: streamRequested,
996
998
  clientModel,
997
999
  clientIp,
@@ -1004,6 +1006,7 @@ const handleSelectionFailure = (context) => {
1004
1006
  isSubagent,
1005
1007
  affinityKeyUsed,
1006
1008
  affinityKeySource,
1009
+ responsesItemOwnerLookupKeysJson: stringifyOwnerKeys$1(responsesItemOwnerLookupKeys),
1007
1010
  httpStatus: selection.reason === "MODEL_NOT_SUPPORTED" ? 400 : 429,
1008
1011
  selectionReason: selectionReason ?? selection.selectionReason,
1009
1012
  selectionFailureReason: selection.reason
@@ -1027,16 +1030,19 @@ const maybeBlockOriginalModelName = (context) => {
1027
1030
  }
1028
1031
  });
1029
1032
  };
1030
-
1031
1033
  //#endregion
1032
1034
  //#region src/routes/messages/non-stream-translation.ts
1033
- const THINKING_TEXT = "Thinking...";
1034
- function translateToOpenAI(payload) {
1035
+ const THINKING_TEXT$1 = "Thinking...";
1036
+ const COPILOT_TOOL_CONTENT_SUPPORT_TYPE = ["array", "image"];
1037
+ function translateToOpenAI(payload, options = {}) {
1035
1038
  const modelId = payload.model;
1036
1039
  const thinkingBudget = getThinkingBudget(payload, getAvailableModels().find((m) => m.id === modelId));
1037
1040
  return {
1038
1041
  model: modelId,
1039
- messages: translateAnthropicMessagesToOpenAI(payload, modelId, thinkingBudget),
1042
+ messages: translateAnthropicMessagesToOpenAI(payload, modelId, {
1043
+ supportPdf: options.supportPdf ?? false,
1044
+ toolContentSupportType: options.toolContentSupportType ?? COPILOT_TOOL_CONTENT_SUPPORT_TYPE
1045
+ }),
1040
1046
  max_tokens: payload.max_tokens,
1041
1047
  stop: payload.stop_sequences,
1042
1048
  stream: payload.stream,
@@ -1059,9 +1065,9 @@ function getThinkingBudget(payload, model) {
1059
1065
  }
1060
1066
  }
1061
1067
  }
1062
- function translateAnthropicMessagesToOpenAI(payload, modelId, _thinkingBudget) {
1068
+ function translateAnthropicMessagesToOpenAI(payload, modelId, capabilities) {
1063
1069
  const systemMessages = handleSystemPrompt(payload.system);
1064
- const otherMessages = payload.messages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, modelId));
1070
+ const otherMessages = payload.messages.flatMap((message) => message.role === "user" ? handleUserMessage(message, capabilities) : handleAssistantMessage(message, modelId, capabilities));
1065
1071
  return [...systemMessages, ...otherMessages];
1066
1072
  }
1067
1073
  function handleSystemPrompt(system) {
@@ -1077,19 +1083,21 @@ function handleSystemPrompt(system) {
1077
1083
  }).join("\n\n")
1078
1084
  }];
1079
1085
  }
1080
- function handleUserMessage(message) {
1086
+ function handleUserMessage(message, capabilities) {
1081
1087
  const newMessages = [];
1082
1088
  if (Array.isArray(message.content)) {
1083
1089
  const toolResultBlocks = message.content.filter((block) => block.type === "tool_result");
1084
1090
  const otherBlocks = message.content.filter((block) => block.type !== "tool_result");
1085
- for (const block of toolResultBlocks) newMessages.push({
1086
- role: "tool",
1087
- tool_call_id: block.tool_use_id,
1088
- content: mapContent(block.content)
1089
- });
1091
+ const movedToolResultUserMessages = [];
1092
+ for (const block of toolResultBlocks) {
1093
+ const result = handleToolResultBlock(block, capabilities);
1094
+ newMessages.push(result.toolMessage);
1095
+ if (result.movedUserMessage) movedToolResultUserMessages.push(result.movedUserMessage);
1096
+ }
1097
+ newMessages.push(...movedToolResultUserMessages);
1090
1098
  if (otherBlocks.length > 0) newMessages.push({
1091
1099
  role: "user",
1092
- content: mapContent(otherBlocks)
1100
+ content: mapContent(otherBlocks, { supportPdf: capabilities.supportPdf })
1093
1101
  });
1094
1102
  } else newMessages.push({
1095
1103
  role: "user",
@@ -1097,20 +1105,74 @@ function handleUserMessage(message) {
1097
1105
  });
1098
1106
  return newMessages;
1099
1107
  }
1100
- function handleAssistantMessage(message, modelId) {
1108
+ function handleToolResultBlock(block, capabilities) {
1109
+ if (typeof block.content === "string") return { toolMessage: createToolMessage(block.tool_use_id, block.content) };
1110
+ if (!Array.isArray(block.content)) return { toolMessage: createToolMessage(block.tool_use_id, "") };
1111
+ const support = getToolContentSupport(capabilities);
1112
+ const hasImage = block.content.some((block) => block.type === "image");
1113
+ const hasDocument = block.content.some((block) => block.type === "document");
1114
+ const content = mapContent(block.content, { supportPdf: capabilities.supportPdf });
1115
+ const hasPdfFile = hasDocument && capabilities.supportPdf;
1116
+ const shouldMoveImageToUserMessage = hasImage && !support.image;
1117
+ const shouldMovePdfToUserMessage = hasPdfFile && !support.pdf;
1118
+ if (shouldMoveImageToUserMessage || shouldMovePdfToUserMessage) return {
1119
+ movedUserMessage: createToolResultUserMessage(block, capabilities.supportPdf),
1120
+ toolMessage: createToolMessage(block.tool_use_id, getTextToolContent(content) || "Rich tool result content was moved to a user message because this upstream does not support it in tool messages.")
1121
+ };
1122
+ const hasRichContent = hasImage || hasPdfFile;
1123
+ if (support.array || hasRichContent) return { toolMessage: createToolMessage(block.tool_use_id, content) };
1124
+ return { toolMessage: createToolMessage(block.tool_use_id, getTextToolContent(content)) };
1125
+ }
1126
+ function getTextToolContent(content) {
1127
+ if (!Array.isArray(content)) return content ?? "";
1128
+ return content.flatMap((part) => part.type === "text" && part.text.length > 0 ? [part.text] : []).join("\n");
1129
+ }
1130
+ function getToolContentSupport(capabilities) {
1131
+ return {
1132
+ array: capabilities.toolContentSupportType.includes("array"),
1133
+ image: capabilities.toolContentSupportType.includes("image"),
1134
+ pdf: capabilities.supportPdf && capabilities.toolContentSupportType.includes("pdf")
1135
+ };
1136
+ }
1137
+ function createToolMessage(toolCallId, content) {
1138
+ return {
1139
+ role: "tool",
1140
+ tool_call_id: toolCallId,
1141
+ content
1142
+ };
1143
+ }
1144
+ function createToolResultUserMessage(block, supportPdf) {
1145
+ const prefix = {
1146
+ type: "text",
1147
+ text: `Tool result for ${block.tool_use_id}:`
1148
+ };
1149
+ const content = mapContent(block.content, { supportPdf });
1150
+ if (Array.isArray(content)) return {
1151
+ role: "user",
1152
+ content: [prefix, ...content]
1153
+ };
1154
+ return {
1155
+ role: "user",
1156
+ content: [prefix, {
1157
+ type: "text",
1158
+ text: content ?? ""
1159
+ }]
1160
+ };
1161
+ }
1162
+ function handleAssistantMessage(message, modelId, capabilities) {
1101
1163
  if (!Array.isArray(message.content)) return [{
1102
1164
  role: "assistant",
1103
1165
  content: mapContent(message.content)
1104
1166
  }];
1105
1167
  const toolUseBlocks = message.content.filter((block) => block.type === "tool_use");
1106
1168
  let thinkingBlocks = message.content.filter((block) => block.type === "thinking");
1107
- if (modelId.startsWith("claude")) thinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT && b.signature && !b.signature.includes("@"));
1108
- const thinkingContents = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT).map((b) => b.thinking);
1169
+ if (modelId.startsWith("claude")) thinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking !== "Thinking..." && b.signature && !b.signature.includes("@"));
1170
+ const thinkingContents = thinkingBlocks.filter((b) => b.thinking && b.thinking !== "Thinking...").map((b) => b.thinking);
1109
1171
  const allThinkingContent = thinkingContents.length > 0 ? thinkingContents.join("\n\n") : void 0;
1110
1172
  const signature = thinkingBlocks.find((b) => b.signature)?.signature;
1111
1173
  return toolUseBlocks.length > 0 ? [{
1112
1174
  role: "assistant",
1113
- content: mapContent(message.content),
1175
+ content: mapContent(message.content, { supportPdf: capabilities.supportPdf }),
1114
1176
  reasoning_text: allThinkingContent,
1115
1177
  reasoning_opaque: signature,
1116
1178
  tool_calls: toolUseBlocks.map((toolUse) => ({
@@ -1123,12 +1185,12 @@ function handleAssistantMessage(message, modelId) {
1123
1185
  }))
1124
1186
  }] : [{
1125
1187
  role: "assistant",
1126
- content: mapContent(message.content),
1188
+ content: mapContent(message.content, { supportPdf: capabilities.supportPdf }),
1127
1189
  reasoning_text: allThinkingContent,
1128
1190
  reasoning_opaque: signature
1129
1191
  }];
1130
1192
  }
1131
- function mapContent(content) {
1193
+ function mapContent(content, options = {}) {
1132
1194
  if (typeof content === "string") return content;
1133
1195
  if (!Array.isArray(content)) return null;
1134
1196
  const contentParts = [];
@@ -1146,7 +1208,7 @@ function mapContent(content) {
1146
1208
  });
1147
1209
  break;
1148
1210
  case "document":
1149
- contentParts.push(createDocumentTextPart());
1211
+ contentParts.push(options.supportPdf ? createDocumentFilePart(block) : createDocumentTextPart());
1150
1212
  break;
1151
1213
  case "tool_reference":
1152
1214
  contentParts.push({
@@ -1155,12 +1217,22 @@ function mapContent(content) {
1155
1217
  });
1156
1218
  break;
1157
1219
  }
1220
+ if (contentParts.length === 0) return "";
1158
1221
  return contentParts;
1159
1222
  }
1160
1223
  function createDocumentTextPart() {
1161
1224
  return {
1162
1225
  type: "text",
1163
- text: "A PDF document was attached, but this api cannot send PDF inputs directly. Analyze using other tools."
1226
+ text: "PDF/document content is not supported by this Chat Completions upstream. Use the available text extracted from the document."
1227
+ };
1228
+ }
1229
+ function createDocumentFilePart(block) {
1230
+ return {
1231
+ type: "file",
1232
+ file: {
1233
+ file_data: `data:${block.source.media_type};base64,${block.source.data}`,
1234
+ filename: block.title ?? "document.pdf"
1235
+ }
1164
1236
  };
1165
1237
  }
1166
1238
  function translateAnthropicToolsToOpenAI(anthropicTools) {
@@ -1205,7 +1277,7 @@ function translateToAnthropic(response) {
1205
1277
  let stopReason = response.choices[0]?.finish_reason ?? null;
1206
1278
  for (const choice of response.choices) {
1207
1279
  const textBlocks = getAnthropicTextBlocks(choice.message.content);
1208
- const thinkBlocks = getAnthropicThinkBlocks(choice.message.reasoning_text, choice.message.reasoning_opaque);
1280
+ const thinkBlocks = getAnthropicThinkBlocks(getOpenAIReasoningText(choice.message), choice.message.reasoning_opaque);
1209
1281
  const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls);
1210
1282
  assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks);
1211
1283
  if (choice.finish_reason === "tool_calls" || stopReason === "stop") stopReason = choice.finish_reason;
@@ -1218,12 +1290,24 @@ function translateToAnthropic(response) {
1218
1290
  content: assistantContentBlocks,
1219
1291
  stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
1220
1292
  stop_sequence: null,
1221
- usage: {
1222
- input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
1223
- output_tokens: response.usage?.completion_tokens ?? 0,
1224
- ...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
1225
- }
1293
+ usage: mapOpenAIChatCompletionUsage(response)
1294
+ };
1295
+ }
1296
+ function mapOpenAIChatCompletionUsage(response) {
1297
+ const promptDetails = response.usage?.prompt_tokens_details;
1298
+ const promptTokens = response.usage?.prompt_tokens ?? 0;
1299
+ const cachedTokens = promptDetails?.cached_tokens ?? 0;
1300
+ const cacheCreationTokens = promptDetails?.cache_creation_input_tokens ?? 0;
1301
+ const usage = {
1302
+ input_tokens: Math.max(0, promptTokens - cachedTokens - cacheCreationTokens),
1303
+ output_tokens: response.usage?.completion_tokens ?? 0
1226
1304
  };
1305
+ if (promptDetails?.cache_creation_input_tokens !== void 0) usage.cache_creation_input_tokens = cacheCreationTokens;
1306
+ if (promptDetails?.cached_tokens !== void 0) usage.cache_read_input_tokens = cachedTokens;
1307
+ return usage;
1308
+ }
1309
+ function getOpenAIReasoningText(message) {
1310
+ return message.reasoning_text ?? message.reasoning_content;
1227
1311
  }
1228
1312
  function getAnthropicTextBlocks(messageContent) {
1229
1313
  if (typeof messageContent === "string" && messageContent.length > 0) return [{
@@ -1244,7 +1328,7 @@ function getAnthropicThinkBlocks(reasoningText, reasoningOpaque) {
1244
1328
  }];
1245
1329
  if (reasoningOpaque && reasoningOpaque.length > 0) return [{
1246
1330
  type: "thinking",
1247
- thinking: THINKING_TEXT,
1331
+ thinking: THINKING_TEXT$1,
1248
1332
  signature: reasoningOpaque
1249
1333
  }];
1250
1334
  return [];
@@ -1258,7 +1342,6 @@ function getAnthropicToolUseBlocks(toolCalls) {
1258
1342
  input: JSON.parse(toolCall.function.arguments)
1259
1343
  }));
1260
1344
  }
1261
-
1262
1345
  //#endregion
1263
1346
  //#region src/routes/admin-api/replay-translation.ts
1264
1347
  function translateForReplay(input) {
@@ -1276,7 +1359,6 @@ function translateForReplay(input) {
1276
1359
  if (upstreamEndpoint.includes("/responses")) return rawText;
1277
1360
  return null;
1278
1361
  }
1279
-
1280
1362
  //#endregion
1281
1363
  //#region src/routes/admin-api/replay.ts
1282
1364
  const replayRoutes = new Hono();
@@ -1596,7 +1678,6 @@ function requireDevMode(c) {
1596
1678
  } }, 403);
1597
1679
  return null;
1598
1680
  }
1599
-
1600
1681
  //#endregion
1601
1682
  //#region src/routes/admin-api/route.ts
1602
1683
  const ADMIN_TOKEN = process.env.ADMIN_TOKEN?.trim() || void 0;
@@ -1675,8 +1756,10 @@ const CONFIG_KEYS = new Set([
1675
1756
  "modelRefreshIntervalHours",
1676
1757
  "sessionAffinityRetentionDays",
1677
1758
  "useMessagesApi",
1759
+ "useResponsesApiWebSocket",
1678
1760
  "useResponsesApiWebSearch",
1679
- "devMode"
1761
+ "devMode",
1762
+ "quotaRefresh"
1680
1763
  ]);
1681
1764
  const REASONING_EFFORTS = new Set([
1682
1765
  "none",
@@ -1837,7 +1920,7 @@ function applyProviderType(provider, value, field) {
1837
1920
  const parsed = parseOptionalString(value.type, `${field}.type`);
1838
1921
  if ("error" in parsed) return parsed.error;
1839
1922
  if ("value" in parsed) {
1840
- if (parsed.value !== PROVIDER_TYPE_ANTHROPIC) return `${field}.type must be "${PROVIDER_TYPE_ANTHROPIC}"`;
1923
+ if (parsed.value !== "anthropic") return `${field}.type must be "${PROVIDER_TYPE_ANTHROPIC}"`;
1841
1924
  provider.type = PROVIDER_TYPE_ANTHROPIC;
1842
1925
  }
1843
1926
  }
@@ -2103,6 +2186,48 @@ function applyDevModeConfig(next, value) {
2103
2186
  }
2104
2187
  next.devMode = parsed.value;
2105
2188
  }
2189
+ const QUOTA_REFRESH_KEYS = new Set([
2190
+ "enabled",
2191
+ "intervalMinutes",
2192
+ "startupDelaySeconds",
2193
+ "staggerMinSeconds",
2194
+ "staggerMaxSeconds"
2195
+ ]);
2196
+ function parseQuotaRefreshConfig(value) {
2197
+ if (value === null || value === void 0) return { clear: true };
2198
+ if (!isPlainObject(value)) return { error: "quotaRefresh must be an object" };
2199
+ for (const key of Object.keys(value)) if (!QUOTA_REFRESH_KEYS.has(key)) return { error: `quotaRefresh.${key} is not supported` };
2200
+ const out = {};
2201
+ if (Object.hasOwn(value, "enabled")) {
2202
+ const parsed = parseOptionalBoolean(value.enabled, "quotaRefresh.enabled");
2203
+ if ("error" in parsed) return parsed;
2204
+ if ("value" in parsed) out.enabled = parsed.value;
2205
+ }
2206
+ for (const key of [
2207
+ "intervalMinutes",
2208
+ "startupDelaySeconds",
2209
+ "staggerMinSeconds",
2210
+ "staggerMaxSeconds"
2211
+ ]) {
2212
+ if (!Object.hasOwn(value, key)) continue;
2213
+ const parsed = parseOptionalNonNegativeNumber(value[key], `quotaRefresh.${key}`);
2214
+ if ("error" in parsed) return parsed;
2215
+ if ("value" in parsed) out[key] = parsed.value;
2216
+ }
2217
+ return { value: out };
2218
+ }
2219
+ function applyQuotaRefreshConfig(next, value) {
2220
+ const parsed = parseQuotaRefreshConfig(value);
2221
+ if ("error" in parsed) return parsed.error;
2222
+ if ("clear" in parsed) {
2223
+ delete next.quotaRefresh;
2224
+ return;
2225
+ }
2226
+ next.quotaRefresh = next.quotaRefresh === void 0 ? parsed.value : {
2227
+ ...next.quotaRefresh,
2228
+ ...parsed.value
2229
+ };
2230
+ }
2106
2231
  const CONFIG_PATCH_HANDLERS = {
2107
2232
  auth: applyAuthConfig,
2108
2233
  extraPrompts: applyExtraPrompts,
@@ -2123,8 +2248,10 @@ const CONFIG_PATCH_HANDLERS = {
2123
2248
  modelRefreshIntervalHours: (next, value) => applyOptionalNumber(next, "modelRefreshIntervalHours", value),
2124
2249
  sessionAffinityRetentionDays: (next, value) => applyOptionalNumber(next, "sessionAffinityRetentionDays", value),
2125
2250
  useMessagesApi: (next, value) => applyOptionalBoolean(next, "useMessagesApi", value),
2251
+ useResponsesApiWebSocket: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSocket", value),
2126
2252
  useResponsesApiWebSearch: (next, value) => applyOptionalBoolean(next, "useResponsesApiWebSearch", value),
2127
- devMode: applyDevModeConfig
2253
+ devMode: applyDevModeConfig,
2254
+ quotaRefresh: applyQuotaRefreshConfig
2128
2255
  };
2129
2256
  function applyConfigPatch(base, input) {
2130
2257
  const next = { ...base };
@@ -2190,6 +2317,7 @@ adminApiRoutes.post("/config", async (c) => {
2190
2317
  const merged = mergeConfigWithDefaults();
2191
2318
  accountsManager.setAccountAffinityEnabled(isAccountAffinityEnabled());
2192
2319
  accountsManager.setModelsRefreshIntervalMs(getModelRefreshIntervalMs());
2320
+ updateQuotaRefreshSchedulerFromConfig();
2193
2321
  applySharedSessionAffinityRetention();
2194
2322
  return c.json({
2195
2323
  ...merged,
@@ -2510,7 +2638,7 @@ adminApiRoutes.post("/accounts/:id/reauth", async (c) => {
2510
2638
  const { oauthApp } = getCurrentIdentityEnvironment();
2511
2639
  const resolvedEnterpriseDomain = (await getAccountClientIdentityByLoginAndApp(accountId, oauthApp))?.enterpriseDomain;
2512
2640
  let enterpriseDomain;
2513
- if (resolvedEnterpriseDomain && resolvedEnterpriseDomain !== DEFAULT_IDENTITY_ENTERPRISE_DOMAIN) enterpriseDomain = resolvedEnterpriseDomain;
2641
+ if (resolvedEnterpriseDomain && resolvedEnterpriseDomain !== "public") enterpriseDomain = resolvedEnterpriseDomain;
2514
2642
  const result = await authSessionManager.startAuth({
2515
2643
  accountType: account.accountType,
2516
2644
  enterpriseDomain,
@@ -2569,14 +2697,14 @@ adminApiRoutes.get("/stats/premium-daily", (c) => {
2569
2697
  message: "Hourly granularity is only supported for ranges up to 35 days.",
2570
2698
  type: "bad_request"
2571
2699
  });
2572
- const result$1 = statsStore.getHourlyPremiumStats({
2700
+ const result = statsStore.getHourlyPremiumStats({
2573
2701
  fromMs,
2574
2702
  toMs,
2575
2703
  accountId
2576
2704
  });
2577
2705
  return c.json({
2578
- daily: result$1.daily,
2579
- by_account: result$1.byAccount,
2706
+ daily: result.daily,
2707
+ by_account: result.byAccount,
2580
2708
  range: {
2581
2709
  from: resolvedFrom,
2582
2710
  to: resolvedTo,
@@ -2600,7 +2728,6 @@ adminApiRoutes.get("/stats/premium-daily", (c) => {
2600
2728
  });
2601
2729
  });
2602
2730
  adminApiRoutes.route("/", replayRoutes);
2603
-
2604
2731
  //#endregion
2605
2732
  //#region src/routes/admin/route.ts
2606
2733
  function resolveAdminDistDir() {
@@ -3239,13 +3366,11 @@ adminRoutes.get("*", async (c) => {
3239
3366
  return c.html(html);
3240
3367
  }
3241
3368
  });
3242
-
3243
3369
  //#endregion
3244
3370
  //#region src/lib/approval.ts
3245
3371
  const awaitApproval = async () => {
3246
3372
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
3247
3373
  };
3248
-
3249
3374
  //#endregion
3250
3375
  //#region src/lib/handler-utils.ts
3251
3376
  function truncate(value, max = 2e3) {
@@ -3354,7 +3479,57 @@ function getUserVisibleErrorMessage(details) {
3354
3479
  function shouldMarkAccountFailed(details) {
3355
3480
  return details.unauthorized && !details.ownershipMismatch && details.upstreamErrorMessageReadFailed !== true;
3356
3481
  }
3357
-
3482
+ //#endregion
3483
+ //#region src/lib/process-cleanup.ts
3484
+ const cleanupHandlers = /* @__PURE__ */ new Set();
3485
+ let cleanupPromise = null;
3486
+ let cleanupState = "idle";
3487
+ let runtimeInitialized$1 = false;
3488
+ function initializeProcessCleanupRuntime() {
3489
+ if (runtimeInitialized$1) return;
3490
+ runtimeInitialized$1 = true;
3491
+ process.once("beforeExit", () => {
3492
+ runProcessCleanups();
3493
+ });
3494
+ process.once("exit", runProcessCleanupsSync);
3495
+ process.once("SIGINT", () => {
3496
+ shutdownProcess(0);
3497
+ });
3498
+ process.once("SIGTERM", () => {
3499
+ shutdownProcess(0);
3500
+ });
3501
+ }
3502
+ function runProcessCleanupsSync() {
3503
+ if (cleanupState !== "idle") return;
3504
+ cleanupState = "done";
3505
+ for (const handler of Array.from(cleanupHandlers)) try {
3506
+ handler();
3507
+ } catch {}
3508
+ }
3509
+ async function runProcessCleanups() {
3510
+ if (cleanupPromise) return cleanupPromise;
3511
+ if (cleanupState === "done") return;
3512
+ cleanupState = "running";
3513
+ cleanupPromise = (async () => {
3514
+ for (const handler of Array.from(cleanupHandlers)) await handler();
3515
+ cleanupState = "done";
3516
+ })();
3517
+ return cleanupPromise;
3518
+ }
3519
+ async function shutdownProcess(exitCode) {
3520
+ try {
3521
+ await runProcessCleanups();
3522
+ } finally {
3523
+ process.exit(exitCode);
3524
+ }
3525
+ }
3526
+ function registerProcessCleanup(handler) {
3527
+ initializeProcessCleanupRuntime();
3528
+ cleanupHandlers.add(handler);
3529
+ return () => {
3530
+ cleanupHandlers.delete(handler);
3531
+ };
3532
+ }
3358
3533
  //#endregion
3359
3534
  //#region src/lib/logger.ts
3360
3535
  const LOG_RETENTION_MS = 10080 * 60 * 1e3;
@@ -3374,9 +3549,6 @@ const logBuffers = /* @__PURE__ */ new Map();
3374
3549
  let runtimeInitialized = false;
3375
3550
  let flushInterval;
3376
3551
  let cleanupInterval;
3377
- let exitHandler;
3378
- let sigintHandler;
3379
- let sigtermHandler;
3380
3552
  const ensureLogDirectory = () => {
3381
3553
  if (!fs$1.existsSync(logDir)) fs$1.mkdirSync(logDir, { recursive: true });
3382
3554
  };
@@ -3454,18 +3626,7 @@ const initializeLoggerRuntime = () => {
3454
3626
  maybeUnref(flushInterval);
3455
3627
  cleanupInterval = setInterval(cleanupOldLogs, CLEANUP_INTERVAL_MS);
3456
3628
  maybeUnref(cleanupInterval);
3457
- exitHandler = cleanup;
3458
- sigintHandler = () => {
3459
- cleanup();
3460
- process.exit(0);
3461
- };
3462
- sigtermHandler = () => {
3463
- cleanup();
3464
- process.exit(0);
3465
- };
3466
- process.once("exit", exitHandler);
3467
- process.once("SIGINT", sigintHandler);
3468
- process.once("SIGTERM", sigtermHandler);
3629
+ registerProcessCleanup(cleanup);
3469
3630
  };
3470
3631
  const getLogStream = (filePath) => {
3471
3632
  initializeLoggerRuntime();
@@ -3502,15 +3663,15 @@ const shouldWriteFileLog = (type, logLevel = resolveLogLevel()) => {
3502
3663
  };
3503
3664
  const resolveLogLevel = () => testLogLevelOverride ?? getLogLevel();
3504
3665
  const isDebugFileLoggingEnabled = () => resolveLogLevel() === "debug";
3505
- const debugLazy = (logger$7, factory) => {
3666
+ const debugLazy = (logger, factory) => {
3506
3667
  if (!isDebugFileLoggingEnabled()) return;
3507
- logger$7.debug(...factory());
3668
+ logger.debug(...factory());
3508
3669
  };
3509
- const debugJson = (logger$7, label, value) => {
3510
- debugLazy(logger$7, () => [label, JSON.stringify(value)]);
3670
+ const debugJson = (logger, label, value) => {
3671
+ debugLazy(logger, () => [label, JSON.stringify(value)]);
3511
3672
  };
3512
- const debugJsonTail = (logger$7, label, { value, tailLength = 400 }) => {
3513
- debugLazy(logger$7, () => [label, JSON.stringify(value).slice(-tailLength)]);
3673
+ const debugJsonTail = (logger, label, { value, tailLength = 400 }) => {
3674
+ debugLazy(logger, () => [label, JSON.stringify(value).slice(-tailLength)]);
3514
3675
  };
3515
3676
  const getConsolaLevel = () => {
3516
3677
  const logLevel = resolveLogLevel();
@@ -3543,33 +3704,31 @@ const createHandlerLogger = (name) => {
3543
3704
  } });
3544
3705
  return instance;
3545
3706
  };
3546
-
3547
3707
  //#endregion
3548
3708
  //#region src/lib/rate-limit.ts
3549
- async function checkRateLimit(state$1) {
3550
- if (state$1.rateLimitSeconds === void 0) return;
3709
+ async function checkRateLimit(state) {
3710
+ if (state.rateLimitSeconds === void 0) return;
3551
3711
  const now = Date.now();
3552
- if (!state$1.lastRequestTimestamp) {
3553
- state$1.lastRequestTimestamp = now;
3712
+ if (!state.lastRequestTimestamp) {
3713
+ state.lastRequestTimestamp = now;
3554
3714
  return;
3555
3715
  }
3556
- const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
3557
- if (elapsedSeconds > state$1.rateLimitSeconds) {
3558
- state$1.lastRequestTimestamp = now;
3716
+ const elapsedSeconds = (now - state.lastRequestTimestamp) / 1e3;
3717
+ if (elapsedSeconds > state.rateLimitSeconds) {
3718
+ state.lastRequestTimestamp = now;
3559
3719
  return;
3560
3720
  }
3561
- const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
3562
- if (!state$1.rateLimitWait) {
3721
+ const waitTimeSeconds = Math.ceil(state.rateLimitSeconds - elapsedSeconds);
3722
+ if (!state.rateLimitWait) {
3563
3723
  consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
3564
3724
  throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
3565
3725
  }
3566
3726
  const waitTimeMs = waitTimeSeconds * 1e3;
3567
3727
  consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
3568
3728
  await sleep(waitTimeMs);
3569
- state$1.lastRequestTimestamp = now;
3729
+ state.lastRequestTimestamp = now;
3570
3730
  consola.info("Rate limit wait completed, proceeding with request");
3571
3731
  }
3572
-
3573
3732
  //#endregion
3574
3733
  //#region src/routes/chat-completions/support.ts
3575
3734
  const CHAT_COMPLETIONS_ENDPOINT$1 = "/chat/completions";
@@ -3579,13 +3738,13 @@ function buildRequestContext$1(c) {
3579
3738
  const requestId = randomUUID();
3580
3739
  const startedAtMs = Date.now();
3581
3740
  const method = c.req.raw.method;
3582
- const path$2 = new URL(c.req.url, "http://local").pathname;
3741
+ const path = new URL(c.req.url, "http://local").pathname;
3583
3742
  const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
3584
3743
  return {
3585
3744
  requestId,
3586
3745
  startedAtMs,
3587
3746
  method,
3588
- path: path$2,
3747
+ path,
3589
3748
  clientIp,
3590
3749
  clientIpSource,
3591
3750
  userAgent: c.req.header("user-agent") ?? void 0
@@ -3678,7 +3837,6 @@ function selectionFailureResponse$2(c, params) {
3678
3837
  type: "rate_limit_error"
3679
3838
  } }, 429);
3680
3839
  }
3681
-
3682
3840
  //#endregion
3683
3841
  //#region src/routes/chat-completions/handler.ts
3684
3842
  const logger$6 = createHandlerLogger("chat-completions-handler");
@@ -3746,7 +3904,7 @@ async function handleCompletion$1(c) {
3746
3904
  request.selectionReason = selection.selectionReason;
3747
3905
  const premiumRemainingBefore = account.premiumRemaining;
3748
3906
  const premiumUnlimitedBefore = account.unlimited;
3749
- if (selectedModel.id === GPT_5_4_MODEL_ID) {
3907
+ if (selectedModel.id === "gpt-5.4") {
3750
3908
  await accountsManager.finalizeQuota(account, reservation);
3751
3909
  recordUnsupportedChatCompletionsModel(store, {
3752
3910
  request,
@@ -4136,7 +4294,6 @@ async function handleNonStreamingRequest(params) {
4136
4294
  }
4137
4295
  }
4138
4296
  const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
4139
-
4140
4297
  //#endregion
4141
4298
  //#region src/routes/chat-completions/route.ts
4142
4299
  const completionRoutes = new Hono();
@@ -4147,7 +4304,6 @@ completionRoutes.post("/", async (c) => {
4147
4304
  return await forwardError(c, error);
4148
4305
  }
4149
4306
  });
4150
-
4151
4307
  //#endregion
4152
4308
  //#region src/services/copilot/create-embeddings.ts
4153
4309
  const createEmbeddings = async (payload, account, options) => {
@@ -4167,7 +4323,6 @@ const createEmbeddings = async (payload, account, options) => {
4167
4323
  if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
4168
4324
  return await response.json();
4169
4325
  };
4170
-
4171
4326
  //#endregion
4172
4327
  //#region src/routes/embeddings/route.ts
4173
4328
  const embeddingRoutes = new Hono();
@@ -4178,13 +4333,13 @@ embeddingRoutes.post("/", async (c) => {
4178
4333
  const requestId = randomUUID();
4179
4334
  const startedAtMs = Date.now();
4180
4335
  const method = c.req.raw.method;
4181
- const path$2 = new URL(c.req.url, "http://local").pathname;
4336
+ const path = new URL(c.req.url, "http://local").pathname;
4182
4337
  const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
4183
4338
  const ctx = {
4184
4339
  requestId,
4185
4340
  startedAtMs,
4186
4341
  method,
4187
- path: path$2,
4342
+ path,
4188
4343
  clientIp,
4189
4344
  clientIpSource,
4190
4345
  userAgent: c.req.header("user-agent") ?? void 0
@@ -4318,9 +4473,107 @@ async function runEmbeddingsWithAccount({ c, store, ctx, payload, clientModel, s
4318
4473
  });
4319
4474
  }
4320
4475
  }
4321
-
4476
+ //#endregion
4477
+ //#region src/lib/provider-model.ts
4478
+ const parseProviderModelAlias = (model) => {
4479
+ const separatorIndex = model.indexOf("/");
4480
+ if (separatorIndex <= 0 || separatorIndex === model.length - 1) return null;
4481
+ const provider = model.slice(0, separatorIndex).trim();
4482
+ const providerModel = model.slice(separatorIndex + 1).trim();
4483
+ if (!provider || !providerModel) return null;
4484
+ return {
4485
+ model: providerModel,
4486
+ provider
4487
+ };
4488
+ };
4489
+ const resolveExistingProviderModelAlias = (model, resolveProvider) => {
4490
+ const alias = parseProviderModelAlias(model);
4491
+ if (!alias) return null;
4492
+ return resolveProvider(alias.provider) ? alias : null;
4493
+ };
4494
+ const createFallbackModel = (modelId) => ({
4495
+ capabilities: {
4496
+ family: "provider",
4497
+ limits: {},
4498
+ object: "model_capabilities",
4499
+ supports: {},
4500
+ tokenizer: "o200k_base",
4501
+ type: "chat"
4502
+ },
4503
+ id: modelId,
4504
+ model_picker_enabled: false,
4505
+ name: modelId,
4506
+ object: "model",
4507
+ preview: false,
4508
+ vendor: "provider",
4509
+ version: "unknown"
4510
+ });
4511
+ //#endregion
4512
+ //#region src/routes/provider/messages/count-tokens-handler.ts
4513
+ const logger$5 = createHandlerLogger("provider-count-tokens-handler");
4514
+ const resolveProviderConfig$2 = (c, provider) => {
4515
+ return (c.get("providerConfigResolver") ?? getProviderConfig)(provider);
4516
+ };
4517
+ async function handleProviderCountTokens(c) {
4518
+ const provider = c.req.param("provider");
4519
+ return await handleProviderCountTokensForProvider(c, {
4520
+ payload: await c.req.json(),
4521
+ provider
4522
+ });
4523
+ }
4524
+ async function handleProviderCountTokensForProvider(c, options) {
4525
+ const { payload: anthropicPayload, provider } = options;
4526
+ const providerConfig = resolveProviderConfig$2(c, provider);
4527
+ if (!providerConfig) return c.json({ error: {
4528
+ message: `Provider '${provider}' not found or disabled`,
4529
+ type: "invalid_request_error"
4530
+ } }, 404);
4531
+ if (typeof anthropicPayload.model !== "string" || !Array.isArray(anthropicPayload.messages)) return c.json({ error: {
4532
+ message: "Invalid Anthropic messages count_tokens payload",
4533
+ type: "invalid_request_error"
4534
+ } }, 400);
4535
+ const modelId = anthropicPayload.model.trim();
4536
+ const modelConfig = providerConfig.models?.[modelId];
4537
+ const translationOptions = providerConfig.type === "openai-compatible" ? {
4538
+ supportPdf: modelConfig?.supportPdf,
4539
+ toolContentSupportType: modelConfig?.toolContentSupportType ?? []
4540
+ } : void 0;
4541
+ try {
4542
+ const tokenCount = await getTokenCount(translateToOpenAI(anthropicPayload, translationOptions), findEndpointModel(modelId) ?? createFallbackModel(modelId));
4543
+ const finalTokenCount = tokenCount.input + tokenCount.output;
4544
+ logger$5.debug("provider.count_tokens.success", {
4545
+ provider,
4546
+ model: anthropicPayload.model,
4547
+ input_tokens: finalTokenCount
4548
+ });
4549
+ return c.json({ input_tokens: finalTokenCount });
4550
+ } catch (error) {
4551
+ logger$5.error("provider.count_tokens.error", {
4552
+ provider,
4553
+ error
4554
+ });
4555
+ return c.json({ error: {
4556
+ message: "Failed to count provider tokens",
4557
+ type: "internal_server_error"
4558
+ } }, 500);
4559
+ }
4560
+ }
4322
4561
  //#endregion
4323
4562
  //#region src/routes/messages/count-tokens-handler.ts
4563
+ const getProviderConfigResolver$1 = (c) => {
4564
+ return c.get("providerConfigResolver") ?? getProviderConfig;
4565
+ };
4566
+ const resolveCountTokensModel = (modelId, findModel = findEndpointModel) => {
4567
+ const selectedModel = findModel(modelId);
4568
+ if (selectedModel) return {
4569
+ fallback: false,
4570
+ model: selectedModel
4571
+ };
4572
+ return {
4573
+ fallback: true,
4574
+ model: createFallbackModel(modelId.trim())
4575
+ };
4576
+ };
4324
4577
  /**
4325
4578
  * Forwards token counting to Anthropic's real /v1/messages/count_tokens endpoint.
4326
4579
  * Returns the result on success, or null to fall through to estimation.
@@ -4359,43 +4612,46 @@ async function countTokensViaAnthropic(c, payload) {
4359
4612
  * endpoint for accurate counts. Otherwise falls back to GPT tokenizer estimation.
4360
4613
  */
4361
4614
  async function handleCountTokens(c) {
4362
- try {
4363
- const anthropicPayload = await c.req.json();
4364
- const anthropicResult = await countTokensViaAnthropic(c, anthropicPayload);
4365
- if (anthropicResult) return anthropicResult;
4366
- const anthropicBeta = c.req.header("anthropic-beta");
4367
- const openAIPayload = translateToOpenAI(anthropicPayload);
4368
- const selectedModel = findEndpointModel(anthropicPayload.model);
4369
- anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model;
4370
- if (!selectedModel) {
4371
- consola.warn("Model not found, returning default token count");
4372
- return c.json({ input_tokens: 1 });
4615
+ const anthropicPayload = await c.req.json();
4616
+ anthropicPayload.model = resolveModelAlias(anthropicPayload.model);
4617
+ const providerModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, getProviderConfigResolver$1(c));
4618
+ if (providerModelAlias) {
4619
+ anthropicPayload.model = providerModelAlias.model;
4620
+ return await handleProviderCountTokensForProvider(c, {
4621
+ payload: anthropicPayload,
4622
+ provider: providerModelAlias.provider
4623
+ });
4624
+ }
4625
+ const anthropicResult = await countTokensViaAnthropic(c, anthropicPayload);
4626
+ if (anthropicResult) return anthropicResult;
4627
+ const anthropicBeta = c.req.header("anthropic-beta");
4628
+ const openAIPayload = translateToOpenAI(anthropicPayload);
4629
+ const requestedModel = anthropicPayload.model;
4630
+ const resolve = resolveCountTokensModel(requestedModel);
4631
+ const selectedModel = resolve.model;
4632
+ anthropicPayload.model = selectedModel.id;
4633
+ if (resolve.fallback) consola.warn(`Model '${requestedModel}' not found, using o200k_base fallback tokenizer`);
4634
+ const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4635
+ if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4636
+ let addToolSystemPromptCount = false;
4637
+ if (anthropicBeta) {
4638
+ const toolsLength = anthropicPayload.tools.length;
4639
+ addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
4373
4640
  }
4374
- const tokenCount = await getTokenCount(openAIPayload, selectedModel);
4375
- if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
4376
- let addToolSystemPromptCount = false;
4377
- if (anthropicBeta) {
4378
- const toolsLength = anthropicPayload.tools.length;
4379
- addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
4380
- }
4381
- if (addToolSystemPromptCount) {
4382
- if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
4383
- else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
4384
- }
4641
+ if (addToolSystemPromptCount) {
4642
+ if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
4643
+ else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
4385
4644
  }
4386
- let finalTokenCount = tokenCount.input + tokenCount.output;
4387
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * getClaudeTokenMultiplier());
4388
- consola.info("Token count:", finalTokenCount);
4389
- return c.json({ input_tokens: finalTokenCount });
4390
- } catch (error) {
4391
- consola.error("Error counting tokens:", error);
4392
- return c.json({ input_tokens: 1 });
4393
4645
  }
4646
+ let finalTokenCount = tokenCount.input + tokenCount.output;
4647
+ if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * getClaudeTokenMultiplier());
4648
+ consola.info("Token count:", finalTokenCount);
4649
+ return c.json({ input_tokens: finalTokenCount });
4394
4650
  }
4395
-
4396
4651
  //#endregion
4397
4652
  //#region src/services/copilot/create-responses.ts
4398
- const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId }, account) => {
4653
+ const RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS = 6e4;
4654
+ const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, compactType, requestId, fetchImpl, transport = "http" }, account) => {
4399
4655
  const ctx = account ?? accountFromState();
4400
4656
  if (!ctx.copilotToken) throw new Error("Copilot token not found");
4401
4657
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
@@ -4410,13 +4666,30 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4410
4666
  prepareForCompact(headers, compactType);
4411
4667
  payload.service_tier = void 0;
4412
4668
  captureOutboundHeadersSnapshot(headers);
4413
- const response = await copilotFetch(`${copilotBaseUrl(ctx)}/responses`, {
4669
+ consola.log(`<-- model: ${payload.model}`);
4670
+ if ((compactType === 1 ? "http" : transport) === "websocket") {
4671
+ const stream = createPooledResponsesWebSocketStream(prepareResponsesWebSocketRequest(payload, headers, {
4672
+ copilotToken: ctx.copilotToken,
4673
+ requestId: requestId ?? upstreamRequestId ?? "missing-request-id",
4674
+ subagentMarker
4675
+ }), copilotBaseUrl(ctx));
4676
+ if (payload.stream) return stream;
4677
+ return await consumeResponsesWebSocketStream(stream);
4678
+ }
4679
+ return await createHttpResponses(payload, headers, ctx, {
4680
+ fetchImpl,
4681
+ requestId
4682
+ });
4683
+ };
4684
+ const createHttpResponses = async (payload, headers, account, options) => {
4685
+ const response = await copilotFetch(`${copilotBaseUrl(account)}/responses`, {
4414
4686
  method: "POST",
4415
4687
  headers,
4416
4688
  body: JSON.stringify(payload)
4417
4689
  }, {
4418
- requestId,
4419
- callSite: "responses"
4690
+ requestId: options.requestId,
4691
+ callSite: "responses",
4692
+ fetchImpl: options.fetchImpl
4420
4693
  });
4421
4694
  logCopilotRateLimits(response.headers);
4422
4695
  if (!response.ok) {
@@ -4426,22 +4699,328 @@ const createResponses = async (payload, { vision, initiator, upstreamRequestId,
4426
4699
  if (payload.stream) return events(response);
4427
4700
  return await response.json();
4428
4701
  };
4429
-
4702
+ const prepareResponsesWebSocketRequest = (payload, preparedHeaders, options) => {
4703
+ const initiator = getResponsesWebSocketInitiator(preparedHeaders);
4704
+ return {
4705
+ headers: copilotWebSocketHeaders(preparedHeaders),
4706
+ poolKey: buildResponsesWebSocketPoolKey(payload, options),
4707
+ payload: buildResponsesWebSocketPayload(payload, initiator)
4708
+ };
4709
+ };
4710
+ const buildResponsesWebSocketPoolKey = (payload, { copilotToken, requestId, subagentMarker }) => {
4711
+ const tokenFingerprint = copilotToken ? createHash("sha256").update(copilotToken).digest("hex").slice(0, 16) : "missing-token";
4712
+ const subagentKey = subagentMarker ? [
4713
+ subagentMarker.session_id,
4714
+ subagentMarker.agent_id,
4715
+ subagentMarker.agent_type
4716
+ ].join(":") : "main";
4717
+ return [
4718
+ tokenFingerprint,
4719
+ payload.model,
4720
+ requestId,
4721
+ subagentKey
4722
+ ].map(encodePoolKeyPart).join("|");
4723
+ };
4724
+ const getResponsesWebSocketInitiator = (preparedHeaders) => {
4725
+ return getHeaderValue(preparedHeaders, "x-initiator")?.toLowerCase() === "agent" ? "agent" : "user";
4726
+ };
4727
+ const createPooledResponsesWebSocketStream = (request, baseUrl) => runResponsesWebSocketRequest(request, baseUrl);
4728
+ const buildResponsesWebSocketPayload = (payload, initiator) => {
4729
+ const websocketPayload = {
4730
+ ...payload,
4731
+ type: "response.create",
4732
+ initiator
4733
+ };
4734
+ delete websocketPayload.stream;
4735
+ delete websocketPayload["background"];
4736
+ delete websocketPayload.service_tier;
4737
+ return websocketPayload;
4738
+ };
4739
+ const buildResponsesWebSocketUrl = (baseUrl) => {
4740
+ const url = new URL(`${baseUrl.replace(/\/+$/u, "")}/responses`);
4741
+ if (url.protocol === "https:") url.protocol = "wss:";
4742
+ else if (url.protocol === "http:") url.protocol = "ws:";
4743
+ return url.toString();
4744
+ };
4745
+ const responsesWebSocketPool = /* @__PURE__ */ new Map();
4746
+ const responsesWebSocketActiveRequests = /* @__PURE__ */ new Map();
4747
+ const runResponsesWebSocketRequest = async function* (request, baseUrl) {
4748
+ const { entry, pooled } = getResponsesWebSocketRequestTarget(request, baseUrl);
4749
+ const release = acquireResponsesWebSocketEntry(request.poolKey, entry, pooled);
4750
+ try {
4751
+ const websocket = await getReadyResponsesWebSocket(request.poolKey, entry, pooled);
4752
+ websocket.send(JSON.stringify(request.payload));
4753
+ for await (const data of createWebSocketMessageStream(websocket)) {
4754
+ const chunk = createResponsesWebSocketStreamChunk(data);
4755
+ yield chunk;
4756
+ if (isTerminalResponsesStreamChunk(chunk)) return;
4757
+ }
4758
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4759
+ throw new Error("Responses websocket ended without a terminal response");
4760
+ } catch (error) {
4761
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4762
+ throw toError(error);
4763
+ } finally {
4764
+ release();
4765
+ }
4766
+ };
4767
+ const getResponsesWebSocketRequestTarget = (request, baseUrl) => {
4768
+ if (getResponsesWebSocketActiveRequestCount(request.poolKey) > 0) return {
4769
+ entry: createResponsesWebSocketEntry(request, baseUrl),
4770
+ pooled: false
4771
+ };
4772
+ const existing = responsesWebSocketPool.get(request.poolKey);
4773
+ if (existing && !existing.closed) {
4774
+ clearResponsesWebSocketIdleTimer(existing);
4775
+ return {
4776
+ entry: existing,
4777
+ pooled: true
4778
+ };
4779
+ }
4780
+ const entry = createResponsesWebSocketEntry(request, baseUrl);
4781
+ responsesWebSocketPool.set(request.poolKey, entry);
4782
+ return {
4783
+ entry,
4784
+ pooled: true
4785
+ };
4786
+ };
4787
+ const createResponsesWebSocketEntry = (request, baseUrl) => {
4788
+ const entry = {
4789
+ closed: false,
4790
+ idleTimer: null,
4791
+ requestCount: 0,
4792
+ websocketPromise: openResponsesWebSocket({
4793
+ headers: request.headers,
4794
+ url: buildResponsesWebSocketUrl(baseUrl)
4795
+ })
4796
+ };
4797
+ entry.websocketPromise.then((websocket) => {
4798
+ websocket.addEventListener("close", () => {
4799
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4800
+ });
4801
+ websocket.addEventListener("error", () => {
4802
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4803
+ });
4804
+ }).catch(() => {
4805
+ removeResponsesWebSocketPoolEntry(request.poolKey, entry);
4806
+ });
4807
+ return entry;
4808
+ };
4809
+ const acquireResponsesWebSocketEntry = (poolKey, entry, pooled) => {
4810
+ clearResponsesWebSocketIdleTimer(entry);
4811
+ incrementResponsesWebSocketActiveRequestCount(poolKey);
4812
+ entry.requestCount += 1;
4813
+ let released = false;
4814
+ return () => {
4815
+ if (released) return;
4816
+ released = true;
4817
+ entry.requestCount -= 1;
4818
+ decrementResponsesWebSocketActiveRequestCount(poolKey);
4819
+ if (entry.closed || entry.requestCount > 0) return;
4820
+ if (pooled && responsesWebSocketPool.get(poolKey) === entry) {
4821
+ scheduleResponsesWebSocketIdleClose(poolKey, entry);
4822
+ return;
4823
+ }
4824
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4825
+ };
4826
+ };
4827
+ const getReadyResponsesWebSocket = async (poolKey, entry, pooled) => {
4828
+ if (entry.closed) throw new Error("Responses websocket became unavailable before the request started");
4829
+ const websocket = await entry.websocketPromise;
4830
+ if (entry.closed || pooled && responsesWebSocketPool.get(poolKey) !== entry) throw new Error("Responses websocket became unavailable before the request started");
4831
+ if (websocket.readyState !== WebSocket.OPEN) {
4832
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4833
+ throw new Error("Responses websocket became unavailable before the request started");
4834
+ }
4835
+ return websocket;
4836
+ };
4837
+ const scheduleResponsesWebSocketIdleClose = (poolKey, entry) => {
4838
+ clearResponsesWebSocketIdleTimer(entry);
4839
+ entry.idleTimer = setTimeout(() => {
4840
+ removeResponsesWebSocketPoolEntry(poolKey, entry);
4841
+ }, RESPONSES_WEBSOCKET_IDLE_TIMEOUT_MS);
4842
+ unrefTimer(entry.idleTimer);
4843
+ };
4844
+ const clearResponsesWebSocketIdleTimer = (entry) => {
4845
+ if (entry.idleTimer) {
4846
+ clearTimeout(entry.idleTimer);
4847
+ entry.idleTimer = null;
4848
+ }
4849
+ };
4850
+ const getResponsesWebSocketActiveRequestCount = (poolKey) => responsesWebSocketActiveRequests.get(poolKey) ?? 0;
4851
+ const incrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4852
+ responsesWebSocketActiveRequests.set(poolKey, getResponsesWebSocketActiveRequestCount(poolKey) + 1);
4853
+ };
4854
+ const decrementResponsesWebSocketActiveRequestCount = (poolKey) => {
4855
+ const nextCount = getResponsesWebSocketActiveRequestCount(poolKey) - 1;
4856
+ if (nextCount <= 0) {
4857
+ responsesWebSocketActiveRequests.delete(poolKey);
4858
+ return;
4859
+ }
4860
+ responsesWebSocketActiveRequests.set(poolKey, nextCount);
4861
+ };
4862
+ const removeResponsesWebSocketPoolEntry = (poolKey, entry) => {
4863
+ if (responsesWebSocketPool.get(poolKey) === entry) responsesWebSocketPool.delete(poolKey);
4864
+ if (entry.closed) return;
4865
+ entry.closed = true;
4866
+ clearResponsesWebSocketIdleTimer(entry);
4867
+ entry.websocketPromise.then(closeResponsesWebSocket).catch(() => {});
4868
+ };
4869
+ const unrefTimer = (timer) => {
4870
+ if (typeof timer === "object" && "unref" in timer && typeof timer.unref === "function") timer.unref();
4871
+ };
4872
+ const createResponsesWebSocketError = (message, event) => {
4873
+ const reason = event?.error ?? event?.message;
4874
+ if (reason === void 0 || reason === "") return new Error(message);
4875
+ const cause = toError(reason);
4876
+ return new Error(`${message}: ${cause.message}`, { cause });
4877
+ };
4878
+ const openResponsesWebSocket = async ({ headers, url }) => await new Promise((resolve, reject) => {
4879
+ const dispatcher = getProxyEnvDispatcher();
4880
+ const websocket = new WebSocket(url, dispatcher ? {
4881
+ dispatcher,
4882
+ headers
4883
+ } : { headers });
4884
+ const cleanup = () => {
4885
+ websocket.removeEventListener("open", onOpen);
4886
+ websocket.removeEventListener("error", onError);
4887
+ };
4888
+ const onOpen = () => {
4889
+ cleanup();
4890
+ resolve(websocket);
4891
+ };
4892
+ const onError = (event) => {
4893
+ cleanup();
4894
+ reject(createResponsesWebSocketError("Failed to create responses websocket", event));
4895
+ };
4896
+ websocket.addEventListener("open", onOpen);
4897
+ websocket.addEventListener("error", onError);
4898
+ });
4899
+ const createWebSocketMessageStream = async function* (websocket) {
4900
+ const queue = [];
4901
+ let closed = false;
4902
+ let error = null;
4903
+ let notify = null;
4904
+ const wake = () => {
4905
+ notify?.();
4906
+ notify = null;
4907
+ };
4908
+ const onMessage = (event) => {
4909
+ queue.push(normalizeWebSocketMessageData(event.data));
4910
+ wake();
4911
+ };
4912
+ const onClose = () => {
4913
+ closed = true;
4914
+ wake();
4915
+ };
4916
+ const onError = (event) => {
4917
+ error = createResponsesWebSocketError("Responses websocket stream error", event);
4918
+ wake();
4919
+ };
4920
+ websocket.addEventListener("message", onMessage);
4921
+ websocket.addEventListener("close", onClose);
4922
+ websocket.addEventListener("error", onError);
4923
+ try {
4924
+ while (true) {
4925
+ const item = queue.shift();
4926
+ if (item) {
4927
+ yield await item;
4928
+ continue;
4929
+ }
4930
+ if (error) throw toError(error);
4931
+ if (closed) break;
4932
+ await new Promise((resolve) => {
4933
+ notify = resolve;
4934
+ });
4935
+ }
4936
+ } finally {
4937
+ websocket.removeEventListener("message", onMessage);
4938
+ websocket.removeEventListener("close", onClose);
4939
+ websocket.removeEventListener("error", onError);
4940
+ }
4941
+ };
4942
+ const normalizeWebSocketMessageData = async (data) => {
4943
+ if (typeof data === "string") return data;
4944
+ if (data instanceof ArrayBuffer) return new TextDecoder().decode(data);
4945
+ if (ArrayBuffer.isView(data)) {
4946
+ const view = data;
4947
+ return new TextDecoder().decode(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
4948
+ }
4949
+ if (isTextReadable(data)) return await data.text();
4950
+ return String(data);
4951
+ };
4952
+ const isTextReadable = (value) => {
4953
+ if (!value || typeof value !== "object" || !("text" in value)) return false;
4954
+ return typeof value.text === "function";
4955
+ };
4956
+ const toError = (value) => {
4957
+ if (value instanceof Error) return value;
4958
+ return new Error(String(value));
4959
+ };
4960
+ const getHeaderValue = (headers, headerName) => {
4961
+ const normalizedHeaderName = headerName.toLowerCase();
4962
+ return Object.entries(headers).find(([key]) => key.toLowerCase() === normalizedHeaderName)?.[1];
4963
+ };
4964
+ const encodePoolKeyPart = (value) => encodeURIComponent(value);
4965
+ const createResponsesWebSocketStreamChunk = (data) => {
4966
+ if (data === "[DONE]") return { data };
4967
+ try {
4968
+ const parsed = JSON.parse(data);
4969
+ if (parsed.type === "response.completed") logCopilotQuotaSnapshots(parsed.copilot_quota_snapshots);
4970
+ return {
4971
+ data: JSON.stringify(parsed),
4972
+ event: typeof parsed.type === "string" ? parsed.type : void 0,
4973
+ id: typeof parsed.id === "string" ? parsed.id : void 0
4974
+ };
4975
+ } catch {
4976
+ return { data };
4977
+ }
4978
+ };
4979
+ const isTerminalResponsesStreamChunk = (chunk) => {
4980
+ if (!chunk.data || chunk.data === "[DONE]") return false;
4981
+ try {
4982
+ const parsed = JSON.parse(chunk.data);
4983
+ return parsed.type === "response.completed" || parsed.type === "response.failed" || parsed.type === "response.incomplete" || parsed.type === "error";
4984
+ } catch {
4985
+ return false;
4986
+ }
4987
+ };
4988
+ const consumeResponsesWebSocketStream = async (stream) => {
4989
+ for await (const chunk of stream) {
4990
+ if (!chunk.data || chunk.data === "[DONE]") continue;
4991
+ const event = JSON.parse(chunk.data);
4992
+ if (event.type === "error") throw new Error(event.message);
4993
+ if (event.type === "response.completed" || event.type === "response.failed" || event.type === "response.incomplete") return event.response;
4994
+ }
4995
+ throw new Error("Responses websocket ended without a terminal response");
4996
+ };
4997
+ const closeResponsesWebSocket = (websocket) => {
4998
+ if (websocket.readyState === WebSocket.CONNECTING || websocket.readyState === WebSocket.OPEN) websocket.close();
4999
+ };
4430
5000
  //#endregion
4431
5001
  //#region src/routes/messages/responses-translation.ts
4432
5002
  const MESSAGE_TYPE = "message";
4433
5003
  const COMPACTION_SIGNATURE_PREFIX = "cm1#";
4434
5004
  const COMPACTION_SIGNATURE_SEPARATOR = "@";
4435
- const THINKING_TEXT$1 = "Thinking...";
4436
- const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) => {
4437
- const model = modelOverride ?? payload.model;
5005
+ const THINKING_TEXT = "Thinking...";
5006
+ const buildPromptCacheKey = (basePromptCacheKey, subagentAgentId) => {
5007
+ if (!basePromptCacheKey) return null;
5008
+ const normalizedSubagentAgentId = subagentAgentId?.trim() || null;
5009
+ if (!normalizedSubagentAgentId) return basePromptCacheKey;
5010
+ return `${basePromptCacheKey}:agent:${normalizedSubagentAgentId}`;
5011
+ };
5012
+ const translateAnthropicMessagesToResponsesPayload = (payload, options = {}) => {
5013
+ const model = options.modelOverride ?? payload.model;
4438
5014
  const input = [];
4439
5015
  const applyPhase = shouldApplyPhase(payload.model);
4440
5016
  for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase));
5017
+ const hasOriginalTools = Array.isArray(payload.tools) && payload.tools.length > 0;
4441
5018
  const translatedTools = convertAnthropicTools(payload.tools);
4442
5019
  const toolChoice = convertAnthropicToolChoice(payload.tool_choice);
4443
- const { sessionId: promptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
4444
- return {
5020
+ const { sessionId: metadataPromptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
5021
+ const sessionAffinity = requestContext.getStore()?.sessionAffinity?.trim() || null;
5022
+ const promptCacheKey = buildPromptCacheKey(metadataPromptCacheKey ?? sessionAffinity, options.subagentAgentId);
5023
+ const responsesPayload = {
4445
5024
  model,
4446
5025
  input,
4447
5026
  instructions: translateSystemPrompt(payload.system, model),
@@ -4451,7 +5030,6 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4451
5030
  tools: translatedTools,
4452
5031
  tool_choice: toolChoice,
4453
5032
  metadata: payload.metadata ? { ...payload.metadata } : null,
4454
- prompt_cache_key: promptCacheKey,
4455
5033
  stream: payload.stream ?? null,
4456
5034
  store: false,
4457
5035
  parallel_tool_calls: true,
@@ -4461,6 +5039,8 @@ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) =>
4461
5039
  },
4462
5040
  include: ["reasoning.encrypted_content"]
4463
5041
  };
5042
+ if (hasOriginalTools) responsesPayload.prompt_cache_key = promptCacheKey;
5043
+ return responsesPayload;
4464
5044
  };
4465
5045
  const encodeCompactionCarrierSignature = (compaction) => {
4466
5046
  return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
@@ -4598,8 +5178,8 @@ const createFileContent = (block) => ({
4598
5178
  filename: block.title ?? "document.pdf"
4599
5179
  });
4600
5180
  const createReasoningContent = (block) => {
4601
- const { encryptedContent, id } = parseReasoningSignature(block.signature);
4602
- const thinking = block.thinking === THINKING_TEXT$1 ? "" : block.thinking;
5181
+ const { encryptedContent, id } = parseReasoningSignature$1(block.signature);
5182
+ const thinking = block.thinking === "Thinking..." ? "" : block.thinking;
4603
5183
  return {
4604
5184
  id,
4605
5185
  type: "reasoning",
@@ -4619,7 +5199,7 @@ const createCompactionContent = (block) => {
4619
5199
  encrypted_content: compaction.encrypted_content
4620
5200
  };
4621
5201
  };
4622
- const parseReasoningSignature = (signature) => {
5202
+ const parseReasoningSignature$1 = (signature) => {
4623
5203
  const splitIndex = signature.lastIndexOf("@");
4624
5204
  if (splitIndex <= 0 || splitIndex === signature.length - 1) return {
4625
5205
  encryptedContent: signature,
@@ -4765,7 +5345,7 @@ const extractReasoningText = (item) => {
4765
5345
  continue;
4766
5346
  }
4767
5347
  };
4768
- if (!item.summary || item.summary.length === 0) return THINKING_TEXT$1;
5348
+ if (!item.summary || item.summary.length === 0) return THINKING_TEXT;
4769
5349
  collectFromBlocks(item.summary);
4770
5350
  return segments.join("").trim();
4771
5351
  };
@@ -4784,7 +5364,7 @@ const createCompactionThinkingBlock = (item) => {
4784
5364
  if (!item.id || !item.encrypted_content) return null;
4785
5365
  return {
4786
5366
  type: "thinking",
4787
- thinking: THINKING_TEXT$1,
5367
+ thinking: THINKING_TEXT,
4788
5368
  signature: encodeCompactionCarrierSignature({
4789
5369
  id: item.id,
4790
5370
  encrypted_content: item.encrypted_content
@@ -4859,7 +5439,869 @@ const convertToolResultContent = (content) => {
4859
5439
  }
4860
5440
  return "";
4861
5441
  };
4862
-
5442
+ //#endregion
5443
+ //#region src/routes/messages/responses-item-ownership.ts
5444
+ function buildResponsesItemOwnershipKey(kind, value) {
5445
+ return `responses-item-owner:${kind}:${createHash("sha256").update(value).digest("hex")}`;
5446
+ }
5447
+ function extractAnthropicResponsesItemOwnerKeys(payload) {
5448
+ const keys = [];
5449
+ for (const message of payload.messages) {
5450
+ if (message.role !== "assistant" || !Array.isArray(message.content)) continue;
5451
+ for (const block of message.content) {
5452
+ if (block.type !== "thinking" || !block.signature) continue;
5453
+ addSignatureOwnerKeys(keys, block.signature);
5454
+ }
5455
+ }
5456
+ return unique(keys);
5457
+ }
5458
+ function extractResponsesResultOwnerKeys(result) {
5459
+ const keys = [];
5460
+ for (const item of result.output) addOutputItemOwnerKeys(keys, item);
5461
+ return unique(keys);
5462
+ }
5463
+ function extractResponsesStreamEventOwnerKeys(event) {
5464
+ if (event.type === "response.output_item.done") {
5465
+ const keys = [];
5466
+ addOutputItemOwnerKeys(keys, event.item);
5467
+ return unique(keys);
5468
+ }
5469
+ if (event.type === "response.completed" || event.type === "response.incomplete") return extractResponsesResultOwnerKeys(event.response);
5470
+ return [];
5471
+ }
5472
+ function addSignatureOwnerKeys(keys, signature) {
5473
+ if (signature.startsWith("cm1#")) {
5474
+ const compaction = decodeCompactionCarrierSignature(signature);
5475
+ if (compaction) addRawOwnerKeys(keys, compaction.id, compaction.encrypted_content);
5476
+ return;
5477
+ }
5478
+ const reasoning = parseReasoningSignature(signature);
5479
+ if (!reasoning) return;
5480
+ addRawOwnerKeys(keys, reasoning.id, reasoning.encryptedContent);
5481
+ }
5482
+ function parseReasoningSignature(signature) {
5483
+ const splitIndex = signature.lastIndexOf("@");
5484
+ if (splitIndex <= 0 || splitIndex === signature.length - 1) return;
5485
+ return {
5486
+ encryptedContent: signature.slice(0, splitIndex),
5487
+ id: signature.slice(splitIndex + 1)
5488
+ };
5489
+ }
5490
+ function addOutputItemOwnerKeys(keys, item) {
5491
+ if (!isOwnerBearingOutputItem(item)) return;
5492
+ addRawOwnerKeys(keys, item.id, item.encrypted_content);
5493
+ }
5494
+ function isOwnerBearingOutputItem(item) {
5495
+ return item.type === "reasoning" || item.type === "compaction";
5496
+ }
5497
+ function addRawOwnerKeys(keys, id, encryptedContent) {
5498
+ if (id) keys.push(buildResponsesItemOwnershipKey("id", id));
5499
+ if (encryptedContent) keys.push(buildResponsesItemOwnershipKey("encrypted_content", encryptedContent));
5500
+ }
5501
+ function unique(values) {
5502
+ return [...new Set(values)];
5503
+ }
5504
+ //#endregion
5505
+ //#region src/routes/messages/stream-translation.ts
5506
+ function isToolBlockOpen(state) {
5507
+ if (!state.contentBlockOpen) return false;
5508
+ return Object.values(state.toolCalls).some((tc) => tc.anthropicBlockIndex === state.contentBlockIndex);
5509
+ }
5510
+ function translateChunkToAnthropicEvents(chunk, state) {
5511
+ const events = [];
5512
+ if (chunk.choices.length === 0) {
5513
+ completePendingMessage(state, events, chunk);
5514
+ return events;
5515
+ }
5516
+ const choice = chunk.choices[0];
5517
+ const { delta } = choice;
5518
+ handleMessageStart(state, events, chunk);
5519
+ handleThinkingText(delta, state, events);
5520
+ handleContent(delta, state, events);
5521
+ handleToolCalls(delta, state, events);
5522
+ handleFinish(choice, state, {
5523
+ events,
5524
+ chunk
5525
+ });
5526
+ return events;
5527
+ }
5528
+ function flushPendingAnthropicStreamEvents(state) {
5529
+ const events = [];
5530
+ completePendingMessage(state, events);
5531
+ return events;
5532
+ }
5533
+ function completePendingMessage(state, events, chunk) {
5534
+ if (!state.pendingMessageDelta) return;
5535
+ if (chunk?.usage) state.pendingMessageDelta.usage = getAnthropicUsageFromOpenAIChunk(chunk, state);
5536
+ events.push(state.pendingMessageDelta, { type: "message_stop" });
5537
+ state.pendingMessageDelta = void 0;
5538
+ }
5539
+ function handleFinish(choice, state, context) {
5540
+ const { events, chunk } = context;
5541
+ if (choice.finish_reason && choice.finish_reason.length > 0) {
5542
+ if (state.contentBlockOpen) {
5543
+ const toolBlockOpen = isToolBlockOpen(state);
5544
+ context.events.push({
5545
+ type: "content_block_stop",
5546
+ index: state.contentBlockIndex
5547
+ });
5548
+ state.contentBlockOpen = false;
5549
+ state.contentBlockIndex++;
5550
+ if (!toolBlockOpen) handleReasoningOpaque(choice.delta, events, state);
5551
+ }
5552
+ flushDeferredContent(state, events);
5553
+ state.pendingMessageDelta = {
5554
+ type: "message_delta",
5555
+ delta: {
5556
+ stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
5557
+ stop_sequence: null
5558
+ },
5559
+ usage: getAnthropicUsageFromOpenAIChunk(chunk, state)
5560
+ };
5561
+ if (chunk.usage) completePendingMessage(state, events, chunk);
5562
+ }
5563
+ }
5564
+ function getAnthropicUsageFromOpenAIChunk(chunk, state) {
5565
+ const { cacheCreationTokens, inputTokens, cacheReadTokens } = getOpenAIChunkUsageTokens(chunk, state);
5566
+ return {
5567
+ input_tokens: inputTokens,
5568
+ output_tokens: chunk.usage?.completion_tokens ?? 0,
5569
+ ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
5570
+ ...cacheReadTokens !== void 0 && { cache_read_input_tokens: cacheReadTokens }
5571
+ };
5572
+ }
5573
+ function getOpenAIChunkUsageTokens(chunk, state) {
5574
+ const promptTokens = chunk.usage?.prompt_tokens;
5575
+ const cachedTokens = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
5576
+ const cacheCreationTokens = chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens ?? 0;
5577
+ if (promptTokens !== void 0) return {
5578
+ cacheCreationTokens,
5579
+ cachedTokens,
5580
+ inputTokens: Math.max(0, promptTokens - cachedTokens - cacheCreationTokens),
5581
+ cacheReadTokens: chunk.usage?.prompt_tokens_details?.cached_tokens
5582
+ };
5583
+ const historicalInputTokens = state?.historicalInputTokens;
5584
+ const historicalOutputTokens = state?.historicalOutputTokens ?? 0;
5585
+ return {
5586
+ cacheCreationTokens,
5587
+ cachedTokens,
5588
+ inputTokens: (historicalInputTokens !== void 0 ? historicalInputTokens + historicalOutputTokens : void 0) ?? state?.estimatedInputTokens ?? 0,
5589
+ cacheReadTokens: state?.historicalCachedInputTokens
5590
+ };
5591
+ }
5592
+ function handleToolCalls(delta, state, events) {
5593
+ if (delta.tool_calls && delta.tool_calls.length > 0) {
5594
+ closeThinkingBlockIfOpen(state, events);
5595
+ handleReasoningOpaqueInToolCalls(state, events, delta);
5596
+ for (const toolCall of delta.tool_calls) {
5597
+ if (toolCall.id && toolCall.function?.name) {
5598
+ if (state.contentBlockOpen) {
5599
+ events.push({
5600
+ type: "content_block_stop",
5601
+ index: state.contentBlockIndex
5602
+ });
5603
+ state.contentBlockIndex++;
5604
+ state.contentBlockOpen = false;
5605
+ }
5606
+ const anthropicBlockIndex = state.contentBlockIndex;
5607
+ state.toolCalls[toolCall.index] = {
5608
+ id: toolCall.id,
5609
+ name: toolCall.function.name,
5610
+ anthropicBlockIndex
5611
+ };
5612
+ events.push({
5613
+ type: "content_block_start",
5614
+ index: anthropicBlockIndex,
5615
+ content_block: {
5616
+ type: "tool_use",
5617
+ id: toolCall.id,
5618
+ name: toolCall.function.name,
5619
+ input: {}
5620
+ }
5621
+ });
5622
+ state.contentBlockOpen = true;
5623
+ }
5624
+ if (toolCall.function?.arguments) {
5625
+ const toolCallInfo = state.toolCalls[toolCall.index];
5626
+ if (toolCallInfo) events.push({
5627
+ type: "content_block_delta",
5628
+ index: toolCallInfo.anthropicBlockIndex,
5629
+ delta: {
5630
+ type: "input_json_delta",
5631
+ partial_json: toolCall.function.arguments
5632
+ }
5633
+ });
5634
+ }
5635
+ }
5636
+ }
5637
+ }
5638
+ function handleReasoningOpaqueInToolCalls(state, events, delta) {
5639
+ if (state.contentBlockOpen && !isToolBlockOpen(state)) {
5640
+ events.push({
5641
+ type: "content_block_stop",
5642
+ index: state.contentBlockIndex
5643
+ });
5644
+ state.contentBlockIndex++;
5645
+ state.contentBlockOpen = false;
5646
+ }
5647
+ handleReasoningOpaque(delta, events, state);
5648
+ }
5649
+ function handleContent(delta, state, events) {
5650
+ if (delta.content && delta.content.length > 0) {
5651
+ closeThinkingBlockIfOpen(state, events);
5652
+ if (isToolBlockOpen(state) || hasToolCallDelta(delta)) {
5653
+ state.deferredContent = `${state.deferredContent ?? ""}${delta.content}`;
5654
+ return;
5655
+ }
5656
+ if (!state.contentBlockOpen) {
5657
+ events.push({
5658
+ type: "content_block_start",
5659
+ index: state.contentBlockIndex,
5660
+ content_block: {
5661
+ type: "text",
5662
+ text: ""
5663
+ }
5664
+ });
5665
+ state.contentBlockOpen = true;
5666
+ }
5667
+ events.push({
5668
+ type: "content_block_delta",
5669
+ index: state.contentBlockIndex,
5670
+ delta: {
5671
+ type: "text_delta",
5672
+ text: delta.content
5673
+ }
5674
+ });
5675
+ }
5676
+ if (delta.content === "" && delta.reasoning_opaque && delta.reasoning_opaque.length > 0 && state.thinkingBlockOpen) {
5677
+ events.push({
5678
+ type: "content_block_delta",
5679
+ index: state.contentBlockIndex,
5680
+ delta: {
5681
+ type: "signature_delta",
5682
+ signature: delta.reasoning_opaque
5683
+ }
5684
+ }, {
5685
+ type: "content_block_stop",
5686
+ index: state.contentBlockIndex
5687
+ });
5688
+ state.contentBlockIndex++;
5689
+ state.thinkingBlockOpen = false;
5690
+ }
5691
+ }
5692
+ function hasToolCallDelta(delta) {
5693
+ return Boolean(delta.tool_calls && delta.tool_calls.length > 0);
5694
+ }
5695
+ function flushDeferredContent(state, events) {
5696
+ if (!state.deferredContent) return;
5697
+ if (!state.contentBlockOpen) {
5698
+ events.push({
5699
+ type: "content_block_start",
5700
+ index: state.contentBlockIndex,
5701
+ content_block: {
5702
+ type: "text",
5703
+ text: ""
5704
+ }
5705
+ });
5706
+ state.contentBlockOpen = true;
5707
+ }
5708
+ events.push({
5709
+ type: "content_block_delta",
5710
+ index: state.contentBlockIndex,
5711
+ delta: {
5712
+ type: "text_delta",
5713
+ text: state.deferredContent
5714
+ }
5715
+ }, {
5716
+ type: "content_block_stop",
5717
+ index: state.contentBlockIndex
5718
+ });
5719
+ state.deferredContent = void 0;
5720
+ state.contentBlockOpen = false;
5721
+ state.contentBlockIndex++;
5722
+ }
5723
+ function handleMessageStart(state, events, chunk) {
5724
+ if (!state.messageStartSent) {
5725
+ const { cacheCreationTokens, inputTokens, cacheReadTokens } = getOpenAIChunkUsageTokens(chunk, state);
5726
+ events.push({
5727
+ type: "message_start",
5728
+ message: {
5729
+ id: chunk.id,
5730
+ type: "message",
5731
+ role: "assistant",
5732
+ content: [],
5733
+ model: chunk.model,
5734
+ stop_reason: null,
5735
+ stop_sequence: null,
5736
+ usage: {
5737
+ input_tokens: inputTokens,
5738
+ output_tokens: 0,
5739
+ ...chunk.usage?.prompt_tokens_details?.cache_creation_input_tokens !== void 0 && { cache_creation_input_tokens: cacheCreationTokens },
5740
+ ...cacheReadTokens !== void 0 && { cache_read_input_tokens: cacheReadTokens }
5741
+ }
5742
+ }
5743
+ });
5744
+ state.messageStartSent = true;
5745
+ }
5746
+ }
5747
+ function handleReasoningOpaque(delta, events, state) {
5748
+ if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) {
5749
+ events.push({
5750
+ type: "content_block_start",
5751
+ index: state.contentBlockIndex,
5752
+ content_block: {
5753
+ type: "thinking",
5754
+ thinking: ""
5755
+ }
5756
+ }, {
5757
+ type: "content_block_delta",
5758
+ index: state.contentBlockIndex,
5759
+ delta: {
5760
+ type: "thinking_delta",
5761
+ thinking: THINKING_TEXT$1
5762
+ }
5763
+ }, {
5764
+ type: "content_block_delta",
5765
+ index: state.contentBlockIndex,
5766
+ delta: {
5767
+ type: "signature_delta",
5768
+ signature: delta.reasoning_opaque
5769
+ }
5770
+ }, {
5771
+ type: "content_block_stop",
5772
+ index: state.contentBlockIndex
5773
+ });
5774
+ state.contentBlockIndex++;
5775
+ }
5776
+ }
5777
+ function handleThinkingText(delta, state, events) {
5778
+ const reasoningText = delta.reasoning_text ?? delta.reasoning_content;
5779
+ if (reasoningText && reasoningText.length > 0) {
5780
+ if (state.contentBlockOpen) {
5781
+ delta.content = reasoningText;
5782
+ delta.reasoning_text = void 0;
5783
+ delta.reasoning_content = void 0;
5784
+ return;
5785
+ }
5786
+ if (!state.thinkingBlockOpen) {
5787
+ events.push({
5788
+ type: "content_block_start",
5789
+ index: state.contentBlockIndex,
5790
+ content_block: {
5791
+ type: "thinking",
5792
+ thinking: ""
5793
+ }
5794
+ });
5795
+ state.thinkingBlockOpen = true;
5796
+ }
5797
+ events.push({
5798
+ type: "content_block_delta",
5799
+ index: state.contentBlockIndex,
5800
+ delta: {
5801
+ type: "thinking_delta",
5802
+ thinking: reasoningText
5803
+ }
5804
+ });
5805
+ }
5806
+ }
5807
+ function closeThinkingBlockIfOpen(state, events) {
5808
+ if (state.thinkingBlockOpen) {
5809
+ events.push({
5810
+ type: "content_block_delta",
5811
+ index: state.contentBlockIndex,
5812
+ delta: {
5813
+ type: "signature_delta",
5814
+ signature: ""
5815
+ }
5816
+ }, {
5817
+ type: "content_block_stop",
5818
+ index: state.contentBlockIndex
5819
+ });
5820
+ state.contentBlockIndex++;
5821
+ state.thinkingBlockOpen = false;
5822
+ }
5823
+ }
5824
+ //#endregion
5825
+ //#region src/services/providers/anthropic-proxy.ts
5826
+ const SHARED_FORWARDABLE_HEADERS = ["accept", "user-agent"];
5827
+ const ANTHROPIC_FORWARDABLE_HEADERS = ["anthropic-version", "anthropic-beta"];
5828
+ const STRIPPED_RESPONSE_HEADERS = [
5829
+ "connection",
5830
+ "content-encoding",
5831
+ "content-length",
5832
+ "keep-alive",
5833
+ "proxy-authenticate",
5834
+ "proxy-authorization",
5835
+ "te",
5836
+ "trailer",
5837
+ "transfer-encoding",
5838
+ "upgrade"
5839
+ ];
5840
+ function buildProviderUpstreamHeaders(providerConfig, requestHeaders) {
5841
+ const authHeaders = {};
5842
+ if (providerConfig.authType === "authorization") authHeaders.authorization = `Bearer ${providerConfig.apiKey}`;
5843
+ else authHeaders["x-api-key"] = providerConfig.apiKey;
5844
+ const headers = {
5845
+ "content-type": "application/json",
5846
+ accept: "application/json",
5847
+ ...authHeaders
5848
+ };
5849
+ for (const headerName of SHARED_FORWARDABLE_HEADERS) {
5850
+ const headerValue = requestHeaders.get(headerName);
5851
+ if (headerValue) headers[headerName] = headerValue;
5852
+ }
5853
+ if (providerConfig.type !== "anthropic") return headers;
5854
+ for (const headerName of ANTHROPIC_FORWARDABLE_HEADERS) {
5855
+ const headerValue = requestHeaders.get(headerName);
5856
+ if (headerValue) headers[headerName] = headerValue;
5857
+ }
5858
+ return headers;
5859
+ }
5860
+ function createProviderProxyResponse(upstreamResponse) {
5861
+ const headers = new Headers(upstreamResponse.headers);
5862
+ for (const headerName of STRIPPED_RESPONSE_HEADERS) headers.delete(headerName);
5863
+ return new Response(upstreamResponse.body, {
5864
+ headers,
5865
+ status: upstreamResponse.status,
5866
+ statusText: upstreamResponse.statusText
5867
+ });
5868
+ }
5869
+ async function forwardProviderMessages(providerConfig, payload, requestHeaders, fetchImpl = fetch) {
5870
+ return await fetchImpl(`${providerConfig.baseUrl}/v1/messages`, {
5871
+ method: "POST",
5872
+ headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders),
5873
+ body: JSON.stringify(payload)
5874
+ });
5875
+ }
5876
+ async function forwardProviderChatCompletions(providerConfig, payload, requestHeaders, fetchImpl = fetch) {
5877
+ return await fetchImpl(`${providerConfig.baseUrl}/v1/chat/completions`, {
5878
+ method: "POST",
5879
+ headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders),
5880
+ body: JSON.stringify(payload)
5881
+ });
5882
+ }
5883
+ async function forwardProviderModels(providerConfig, requestHeaders, fetchImpl = fetch) {
5884
+ return await fetchImpl(`${providerConfig.baseUrl}/v1/models`, {
5885
+ method: "GET",
5886
+ headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders)
5887
+ });
5888
+ }
5889
+ //#endregion
5890
+ //#region src/routes/provider/messages/handler.ts
5891
+ const logger$4 = createHandlerLogger("provider-messages-handler");
5892
+ const getProviderFetch$1 = (c) => c.get("providerFetch") ?? fetch;
5893
+ const resolveProviderConfig$1 = (c, provider) => {
5894
+ return (c.get("providerConfigResolver") ?? getProviderConfig)(provider);
5895
+ };
5896
+ const OPENAI_COMPATIBLE_CONTEXT_CACHE_MARKER_LIMIT = 4;
5897
+ const OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL = { type: "ephemeral" };
5898
+ const OPENAI_COMPATIBLE_CONTEXT_CACHE_ROLES = new Set([
5899
+ "system",
5900
+ "user",
5901
+ "assistant",
5902
+ "tool"
5903
+ ]);
5904
+ const writeProviderStreamError = async (stream, message) => {
5905
+ try {
5906
+ await stream.writeSSE({
5907
+ event: "error",
5908
+ data: JSON.stringify({
5909
+ error: {
5910
+ message,
5911
+ type: "api_error"
5912
+ },
5913
+ type: "error"
5914
+ })
5915
+ });
5916
+ } catch (error) {
5917
+ logger$4.warn("Failed to write provider stream error event", error);
5918
+ }
5919
+ };
5920
+ async function handleProviderMessages(c) {
5921
+ const provider = c.req.param("provider");
5922
+ return await handleProviderMessagesForProvider(c, {
5923
+ payload: await c.req.json(),
5924
+ provider
5925
+ });
5926
+ }
5927
+ async function handleProviderMessagesForProvider(c, options) {
5928
+ const { instrumentation, payload, provider } = options;
5929
+ const providerConfig = resolveProviderConfig$1(c, provider);
5930
+ if (!providerConfig) {
5931
+ const message = `Provider '${provider}' not found or disabled`;
5932
+ instrumentation?.onError?.({
5933
+ errorMessage: message,
5934
+ errorName: "ProviderNotFoundError",
5935
+ errorStatus: 404,
5936
+ httpStatus: 404
5937
+ });
5938
+ return c.json({ error: {
5939
+ message,
5940
+ type: "invalid_request_error"
5941
+ } }, 404);
5942
+ }
5943
+ try {
5944
+ const modelConfig = providerConfig.models?.[payload.model];
5945
+ applyModelDefaults(payload, modelConfig);
5946
+ debugJson(logger$4, "provider.messages.request", {
5947
+ payload,
5948
+ provider
5949
+ });
5950
+ if (providerConfig.type === "openai-compatible") return await handleOpenAICompatibleProviderMessages(c, {
5951
+ instrumentation,
5952
+ modelConfig,
5953
+ payload,
5954
+ provider,
5955
+ providerConfig
5956
+ });
5957
+ applyMissingExtraBody(payload, { extraBody: modelConfig?.extraBody });
5958
+ const upstreamResponse = await forwardProviderMessages(providerConfig, payload, c.req.raw.headers, getProviderFetch$1(c));
5959
+ if (!upstreamResponse.ok) {
5960
+ logger$4.error("Failed to create responses", upstreamResponse);
5961
+ throw new HTTPError("Failed to create responses", upstreamResponse);
5962
+ }
5963
+ const contentType = upstreamResponse.headers.get("content-type") ?? "";
5964
+ if (Boolean(payload.stream) && contentType.includes("text/event-stream")) return streamProviderMessages({
5965
+ c,
5966
+ instrumentation,
5967
+ payload,
5968
+ provider,
5969
+ providerConfig,
5970
+ upstreamResponse
5971
+ });
5972
+ return respondProviderMessagesJson(c, {
5973
+ body: await upstreamResponse.json(),
5974
+ instrumentation,
5975
+ payload,
5976
+ provider,
5977
+ providerConfig
5978
+ });
5979
+ } catch (error) {
5980
+ logger$4.error("provider.messages.error", {
5981
+ provider,
5982
+ error
5983
+ });
5984
+ throw error;
5985
+ }
5986
+ }
5987
+ const applyModelDefaults = (payload, modelConfig) => {
5988
+ payload.temperature ??= modelConfig?.temperature;
5989
+ payload.top_p ??= modelConfig?.topP;
5990
+ payload.top_k ??= modelConfig?.topK;
5991
+ };
5992
+ const applyMissingExtraBody = (payload, options) => {
5993
+ for (const [key, value] of Object.entries(options.extraBody ?? {})) if (!Object.hasOwn(payload, key)) payload[key] = value;
5994
+ };
5995
+ const handleOpenAICompatibleProviderMessages = async (c, options) => {
5996
+ const { instrumentation, modelConfig, payload, provider, providerConfig } = options;
5997
+ const openAIPayload = createOpenAICompatiblePayload(payload, modelConfig);
5998
+ debugJson(logger$4, "provider.messages.openai_compatible.request", {
5999
+ payload: openAIPayload,
6000
+ provider
6001
+ });
6002
+ const upstreamResponse = await forwardProviderChatCompletions(providerConfig, openAIPayload, c.req.raw.headers, getProviderFetch$1(c));
6003
+ if (!upstreamResponse.ok) {
6004
+ logger$4.error("Failed to create openai-compatible responses", upstreamResponse);
6005
+ throw new HTTPError("Failed to create openai-compatible responses", upstreamResponse);
6006
+ }
6007
+ const contentType = upstreamResponse.headers.get("content-type") ?? "";
6008
+ if (Boolean(openAIPayload.stream) && contentType.includes("text/event-stream")) return streamOpenAICompatibleProviderMessages({
6009
+ c,
6010
+ instrumentation,
6011
+ payload,
6012
+ provider,
6013
+ upstreamResponse
6014
+ });
6015
+ return respondOpenAICompatibleProviderMessagesJson(c, {
6016
+ body: await upstreamResponse.json(),
6017
+ instrumentation,
6018
+ payload,
6019
+ provider
6020
+ });
6021
+ };
6022
+ const createOpenAICompatiblePayload = (payload, modelConfig) => {
6023
+ const openAIPayload = translateToOpenAI(payload, {
6024
+ supportPdf: modelConfig?.supportPdf,
6025
+ toolContentSupportType: modelConfig?.toolContentSupportType ?? []
6026
+ });
6027
+ if (payload.top_k !== void 0) openAIPayload.top_k = payload.top_k;
6028
+ if (openAIPayload.stream) openAIPayload.stream_options = { include_usage: true };
6029
+ normalizeOpenAICompatibleReasoningContent(openAIPayload);
6030
+ applyOpenAICompatibleRequestOverrides(openAIPayload, {
6031
+ extraBody: modelConfig?.extraBody,
6032
+ source: payload
6033
+ });
6034
+ applyMissingExtraBody(openAIPayload, { extraBody: modelConfig?.extraBody });
6035
+ if (!Object.hasOwn(openAIPayload, "parallel_tool_calls")) openAIPayload.parallel_tool_calls = true;
6036
+ if (modelConfig?.contextCache !== false) applyOpenAICompatibleContextCache(openAIPayload);
6037
+ return openAIPayload;
6038
+ };
6039
+ const normalizeOpenAICompatibleReasoningContent = (payload) => {
6040
+ for (const message of payload.messages) {
6041
+ if (message.role !== "assistant") continue;
6042
+ if (message.reasoning_content === void 0 && message.reasoning_text !== void 0) message.reasoning_content = message.reasoning_text;
6043
+ delete message.reasoning_text;
6044
+ delete message.reasoning_opaque;
6045
+ }
6046
+ };
6047
+ const applyOpenAICompatibleRequestOverrides = (payload, options) => {
6048
+ const allowedKeys = new Set(Object.keys(options.extraBody ?? {}));
6049
+ for (const key of allowedKeys) if (Object.hasOwn(options.source, key)) payload[key] = options.source[key];
6050
+ };
6051
+ const applyOpenAICompatibleContextCache = (payload) => {
6052
+ const messageIndexes = selectContextCacheMessageIndexes(payload.messages);
6053
+ for (const messageIndex of messageIndexes) applyContextCacheControl(payload.messages[messageIndex]);
6054
+ };
6055
+ const selectContextCacheMessageIndexes = (messages) => {
6056
+ const cacheableIndexes = messages.flatMap((message, index) => isContextCacheMarkerEligible(message) ? [index] : []);
6057
+ const systemIndexes = cacheableIndexes.filter((index) => messages[index]?.role === "system").slice(0, 2);
6058
+ const finalIndexes = cacheableIndexes.filter((index) => messages[index]?.role !== "system").slice(-2);
6059
+ return uniqueIndexes([...systemIndexes, ...finalIndexes]).sort((a, b) => a - b);
6060
+ };
6061
+ const uniqueIndexes = (indexes) => [...new Set(indexes)].slice(0, OPENAI_COMPATIBLE_CONTEXT_CACHE_MARKER_LIMIT);
6062
+ const isContextCacheMarkerEligible = (message) => {
6063
+ if (!OPENAI_COMPATIBLE_CONTEXT_CACHE_ROLES.has(message.role)) return false;
6064
+ if (typeof message.content === "string") return message.content.length > 0;
6065
+ return Array.isArray(message.content) && message.content.length > 0;
6066
+ };
6067
+ const applyContextCacheControl = (message) => {
6068
+ if (!message) return;
6069
+ if (typeof message.content === "string") {
6070
+ message.content = [{
6071
+ type: "text",
6072
+ text: message.content,
6073
+ cache_control: { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL }
6074
+ }];
6075
+ return;
6076
+ }
6077
+ if (!Array.isArray(message.content)) return;
6078
+ const lastPart = message.content.at(-1);
6079
+ if (!lastPart) return;
6080
+ setContextCacheControl(lastPart);
6081
+ };
6082
+ const setContextCacheControl = (part) => {
6083
+ part.cache_control = { ...OPENAI_COMPATIBLE_CONTEXT_CACHE_CONTROL };
6084
+ };
6085
+ const streamProviderMessages = ({ c, instrumentation, providerConfig, upstreamResponse }) => {
6086
+ logger$4.debug("provider.messages.streaming");
6087
+ return streamSSE(c, async (stream) => {
6088
+ let usage = {};
6089
+ try {
6090
+ let completed = false;
6091
+ for await (const chunk of events(upstreamResponse)) {
6092
+ logger$4.debug("provider.messages.raw_stream_event:", chunk.data);
6093
+ const eventName = chunk.event;
6094
+ if (eventName === "ping") {
6095
+ await stream.writeSSE({
6096
+ event: "ping",
6097
+ data: "{\"type\":\"ping\"}"
6098
+ });
6099
+ continue;
6100
+ }
6101
+ let data = chunk.data;
6102
+ if (!data) continue;
6103
+ if (chunk.data === "[DONE]") {
6104
+ completed = true;
6105
+ break;
6106
+ }
6107
+ const parsed = parseProviderStreamEvent(data, providerConfig);
6108
+ usage = mergeAnthropicUsage(usage, parsed.usage);
6109
+ data = parsed.data;
6110
+ await stream.writeSSE({
6111
+ event: eventName,
6112
+ data
6113
+ });
6114
+ if (parsed.error || eventName === "error") {
6115
+ instrumentation?.onError?.(parsed.error ?? {
6116
+ errorMessage: data,
6117
+ errorName: "ProviderStreamError",
6118
+ httpStatus: 500
6119
+ });
6120
+ return;
6121
+ }
6122
+ completed ||= parsed.done;
6123
+ }
6124
+ if (!completed) throw new Error("Provider messages stream ended before completion");
6125
+ instrumentation?.onComplete?.(usage);
6126
+ } catch (error) {
6127
+ const details = await extractErrorObservability(error);
6128
+ logger$4.warn("provider.messages.streaming.error", error);
6129
+ instrumentation?.onError?.(details);
6130
+ await writeProviderStreamError(stream, getUserVisibleErrorMessage(details));
6131
+ }
6132
+ });
6133
+ };
6134
+ const streamOpenAICompatibleProviderMessages = ({ c, instrumentation, upstreamResponse }) => {
6135
+ logger$4.debug("provider.messages.openai_compatible.streaming");
6136
+ return streamSSE(c, async (stream) => {
6137
+ let usage = {};
6138
+ const streamState = {
6139
+ messageStartSent: false,
6140
+ contentBlockIndex: 0,
6141
+ contentBlockOpen: false,
6142
+ toolCalls: {},
6143
+ thinkingBlockOpen: false
6144
+ };
6145
+ try {
6146
+ let completed = false;
6147
+ for await (const chunk of events(upstreamResponse)) {
6148
+ logger$4.debug("provider.messages.openai_compatible.raw_stream_event:", chunk.data);
6149
+ if (chunk.event === "ping") {
6150
+ await stream.writeSSE({
6151
+ event: "ping",
6152
+ data: "{\"type\":\"ping\"}"
6153
+ });
6154
+ continue;
6155
+ }
6156
+ if (!chunk.data) continue;
6157
+ if (chunk.data === "[DONE]") {
6158
+ completed = true;
6159
+ break;
6160
+ }
6161
+ const parsed = parseOpenAICompatibleStreamChunk(chunk.data);
6162
+ if (parsed.usage) usage = normalizeOpenAIUsage(parsed.usage);
6163
+ const events = translateChunkToAnthropicEvents(parsed, streamState);
6164
+ for (const event of events) {
6165
+ const eventData = JSON.stringify(event);
6166
+ debugLazy(logger$4, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
6167
+ await stream.writeSSE({
6168
+ event: event.type,
6169
+ data: eventData
6170
+ });
6171
+ completed ||= event.type === "message_stop";
6172
+ }
6173
+ }
6174
+ for (const event of flushPendingAnthropicStreamEvents(streamState)) {
6175
+ const eventData = JSON.stringify(event);
6176
+ debugLazy(logger$4, () => ["provider.messages.openai_compatible.translated_event:", eventData]);
6177
+ await stream.writeSSE({
6178
+ event: event.type,
6179
+ data: eventData
6180
+ });
6181
+ completed ||= event.type === "message_stop";
6182
+ }
6183
+ if (!completed) throw new Error("OpenAI-compatible provider messages stream ended before completion");
6184
+ instrumentation?.onComplete?.(usage);
6185
+ } catch (error) {
6186
+ const details = await extractErrorObservability(error);
6187
+ logger$4.warn("provider.messages.openai_compatible.streaming.error", error);
6188
+ instrumentation?.onError?.(details);
6189
+ await writeProviderStreamError(stream, getUserVisibleErrorMessage(details));
6190
+ }
6191
+ });
6192
+ };
6193
+ const parseOpenAICompatibleStreamChunk = (data) => {
6194
+ let parsed;
6195
+ try {
6196
+ parsed = JSON.parse(data);
6197
+ } catch (error) {
6198
+ logger$4.error("provider.messages.openai_compatible.parse_chunk_error", {
6199
+ data,
6200
+ error
6201
+ });
6202
+ throw new Error("Failed to parse OpenAI-compatible stream chunk", { cause: error });
6203
+ }
6204
+ const streamErrorMessage = getOpenAICompatibleStreamErrorMessage(parsed.error);
6205
+ if (streamErrorMessage) throw new Error(streamErrorMessage);
6206
+ return parsed;
6207
+ };
6208
+ const getOpenAICompatibleStreamErrorMessage = (error) => {
6209
+ if (typeof error === "string") return error;
6210
+ if (!error || typeof error !== "object") return null;
6211
+ const message = error.message;
6212
+ return typeof message === "string" ? message : JSON.stringify(error);
6213
+ };
6214
+ const parseProviderStreamEvent = (data, providerConfig) => {
6215
+ try {
6216
+ const parsed = JSON.parse(data);
6217
+ if (parsed.type === "message_start") {
6218
+ adjustInputTokens(providerConfig, parsed.message.usage);
6219
+ return {
6220
+ data: JSON.stringify(parsed),
6221
+ done: false,
6222
+ model: parsed.message.model,
6223
+ usage: normalizeAnthropicUsage(parsed.message.usage)
6224
+ };
6225
+ }
6226
+ if (parsed.type === "message_delta") {
6227
+ adjustInputTokens(providerConfig, parsed.usage);
6228
+ return {
6229
+ data: JSON.stringify(parsed),
6230
+ done: false,
6231
+ usage: normalizeAnthropicUsage(parsed.usage)
6232
+ };
6233
+ }
6234
+ if (parsed.type === "message_stop") return {
6235
+ data: JSON.stringify(parsed),
6236
+ done: true,
6237
+ usage: {}
6238
+ };
6239
+ if (parsed.type === "error") return {
6240
+ data: JSON.stringify(parsed),
6241
+ done: false,
6242
+ error: {
6243
+ errorMessage: parsed.error.message,
6244
+ errorName: parsed.error.type,
6245
+ httpStatus: 500
6246
+ },
6247
+ usage: {}
6248
+ };
6249
+ return {
6250
+ data: JSON.stringify(parsed),
6251
+ done: false,
6252
+ usage: {}
6253
+ };
6254
+ } catch (error) {
6255
+ logger$4.error("provider.messages.streaming.adjust_tokens_error", {
6256
+ error,
6257
+ originalData: data
6258
+ });
6259
+ throw new Error("Failed to parse provider messages stream event", { cause: error });
6260
+ }
6261
+ };
6262
+ const respondProviderMessagesJson = (c, options) => {
6263
+ const { body, instrumentation, providerConfig } = options;
6264
+ adjustInputTokens(providerConfig, body.usage);
6265
+ debugJson(logger$4, "provider.messages.no_stream result:", body);
6266
+ const response = c.json(body);
6267
+ instrumentation?.onComplete?.(normalizeAnthropicUsage(body.usage));
6268
+ return response;
6269
+ };
6270
+ const respondOpenAICompatibleProviderMessagesJson = (c, options) => {
6271
+ const { body, instrumentation } = options;
6272
+ const anthropicResponse = translateToAnthropic(body);
6273
+ debugJson(logger$4, "provider.messages.openai_compatible.no_stream result:", anthropicResponse);
6274
+ const response = c.json(anthropicResponse);
6275
+ instrumentation?.onComplete?.(normalizeOpenAIUsage(body.usage));
6276
+ return response;
6277
+ };
6278
+ const normalizeOpenAIUsage = (usage) => {
6279
+ const cacheCreationInputTokens = usage?.prompt_tokens_details?.cache_creation_input_tokens;
6280
+ const cacheReadInputTokens = usage?.prompt_tokens_details?.cached_tokens;
6281
+ return {
6282
+ inputTokens: usage?.prompt_tokens === void 0 ? void 0 : Math.max(0, usage.prompt_tokens - (cacheCreationInputTokens ?? 0) - (cacheReadInputTokens ?? 0)),
6283
+ outputTokens: usage?.completion_tokens,
6284
+ cacheCreationInputTokens,
6285
+ cacheReadInputTokens
6286
+ };
6287
+ };
6288
+ const normalizeAnthropicUsage = (usage) => ({
6289
+ inputTokens: usage?.input_tokens,
6290
+ outputTokens: usage?.output_tokens,
6291
+ cacheCreationInputTokens: usage?.cache_creation_input_tokens,
6292
+ cacheReadInputTokens: usage?.cache_read_input_tokens
6293
+ });
6294
+ const mergeAnthropicUsage = (current, next) => ({
6295
+ inputTokens: next.inputTokens ?? current.inputTokens,
6296
+ outputTokens: next.outputTokens ?? current.outputTokens,
6297
+ cacheCreationInputTokens: next.cacheCreationInputTokens ?? current.cacheCreationInputTokens,
6298
+ cacheReadInputTokens: next.cacheReadInputTokens ?? current.cacheReadInputTokens
6299
+ });
6300
+ const adjustInputTokens = (providerConfig, usage) => {
6301
+ if (!providerConfig.adjustInputTokens || !usage) return;
6302
+ usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
6303
+ debugJson(logger$4, "provider.messages.adjusted_usage:", usage);
6304
+ };
4863
6305
  //#endregion
4864
6306
  //#region src/routes/messages/responses-stream-translation.ts
4865
6307
  const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20;
@@ -4896,40 +6338,40 @@ const createResponsesStreamState = () => ({
4896
6338
  blockHasDelta: /* @__PURE__ */ new Set(),
4897
6339
  functionCallStateByOutputIndex: /* @__PURE__ */ new Map()
4898
6340
  });
4899
- const translateResponsesStreamEvent = (rawEvent, state$1) => {
6341
+ const translateResponsesStreamEvent = (rawEvent, state) => {
4900
6342
  switch (rawEvent.type) {
4901
- case "response.created": return handleResponseCreated(rawEvent, state$1);
4902
- case "response.output_item.added": return handleOutputItemAdded$1(rawEvent, state$1);
4903
- case "response.reasoning_summary_text.delta": return handleReasoningSummaryTextDelta(rawEvent, state$1);
4904
- case "response.output_text.delta": return handleOutputTextDelta(rawEvent, state$1);
4905
- case "response.reasoning_summary_text.done": return handleReasoningSummaryTextDone(rawEvent, state$1);
4906
- case "response.output_text.done": return handleOutputTextDone(rawEvent, state$1);
4907
- case "response.output_item.done": return handleOutputItemDone$1(rawEvent, state$1);
4908
- case "response.function_call_arguments.delta": return handleFunctionCallArgumentsDelta(rawEvent, state$1);
4909
- case "response.function_call_arguments.done": return handleFunctionCallArgumentsDone(rawEvent, state$1);
6343
+ case "response.created": return handleResponseCreated(rawEvent, state);
6344
+ case "response.output_item.added": return handleOutputItemAdded$1(rawEvent, state);
6345
+ case "response.reasoning_summary_text.delta": return handleReasoningSummaryTextDelta(rawEvent, state);
6346
+ case "response.output_text.delta": return handleOutputTextDelta(rawEvent, state);
6347
+ case "response.reasoning_summary_text.done": return handleReasoningSummaryTextDone(rawEvent, state);
6348
+ case "response.output_text.done": return handleOutputTextDone(rawEvent, state);
6349
+ case "response.output_item.done": return handleOutputItemDone$1(rawEvent, state);
6350
+ case "response.function_call_arguments.delta": return handleFunctionCallArgumentsDelta(rawEvent, state);
6351
+ case "response.function_call_arguments.done": return handleFunctionCallArgumentsDone(rawEvent, state);
4910
6352
  case "response.completed":
4911
- case "response.incomplete": return handleResponseCompleted(rawEvent, state$1);
4912
- case "response.failed": return handleResponseFailed(rawEvent, state$1);
4913
- case "error": return handleErrorEvent(rawEvent, state$1);
6353
+ case "response.incomplete": return handleResponseCompleted(rawEvent, state);
6354
+ case "response.failed": return handleResponseFailed(rawEvent, state);
6355
+ case "error": return handleErrorEvent(rawEvent, state);
4914
6356
  default: return [];
4915
6357
  }
4916
6358
  };
4917
- const handleResponseCreated = (rawEvent, state$1) => {
4918
- return messageStart(state$1, rawEvent.response);
6359
+ const handleResponseCreated = (rawEvent, state) => {
6360
+ return messageStart(state, rawEvent.response);
4919
6361
  };
4920
- const handleOutputItemAdded$1 = (rawEvent, state$1) => {
4921
- const events$1 = new Array();
6362
+ const handleOutputItemAdded$1 = (rawEvent, state) => {
6363
+ const events = new Array();
4922
6364
  const functionCallDetails = extractFunctionCallDetails(rawEvent);
4923
- if (!functionCallDetails) return events$1;
6365
+ if (!functionCallDetails) return events;
4924
6366
  const { outputIndex, toolCallId, name, initialArguments } = functionCallDetails;
4925
- const blockIndex = openFunctionCallBlock(state$1, {
6367
+ const blockIndex = openFunctionCallBlock(state, {
4926
6368
  outputIndex,
4927
6369
  toolCallId,
4928
6370
  name,
4929
- events: events$1
6371
+ events
4930
6372
  });
4931
6373
  if (initialArguments !== void 0 && initialArguments.length > 0) {
4932
- events$1.push({
6374
+ events.push({
4933
6375
  type: "content_block_delta",
4934
6376
  index: blockIndex,
4935
6377
  delta: {
@@ -4937,29 +6379,29 @@ const handleOutputItemAdded$1 = (rawEvent, state$1) => {
4937
6379
  partial_json: initialArguments
4938
6380
  }
4939
6381
  });
4940
- state$1.blockHasDelta.add(blockIndex);
6382
+ state.blockHasDelta.add(blockIndex);
4941
6383
  }
4942
- return events$1;
6384
+ return events;
4943
6385
  };
4944
- const handleOutputItemDone$1 = (rawEvent, state$1) => {
4945
- const events$1 = new Array();
6386
+ const handleOutputItemDone$1 = (rawEvent, state) => {
6387
+ const events = new Array();
4946
6388
  const item = rawEvent.item;
4947
6389
  const itemType = item.type;
4948
6390
  const outputIndex = rawEvent.output_index;
4949
6391
  if (itemType === "compaction") {
4950
- if (!item.id || !item.encrypted_content) return events$1;
4951
- const blockIndex$1 = openThinkingBlockIfNeeded(state$1, outputIndex, events$1);
4952
- if (!state$1.blockHasDelta.has(blockIndex$1)) events$1.push({
6392
+ if (!item.id || !item.encrypted_content) return events;
6393
+ const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events);
6394
+ if (!state.blockHasDelta.has(blockIndex)) events.push({
4953
6395
  type: "content_block_delta",
4954
- index: blockIndex$1,
6396
+ index: blockIndex,
4955
6397
  delta: {
4956
6398
  type: "thinking_delta",
4957
- thinking: THINKING_TEXT$1
6399
+ thinking: THINKING_TEXT
4958
6400
  }
4959
6401
  });
4960
- events$1.push({
6402
+ events.push({
4961
6403
  type: "content_block_delta",
4962
- index: blockIndex$1,
6404
+ index: blockIndex,
4963
6405
  delta: {
4964
6406
  type: "signature_delta",
4965
6407
  signature: encodeCompactionCarrierSignature({
@@ -4968,22 +6410,22 @@ const handleOutputItemDone$1 = (rawEvent, state$1) => {
4968
6410
  })
4969
6411
  }
4970
6412
  });
4971
- state$1.blockHasDelta.add(blockIndex$1);
4972
- return events$1;
6413
+ state.blockHasDelta.add(blockIndex);
6414
+ return events;
4973
6415
  }
4974
- if (itemType !== "reasoning") return events$1;
4975
- const blockIndex = openThinkingBlockIfNeeded(state$1, outputIndex, events$1);
6416
+ if (itemType !== "reasoning") return events;
6417
+ const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events);
4976
6418
  const signature = (item.encrypted_content ?? "") + "@" + item.id;
4977
6419
  if (signature) {
4978
- if (!item.summary || item.summary.length === 0) events$1.push({
6420
+ if (!item.summary || item.summary.length === 0) events.push({
4979
6421
  type: "content_block_delta",
4980
6422
  index: blockIndex,
4981
6423
  delta: {
4982
6424
  type: "thinking_delta",
4983
- thinking: THINKING_TEXT$1
6425
+ thinking: THINKING_TEXT
4984
6426
  }
4985
6427
  });
4986
- events$1.push({
6428
+ events.push({
4987
6429
  type: "content_block_delta",
4988
6430
  index: blockIndex,
4989
6431
  delta: {
@@ -4991,25 +6433,25 @@ const handleOutputItemDone$1 = (rawEvent, state$1) => {
4991
6433
  signature
4992
6434
  }
4993
6435
  });
4994
- state$1.blockHasDelta.add(blockIndex);
6436
+ state.blockHasDelta.add(blockIndex);
4995
6437
  }
4996
- return events$1;
6438
+ return events;
4997
6439
  };
4998
- const handleFunctionCallArgumentsDelta = (rawEvent, state$1) => {
4999
- const events$1 = new Array();
6440
+ const handleFunctionCallArgumentsDelta = (rawEvent, state) => {
6441
+ const events = new Array();
5000
6442
  const outputIndex = rawEvent.output_index;
5001
6443
  const deltaText = rawEvent.delta;
5002
- if (!deltaText) return events$1;
5003
- const blockIndex = openFunctionCallBlock(state$1, {
6444
+ if (!deltaText) return events;
6445
+ const blockIndex = openFunctionCallBlock(state, {
5004
6446
  outputIndex,
5005
- events: events$1
6447
+ events
5006
6448
  });
5007
- const functionCallState = state$1.functionCallStateByOutputIndex.get(outputIndex);
5008
- if (!functionCallState) return handleFunctionCallArgumentsValidationError(new FunctionCallArgumentsValidationError("Received function call arguments delta without an open tool call block."), state$1, events$1);
6449
+ const functionCallState = state.functionCallStateByOutputIndex.get(outputIndex);
6450
+ if (!functionCallState) return handleFunctionCallArgumentsValidationError(new FunctionCallArgumentsValidationError("Received function call arguments delta without an open tool call block."), state, events);
5009
6451
  const { nextCount, exceeded } = updateWhitespaceRunState(functionCallState.consecutiveWhitespaceCount, deltaText);
5010
- if (exceeded) return handleFunctionCallArgumentsValidationError(new FunctionCallArgumentsValidationError("Received function call arguments delta containing more than 20 consecutive whitespace characters."), state$1, events$1);
6452
+ if (exceeded) return handleFunctionCallArgumentsValidationError(new FunctionCallArgumentsValidationError("Received function call arguments delta containing more than 20 consecutive whitespace characters."), state, events);
5011
6453
  functionCallState.consecutiveWhitespaceCount = nextCount;
5012
- events$1.push({
6454
+ events.push({
5013
6455
  type: "content_block_delta",
5014
6456
  index: blockIndex,
5015
6457
  delta: {
@@ -5017,19 +6459,19 @@ const handleFunctionCallArgumentsDelta = (rawEvent, state$1) => {
5017
6459
  partial_json: deltaText
5018
6460
  }
5019
6461
  });
5020
- state$1.blockHasDelta.add(blockIndex);
5021
- return events$1;
6462
+ state.blockHasDelta.add(blockIndex);
6463
+ return events;
5022
6464
  };
5023
- const handleFunctionCallArgumentsDone = (rawEvent, state$1) => {
5024
- const events$1 = new Array();
6465
+ const handleFunctionCallArgumentsDone = (rawEvent, state) => {
6466
+ const events = new Array();
5025
6467
  const outputIndex = rawEvent.output_index;
5026
- const blockIndex = openFunctionCallBlock(state$1, {
6468
+ const blockIndex = openFunctionCallBlock(state, {
5027
6469
  outputIndex,
5028
- events: events$1
6470
+ events
5029
6471
  });
5030
6472
  const finalArguments = typeof rawEvent.arguments === "string" ? rawEvent.arguments : void 0;
5031
- if (!state$1.blockHasDelta.has(blockIndex) && finalArguments) {
5032
- events$1.push({
6473
+ if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
6474
+ events.push({
5033
6475
  type: "content_block_delta",
5034
6476
  index: blockIndex,
5035
6477
  delta: {
@@ -5037,23 +6479,23 @@ const handleFunctionCallArgumentsDone = (rawEvent, state$1) => {
5037
6479
  partial_json: finalArguments
5038
6480
  }
5039
6481
  });
5040
- state$1.blockHasDelta.add(blockIndex);
6482
+ state.blockHasDelta.add(blockIndex);
5041
6483
  }
5042
- state$1.functionCallStateByOutputIndex.delete(outputIndex);
5043
- return events$1;
6484
+ state.functionCallStateByOutputIndex.delete(outputIndex);
6485
+ return events;
5044
6486
  };
5045
- const handleOutputTextDelta = (rawEvent, state$1) => {
5046
- const events$1 = new Array();
6487
+ const handleOutputTextDelta = (rawEvent, state) => {
6488
+ const events = new Array();
5047
6489
  const outputIndex = rawEvent.output_index;
5048
6490
  const contentIndex = rawEvent.content_index;
5049
6491
  const deltaText = rawEvent.delta;
5050
- if (!deltaText) return events$1;
5051
- const blockIndex = openTextBlockIfNeeded(state$1, {
6492
+ if (!deltaText) return events;
6493
+ const blockIndex = openTextBlockIfNeeded(state, {
5052
6494
  outputIndex,
5053
6495
  contentIndex,
5054
- events: events$1
6496
+ events
5055
6497
  });
5056
- events$1.push({
6498
+ events.push({
5057
6499
  type: "content_block_delta",
5058
6500
  index: blockIndex,
5059
6501
  delta: {
@@ -5061,15 +6503,15 @@ const handleOutputTextDelta = (rawEvent, state$1) => {
5061
6503
  text: deltaText
5062
6504
  }
5063
6505
  });
5064
- state$1.blockHasDelta.add(blockIndex);
5065
- return events$1;
6506
+ state.blockHasDelta.add(blockIndex);
6507
+ return events;
5066
6508
  };
5067
- const handleReasoningSummaryTextDelta = (rawEvent, state$1) => {
6509
+ const handleReasoningSummaryTextDelta = (rawEvent, state) => {
5068
6510
  const outputIndex = rawEvent.output_index;
5069
6511
  const deltaText = rawEvent.delta;
5070
- const events$1 = new Array();
5071
- const blockIndex = openThinkingBlockIfNeeded(state$1, outputIndex, events$1);
5072
- events$1.push({
6512
+ const events = new Array();
6513
+ const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events);
6514
+ events.push({
5073
6515
  type: "content_block_delta",
5074
6516
  index: blockIndex,
5075
6517
  delta: {
@@ -5077,15 +6519,15 @@ const handleReasoningSummaryTextDelta = (rawEvent, state$1) => {
5077
6519
  thinking: deltaText
5078
6520
  }
5079
6521
  });
5080
- state$1.blockHasDelta.add(blockIndex);
5081
- return events$1;
6522
+ state.blockHasDelta.add(blockIndex);
6523
+ return events;
5082
6524
  };
5083
- const handleReasoningSummaryTextDone = (rawEvent, state$1) => {
6525
+ const handleReasoningSummaryTextDone = (rawEvent, state) => {
5084
6526
  const outputIndex = rawEvent.output_index;
5085
6527
  const text = rawEvent.text;
5086
- const events$1 = new Array();
5087
- const blockIndex = openThinkingBlockIfNeeded(state$1, outputIndex, events$1);
5088
- if (text && !state$1.blockHasDelta.has(blockIndex)) events$1.push({
6528
+ const events = new Array();
6529
+ const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events);
6530
+ if (text && !state.blockHasDelta.has(blockIndex)) events.push({
5089
6531
  type: "content_block_delta",
5090
6532
  index: blockIndex,
5091
6533
  delta: {
@@ -5093,19 +6535,19 @@ const handleReasoningSummaryTextDone = (rawEvent, state$1) => {
5093
6535
  thinking: text
5094
6536
  }
5095
6537
  });
5096
- return events$1;
6538
+ return events;
5097
6539
  };
5098
- const handleOutputTextDone = (rawEvent, state$1) => {
5099
- const events$1 = new Array();
6540
+ const handleOutputTextDone = (rawEvent, state) => {
6541
+ const events = new Array();
5100
6542
  const outputIndex = rawEvent.output_index;
5101
6543
  const contentIndex = rawEvent.content_index;
5102
6544
  const text = rawEvent.text;
5103
- const blockIndex = openTextBlockIfNeeded(state$1, {
6545
+ const blockIndex = openTextBlockIfNeeded(state, {
5104
6546
  outputIndex,
5105
6547
  contentIndex,
5106
- events: events$1
6548
+ events
5107
6549
  });
5108
- if (text && !state$1.blockHasDelta.has(blockIndex)) events$1.push({
6550
+ if (text && !state.blockHasDelta.has(blockIndex)) events.push({
5109
6551
  type: "content_block_delta",
5110
6552
  index: blockIndex,
5111
6553
  delta: {
@@ -5113,14 +6555,15 @@ const handleOutputTextDone = (rawEvent, state$1) => {
5113
6555
  text
5114
6556
  }
5115
6557
  });
5116
- return events$1;
6558
+ return events;
5117
6559
  };
5118
- const handleResponseCompleted = (rawEvent, state$1) => {
6560
+ const handleResponseCompleted = (rawEvent, state) => {
5119
6561
  const response = rawEvent.response;
5120
- const events$1 = new Array();
5121
- closeAllOpenBlocks(state$1, events$1);
6562
+ const events = new Array();
6563
+ closeAllOpenBlocks(state, events);
6564
+ state.responseStatus = response.status;
5122
6565
  const anthropic = translateResponsesResultToAnthropic(response);
5123
- events$1.push({
6566
+ events.push({
5124
6567
  type: "message_delta",
5125
6568
  delta: {
5126
6569
  stop_reason: anthropic.stop_reason,
@@ -5128,39 +6571,40 @@ const handleResponseCompleted = (rawEvent, state$1) => {
5128
6571
  },
5129
6572
  usage: anthropic.usage
5130
6573
  }, { type: "message_stop" });
5131
- state$1.messageCompleted = true;
5132
- return events$1;
6574
+ state.messageCompleted = true;
6575
+ return events;
5133
6576
  };
5134
- const handleResponseFailed = (rawEvent, state$1) => {
6577
+ const handleResponseFailed = (rawEvent, state) => {
5135
6578
  const response = rawEvent.response;
5136
- const events$1 = new Array();
5137
- closeAllOpenBlocks(state$1, events$1);
6579
+ const events = new Array();
6580
+ closeAllOpenBlocks(state, events);
6581
+ state.responseStatus = response.status;
5138
6582
  const message = response.error?.message ?? "The response failed due to an unknown error.";
5139
- events$1.push(buildErrorEvent(message));
5140
- state$1.messageCompleted = true;
5141
- return events$1;
6583
+ events.push(buildErrorEvent(message));
6584
+ state.messageCompleted = true;
6585
+ return events;
5142
6586
  };
5143
- const handleErrorEvent = (rawEvent, state$1) => {
6587
+ const handleErrorEvent = (rawEvent, state) => {
5144
6588
  const message = typeof rawEvent.message === "string" ? rawEvent.message : "An unexpected error occurred during streaming.";
5145
- state$1.messageCompleted = true;
6589
+ state.messageCompleted = true;
5146
6590
  return [buildErrorEvent(message)];
5147
6591
  };
5148
- const handleFunctionCallArgumentsValidationError = (error, state$1, events$1 = []) => {
6592
+ const handleFunctionCallArgumentsValidationError = (error, state, events = []) => {
5149
6593
  const reason = error.message;
5150
- closeAllOpenBlocks(state$1, events$1);
5151
- state$1.messageCompleted = true;
5152
- events$1.push(buildErrorEvent(reason));
5153
- return events$1;
6594
+ closeAllOpenBlocks(state, events);
6595
+ state.messageCompleted = true;
6596
+ events.push(buildErrorEvent(reason));
6597
+ return events;
5154
6598
  };
5155
- const messageStart = (state$1, response) => {
5156
- state$1.messageStartSent = true;
6599
+ const messageStart = (state, response) => {
6600
+ state.messageStartSent = true;
5157
6601
  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens;
5158
6602
  const upstreamInputTokens = response.usage?.input_tokens;
5159
- const historicalInputTokens = state$1.historicalInputTokens;
5160
- const historicalOutputTokens = state$1.historicalOutputTokens ?? 0;
6603
+ const historicalInputTokens = state.historicalInputTokens;
6604
+ const historicalOutputTokens = state.historicalOutputTokens ?? 0;
5161
6605
  const historicalTotalTokens = historicalInputTokens !== void 0 ? historicalInputTokens + historicalOutputTokens : void 0;
5162
- const inputTokens = upstreamInputTokens !== void 0 ? upstreamInputTokens - (inputCachedTokens ?? 0) : historicalTotalTokens ?? state$1.estimatedInputTokens ?? 0;
5163
- const cacheReadTokens = upstreamInputTokens !== void 0 ? inputCachedTokens ?? 0 : state$1.historicalCachedInputTokens ?? 0;
6606
+ const inputTokens = upstreamInputTokens !== void 0 ? upstreamInputTokens - (inputCachedTokens ?? 0) : historicalTotalTokens ?? state.estimatedInputTokens ?? 0;
6607
+ const cacheReadTokens = upstreamInputTokens !== void 0 ? inputCachedTokens ?? 0 : state.historicalCachedInputTokens ?? 0;
5164
6608
  return [{
5165
6609
  type: "message_start",
5166
6610
  message: {
@@ -5179,18 +6623,18 @@ const messageStart = (state$1, response) => {
5179
6623
  }
5180
6624
  }];
5181
6625
  };
5182
- const openTextBlockIfNeeded = (state$1, params) => {
5183
- const { outputIndex, contentIndex, events: events$1 } = params;
6626
+ const openTextBlockIfNeeded = (state, params) => {
6627
+ const { outputIndex, contentIndex, events } = params;
5184
6628
  const key = getBlockKey(outputIndex, contentIndex);
5185
- let blockIndex = state$1.blockIndexByKey.get(key);
6629
+ let blockIndex = state.blockIndexByKey.get(key);
5186
6630
  if (blockIndex === void 0) {
5187
- blockIndex = state$1.nextContentBlockIndex;
5188
- state$1.nextContentBlockIndex += 1;
5189
- state$1.blockIndexByKey.set(key, blockIndex);
6631
+ blockIndex = state.nextContentBlockIndex;
6632
+ state.nextContentBlockIndex += 1;
6633
+ state.blockIndexByKey.set(key, blockIndex);
5190
6634
  }
5191
- if (!state$1.openBlocks.has(blockIndex)) {
5192
- closeOpenBlocks(state$1, events$1);
5193
- events$1.push({
6635
+ if (!state.openBlocks.has(blockIndex)) {
6636
+ closeOpenBlocks(state, events);
6637
+ events.push({
5194
6638
  type: "content_block_start",
5195
6639
  index: blockIndex,
5196
6640
  content_block: {
@@ -5198,21 +6642,21 @@ const openTextBlockIfNeeded = (state$1, params) => {
5198
6642
  text: ""
5199
6643
  }
5200
6644
  });
5201
- state$1.openBlocks.add(blockIndex);
6645
+ state.openBlocks.add(blockIndex);
5202
6646
  }
5203
6647
  return blockIndex;
5204
6648
  };
5205
- const openThinkingBlockIfNeeded = (state$1, outputIndex, events$1) => {
6649
+ const openThinkingBlockIfNeeded = (state, outputIndex, events) => {
5206
6650
  const key = getBlockKey(outputIndex, 0);
5207
- let blockIndex = state$1.blockIndexByKey.get(key);
6651
+ let blockIndex = state.blockIndexByKey.get(key);
5208
6652
  if (blockIndex === void 0) {
5209
- blockIndex = state$1.nextContentBlockIndex;
5210
- state$1.nextContentBlockIndex += 1;
5211
- state$1.blockIndexByKey.set(key, blockIndex);
6653
+ blockIndex = state.nextContentBlockIndex;
6654
+ state.nextContentBlockIndex += 1;
6655
+ state.blockIndexByKey.set(key, blockIndex);
5212
6656
  }
5213
- if (!state$1.openBlocks.has(blockIndex)) {
5214
- closeOpenBlocks(state$1, events$1);
5215
- events$1.push({
6657
+ if (!state.openBlocks.has(blockIndex)) {
6658
+ closeOpenBlocks(state, events);
6659
+ events.push({
5216
6660
  type: "content_block_start",
5217
6661
  index: blockIndex,
5218
6662
  content_block: {
@@ -5220,25 +6664,25 @@ const openThinkingBlockIfNeeded = (state$1, outputIndex, events$1) => {
5220
6664
  thinking: ""
5221
6665
  }
5222
6666
  });
5223
- state$1.openBlocks.add(blockIndex);
6667
+ state.openBlocks.add(blockIndex);
5224
6668
  }
5225
6669
  return blockIndex;
5226
6670
  };
5227
- const closeBlockIfOpen = (state$1, blockIndex, events$1) => {
5228
- if (!state$1.openBlocks.has(blockIndex)) return;
5229
- events$1.push({
6671
+ const closeBlockIfOpen = (state, blockIndex, events) => {
6672
+ if (!state.openBlocks.has(blockIndex)) return;
6673
+ events.push({
5230
6674
  type: "content_block_stop",
5231
6675
  index: blockIndex
5232
6676
  });
5233
- state$1.openBlocks.delete(blockIndex);
5234
- state$1.blockHasDelta.delete(blockIndex);
6677
+ state.openBlocks.delete(blockIndex);
6678
+ state.blockHasDelta.delete(blockIndex);
5235
6679
  };
5236
- const closeOpenBlocks = (state$1, events$1) => {
5237
- for (const blockIndex of state$1.openBlocks) closeBlockIfOpen(state$1, blockIndex, events$1);
6680
+ const closeOpenBlocks = (state, events) => {
6681
+ for (const blockIndex of state.openBlocks) closeBlockIfOpen(state, blockIndex, events);
5238
6682
  };
5239
- const closeAllOpenBlocks = (state$1, events$1) => {
5240
- closeOpenBlocks(state$1, events$1);
5241
- state$1.functionCallStateByOutputIndex.clear();
6683
+ const closeAllOpenBlocks = (state, events) => {
6684
+ closeOpenBlocks(state, events);
6685
+ state.functionCallStateByOutputIndex.clear();
5242
6686
  };
5243
6687
  const buildErrorEvent = (message) => ({
5244
6688
  type: "error",
@@ -5248,24 +6692,24 @@ const buildErrorEvent = (message) => ({
5248
6692
  }
5249
6693
  });
5250
6694
  const getBlockKey = (outputIndex, contentIndex) => `${outputIndex}:${contentIndex}`;
5251
- const openFunctionCallBlock = (state$1, params) => {
5252
- const { outputIndex, toolCallId, name, events: events$1 } = params;
5253
- let functionCallState = state$1.functionCallStateByOutputIndex.get(outputIndex);
6695
+ const openFunctionCallBlock = (state, params) => {
6696
+ const { outputIndex, toolCallId, name, events } = params;
6697
+ let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex);
5254
6698
  if (!functionCallState) {
5255
- const blockIndex$1 = state$1.nextContentBlockIndex;
5256
- state$1.nextContentBlockIndex += 1;
6699
+ const blockIndex = state.nextContentBlockIndex;
6700
+ state.nextContentBlockIndex += 1;
5257
6701
  functionCallState = {
5258
- blockIndex: blockIndex$1,
5259
- toolCallId: toolCallId ?? `tool_call_${blockIndex$1}`,
6702
+ blockIndex,
6703
+ toolCallId: toolCallId ?? `tool_call_${blockIndex}`,
5260
6704
  name: name ?? "function",
5261
6705
  consecutiveWhitespaceCount: 0
5262
6706
  };
5263
- state$1.functionCallStateByOutputIndex.set(outputIndex, functionCallState);
6707
+ state.functionCallStateByOutputIndex.set(outputIndex, functionCallState);
5264
6708
  }
5265
6709
  const { blockIndex } = functionCallState;
5266
- if (!state$1.openBlocks.has(blockIndex)) {
5267
- closeOpenBlocks(state$1, events$1);
5268
- events$1.push({
6710
+ if (!state.openBlocks.has(blockIndex)) {
6711
+ closeOpenBlocks(state, events);
6712
+ events.push({
5269
6713
  type: "content_block_start",
5270
6714
  index: blockIndex,
5271
6715
  content_block: {
@@ -5275,7 +6719,7 @@ const openFunctionCallBlock = (state$1, params) => {
5275
6719
  input: {}
5276
6720
  }
5277
6721
  });
5278
- state$1.openBlocks.add(blockIndex);
6722
+ state.openBlocks.add(blockIndex);
5279
6723
  }
5280
6724
  return blockIndex;
5281
6725
  };
@@ -5289,15 +6733,23 @@ const extractFunctionCallDetails = (rawEvent) => {
5289
6733
  initialArguments: item.arguments
5290
6734
  };
5291
6735
  };
5292
-
5293
- //#endregion
5294
- //#region src/routes/responses/utils.ts
6736
+ const responsesUtilsDependencies = {
6737
+ isResponsesApiContextManagementModel,
6738
+ isResponsesApiWebSocketEnabled
6739
+ };
5295
6740
  const getResponsesRequestOptions = (payload) => {
5296
6741
  return {
5297
6742
  vision: hasVisionInput$1(payload),
5298
6743
  initiator: hasAgentInitiator(payload) ? "agent" : "user"
5299
6744
  };
5300
6745
  };
6746
+ const getResponsesTransportForModel = (selectedModel, options = {}) => {
6747
+ const supportedEndpoints = selectedModel?.supported_endpoints ?? [];
6748
+ const useWebSocket = responsesUtilsDependencies.isResponsesApiWebSocketEnabled();
6749
+ if (options.compactType !== 1 && useWebSocket && supportedEndpoints.includes("ws:/responses")) return "websocket";
6750
+ if (supportedEndpoints.includes("/responses")) return "http";
6751
+ return null;
6752
+ };
5301
6753
  const hasAgentInitiator = (payload) => {
5302
6754
  const items = getPayloadItems(payload);
5303
6755
  if (isForceAgentEnabled()) return items.some((item) => isAgentRole(item));
@@ -5322,7 +6774,7 @@ const createCompactionContextManagement = (compactThreshold) => [{
5322
6774
  }];
5323
6775
  const applyResponsesApiContextManagement = (payload, maxPromptTokens) => {
5324
6776
  if (payload.context_management !== void 0) return;
5325
- if (!isResponsesApiContextManagementModel(payload.model)) return;
6777
+ if (!responsesUtilsDependencies.isResponsesApiContextManagementModel(payload.model)) return;
5326
6778
  payload.context_management = createCompactionContextManagement(resolveResponsesCompactThreshold(maxPromptTokens));
5327
6779
  };
5328
6780
  const compactInputByLatestCompaction = (payload) => {
@@ -5380,7 +6832,7 @@ function getStreamChunkFields(chunk) {
5380
6832
  data: c.data
5381
6833
  };
5382
6834
  }
5383
- const isAsyncIterable = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
6835
+ const isAsyncIterable$1 = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
5384
6836
  const containsVisionContent = (value) => {
5385
6837
  if (!value) return false;
5386
6838
  if (Array.isArray(value)) return value.some((entry) => containsVisionContent(entry));
@@ -5390,7 +6842,6 @@ const containsVisionContent = (value) => {
5390
6842
  if (Array.isArray(record.content)) return record.content.some((entry) => containsVisionContent(entry));
5391
6843
  return false;
5392
6844
  };
5393
-
5394
6845
  //#endregion
5395
6846
  //#region src/services/copilot/create-messages.ts
5396
6847
  const isAgentMessage = (msg) => {
@@ -5453,297 +6904,36 @@ const buildMessagesHeaders = ({ ctx, enableVision, initiator, options, payload }
5453
6904
  if (shouldUseMessageProxyHeaders(payload)) prepareMessageProxyHeaders(headers);
5454
6905
  const anthropicBeta = buildAnthropicBetaHeader(options?.anthropicBetaHeader, payload.thinking, payload.model);
5455
6906
  if (anthropicBeta) headers["anthropic-beta"] = anthropicBeta;
5456
- return headers;
5457
- };
5458
- const createMessages = async (payload, account, options) => {
5459
- const ctx = account ?? accountFromState();
5460
- if (!ctx.copilotToken) throw new Error("Copilot token not found");
5461
- const headers = buildMessagesHeaders({
5462
- ctx,
5463
- enableVision: hasVisionInput(payload),
5464
- initiator: options?.initiator ?? getMessagesInitiator(payload),
5465
- options,
5466
- payload
5467
- });
5468
- captureOutboundHeadersSnapshot(headers);
5469
- const response = await copilotFetch(`${copilotBaseUrl(ctx)}/v1/messages`, {
5470
- method: "POST",
5471
- headers,
5472
- body: JSON.stringify(payload)
5473
- }, {
5474
- requestId: options?.requestId,
5475
- callSite: "messages"
5476
- });
5477
- logCopilotRateLimits(response.headers);
5478
- if (!response.ok) {
5479
- consola.error("Failed to create messages", response);
5480
- throw new HTTPError("Failed to create messages", response);
5481
- }
5482
- if (payload.stream) return events(response);
5483
- return await response.json();
5484
- };
5485
-
5486
- //#endregion
5487
- //#region src/routes/messages/stream-translation.ts
5488
- function isToolBlockOpen(state$1) {
5489
- if (!state$1.contentBlockOpen) return false;
5490
- return Object.values(state$1.toolCalls).some((tc) => tc.anthropicBlockIndex === state$1.contentBlockIndex);
5491
- }
5492
- function translateChunkToAnthropicEvents(chunk, state$1) {
5493
- const events$1 = [];
5494
- if (chunk.choices.length === 0) return events$1;
5495
- const choice = chunk.choices[0];
5496
- const { delta } = choice;
5497
- handleMessageStart(state$1, events$1, chunk);
5498
- handleThinkingText(delta, state$1, events$1);
5499
- handleContent(delta, state$1, events$1);
5500
- handleToolCalls(delta, state$1, events$1);
5501
- handleFinish(choice, state$1, {
5502
- events: events$1,
5503
- chunk
5504
- });
5505
- return events$1;
5506
- }
5507
- function handleFinish(choice, state$1, context) {
5508
- const { events: events$1, chunk } = context;
5509
- if (choice.finish_reason && choice.finish_reason.length > 0) {
5510
- if (state$1.contentBlockOpen) {
5511
- const toolBlockOpen = isToolBlockOpen(state$1);
5512
- context.events.push({
5513
- type: "content_block_stop",
5514
- index: state$1.contentBlockIndex
5515
- });
5516
- state$1.contentBlockOpen = false;
5517
- state$1.contentBlockIndex++;
5518
- if (!toolBlockOpen) handleReasoningOpaque(choice.delta, events$1, state$1);
5519
- }
5520
- events$1.push({
5521
- type: "message_delta",
5522
- delta: {
5523
- stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
5524
- stop_sequence: null
5525
- },
5526
- usage: {
5527
- input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
5528
- output_tokens: chunk.usage?.completion_tokens ?? 0,
5529
- ...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
5530
- }
5531
- }, { type: "message_stop" });
5532
- }
5533
- }
5534
- function handleToolCalls(delta, state$1, events$1) {
5535
- if (delta.tool_calls && delta.tool_calls.length > 0) {
5536
- closeThinkingBlockIfOpen(state$1, events$1);
5537
- handleReasoningOpaqueInToolCalls(state$1, events$1, delta);
5538
- for (const toolCall of delta.tool_calls) {
5539
- if (toolCall.id && toolCall.function?.name) {
5540
- if (state$1.contentBlockOpen) {
5541
- events$1.push({
5542
- type: "content_block_stop",
5543
- index: state$1.contentBlockIndex
5544
- });
5545
- state$1.contentBlockIndex++;
5546
- state$1.contentBlockOpen = false;
5547
- }
5548
- const anthropicBlockIndex = state$1.contentBlockIndex;
5549
- state$1.toolCalls[toolCall.index] = {
5550
- id: toolCall.id,
5551
- name: toolCall.function.name,
5552
- anthropicBlockIndex
5553
- };
5554
- events$1.push({
5555
- type: "content_block_start",
5556
- index: anthropicBlockIndex,
5557
- content_block: {
5558
- type: "tool_use",
5559
- id: toolCall.id,
5560
- name: toolCall.function.name,
5561
- input: {}
5562
- }
5563
- });
5564
- state$1.contentBlockOpen = true;
5565
- }
5566
- if (toolCall.function?.arguments) {
5567
- const toolCallInfo = state$1.toolCalls[toolCall.index];
5568
- if (toolCallInfo) events$1.push({
5569
- type: "content_block_delta",
5570
- index: toolCallInfo.anthropicBlockIndex,
5571
- delta: {
5572
- type: "input_json_delta",
5573
- partial_json: toolCall.function.arguments
5574
- }
5575
- });
5576
- }
5577
- }
5578
- }
5579
- }
5580
- function handleReasoningOpaqueInToolCalls(state$1, events$1, delta) {
5581
- if (state$1.contentBlockOpen && !isToolBlockOpen(state$1)) {
5582
- events$1.push({
5583
- type: "content_block_stop",
5584
- index: state$1.contentBlockIndex
5585
- });
5586
- state$1.contentBlockIndex++;
5587
- state$1.contentBlockOpen = false;
5588
- }
5589
- handleReasoningOpaque(delta, events$1, state$1);
5590
- }
5591
- function handleContent(delta, state$1, events$1) {
5592
- if (delta.content && delta.content.length > 0) {
5593
- closeThinkingBlockIfOpen(state$1, events$1);
5594
- if (isToolBlockOpen(state$1)) {
5595
- events$1.push({
5596
- type: "content_block_stop",
5597
- index: state$1.contentBlockIndex
5598
- });
5599
- state$1.contentBlockIndex++;
5600
- state$1.contentBlockOpen = false;
5601
- }
5602
- if (!state$1.contentBlockOpen) {
5603
- events$1.push({
5604
- type: "content_block_start",
5605
- index: state$1.contentBlockIndex,
5606
- content_block: {
5607
- type: "text",
5608
- text: ""
5609
- }
5610
- });
5611
- state$1.contentBlockOpen = true;
5612
- }
5613
- events$1.push({
5614
- type: "content_block_delta",
5615
- index: state$1.contentBlockIndex,
5616
- delta: {
5617
- type: "text_delta",
5618
- text: delta.content
5619
- }
5620
- });
5621
- }
5622
- if (delta.content === "" && delta.reasoning_opaque && delta.reasoning_opaque.length > 0 && state$1.thinkingBlockOpen) {
5623
- events$1.push({
5624
- type: "content_block_delta",
5625
- index: state$1.contentBlockIndex,
5626
- delta: {
5627
- type: "signature_delta",
5628
- signature: delta.reasoning_opaque
5629
- }
5630
- }, {
5631
- type: "content_block_stop",
5632
- index: state$1.contentBlockIndex
5633
- });
5634
- state$1.contentBlockIndex++;
5635
- state$1.thinkingBlockOpen = false;
5636
- }
5637
- }
5638
- function handleMessageStart(state$1, events$1, chunk) {
5639
- if (!state$1.messageStartSent) {
5640
- const cachedTokens = chunk.usage?.prompt_tokens_details?.cached_tokens;
5641
- const upstreamPromptTokens = chunk.usage?.prompt_tokens;
5642
- const historicalInputTokens = state$1.historicalInputTokens;
5643
- const historicalOutputTokens = state$1.historicalOutputTokens ?? 0;
5644
- const historicalTotalTokens = historicalInputTokens !== void 0 ? historicalInputTokens + historicalOutputTokens : void 0;
5645
- const inputTokens = upstreamPromptTokens !== void 0 ? upstreamPromptTokens - (cachedTokens ?? 0) : historicalTotalTokens ?? state$1.estimatedInputTokens ?? 0;
5646
- const cacheReadTokens = upstreamPromptTokens !== void 0 ? cachedTokens : state$1.historicalCachedInputTokens;
5647
- events$1.push({
5648
- type: "message_start",
5649
- message: {
5650
- id: chunk.id,
5651
- type: "message",
5652
- role: "assistant",
5653
- content: [],
5654
- model: chunk.model,
5655
- stop_reason: null,
5656
- stop_sequence: null,
5657
- usage: {
5658
- input_tokens: inputTokens,
5659
- output_tokens: 0,
5660
- ...cacheReadTokens !== void 0 && { cache_read_input_tokens: cacheReadTokens }
5661
- }
5662
- }
5663
- });
5664
- state$1.messageStartSent = true;
5665
- }
5666
- }
5667
- function handleReasoningOpaque(delta, events$1, state$1) {
5668
- if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) {
5669
- events$1.push({
5670
- type: "content_block_start",
5671
- index: state$1.contentBlockIndex,
5672
- content_block: {
5673
- type: "thinking",
5674
- thinking: ""
5675
- }
5676
- }, {
5677
- type: "content_block_delta",
5678
- index: state$1.contentBlockIndex,
5679
- delta: {
5680
- type: "thinking_delta",
5681
- thinking: THINKING_TEXT
5682
- }
5683
- }, {
5684
- type: "content_block_delta",
5685
- index: state$1.contentBlockIndex,
5686
- delta: {
5687
- type: "signature_delta",
5688
- signature: delta.reasoning_opaque
5689
- }
5690
- }, {
5691
- type: "content_block_stop",
5692
- index: state$1.contentBlockIndex
5693
- });
5694
- state$1.contentBlockIndex++;
5695
- }
5696
- }
5697
- function handleThinkingText(delta, state$1, events$1) {
5698
- if (delta.reasoning_text && delta.reasoning_text.length > 0) {
5699
- if (state$1.contentBlockOpen) {
5700
- delta.content = delta.reasoning_text;
5701
- delta.reasoning_text = void 0;
5702
- return;
5703
- }
5704
- if (!state$1.thinkingBlockOpen) {
5705
- events$1.push({
5706
- type: "content_block_start",
5707
- index: state$1.contentBlockIndex,
5708
- content_block: {
5709
- type: "thinking",
5710
- thinking: ""
5711
- }
5712
- });
5713
- state$1.thinkingBlockOpen = true;
5714
- }
5715
- events$1.push({
5716
- type: "content_block_delta",
5717
- index: state$1.contentBlockIndex,
5718
- delta: {
5719
- type: "thinking_delta",
5720
- thinking: delta.reasoning_text
5721
- }
5722
- });
5723
- }
5724
- }
5725
- function closeThinkingBlockIfOpen(state$1, events$1) {
5726
- if (state$1.thinkingBlockOpen) {
5727
- events$1.push({
5728
- type: "content_block_delta",
5729
- index: state$1.contentBlockIndex,
5730
- delta: {
5731
- type: "signature_delta",
5732
- signature: ""
5733
- }
5734
- }, {
5735
- type: "content_block_stop",
5736
- index: state$1.contentBlockIndex
5737
- });
5738
- state$1.contentBlockIndex++;
5739
- state$1.thinkingBlockOpen = false;
6907
+ return headers;
6908
+ };
6909
+ const createMessages = async (payload, account, options) => {
6910
+ const ctx = account ?? accountFromState();
6911
+ if (!ctx.copilotToken) throw new Error("Copilot token not found");
6912
+ const headers = buildMessagesHeaders({
6913
+ ctx,
6914
+ enableVision: hasVisionInput(payload),
6915
+ initiator: options?.initiator ?? getMessagesInitiator(payload),
6916
+ options,
6917
+ payload
6918
+ });
6919
+ captureOutboundHeadersSnapshot(headers);
6920
+ const response = await copilotFetch(`${copilotBaseUrl(ctx)}/v1/messages`, {
6921
+ method: "POST",
6922
+ headers,
6923
+ body: JSON.stringify(payload)
6924
+ }, {
6925
+ requestId: options?.requestId,
6926
+ callSite: "messages",
6927
+ fetchImpl: options?.fetchImpl
6928
+ });
6929
+ logCopilotRateLimits(response.headers);
6930
+ if (!response.ok) {
6931
+ consola.error("Failed to create messages", response);
6932
+ throw new HTTPError("Failed to create messages", response);
5740
6933
  }
5741
- }
5742
-
5743
- //#endregion
5744
- //#region src/lib/subagent.ts
5745
- const subagentMarkerPrefix = "__SUBAGENT_MARKER__";
5746
-
6934
+ if (payload.stream) return events(response);
6935
+ return await response.json();
6936
+ };
5747
6937
  //#endregion
5748
6938
  //#region src/routes/messages/subagent-marker.ts
5749
6939
  const subagentStartContextPrefix = "SubagentStart hook additional context:";
@@ -5778,8 +6968,8 @@ const extractMarkerPayloadFromReminderLine = (line) => {
5778
6968
  if (!trimmedLine) return null;
5779
6969
  let markerLine = trimmedLine;
5780
6970
  if (markerLine.startsWith(subagentStartContextPrefix)) markerLine = markerLine.slice(38).trimStart();
5781
- if (!markerLine.startsWith(subagentMarkerPrefix)) return null;
5782
- return markerLine.slice(subagentMarkerPrefix.length).trimStart();
6971
+ if (!markerLine.startsWith("__SUBAGENT_MARKER__")) return null;
6972
+ return markerLine.slice(19).trimStart();
5783
6973
  };
5784
6974
  const inspectSubagentMarkerFromSystemReminder = (text) => {
5785
6975
  let sawInvalidMarker = false;
@@ -5848,32 +7038,136 @@ const extractBalancedJson = (text) => {
5848
7038
  }
5849
7039
  return null;
5850
7040
  };
5851
-
5852
7041
  //#endregion
5853
7042
  //#region src/routes/messages/handler.ts
5854
- const logger$5 = createHandlerLogger("messages-handler");
7043
+ const logger$3 = createHandlerLogger("messages-handler");
5855
7044
  const CHAT_COMPLETIONS_ENDPOINT = "/chat/completions";
5856
7045
  const RESPONSES_ENDPOINT$1 = "/responses";
5857
7046
  const MESSAGES_ENDPOINT = "/v1/messages";
7047
+ const getProviderConfigResolver = (c) => {
7048
+ return c.get("providerConfigResolver") ?? getProviderConfig;
7049
+ };
7050
+ const resolveProviderTargetModelAlias = (model, providerConfigResolver) => {
7051
+ const targetModel = resolveModelAlias(model);
7052
+ if (targetModel === model) return null;
7053
+ return resolveExistingProviderModelAlias(targetModel, providerConfigResolver);
7054
+ };
7055
+ function normalizeProviderAliasUsage(usage) {
7056
+ const tokensInput = usage.inputTokens === void 0 ? void 0 : Math.max(0, usage.inputTokens);
7057
+ const tokensCachedInput = usage.cacheReadInputTokens;
7058
+ const tokensTotal = usage.inputTokens === void 0 && usage.outputTokens === void 0 ? void 0 : (tokensInput ?? 0) + (usage.outputTokens ?? 0) + (usage.cacheCreationInputTokens ?? 0) + (tokensCachedInput ?? 0);
7059
+ return {
7060
+ tokensCachedInput,
7061
+ tokensInput,
7062
+ tokensOutput: usage.outputTokens,
7063
+ tokensTotal,
7064
+ usageJson: JSON.stringify({
7065
+ input_tokens: usage.inputTokens,
7066
+ output_tokens: usage.outputTokens,
7067
+ cache_creation_input_tokens: usage.cacheCreationInputTokens,
7068
+ cache_read_input_tokens: usage.cacheReadInputTokens
7069
+ })
7070
+ };
7071
+ }
7072
+ async function handleProviderAliasCompletion(c, options) {
7073
+ const { payload, provider, providerModel } = options;
7074
+ const requestId = randomUUID();
7075
+ const startedAtMs = Date.now();
7076
+ const method = c.req.raw.method;
7077
+ const path = new URL(c.req.url, "http://local").pathname;
7078
+ const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
7079
+ const userAgent = c.req.header("user-agent") ?? void 0;
7080
+ const streamRequested = Boolean(payload.stream);
7081
+ const originalModel = payload.model;
7082
+ const rawUserId = payload.metadata?.user_id;
7083
+ const userId = typeof rawUserId === "string" ? rawUserId : void 0;
7084
+ const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(userId);
7085
+ const isSubagent = inspectSubagentMarkerFromFirstUser(payload).kind === "valid";
7086
+ let requestRecorded = false;
7087
+ const insertProviderAliasLog = (record) => {
7088
+ if (requestRecorded) {
7089
+ logger$3.warn("provider alias request already recorded", { requestId });
7090
+ return;
7091
+ }
7092
+ requestRecorded = true;
7093
+ const finishedAtMs = Date.now();
7094
+ getRequestHistoryStore().insert({
7095
+ requestId,
7096
+ startedAtMs,
7097
+ finishedAtMs,
7098
+ durationMs: finishedAtMs - startedAtMs,
7099
+ method,
7100
+ path,
7101
+ clientIp,
7102
+ clientIpSource,
7103
+ userAgent,
7104
+ userId,
7105
+ safetyIdentifier: safetyIdentifier ?? void 0,
7106
+ promptCacheKey: promptCacheKey ?? void 0,
7107
+ isSubagent,
7108
+ clientModel: originalModel,
7109
+ upstreamEndpoint: `/providers/${provider}/messages`,
7110
+ stream: streamRequested,
7111
+ upstreamModel: providerModel,
7112
+ ...record
7113
+ });
7114
+ };
7115
+ const instrumentation = {
7116
+ onComplete: (usage) => {
7117
+ insertProviderAliasLog({
7118
+ ...normalizeProviderAliasUsage(usage),
7119
+ httpStatus: 200
7120
+ });
7121
+ },
7122
+ onError: (error) => {
7123
+ insertProviderAliasLog({
7124
+ httpStatus: error.httpStatus,
7125
+ errorName: error.errorName,
7126
+ errorStatus: error.errorStatus,
7127
+ errorMessage: error.errorMessage,
7128
+ upstreamErrorMessageRaw: error.upstreamErrorMessageRaw
7129
+ });
7130
+ }
7131
+ };
7132
+ payload.model = providerModel;
7133
+ try {
7134
+ return await handleProviderMessagesForProvider(c, {
7135
+ instrumentation,
7136
+ payload,
7137
+ provider
7138
+ });
7139
+ } catch (error) {
7140
+ const observableError = await extractErrorObservability(error);
7141
+ instrumentation.onError?.(observableError);
7142
+ throw error;
7143
+ }
7144
+ }
5858
7145
  async function handleCompletion(c) {
7146
+ const anthropicPayload = await c.req.json();
7147
+ const providerConfigResolver = getProviderConfigResolver(c);
7148
+ const providerTargetModelAlias = resolveExistingProviderModelAlias(anthropicPayload.model, providerConfigResolver) ?? resolveProviderTargetModelAlias(anthropicPayload.model, providerConfigResolver);
7149
+ if (providerTargetModelAlias) return await handleProviderAliasCompletion(c, {
7150
+ payload: anthropicPayload,
7151
+ provider: providerTargetModelAlias.provider,
7152
+ providerModel: providerTargetModelAlias.model
7153
+ });
5859
7154
  await checkRateLimit(state);
5860
7155
  const store = getRequestHistoryStore();
5861
7156
  const requestId = randomUUID();
5862
7157
  const startedAtMs = Date.now();
5863
7158
  const method = c.req.raw.method;
5864
- const path$2 = new URL(c.req.url, "http://local").pathname;
7159
+ const path = new URL(c.req.url, "http://local").pathname;
5865
7160
  const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
5866
7161
  const userAgent = c.req.header("user-agent") ?? void 0;
5867
- const anthropicPayload = await c.req.json();
5868
7162
  sanitizeIdeTools(anthropicPayload);
5869
- debugJson(logger$5, "Anthropic request payload:", anthropicPayload);
7163
+ debugJson(logger$3, "Anthropic request payload:", anthropicPayload);
5870
7164
  const markerInspection = inspectSubagentMarkerFromFirstUser(anthropicPayload);
5871
7165
  const subagentMarker = markerInspection.kind === "valid" ? markerInspection.marker : null;
5872
7166
  const isSubagentRequest = subagentMarker !== null;
5873
7167
  const invalidSubagentMarkerSelectionReason = markerInspection.kind === "invalid" ? "subagent_marker_invalid_fallback" : void 0;
5874
- if (subagentMarker) debugJson(logger$5, "Detected Subagent marker:", subagentMarker);
7168
+ if (subagentMarker) debugJson(logger$3, "Detected Subagent marker:", subagentMarker);
5875
7169
  const sessionId = getRootSessionId(anthropicPayload, c);
5876
- logger$5.debug("Extracted session ID:", sessionId);
7170
+ logger$3.debug("Extracted session ID:", sessionId);
5877
7171
  const ownershipLookupSessionId = markerInspection.kind === "valid" ? normalizeStableSessionId(markerInspection.marker.session_id) : void 0;
5878
7172
  const ownershipWriteSessionId = markerInspection.kind === "none" ? sessionId : void 0;
5879
7173
  const anthropicBeta = c.req.header("anthropic-beta");
@@ -5881,14 +7175,14 @@ async function handleCompletion(c) {
5881
7175
  const isCompact = compactType !== 0;
5882
7176
  const originalRequestModel = anthropicPayload.model;
5883
7177
  if (anthropicBeta && isWarmupProbeRequest(anthropicPayload)) anthropicPayload.model = getSmallModel();
5884
- if (compactType !== 0) logger$5.debug("Compact request type:", compactType);
5885
- if (compactType === COMPACT_REQUEST && shouldCompactUseSmallModel()) anthropicPayload.model = getSmallModel();
7178
+ if (compactType !== 0) logger$3.debug("Compact request type:", compactType);
7179
+ if (compactType === 1 && shouldCompactUseSmallModel()) anthropicPayload.model = getSmallModel();
5886
7180
  if (compactType === 0) {
5887
7181
  stripToolReferenceTurnBoundary(anthropicPayload);
5888
7182
  mergeToolResultForClaude(anthropicPayload);
5889
7183
  }
5890
7184
  const upstreamRequestId = generateRequestIdFromPayload(anthropicPayload, sessionId);
5891
- logger$5.debug("Generated request ID:", upstreamRequestId);
7185
+ logger$3.debug("Generated request ID:", upstreamRequestId);
5892
7186
  const clientModel = anthropicPayload.model;
5893
7187
  const streamRequested = Boolean(anthropicPayload.stream);
5894
7188
  const rawUserId = anthropicPayload.metadata?.user_id;
@@ -5907,7 +7201,7 @@ async function handleCompletion(c) {
5907
7201
  requestId,
5908
7202
  startedAtMs,
5909
7203
  method,
5910
- path: path$2,
7204
+ path,
5911
7205
  streamRequested,
5912
7206
  clientModel,
5913
7207
  clientIp,
@@ -5942,11 +7236,13 @@ async function handleCompletion(c) {
5942
7236
  headerSessionId: c.req.header("x-session-id") ?? null,
5943
7237
  upstreamRequestId
5944
7238
  });
7239
+ const responsesItemOwnershipKeys = extractAnthropicResponsesItemOwnerKeys(anthropicPayload);
5945
7240
  const selection = await accountsManager.selectAccountForRequest(candidates, {
5946
7241
  requestId: affinityKey.requestId,
5947
7242
  affinityModelId,
5948
7243
  ownershipLookupSessionId,
5949
- ownershipWriteSessionId
7244
+ ownershipWriteSessionId,
7245
+ responsesItemOwnershipKeys
5950
7246
  });
5951
7247
  const selectionReason = invalidSubagentMarkerSelectionReason ?? selection.selectionReason;
5952
7248
  if (!selection.ok) return handleSelectionFailure({
@@ -5955,7 +7251,7 @@ async function handleCompletion(c) {
5955
7251
  requestId,
5956
7252
  startedAtMs,
5957
7253
  method,
5958
- path: path$2,
7254
+ path,
5959
7255
  streamRequested,
5960
7256
  clientModel,
5961
7257
  clientIp,
@@ -5969,6 +7265,7 @@ async function handleCompletion(c) {
5969
7265
  affinityKeyUsed: affinityKey.affinityKeyUsed,
5970
7266
  affinityKeySource: affinityKey.affinityKeySource,
5971
7267
  selectionReason,
7268
+ responsesItemOwnerLookupKeys: responsesItemOwnershipKeys,
5972
7269
  selection
5973
7270
  });
5974
7271
  const { account, reservation, selectedModel, endpoint, costUnits } = selection;
@@ -5982,7 +7279,7 @@ async function handleCompletion(c) {
5982
7279
  requestId,
5983
7280
  startedAtMs,
5984
7281
  method,
5985
- path: path$2,
7282
+ path,
5986
7283
  clientIp,
5987
7284
  clientIpSource,
5988
7285
  userAgent,
@@ -6005,7 +7302,8 @@ async function handleCompletion(c) {
6005
7302
  affinityCacheKey: selection.affinityCacheKey,
6006
7303
  affinityKeyUsed: affinityKey.affinityKeyUsed,
6007
7304
  affinityKeySource: affinityKey.affinityKeySource,
6008
- selectionReason
7305
+ selectionReason,
7306
+ responsesItemOwnerLookupKeys: responsesItemOwnershipKeys
6009
7307
  };
6010
7308
  if (endpoint === MESSAGES_ENDPOINT) return await handleWithMessagesApi({
6011
7309
  c,
@@ -6039,7 +7337,7 @@ async function handleCompletion(c) {
6039
7337
  }
6040
7338
  const handleWithChatCompletions = async (params) => {
6041
7339
  const { c, openAIPayload, subagentMarker, sessionId, selectedModel, instr, compactType } = params;
6042
- debugJson(logger$5, "Translated OpenAI request payload:", openAIPayload);
7340
+ debugJson(logger$3, "Translated OpenAI request payload:", openAIPayload);
6043
7341
  const ctx = toAccountContext(instr.account);
6044
7342
  const effectiveInitiator = resolveEffectiveInitiator(getChatInitiator(openAIPayload.messages), {
6045
7343
  isCompact: compactType !== 0,
@@ -6070,9 +7368,9 @@ const handleWithChatCompletions = async (params) => {
6070
7368
  response,
6071
7369
  instr
6072
7370
  });
6073
- logger$5.debug("Streaming response from Copilot");
7371
+ logger$3.debug("Streaming response from Copilot");
6074
7372
  const fallbackEnabled = isMessageStartInputTokensFallbackEnabled();
6075
- const estimatedInputTokens = fallbackEnabled ? await estimateInputTokens(openAIPayload, selectedModel, logger$5) : void 0;
7373
+ const estimatedInputTokens = fallbackEnabled ? await estimateInputTokens(openAIPayload, selectedModel, logger$3) : void 0;
6076
7374
  const historicalUsage = fallbackEnabled && instr.promptCacheKey && instr.safetyIdentifier ? instr.store.getLastCompletedUsageBySession({
6077
7375
  promptCacheKey: instr.promptCacheKey,
6078
7376
  safetyIdentifier: instr.safetyIdentifier,
@@ -6088,11 +7386,15 @@ const handleWithChatCompletions = async (params) => {
6088
7386
  };
6089
7387
  const handleWithResponsesApi = async (params) => {
6090
7388
  const { c, anthropicPayload, openAIPayload, subagentMarker, sessionId, selectedModel, instr, compactType } = params;
6091
- const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, selectedModel.id);
7389
+ const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, {
7390
+ modelOverride: selectedModel.id,
7391
+ subagentAgentId: subagentMarker?.agent_id
7392
+ });
6092
7393
  applyResponsesApiContextManagement(responsesPayload, selectedModel.capabilities.limits.max_prompt_tokens);
6093
7394
  compactInputByLatestCompaction(responsesPayload);
6094
- debugJson(logger$5, "Translated Responses payload:", responsesPayload);
7395
+ debugJson(logger$3, "Translated Responses payload:", responsesPayload);
6095
7396
  const { vision, initiator } = getResponsesRequestOptions(responsesPayload);
7397
+ const transport = getResponsesTransportForModel(selectedModel, { compactType }) ?? "http";
6096
7398
  const effectiveInitiator = resolveEffectiveInitiator(initiator, {
6097
7399
  isCompact: compactType !== 0,
6098
7400
  isSubagent: Boolean(subagentMarker)
@@ -6108,7 +7410,8 @@ const handleWithResponsesApi = async (params) => {
6108
7410
  subagentMarker,
6109
7411
  sessionId,
6110
7412
  compactType,
6111
- requestId: instr.requestId
7413
+ requestId: instr.requestId,
7414
+ transport
6112
7415
  }, ctx);
6113
7416
  instr.confirmAffinity?.();
6114
7417
  instr.confirmOwnership?.();
@@ -6119,10 +7422,10 @@ const handleWithResponsesApi = async (params) => {
6119
7422
  stream: Boolean(responsesPayload.stream)
6120
7423
  });
6121
7424
  }
6122
- if (responsesPayload.stream && isAsyncIterable$1(response)) {
6123
- logger$5.debug("Streaming response from Copilot (Responses API)");
7425
+ if (responsesPayload.stream && isAsyncIterable(response)) {
7426
+ logger$3.debug("Streaming response from Copilot (Responses API)");
6124
7427
  const fallbackEnabled = isMessageStartInputTokensFallbackEnabled();
6125
- const estimatedInputTokens = fallbackEnabled ? await estimateInputTokens(openAIPayload, selectedModel, logger$5) : void 0;
7428
+ const estimatedInputTokens = fallbackEnabled ? await estimateInputTokens(openAIPayload, selectedModel, logger$3) : void 0;
6126
7429
  const historicalUsage = fallbackEnabled && instr.promptCacheKey && instr.safetyIdentifier ? instr.store.getLastCompletedUsageBySession({
6127
7430
  promptCacheKey: instr.promptCacheKey,
6128
7431
  safetyIdentifier: instr.safetyIdentifier,
@@ -6142,13 +7445,16 @@ const handleWithResponsesApi = async (params) => {
6142
7445
  instr
6143
7446
  });
6144
7447
  };
7448
+ function stringifyOwnerKeys(keys) {
7449
+ return keys && keys.length > 0 ? JSON.stringify(keys) : void 0;
7450
+ }
6145
7451
  function insertRequestLog$1(instr, record) {
6146
- const { store, requestId, startedAtMs, method, path: path$2, clientIp, clientIpSource, userAgent, clientModel, account, upstreamEndpoint, upstreamModel, costUnits, premiumRemainingBefore, premiumUnlimitedBefore } = instr;
7452
+ const { store, requestId, startedAtMs, method, path, clientIp, clientIpSource, userAgent, clientModel, account, upstreamEndpoint, upstreamModel, costUnits, premiumRemainingBefore, premiumUnlimitedBefore } = instr;
6147
7453
  store.insert({
6148
7454
  requestId,
6149
7455
  startedAtMs,
6150
7456
  method,
6151
- path: path$2,
7457
+ path,
6152
7458
  clientIp,
6153
7459
  clientIpSource,
6154
7460
  userAgent,
@@ -6171,7 +7477,9 @@ function insertRequestLog$1(instr, record) {
6171
7477
  upstreamModel,
6172
7478
  premiumRemainingBefore,
6173
7479
  premiumUnlimitedBefore,
6174
- ...record
7480
+ ...record,
7481
+ responsesItemOwnerLookupKeysJson: stringifyOwnerKeys(instr.responsesItemOwnerLookupKeys),
7482
+ responsesItemOwnerRecordedKeysJson: stringifyOwnerKeys(instr.responsesItemOwnerRecordedKeys)
6175
7483
  });
6176
7484
  flushPendingCapture(requestId);
6177
7485
  }
@@ -6215,9 +7523,9 @@ async function handleChatCompletionsNonStreaming(params) {
6215
7523
  let upstreamErrorMessageRaw;
6216
7524
  const finishedAtMs = Date.now();
6217
7525
  try {
6218
- logger$5.debug("Non-streaming response from Copilot:", JSON.stringify(response));
7526
+ logger$3.debug("Non-streaming response from Copilot:", JSON.stringify(response));
6219
7527
  const anthropicResponse = translateToAnthropic(response);
6220
- debugJson(logger$5, "Translated Anthropic response:", anthropicResponse);
7528
+ debugJson(logger$3, "Translated Anthropic response:", anthropicResponse);
6221
7529
  return c.json(anthropicResponse);
6222
7530
  } catch (error) {
6223
7531
  const details = await extractErrorObservability(error);
@@ -6268,16 +7576,16 @@ async function streamChatCompletionsAndLog(params) {
6268
7576
  try {
6269
7577
  for await (const rawEvent of response) {
6270
7578
  if (ttfbMs === void 0) ttfbMs = Date.now() - instr.startedAtMs;
6271
- logger$5.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
7579
+ logger$3.debug("Copilot raw stream event:", JSON.stringify(rawEvent));
6272
7580
  const { data: rawData } = rawEvent;
6273
7581
  const data = typeof rawData === "string" ? rawData : await rawData;
6274
7582
  if (data === "[DONE]") break;
6275
7583
  if (!data) continue;
6276
7584
  const chunk = JSON.parse(data);
6277
7585
  if (chunk.usage) lastUsage = normalizeChatCompletionsUsage(chunk.usage);
6278
- const events$1 = translateChunkToAnthropicEvents(chunk, streamState);
6279
- for (const event of events$1) {
6280
- logger$5.debug("Translated Anthropic event:", JSON.stringify(event));
7586
+ const events = translateChunkToAnthropicEvents(chunk, streamState);
7587
+ for (const event of events) {
7588
+ logger$3.debug("Translated Anthropic event:", JSON.stringify(event));
6281
7589
  await stream.writeSSE({
6282
7590
  event: event.type,
6283
7591
  data: JSON.stringify(event)
@@ -6290,7 +7598,7 @@ async function streamChatCompletionsAndLog(params) {
6290
7598
  errorStatus = details.errorStatus;
6291
7599
  errorMessage = details.errorMessage;
6292
7600
  upstreamErrorMessageRaw = details.upstreamErrorMessageRaw;
6293
- logger$5.warn("Streaming error:", error);
7601
+ logger$3.warn("Streaming error:", error);
6294
7602
  if (shouldMarkAccountFailed(details)) accountsManager.markAccountFailed(instr.account.id, "Unauthorized (401)");
6295
7603
  await writeAnthropicStreamError(stream, getUserVisibleErrorMessage(details));
6296
7604
  } finally {
@@ -6349,10 +7657,14 @@ async function handleResponsesNonStreaming(params) {
6349
7657
  const finishedAtMs = Date.now();
6350
7658
  try {
6351
7659
  usage = extractResponsesUsageFromResult(result);
6352
- logger$5.debug("Non-streaming Responses result:", JSON.stringify(result).slice(-400));
7660
+ const responseOwnerKeys = extractResponsesResultOwnerKeys(result);
7661
+ instr.responsesItemOwnerRecordedKeys = responseOwnerKeys;
7662
+ logger$3.debug("Non-streaming Responses result:", JSON.stringify(result).slice(-400));
6353
7663
  const anthropicResponse = translateResponsesResultToAnthropic(result);
6354
- debugJson(logger$5, "Translated Anthropic response:", anthropicResponse);
6355
- return c.json(anthropicResponse);
7664
+ debugJson(logger$3, "Translated Anthropic response:", anthropicResponse);
7665
+ const response = c.json(anthropicResponse);
7666
+ if (result.status === "completed") accountsManager.recordResponsesItemOwnership(responseOwnerKeys, instr.account.id);
7667
+ return response;
6356
7668
  } catch (error) {
6357
7669
  const details = await extractErrorObservability(error);
6358
7670
  httpStatus = details.httpStatus;
@@ -6383,7 +7695,7 @@ async function handleResponsesNonStreaming(params) {
6383
7695
  async function ensureResponsesStreamCompleted(params) {
6384
7696
  const { stream, streamState, setStreamError } = params;
6385
7697
  if (streamState.messageCompleted) return;
6386
- logger$5.warn("Responses stream ended without completion; sending error event");
7698
+ logger$3.warn("Responses stream ended without completion; sending error event");
6387
7699
  const msg = "Responses stream ended without completion";
6388
7700
  const errorEvent = buildErrorEvent(msg);
6389
7701
  setStreamError("StreamIncomplete", msg);
@@ -6400,22 +7712,66 @@ async function writeAnthropicStreamError(stream, message) {
6400
7712
  data: JSON.stringify(errorEvent)
6401
7713
  });
6402
7714
  } catch (streamError) {
6403
- logger$5.warn("Failed to write Anthropic stream error event:", streamError);
7715
+ logger$3.warn("Failed to write Anthropic stream error event:", streamError);
7716
+ }
7717
+ }
7718
+ function collectResponsesStreamOwnerKeys(event, responseOwnerKeys) {
7719
+ for (const key of extractResponsesStreamEventOwnerKeys(event)) responseOwnerKeys.add(key);
7720
+ }
7721
+ function createResponsesStreamStateWithUsage(params) {
7722
+ const streamState = createResponsesStreamState();
7723
+ streamState.estimatedInputTokens = params.estimatedInputTokens;
7724
+ streamState.historicalInputTokens = params.historicalUsage?.tokensInput;
7725
+ streamState.historicalOutputTokens = params.historicalUsage?.tokensOutput;
7726
+ streamState.historicalCachedInputTokens = params.historicalUsage?.tokensCachedInput;
7727
+ return streamState;
7728
+ }
7729
+ function recordStreamOwnerKeys(streamState, responseOwnerKeys, instr) {
7730
+ if (!streamState.messageCompleted) return;
7731
+ const ownerKeys = [...responseOwnerKeys];
7732
+ instr.responsesItemOwnerRecordedKeys = ownerKeys;
7733
+ if (streamState.responseStatus === "completed") accountsManager.recordResponsesItemOwnership(ownerKeys, instr.account.id);
7734
+ }
7735
+ function getResponsesStreamEventError(event) {
7736
+ if (event.type === "response.failed") {
7737
+ const message = event.response.error?.message ?? "Responses stream failed upstream.";
7738
+ return {
7739
+ errorName: "ResponsesStreamFailed",
7740
+ errorStatus: 502,
7741
+ errorMessage: message,
7742
+ upstreamErrorMessageRaw: message
7743
+ };
7744
+ }
7745
+ if (event.type === "error") {
7746
+ const message = event.message || "Responses stream returned an error.";
7747
+ return {
7748
+ errorName: "ResponsesStreamError",
7749
+ errorStatus: 502,
7750
+ errorMessage: message,
7751
+ upstreamErrorMessageRaw: message
7752
+ };
7753
+ }
7754
+ }
7755
+ async function writeTranslatedAnthropicStreamEvents(stream, events) {
7756
+ for (const event of events) {
7757
+ const eventData = JSON.stringify(event);
7758
+ logger$3.debug("Translated Anthropic event:", eventData);
7759
+ await stream.writeSSE({
7760
+ event: event.type,
7761
+ data: eventData
7762
+ });
6404
7763
  }
6405
7764
  }
6406
7765
  async function streamResponsesAndLog$1(params) {
6407
- const { stream, response, instr, estimatedInputTokens, historicalUsage } = params;
7766
+ const { stream, response, instr } = params;
6408
7767
  let ttfbMs;
6409
7768
  let lastUsage = {};
6410
7769
  let errorName;
6411
7770
  let errorStatus;
6412
7771
  let errorMessage;
6413
7772
  let upstreamErrorMessageRaw;
6414
- const streamState = createResponsesStreamState();
6415
- streamState.estimatedInputTokens = estimatedInputTokens;
6416
- streamState.historicalInputTokens = historicalUsage?.tokensInput;
6417
- streamState.historicalOutputTokens = historicalUsage?.tokensOutput;
6418
- streamState.historicalCachedInputTokens = historicalUsage?.tokensCachedInput;
7773
+ const streamState = createResponsesStreamStateWithUsage(params);
7774
+ const responseOwnerKeys = /* @__PURE__ */ new Set();
6419
7775
  try {
6420
7776
  for await (const chunk of response) {
6421
7777
  if (ttfbMs === void 0) ttfbMs = Date.now() - instr.startedAtMs;
@@ -6428,21 +7784,21 @@ async function streamResponsesAndLog$1(params) {
6428
7784
  }
6429
7785
  const data = chunk.data;
6430
7786
  if (!data) continue;
6431
- logger$5.debug("Responses raw stream event:", data);
7787
+ logger$3.debug("Responses raw stream event:", data);
6432
7788
  const parsed = JSON.parse(data);
7789
+ const streamEventError = getResponsesStreamEventError(parsed);
7790
+ if (streamEventError) {
7791
+ errorName = streamEventError.errorName;
7792
+ errorStatus = streamEventError.errorStatus;
7793
+ errorMessage = streamEventError.errorMessage;
7794
+ upstreamErrorMessageRaw = streamEventError.upstreamErrorMessageRaw;
7795
+ }
7796
+ collectResponsesStreamOwnerKeys(parsed, responseOwnerKeys);
6433
7797
  const u = extractResponsesUsageFromStreamEvent(parsed);
6434
7798
  if (u.usageJson) lastUsage = u;
6435
- const events$1 = translateResponsesStreamEvent(parsed, streamState);
6436
- for (const event of events$1) {
6437
- const eventData = JSON.stringify(event);
6438
- logger$5.debug("Translated Anthropic event:", eventData);
6439
- await stream.writeSSE({
6440
- event: event.type,
6441
- data: eventData
6442
- });
6443
- }
7799
+ await writeTranslatedAnthropicStreamEvents(stream, translateResponsesStreamEvent(parsed, streamState));
6444
7800
  if (streamState.messageCompleted) {
6445
- logger$5.debug("Message completed, ending stream");
7801
+ logger$3.debug("Message completed, ending stream");
6446
7802
  break;
6447
7803
  }
6448
7804
  }
@@ -6454,13 +7810,14 @@ async function streamResponsesAndLog$1(params) {
6454
7810
  errorMessage = message;
6455
7811
  }
6456
7812
  });
7813
+ recordStreamOwnerKeys(streamState, responseOwnerKeys, instr);
6457
7814
  } catch (error) {
6458
7815
  const details = await extractErrorObservability(error);
6459
7816
  errorName = details.errorName;
6460
7817
  errorStatus = details.errorStatus;
6461
7818
  errorMessage = details.errorMessage;
6462
7819
  upstreamErrorMessageRaw = details.upstreamErrorMessageRaw;
6463
- logger$5.warn("Streaming error:", error);
7820
+ logger$3.warn("Streaming error:", error);
6464
7821
  invalidateAffinityOnOwnershipMismatch(details.ownershipMismatch, instr);
6465
7822
  if (shouldMarkAccountFailed(details)) accountsManager.markAccountFailed(instr.account.id, "Unauthorized (401)");
6466
7823
  await writeAnthropicStreamError(stream, getUserVisibleErrorMessage(details));
@@ -6515,7 +7872,7 @@ async function handleMessagesNonStreaming(params) {
6515
7872
  let upstreamErrorMessageRaw;
6516
7873
  const finishedAtMs = Date.now();
6517
7874
  try {
6518
- logger$5.debug("Non-streaming Messages result:", JSON.stringify(response).slice(-400));
7875
+ logger$3.debug("Non-streaming Messages result:", JSON.stringify(response).slice(-400));
6519
7876
  return c.json(response);
6520
7877
  } catch (error) {
6521
7878
  const details = await extractErrorObservability(error);
@@ -6545,14 +7902,15 @@ async function handleMessagesNonStreaming(params) {
6545
7902
  }
6546
7903
  }
6547
7904
  const parseMessagesStreamUsage = (data) => {
6548
- if (!data) return null;
7905
+ if (!data || data === "[DONE]") return null;
6549
7906
  try {
6550
7907
  const parsed = JSON.parse(data);
7908
+ if (parsed.type === "error") throw new Error(parsed.error.message);
6551
7909
  if (parsed.type !== "message_delta" || !parsed.usage) return null;
6552
7910
  return normalizeMessagesUsage(parsed.usage);
6553
7911
  } catch (error) {
6554
- logger$5.warn("Failed to parse messages stream event", error);
6555
- return null;
7912
+ logger$3.warn("Failed to parse messages stream event", error);
7913
+ throw new Error("Failed to parse messages stream event", { cause: error });
6556
7914
  }
6557
7915
  };
6558
7916
  async function streamMessagesAndLog(params) {
@@ -6569,7 +7927,7 @@ async function streamMessagesAndLog(params) {
6569
7927
  const eventNameRaw = rawEvent.event;
6570
7928
  const eventName = typeof eventNameRaw === "string" && eventNameRaw.length > 0 ? eventNameRaw : "message";
6571
7929
  const data = rawEvent.data ?? "";
6572
- logger$5.debug("Messages raw stream event:", data);
7930
+ logger$3.debug("Messages raw stream event:", data);
6573
7931
  const usage = parseMessagesStreamUsage(data);
6574
7932
  if (usage) lastUsage = usage;
6575
7933
  await stream.writeSSE({
@@ -6583,7 +7941,7 @@ async function streamMessagesAndLog(params) {
6583
7941
  errorStatus = details.errorStatus;
6584
7942
  errorMessage = details.errorMessage;
6585
7943
  upstreamErrorMessageRaw = details.upstreamErrorMessageRaw;
6586
- logger$5.warn("Streaming error:", error);
7944
+ logger$3.warn("Streaming error:", error);
6587
7945
  if (shouldMarkAccountFailed(details)) accountsManager.markAccountFailed(instr.account.id, "Unauthorized (401)");
6588
7946
  await writeAnthropicStreamError(stream, getUserVisibleErrorMessage(details));
6589
7947
  } finally {
@@ -6609,7 +7967,7 @@ async function streamMessagesAndLog(params) {
6609
7967
  const handleWithMessagesApi = async (params) => {
6610
7968
  const { c, anthropicPayload, anthropicBetaHeader, subagentMarker, sessionId, instr, selectedModel, compactType } = params;
6611
7969
  prepareMessagesApiPayload(anthropicPayload, selectedModel);
6612
- debugJson(logger$5, "Translated Messages payload:", anthropicPayload);
7970
+ debugJson(logger$3, "Translated Messages payload:", anthropicPayload);
6613
7971
  const ctx = toAccountContext(instr.account);
6614
7972
  const effectiveInitiator = resolveEffectiveInitiator(getMessagesInitiator(anthropicPayload), {
6615
7973
  isCompact: compactType !== 0,
@@ -6636,8 +7994,8 @@ const handleWithMessagesApi = async (params) => {
6636
7994
  stream: Boolean(anthropicPayload.stream)
6637
7995
  });
6638
7996
  }
6639
- if (isAsyncIterable$1(response)) {
6640
- logger$5.debug("Streaming response from Copilot (Messages API)");
7997
+ if (isAsyncIterable(response)) {
7998
+ logger$3.debug("Streaming response from Copilot (Messages API)");
6641
7999
  return streamSSE(c, (stream) => streamMessagesAndLog({
6642
8000
  stream,
6643
8001
  response,
@@ -6651,8 +8009,7 @@ const handleWithMessagesApi = async (params) => {
6651
8009
  });
6652
8010
  };
6653
8011
  const isNonStreaming = (response) => Object.hasOwn(response, "choices");
6654
- const isAsyncIterable$1 = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
6655
-
8012
+ const isAsyncIterable = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
6656
8013
  //#endregion
6657
8014
  //#region src/routes/messages/route.ts
6658
8015
  const messageRoutes = new Hono();
@@ -6670,22 +8027,23 @@ messageRoutes.post("/count_tokens", async (c) => {
6670
8027
  return await forwardError(c, error);
6671
8028
  }
6672
8029
  });
6673
-
6674
8030
  //#endregion
6675
8031
  //#region src/routes/models/route.ts
6676
8032
  const modelRoutes = new Hono();
6677
8033
  modelRoutes.get("/", async (c) => {
6678
8034
  try {
6679
8035
  const blockedTargets = getAliasTargetSet();
6680
- const models = getAvailableModels().filter((model) => !blockedTargets.has(model.id.toLowerCase())).map((model) => ({
6681
- id: model.id,
6682
- object: "model",
6683
- type: "model",
6684
- created: 0,
6685
- created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
6686
- owned_by: model.vendor,
6687
- display_name: model.name
6688
- }));
8036
+ const models = getAvailableModels().filter((model) => !blockedTargets.has(model.id.toLowerCase())).map((model) => {
8037
+ return {
8038
+ id: model.capabilities.limits?.max_context_window_tokens === 1e6 ? `${model.id}[1m]` : model.id,
8039
+ object: "model",
8040
+ type: "model",
8041
+ created: 0,
8042
+ created_at: (/* @__PURE__ */ new Date(0)).toISOString(),
8043
+ owned_by: model.vendor,
8044
+ display_name: model.name
8045
+ };
8046
+ });
6689
8047
  const aliasModels = Object.keys(getModelAliases()).map((alias) => ({
6690
8048
  id: alias,
6691
8049
  object: "model",
@@ -6707,195 +8065,6 @@ modelRoutes.get("/", async (c) => {
6707
8065
  return await forwardError(c, error);
6708
8066
  }
6709
8067
  });
6710
-
6711
- //#endregion
6712
- //#region src/routes/provider/messages/count-tokens-handler.ts
6713
- const logger$4 = createHandlerLogger("provider-count-tokens-handler");
6714
- const createFallbackModel = (modelId) => ({
6715
- capabilities: {
6716
- family: "provider",
6717
- limits: {},
6718
- object: "model_capabilities",
6719
- supports: {},
6720
- tokenizer: "o200k_base",
6721
- type: "chat"
6722
- },
6723
- id: modelId,
6724
- model_picker_enabled: false,
6725
- name: modelId,
6726
- object: "model",
6727
- preview: false,
6728
- vendor: "provider",
6729
- version: "unknown"
6730
- });
6731
- async function handleProviderCountTokens(c) {
6732
- const provider = c.req.param("provider");
6733
- try {
6734
- const anthropicPayload = await c.req.json();
6735
- const openAIPayload = translateToOpenAI(anthropicPayload);
6736
- const modelId = anthropicPayload.model.trim();
6737
- let selectedModel = getAvailableModels().find((model) => model.id === modelId);
6738
- if (!selectedModel && modelId) selectedModel = createFallbackModel(modelId);
6739
- if (!selectedModel) {
6740
- logger$4.warn("provider.count_tokens.model_not_found", {
6741
- provider,
6742
- model: anthropicPayload.model
6743
- });
6744
- return c.json({ input_tokens: 1 });
6745
- }
6746
- const tokenCount = await getTokenCount(openAIPayload, selectedModel);
6747
- const finalTokenCount = tokenCount.input + tokenCount.output;
6748
- logger$4.debug("provider.count_tokens.success", {
6749
- provider,
6750
- model: anthropicPayload.model,
6751
- input_tokens: finalTokenCount
6752
- });
6753
- return c.json({ input_tokens: finalTokenCount });
6754
- } catch (error) {
6755
- logger$4.error("provider.count_tokens.error", {
6756
- provider,
6757
- error
6758
- });
6759
- return c.json({ input_tokens: 1 });
6760
- }
6761
- }
6762
-
6763
- //#endregion
6764
- //#region src/services/providers/anthropic-proxy.ts
6765
- const FORWARDABLE_HEADERS = [
6766
- "anthropic-version",
6767
- "anthropic-beta",
6768
- "accept",
6769
- "user-agent"
6770
- ];
6771
- const STRIPPED_RESPONSE_HEADERS = [
6772
- "connection",
6773
- "content-encoding",
6774
- "content-length",
6775
- "keep-alive",
6776
- "proxy-authenticate",
6777
- "proxy-authorization",
6778
- "te",
6779
- "trailer",
6780
- "transfer-encoding",
6781
- "upgrade"
6782
- ];
6783
- function buildProviderUpstreamHeaders(providerConfig, requestHeaders) {
6784
- const authHeaders = {};
6785
- if (providerConfig.authType === "authorization") authHeaders.authorization = `Bearer ${providerConfig.apiKey}`;
6786
- else authHeaders["x-api-key"] = providerConfig.apiKey;
6787
- const headers = {
6788
- "content-type": "application/json",
6789
- accept: "application/json",
6790
- ...authHeaders
6791
- };
6792
- for (const headerName of FORWARDABLE_HEADERS) {
6793
- const headerValue = requestHeaders.get(headerName);
6794
- if (headerValue) headers[headerName] = headerValue;
6795
- }
6796
- return headers;
6797
- }
6798
- function createProviderProxyResponse(upstreamResponse) {
6799
- const headers = new Headers(upstreamResponse.headers);
6800
- for (const headerName of STRIPPED_RESPONSE_HEADERS) headers.delete(headerName);
6801
- return new Response(upstreamResponse.body, {
6802
- headers,
6803
- status: upstreamResponse.status,
6804
- statusText: upstreamResponse.statusText
6805
- });
6806
- }
6807
- async function forwardProviderMessages(providerConfig, payload, requestHeaders) {
6808
- return await fetch(`${providerConfig.baseUrl}/v1/messages`, {
6809
- method: "POST",
6810
- headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders),
6811
- body: JSON.stringify(payload)
6812
- });
6813
- }
6814
- async function forwardProviderModels(providerConfig, requestHeaders) {
6815
- return await fetch(`${providerConfig.baseUrl}/v1/models`, {
6816
- method: "GET",
6817
- headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders)
6818
- });
6819
- }
6820
-
6821
- //#endregion
6822
- //#region src/routes/provider/messages/handler.ts
6823
- const logger$3 = createHandlerLogger("provider-messages-handler");
6824
- async function handleProviderMessages(c) {
6825
- const provider = c.req.param("provider") ?? "";
6826
- const providerConfig = getProviderConfig(provider);
6827
- if (!providerConfig) return c.json({ error: {
6828
- message: `Provider '${provider}' not found or disabled`,
6829
- type: "invalid_request_error"
6830
- } }, 404);
6831
- try {
6832
- const payload = await c.req.json();
6833
- const modelConfig = providerConfig.models?.[payload.model];
6834
- payload.temperature ??= modelConfig?.temperature;
6835
- payload.top_p ??= modelConfig?.topP;
6836
- payload.top_k ??= modelConfig?.topK;
6837
- debugJson(logger$3, "provider.messages.request", {
6838
- payload,
6839
- provider
6840
- });
6841
- const upstreamResponse = await forwardProviderMessages(providerConfig, payload, c.req.raw.headers);
6842
- if (!upstreamResponse.ok) {
6843
- logger$3.error("Failed to create responses", upstreamResponse);
6844
- throw new HTTPError("Failed to create responses", upstreamResponse);
6845
- }
6846
- const contentType = upstreamResponse.headers.get("content-type") ?? "";
6847
- if (Boolean(payload.stream) && contentType.includes("text/event-stream")) {
6848
- logger$3.debug("provider.messages.streaming");
6849
- return streamSSE(c, async (stream) => {
6850
- for await (const chunk of events(upstreamResponse)) {
6851
- logger$3.debug("provider.messages.raw_stream_event:", chunk.data);
6852
- const eventName = chunk.event;
6853
- if (eventName === "ping") {
6854
- await stream.writeSSE({
6855
- event: "ping",
6856
- data: "{\"type\":\"ping\"}"
6857
- });
6858
- continue;
6859
- }
6860
- let data = chunk.data;
6861
- if (!data) continue;
6862
- if (chunk.data === "[DONE]") break;
6863
- try {
6864
- const parsed = JSON.parse(data);
6865
- if (parsed.type === "message_start") adjustInputTokens(providerConfig, parsed.message.usage);
6866
- else if (parsed.type === "message_delta") adjustInputTokens(providerConfig, parsed.usage);
6867
- data = JSON.stringify(parsed);
6868
- } catch (error) {
6869
- logger$3.error("provider.messages.streaming.adjust_tokens_error", {
6870
- error,
6871
- originalData: data
6872
- });
6873
- }
6874
- await stream.writeSSE({
6875
- event: eventName,
6876
- data
6877
- });
6878
- }
6879
- });
6880
- }
6881
- const jsonBody = await upstreamResponse.json();
6882
- adjustInputTokens(providerConfig, jsonBody.usage);
6883
- debugJson(logger$3, "provider.messages.no_stream result:", jsonBody);
6884
- return c.json(jsonBody);
6885
- } catch (error) {
6886
- logger$3.error("provider.messages.error", {
6887
- provider,
6888
- error
6889
- });
6890
- throw error;
6891
- }
6892
- }
6893
- const adjustInputTokens = (providerConfig, usage) => {
6894
- if (!providerConfig.adjustInputTokens || !usage) return;
6895
- usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
6896
- debugJson(logger$3, "provider.messages.adjusted_usage:", usage);
6897
- };
6898
-
6899
8068
  //#endregion
6900
8069
  //#region src/routes/provider/messages/route.ts
6901
8070
  const providerMessageRoutes = new Hono();
@@ -6913,20 +8082,23 @@ providerMessageRoutes.post("/count_tokens", async (c) => {
6913
8082
  return await forwardError(c, error);
6914
8083
  }
6915
8084
  });
6916
-
6917
8085
  //#endregion
6918
8086
  //#region src/routes/provider/models/route.ts
6919
8087
  const logger$2 = createHandlerLogger("provider-models-handler");
8088
+ const getProviderFetch = (c) => c.get("providerFetch") ?? fetch;
8089
+ const resolveProviderConfig = (c, provider) => {
8090
+ return (c.get("providerConfigResolver") ?? getProviderConfig)(provider);
8091
+ };
6920
8092
  const providerModelRoutes = new Hono();
6921
8093
  providerModelRoutes.get("/", async (c) => {
6922
8094
  const provider = c.req.param("provider") ?? "";
6923
8095
  try {
6924
- const providerConfig = getProviderConfig(provider);
8096
+ const providerConfig = resolveProviderConfig(c, provider);
6925
8097
  if (!providerConfig) return c.json({ error: {
6926
8098
  message: `Provider '${provider}' not found or disabled`,
6927
8099
  type: "invalid_request_error"
6928
8100
  } }, 404);
6929
- const upstreamResponse = await forwardProviderModels(providerConfig, c.req.raw.headers);
8101
+ const upstreamResponse = await forwardProviderModels(providerConfig, c.req.raw.headers, getProviderFetch(c));
6930
8102
  logger$2.debug("provider.models.response", {
6931
8103
  provider,
6932
8104
  statusCode: upstreamResponse.status
@@ -6940,7 +8112,6 @@ providerModelRoutes.get("/", async (c) => {
6940
8112
  return await forwardError(c, error);
6941
8113
  }
6942
8114
  });
6943
-
6944
8115
  //#endregion
6945
8116
  //#region src/routes/responses/stream-id-sync.ts
6946
8117
  const createStreamIdTracker = () => ({ outputItems: /* @__PURE__ */ new Map() });
@@ -6977,7 +8148,6 @@ const handleItemId = (parsed, tracker) => {
6977
8148
  }
6978
8149
  return JSON.stringify(parsed);
6979
8150
  };
6980
-
6981
8151
  //#endregion
6982
8152
  //#region src/routes/responses/handler.ts
6983
8153
  const logger$1 = createHandlerLogger("responses-handler");
@@ -7051,6 +8221,7 @@ const handleResponses = async (c) => {
7051
8221
  compactInputByLatestCompaction(upstreamPayload);
7052
8222
  const premiumRemainingBefore = account.premiumRemaining;
7053
8223
  const premiumUnlimitedBefore = account.unlimited;
8224
+ const transport = getResponsesTransportForModel(selectedModel) ?? "http";
7054
8225
  const { vision, initiator } = getResponsesRequestOptions(upstreamPayload);
7055
8226
  request.initiator = initiator;
7056
8227
  if (state.manualApprove) await awaitApproval();
@@ -7069,7 +8240,8 @@ const handleResponses = async (c) => {
7069
8240
  vision,
7070
8241
  initiator,
7071
8242
  premiumRemainingBefore,
7072
- premiumUnlimitedBefore
8243
+ premiumUnlimitedBefore,
8244
+ transport
7073
8245
  });
7074
8246
  return handleNonStreamingResponses({
7075
8247
  c,
@@ -7082,7 +8254,8 @@ const handleResponses = async (c) => {
7082
8254
  vision,
7083
8255
  initiator,
7084
8256
  premiumRemainingBefore,
7085
- premiumUnlimitedBefore
8257
+ premiumUnlimitedBefore,
8258
+ transport
7086
8259
  });
7087
8260
  };
7088
8261
  async function observeRequestError(accountId, error, affinity) {
@@ -7121,13 +8294,13 @@ function buildRequestContext(c) {
7121
8294
  const requestId = randomUUID();
7122
8295
  const startedAtMs = Date.now();
7123
8296
  const method = c.req.raw.method;
7124
- const path$2 = new URL(c.req.url, "http://local").pathname;
8297
+ const path = new URL(c.req.url, "http://local").pathname;
7125
8298
  const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
7126
8299
  return {
7127
8300
  requestId,
7128
8301
  startedAtMs,
7129
8302
  method,
7130
- path: path$2,
8303
+ path,
7131
8304
  clientIp,
7132
8305
  clientIpSource,
7133
8306
  userAgent: c.req.header("user-agent") ?? void 0
@@ -7190,7 +8363,7 @@ function extractUsageFromChunkData(data) {
7190
8363
  }
7191
8364
  }
7192
8365
  async function handleStreamingResponses(params) {
7193
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8366
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
7194
8367
  let response;
7195
8368
  try {
7196
8369
  response = await createResponses(payload, {
@@ -7198,7 +8371,8 @@ async function handleStreamingResponses(params) {
7198
8371
  initiator,
7199
8372
  upstreamRequestId: request.upstreamRequestId,
7200
8373
  sessionId: request.upstreamSessionId,
7201
- requestId: request.requestId
8374
+ requestId: request.requestId,
8375
+ transport
7202
8376
  }, accountCtx);
7203
8377
  selection.confirmAffinity?.();
7204
8378
  } catch (error) {
@@ -7212,7 +8386,7 @@ async function handleStreamingResponses(params) {
7212
8386
  error
7213
8387
  });
7214
8388
  }
7215
- if (isAsyncIterable(response)) {
8389
+ if (isAsyncIterable$1(response)) {
7216
8390
  logger$1.debug("Forwarding native Responses stream");
7217
8391
  return streamSSE(c, (stream) => streamResponsesAndLog({
7218
8392
  stream,
@@ -7386,7 +8560,7 @@ async function streamResponsesAndLog(params) {
7386
8560
  }
7387
8561
  }
7388
8562
  async function handleNonStreamingResponses(params) {
7389
- const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore } = params;
8563
+ const { c, store, request, payload, selection, clientModel, accountCtx, vision, initiator, premiumRemainingBefore, premiumUnlimitedBefore, transport } = params;
7390
8564
  const { account, reservation, selectedModel, endpoint, costUnits } = selection;
7391
8565
  let usage = {};
7392
8566
  let errorState = { httpStatus: 200 };
@@ -7397,9 +8571,10 @@ async function handleNonStreamingResponses(params) {
7397
8571
  initiator,
7398
8572
  upstreamRequestId: request.upstreamRequestId,
7399
8573
  sessionId: request.upstreamSessionId,
7400
- requestId: request.requestId
8574
+ requestId: request.requestId,
8575
+ transport
7401
8576
  }, accountCtx);
7402
- if (isAsyncIterable(response)) throw new Error("Upstream returned a stream unexpectedly");
8577
+ if (isAsyncIterable$1(response)) throw new Error("Upstream returned a stream unexpectedly");
7403
8578
  selection.confirmAffinity?.();
7404
8579
  finishedAtMs = Date.now();
7405
8580
  const result = response;
@@ -7459,7 +8634,6 @@ const removeUnsupportedTools = (payload) => {
7459
8634
  });
7460
8635
  if (dropped.length > 0) logger$1.debug("Removed unsupported tools:", dropped);
7461
8636
  };
7462
-
7463
8637
  //#endregion
7464
8638
  //#region src/routes/responses/route.ts
7465
8639
  const responsesRoutes = new Hono();
@@ -7470,7 +8644,6 @@ responsesRoutes.post("/", async (c) => {
7470
8644
  return await forwardError(c, error);
7471
8645
  }
7472
8646
  });
7473
-
7474
8647
  //#endregion
7475
8648
  //#region src/routes/token/route.ts
7476
8649
  const tokenRoute = new Hono();
@@ -7485,7 +8658,6 @@ tokenRoute.get("/", (c) => {
7485
8658
  }, 500);
7486
8659
  }
7487
8660
  });
7488
-
7489
8661
  //#endregion
7490
8662
  //#region src/routes/usage/route.ts
7491
8663
  const usageRoute = new Hono();
@@ -7522,7 +8694,6 @@ usageRoute.get("/:accountIndex", async (c) => {
7522
8694
  return c.json({ error: "Failed to fetch account usage" }, 500);
7523
8695
  }
7524
8696
  });
7525
-
7526
8697
  //#endregion
7527
8698
  //#region src/server.ts
7528
8699
  const server = new Hono();
@@ -7549,7 +8720,7 @@ server.route("/v1/responses", responsesRoutes);
7549
8720
  server.route("/v1/messages", messageRoutes);
7550
8721
  server.route("/:provider/v1/messages", providerMessageRoutes);
7551
8722
  server.route("/:provider/v1/models", providerModelRoutes);
7552
-
7553
8723
  //#endregion
7554
8724
  export { server };
7555
- //# sourceMappingURL=server-DR9ZR_MN.js.map
8725
+
8726
+ //# sourceMappingURL=server-DxQsi1x2.js.map