@nick3/copilot-api 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  import { PATHS } from "./paths-DoT4SZ8f.js";
2
2
  import { listAccountsFromRegistry } from "./accounts-registry-c7rs5Ed9.js";
3
- import { HTTPError, accountFromState, cacheModels, copilotBaseUrl, copilotHeaders, forwardError, generateRequestIdFromPayload, getCopilotUsage, getRootSessionId, getUUID, isNullish, prepareInteractionHeaders, sleep, state } from "./utils-BUJfM1V2.js";
4
- import "./get-copilot-token-BwP_PxV5.js";
5
- import { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isForceAgentEnabled, isFreeModelLoadBalancingEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, mergeConfigWithDefaults, shouldCompactUseSmallModel } from "./accounts-manager-DjGzZIcp.js";
3
+ import { HTTPError, accountFromState, cacheModels, copilotBaseUrl, copilotHeaders, forwardError, generateRequestIdFromPayload, getCopilotUsage, getRootSessionId, getUUID, isNullish, parseUserIdMetadata, prepareForCompact, prepareInteractionHeaders, sleep, state } from "./utils-D8j9lvS0.js";
4
+ import "./get-copilot-token-BA1FaCgQ.js";
5
+ import { PROVIDER_TYPE_ANTHROPIC, accountsManager, getAliasTargetSet, getConfig, getExtraPromptForModel, getModelAliases, getModelAliasesInfo, getModelRefreshIntervalMs, getProviderConfig, getReasoningEffortForModel, getSmallModel, isForceAgentEnabled, isFreeModelLoadBalancingEnabled, isMessageStartInputTokensFallbackEnabled, isMessagesApiEnabled, isResponsesApiContextManagementModel, mergeConfigWithDefaults, shouldCompactUseSmallModel } from "./accounts-manager-BQCAoBZp.js";
6
6
  import consola from "consola";
7
7
  import fs, { readFile } from "node:fs/promises";
8
8
  import * as path$1 from "node:path";
@@ -12,6 +12,7 @@ import { Hono } from "hono";
12
12
  import { cors } from "hono/cors";
13
13
  import { logger } from "hono/logger";
14
14
  import fs$1, { existsSync } from "node:fs";
15
+ import { AsyncLocalStorage } from "node:async_hooks";
15
16
  import { Database } from "bun:sqlite";
16
17
  import { fileURLToPath } from "node:url";
17
18
  import { streamSSE } from "hono/streaming";
@@ -104,6 +105,40 @@ function createAuthMiddleware(options = {}) {
104
105
  };
105
106
  }
106
107
 
108
+ //#endregion
109
+ //#region src/lib/request-context.ts
110
+ const TRACE_ID_MAX_LENGTH = 64;
111
+ const TRACE_ID_PATTERN = /^\w[\w.-]*$/;
112
+ const asyncLocalStorage = new AsyncLocalStorage();
113
+ const requestContext = {
114
+ getStore: () => asyncLocalStorage.getStore(),
115
+ run: (context, callback) => asyncLocalStorage.run(context, callback)
116
+ };
117
+ function generateTraceId() {
118
+ const timestamp = Date.now().toString(36);
119
+ const random = Math.random().toString(36).slice(2, 8);
120
+ return `${timestamp}-${random}`;
121
+ }
122
+ function resolveTraceId(traceId) {
123
+ const candidate = traceId?.trim();
124
+ if (!candidate || candidate.length > TRACE_ID_MAX_LENGTH || !TRACE_ID_PATTERN.test(candidate)) return generateTraceId();
125
+ return candidate;
126
+ }
127
+
128
+ //#endregion
129
+ //#region src/lib/trace.ts
130
+ const traceIdMiddleware = async (c, next) => {
131
+ const traceId = resolveTraceId(c.req.header("x-trace-id"));
132
+ c.header("x-trace-id", traceId);
133
+ const context = {
134
+ traceId,
135
+ startTime: Date.now()
136
+ };
137
+ await requestContext.run(context, async () => {
138
+ await next();
139
+ });
140
+ };
141
+
107
142
  //#endregion
108
143
  //#region src/lib/admin-db.ts
109
144
  const DEFAULT_DB_PATH = path.join(PATHS.APP_DIR, "admin.sqlite");
@@ -2148,12 +2183,14 @@ const createHandlerLogger = (name) => {
2148
2183
  cleanupOldLogs();
2149
2184
  lastCleanup = Date.now();
2150
2185
  }
2186
+ const traceId = requestContext.getStore()?.traceId;
2151
2187
  const date = logObj.date;
2152
2188
  const dateKey = date.toLocaleDateString("sv-SE");
2153
2189
  const timestamp = date.toLocaleString("sv-SE", { hour12: false });
2154
2190
  const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`);
2155
2191
  const message = formatArgs(logObj.args);
2156
- const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${message ? ` ${message}` : ""}`;
2192
+ const traceIdStr = traceId ? ` [${traceId}]` : "";
2193
+ const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${traceIdStr}${message ? ` ${message}` : ""}`;
2157
2194
  appendLine(filePath, line);
2158
2195
  } });
2159
2196
  return instance;
@@ -2402,568 +2439,108 @@ const getTokenCount = async (payload, model) => {
2402
2439
  };
2403
2440
 
2404
2441
  //#endregion
2405
- //#region src/services/copilot/create-responses.ts
2406
- const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId }, account) => {
2442
+ //#region src/services/copilot/create-chat-completions.ts
2443
+ function isGpt5MiniFamily(modelId) {
2444
+ return modelId === "gpt-5-mini" || modelId.startsWith("gpt-5-mini-");
2445
+ }
2446
+ function applyDefaultReasoningEffort(payload) {
2447
+ if (!isGpt5MiniFamily(payload.model)) return payload;
2448
+ if (payload.reasoning_effort !== null && payload.reasoning_effort !== void 0) return payload;
2449
+ return {
2450
+ ...payload,
2451
+ reasoning_effort: getReasoningEffortForModel("gpt-5-mini")
2452
+ };
2453
+ }
2454
+ const getChatInitiator = (messages) => {
2455
+ if (isForceAgentEnabled()) return messages.some((msg) => ["assistant", "tool"].includes(msg.role)) ? "agent" : "user";
2456
+ const lastMessage = messages.at(-1);
2457
+ if (!lastMessage) return "user";
2458
+ return ["assistant", "tool"].includes(lastMessage.role) ? "agent" : "user";
2459
+ };
2460
+ const createChatCompletions = async (payload, account, options) => {
2407
2461
  const ctx = account ?? accountFromState();
2408
2462
  if (!ctx.copilotToken) throw new Error("Copilot token not found");
2463
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2464
+ const initiator = options?.initiator ?? getChatInitiator(payload.messages);
2409
2465
  const headers = {
2410
- ...copilotHeaders(ctx, vision, upstreamRequestId),
2411
- "x-initiator": initiator
2466
+ ...copilotHeaders(ctx, enableVision, options?.upstreamRequestId),
2467
+ "x-initiator": options?.subagentMarker ? "agent" : initiator
2412
2468
  };
2413
- prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers);
2414
- payload.service_tier = null;
2415
- const response = await fetch(`${copilotBaseUrl(ctx)}/responses`, {
2469
+ prepareInteractionHeaders(options?.sessionId, Boolean(options?.subagentMarker), headers);
2470
+ const upstreamPayload = applyDefaultReasoningEffort(payload);
2471
+ prepareForCompact(headers, options?.isCompact);
2472
+ const response = await fetch(`${copilotBaseUrl(ctx)}/chat/completions`, {
2416
2473
  method: "POST",
2417
2474
  headers,
2418
- body: JSON.stringify(payload)
2475
+ body: JSON.stringify(upstreamPayload)
2419
2476
  });
2420
2477
  if (!response.ok) {
2421
- consola.error("Failed to create responses", response);
2422
- throw new HTTPError("Failed to create responses", response);
2478
+ consola.error("Failed to create chat completions", response);
2479
+ throw new HTTPError("Failed to create chat completions", response);
2423
2480
  }
2424
2481
  if (payload.stream) return events(response);
2425
2482
  return await response.json();
2426
2483
  };
2427
2484
 
2428
2485
  //#endregion
2429
- //#region src/routes/messages/responses-translation.ts
2430
- const MESSAGE_TYPE = "message";
2431
- const COMPACTION_SIGNATURE_PREFIX = "cm1#";
2432
- const COMPACTION_SIGNATURE_SEPARATOR = "@";
2433
- const THINKING_TEXT$1 = "Thinking...";
2434
- const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) => {
2435
- const model = modelOverride ?? payload.model;
2436
- const input = [];
2437
- const applyPhase = shouldApplyPhase(payload.model);
2438
- for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase));
2439
- const translatedTools = convertAnthropicTools(payload.tools);
2440
- const toolChoice = convertAnthropicToolChoice(payload.tool_choice);
2441
- const { safetyIdentifier, promptCacheKey } = parseUserId(payload.metadata?.user_id);
2442
- return {
2443
- model,
2444
- input,
2445
- instructions: translateSystemPrompt(payload.system, model),
2446
- temperature: 1,
2447
- top_p: payload.top_p ?? null,
2448
- max_output_tokens: Math.max(payload.max_tokens, 12800),
2449
- tools: translatedTools,
2450
- tool_choice: toolChoice,
2451
- metadata: payload.metadata ? { ...payload.metadata } : null,
2452
- safety_identifier: safetyIdentifier,
2453
- prompt_cache_key: promptCacheKey,
2454
- stream: payload.stream ?? null,
2455
- store: false,
2456
- parallel_tool_calls: true,
2457
- reasoning: {
2458
- effort: getReasoningEffortForModel(model),
2459
- summary: "auto"
2460
- },
2461
- include: ["reasoning.encrypted_content"]
2462
- };
2463
- };
2464
- const encodeCompactionCarrierSignature = (compaction) => {
2465
- return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
2466
- };
2467
- const decodeCompactionCarrierSignature = (signature) => {
2468
- if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
2469
- const raw = signature.slice(4);
2470
- const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR);
2471
- if (separatorIndex <= 0 || separatorIndex === raw.length - 1) return;
2472
- const encrypted_content = raw.slice(0, separatorIndex);
2473
- const id = raw.slice(separatorIndex + 1);
2474
- if (!encrypted_content) return;
2475
- return {
2476
- id,
2477
- encrypted_content
2478
- };
2479
- }
2480
- };
2481
- const translateMessage = (message, model, applyPhase) => {
2482
- if (message.role === "user") return translateUserMessage(message);
2483
- return translateAssistantMessage(message, model, applyPhase);
2484
- };
2485
- const translateUserMessage = (message) => {
2486
- if (typeof message.content === "string") return [createMessage("user", message.content)];
2487
- if (!Array.isArray(message.content)) return [];
2488
- const items = [];
2489
- const pendingContent = [];
2490
- for (const block of message.content) {
2491
- if (block.type === "tool_result") {
2492
- flushPendingContent(pendingContent, items, { role: "user" });
2493
- items.push(createFunctionCallOutput(block));
2494
- continue;
2495
- }
2496
- const converted = translateUserContentBlock(block);
2497
- if (converted) pendingContent.push(converted);
2486
+ //#region src/routes/chat-completions/handler.ts
2487
+ const logger$6 = createHandlerLogger("chat-completions-handler");
2488
+ const CHAT_COMPLETIONS_ENDPOINT$1 = "/chat/completions";
2489
+ async function handleCompletion$1(c) {
2490
+ await checkRateLimit(state);
2491
+ const store = getRequestHistoryStore();
2492
+ const request = buildRequestContext$1(c);
2493
+ const payload = await c.req.json();
2494
+ const clientModel = payload.model;
2495
+ const streamRequested = Boolean(payload.stream);
2496
+ const initiator = getChatInitiator(payload.messages);
2497
+ const userId = payload.user ?? void 0;
2498
+ const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(userId);
2499
+ const normalizedSafetyIdentifier = safetyIdentifier ?? void 0;
2500
+ const normalizedPromptCacheKey = promptCacheKey ?? void 0;
2501
+ request.userId = userId;
2502
+ request.safetyIdentifier = normalizedSafetyIdentifier;
2503
+ request.promptCacheKey = normalizedPromptCacheKey;
2504
+ request.initiator = initiator;
2505
+ if (getAliasTargetSet().has(clientModel.toLowerCase())) {
2506
+ recordSelectionFailure$2(store, {
2507
+ request,
2508
+ clientModel,
2509
+ stream: streamRequested,
2510
+ reason: "MODEL_NOT_SUPPORTED"
2511
+ });
2512
+ return selectionFailureResponse$2(c, {
2513
+ clientModel,
2514
+ reason: "MODEL_NOT_SUPPORTED"
2515
+ });
2498
2516
  }
2499
- flushPendingContent(pendingContent, items, { role: "user" });
2500
- return items;
2501
- };
2502
- const translateAssistantMessage = (message, model, applyPhase) => {
2503
- const assistantPhase = resolveAssistantPhase(model, message.content, applyPhase);
2504
- if (typeof message.content === "string") return [createMessage("assistant", message.content, assistantPhase)];
2505
- if (!Array.isArray(message.content)) return [];
2506
- const items = [];
2507
- const pendingContent = [];
2508
- for (const block of message.content) {
2509
- if (block.type === "tool_use") {
2510
- flushPendingContent(pendingContent, items, {
2511
- role: "assistant",
2512
- phase: assistantPhase
2513
- });
2514
- items.push(createFunctionToolCall(block));
2515
- continue;
2516
- }
2517
- if (block.type === "thinking" && block.signature) {
2518
- const compactionContent = createCompactionContent(block);
2519
- if (compactionContent) {
2520
- flushPendingContent(pendingContent, items, {
2521
- role: "assistant",
2522
- phase: assistantPhase
2523
- });
2524
- items.push(compactionContent);
2525
- continue;
2526
- }
2527
- if (block.signature.includes("@")) {
2528
- flushPendingContent(pendingContent, items, {
2529
- role: "assistant",
2530
- phase: assistantPhase
2531
- });
2532
- items.push(createReasoningContent(block));
2533
- continue;
2534
- }
2535
- }
2536
- const converted = translateAssistantContentBlock(block);
2537
- if (converted) pendingContent.push(converted);
2517
+ logger$6.debug("Request payload:", JSON.stringify(payload).slice(-400));
2518
+ const selection = await accountsManager.selectAccountForRequest([{
2519
+ modelId: clientModel,
2520
+ endpoint: CHAT_COMPLETIONS_ENDPOINT$1
2521
+ }]);
2522
+ if (!selection.ok) {
2523
+ recordSelectionFailure$2(store, {
2524
+ request,
2525
+ clientModel,
2526
+ stream: streamRequested,
2527
+ reason: selection.reason
2528
+ });
2529
+ return selectionFailureResponse$2(c, {
2530
+ clientModel,
2531
+ reason: selection.reason
2532
+ });
2538
2533
  }
2539
- flushPendingContent(pendingContent, items, {
2540
- role: "assistant",
2541
- phase: assistantPhase
2542
- });
2543
- return items;
2544
- };
2545
- const translateUserContentBlock = (block) => {
2546
- switch (block.type) {
2547
- case "text": return createTextContent(block.text);
2548
- case "image": return createImageContent(block);
2549
- default: return;
2550
- }
2551
- };
2552
- const translateAssistantContentBlock = (block) => {
2553
- switch (block.type) {
2554
- case "text": return createOutPutTextContent(block.text);
2555
- default: return;
2556
- }
2557
- };
2558
- const flushPendingContent = (pendingContent, target, message) => {
2559
- if (pendingContent.length === 0) return;
2560
- const messageContent = [...pendingContent];
2561
- target.push(createMessage(message.role, messageContent, message.phase));
2562
- pendingContent.length = 0;
2563
- };
2564
- const createMessage = (role, content, phase) => ({
2565
- type: MESSAGE_TYPE,
2566
- role,
2567
- content,
2568
- ...role === "assistant" && phase ? { phase } : {}
2569
- });
2570
- const resolveAssistantPhase = (_model, content, applyPhase) => {
2571
- if (!applyPhase) return;
2572
- if (typeof content === "string") return "final_answer";
2573
- if (!Array.isArray(content)) return;
2574
- if (!content.some((block) => block.type === "text")) return;
2575
- return content.some((block) => block.type === "tool_use") ? "commentary" : "final_answer";
2576
- };
2577
- const shouldApplyPhase = (model) => {
2578
- return getExtraPromptForModel(model).includes("## Intermediary updates");
2579
- };
2580
- const createTextContent = (text) => ({
2581
- type: "input_text",
2582
- text
2583
- });
2584
- const createOutPutTextContent = (text) => ({
2585
- type: "output_text",
2586
- text
2587
- });
2588
- const createImageContent = (block) => ({
2589
- type: "input_image",
2590
- image_url: `data:${block.source.media_type};base64,${block.source.data}`,
2591
- detail: "auto"
2592
- });
2593
- const createReasoningContent = (block) => {
2594
- const { encryptedContent, id } = parseReasoningSignature(block.signature);
2595
- const thinking = block.thinking === THINKING_TEXT$1 ? "" : block.thinking;
2596
- return {
2597
- id,
2598
- type: "reasoning",
2599
- summary: thinking ? [{
2600
- type: "summary_text",
2601
- text: thinking
2602
- }] : [],
2603
- encrypted_content: encryptedContent
2604
- };
2605
- };
2606
- const createCompactionContent = (block) => {
2607
- const compaction = decodeCompactionCarrierSignature(block.signature);
2608
- if (!compaction) return;
2609
- return {
2610
- id: compaction.id,
2611
- type: "compaction",
2612
- encrypted_content: compaction.encrypted_content
2613
- };
2614
- };
2615
- const parseReasoningSignature = (signature) => {
2616
- const splitIndex = signature.lastIndexOf("@");
2617
- if (splitIndex <= 0 || splitIndex === signature.length - 1) return {
2618
- encryptedContent: signature,
2619
- id: ""
2620
- };
2621
- return {
2622
- encryptedContent: signature.slice(0, splitIndex),
2623
- id: signature.slice(splitIndex + 1)
2624
- };
2625
- };
2626
- const createFunctionToolCall = (block) => ({
2627
- type: "function_call",
2628
- call_id: block.id,
2629
- name: block.name,
2630
- arguments: JSON.stringify(block.input),
2631
- status: "completed"
2632
- });
2633
- const createFunctionCallOutput = (block) => ({
2634
- type: "function_call_output",
2635
- call_id: block.tool_use_id,
2636
- output: convertToolResultContent(block.content),
2637
- status: block.is_error ? "incomplete" : "completed"
2638
- });
2639
- const translateSystemPrompt = (system, model) => {
2640
- if (!system) return null;
2641
- const extraPrompt = getExtraPromptForModel(model);
2642
- if (typeof system === "string") return system + extraPrompt;
2643
- const text = system.map((block, index) => {
2644
- if (index === 0) return block.text + extraPrompt;
2645
- return block.text;
2646
- }).join(" ");
2647
- return text.length > 0 ? text : null;
2648
- };
2649
- const convertAnthropicTools = (tools) => {
2650
- if (!tools || tools.length === 0) return null;
2651
- return tools.map((tool) => ({
2652
- type: "function",
2653
- name: tool.name,
2654
- parameters: tool.input_schema,
2655
- strict: false,
2656
- ...tool.description ? { description: tool.description } : {}
2657
- }));
2658
- };
2659
- const convertAnthropicToolChoice = (choice) => {
2660
- if (!choice) return "auto";
2661
- switch (choice.type) {
2662
- case "auto": return "auto";
2663
- case "any": return "required";
2664
- case "tool": return choice.name ? {
2665
- type: "function",
2666
- name: choice.name
2667
- } : "auto";
2668
- case "none": return "none";
2669
- default: return "auto";
2670
- }
2671
- };
2672
- const translateResponsesResultToAnthropic = (response) => {
2673
- const contentBlocks = mapOutputToAnthropicContent(response.output);
2674
- const usage = mapResponsesUsage(response);
2675
- let anthropicContent = fallbackContentBlocks(response.output_text);
2676
- if (contentBlocks.length > 0) anthropicContent = contentBlocks;
2677
- const stopReason = mapResponsesStopReason(response);
2678
- return {
2679
- id: response.id,
2680
- type: "message",
2681
- role: "assistant",
2682
- content: anthropicContent,
2683
- model: response.model,
2684
- stop_reason: stopReason,
2685
- stop_sequence: null,
2686
- usage
2687
- };
2688
- };
2689
- const mapOutputToAnthropicContent = (output) => {
2690
- const contentBlocks = [];
2691
- for (const item of output) switch (item.type) {
2692
- case "reasoning": {
2693
- const thinkingText = extractReasoningText(item);
2694
- if (thinkingText.length > 0) contentBlocks.push({
2695
- type: "thinking",
2696
- thinking: thinkingText,
2697
- signature: (item.encrypted_content ?? "") + "@" + item.id
2698
- });
2699
- break;
2700
- }
2701
- case "function_call": {
2702
- const toolUseBlock = createToolUseContentBlock(item);
2703
- if (toolUseBlock) contentBlocks.push(toolUseBlock);
2704
- break;
2705
- }
2706
- case "message": {
2707
- const combinedText = combineMessageTextContent(item.content);
2708
- if (combinedText.length > 0) contentBlocks.push({
2709
- type: "text",
2710
- text: combinedText
2711
- });
2712
- break;
2713
- }
2714
- case "compaction": {
2715
- const compactionBlock = createCompactionThinkingBlock(item);
2716
- if (compactionBlock) contentBlocks.push(compactionBlock);
2717
- break;
2718
- }
2719
- default: {
2720
- const combinedText = combineMessageTextContent(item.content);
2721
- if (combinedText.length > 0) contentBlocks.push({
2722
- type: "text",
2723
- text: combinedText
2724
- });
2725
- }
2726
- }
2727
- return contentBlocks;
2728
- };
2729
- const combineMessageTextContent = (content) => {
2730
- if (!Array.isArray(content)) return "";
2731
- let aggregated = "";
2732
- for (const block of content) {
2733
- if (isResponseOutputText(block)) {
2734
- aggregated += block.text;
2735
- continue;
2736
- }
2737
- if (isResponseOutputRefusal(block)) {
2738
- aggregated += block.refusal;
2739
- continue;
2740
- }
2741
- if (typeof block.text === "string") {
2742
- aggregated += block.text;
2743
- continue;
2744
- }
2745
- if (typeof block.reasoning === "string") {
2746
- aggregated += block.reasoning;
2747
- continue;
2748
- }
2749
- }
2750
- return aggregated;
2751
- };
2752
- const extractReasoningText = (item) => {
2753
- const segments = [];
2754
- const collectFromBlocks = (blocks) => {
2755
- if (!Array.isArray(blocks)) return;
2756
- for (const block of blocks) if (typeof block.text === "string") {
2757
- segments.push(block.text);
2758
- continue;
2759
- }
2760
- };
2761
- if (!item.summary || item.summary.length === 0) return THINKING_TEXT$1;
2762
- collectFromBlocks(item.summary);
2763
- return segments.join("").trim();
2764
- };
2765
- const createToolUseContentBlock = (call) => {
2766
- const toolId = call.call_id;
2767
- if (!call.name || !toolId) return null;
2768
- const input = parseFunctionCallArguments(call.arguments);
2769
- return {
2770
- type: "tool_use",
2771
- id: toolId,
2772
- name: call.name,
2773
- input
2774
- };
2775
- };
2776
- const createCompactionThinkingBlock = (item) => {
2777
- if (!item.id || !item.encrypted_content) return null;
2778
- return {
2779
- type: "thinking",
2780
- thinking: THINKING_TEXT$1,
2781
- signature: encodeCompactionCarrierSignature({
2782
- id: item.id,
2783
- encrypted_content: item.encrypted_content
2784
- })
2785
- };
2786
- };
2787
- const parseFunctionCallArguments = (rawArguments) => {
2788
- if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) return {};
2789
- try {
2790
- const parsed = JSON.parse(rawArguments);
2791
- if (Array.isArray(parsed)) return { arguments: parsed };
2792
- if (parsed && typeof parsed === "object") return parsed;
2793
- } catch (error) {
2794
- consola.warn("Failed to parse function call arguments", {
2795
- error,
2796
- rawArguments
2797
- });
2798
- }
2799
- return { raw_arguments: rawArguments };
2800
- };
2801
- const fallbackContentBlocks = (outputText) => {
2802
- if (!outputText) return [];
2803
- return [{
2804
- type: "text",
2805
- text: outputText
2806
- }];
2807
- };
2808
- const mapResponsesStopReason = (response) => {
2809
- const { status, incomplete_details: incompleteDetails } = response;
2810
- if (status === "completed") {
2811
- if (response.output.some((item) => item.type === "function_call")) return "tool_use";
2812
- return "end_turn";
2813
- }
2814
- if (status === "incomplete") {
2815
- if (incompleteDetails?.reason === "max_output_tokens") return "max_tokens";
2816
- if (incompleteDetails?.reason === "content_filter") return "end_turn";
2817
- }
2818
- return null;
2819
- };
2820
- const mapResponsesUsage = (response) => {
2821
- const inputTokens = response.usage?.input_tokens ?? 0;
2822
- const outputTokens = response.usage?.output_tokens ?? 0;
2823
- const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens;
2824
- return {
2825
- input_tokens: inputTokens - (inputCachedTokens ?? 0),
2826
- output_tokens: outputTokens,
2827
- ...response.usage?.input_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.input_tokens_details.cached_tokens }
2828
- };
2829
- };
2830
- const isRecord = (value) => typeof value === "object" && value !== null;
2831
- const isResponseOutputText = (block) => isRecord(block) && "type" in block && block.type === "output_text";
2832
- const isResponseOutputRefusal = (block) => isRecord(block) && "type" in block && block.type === "refusal";
2833
- const parseUserId = (userId) => {
2834
- if (!userId || typeof userId !== "string") return {
2835
- safetyIdentifier: null,
2836
- promptCacheKey: null
2837
- };
2838
- const userMatch = userId.match(/user_([^_]+)_account/);
2839
- const safetyIdentifier = userMatch ? userMatch[1] : null;
2840
- const sessionMatch = userId.match(/_session_(.+)$/);
2841
- const promptCacheKey = sessionMatch ? sessionMatch[1] : null;
2842
- return {
2843
- safetyIdentifier,
2844
- promptCacheKey
2845
- };
2846
- };
2847
- const convertToolResultContent = (content) => {
2848
- if (typeof content === "string") return content;
2849
- if (Array.isArray(content)) {
2850
- const result = [];
2851
- for (const block of content) switch (block.type) {
2852
- case "text":
2853
- result.push(createTextContent(block.text));
2854
- break;
2855
- case "image":
2856
- result.push(createImageContent(block));
2857
- break;
2858
- default: break;
2859
- }
2860
- return result;
2861
- }
2862
- return "";
2863
- };
2864
-
2865
- //#endregion
2866
- //#region src/services/copilot/create-chat-completions.ts
2867
- function isGpt5MiniFamily(modelId) {
2868
- return modelId === "gpt-5-mini" || modelId.startsWith("gpt-5-mini-");
2869
- }
2870
- function applyDefaultReasoningEffort(payload) {
2871
- if (!isGpt5MiniFamily(payload.model)) return payload;
2872
- if (payload.reasoning_effort !== null && payload.reasoning_effort !== void 0) return payload;
2873
- return {
2874
- ...payload,
2875
- reasoning_effort: getReasoningEffortForModel("gpt-5-mini")
2876
- };
2877
- }
2878
- const getChatInitiator = (messages) => {
2879
- if (isForceAgentEnabled()) return messages.some((msg) => ["assistant", "tool"].includes(msg.role)) ? "agent" : "user";
2880
- const lastMessage = messages.at(-1);
2881
- if (!lastMessage) return "user";
2882
- return ["assistant", "tool"].includes(lastMessage.role) ? "agent" : "user";
2883
- };
2884
- const createChatCompletions = async (payload, account, options) => {
2885
- const ctx = account ?? accountFromState();
2886
- if (!ctx.copilotToken) throw new Error("Copilot token not found");
2887
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2888
- const initiator = options?.initiator ?? getChatInitiator(payload.messages);
2889
- const headers = {
2890
- ...copilotHeaders(ctx, enableVision, options?.upstreamRequestId),
2891
- "x-initiator": options?.subagentMarker ? "agent" : initiator
2892
- };
2893
- prepareInteractionHeaders(options?.sessionId, Boolean(options?.subagentMarker), headers);
2894
- const upstreamPayload = applyDefaultReasoningEffort(payload);
2895
- const response = await fetch(`${copilotBaseUrl(ctx)}/chat/completions`, {
2896
- method: "POST",
2897
- headers,
2898
- body: JSON.stringify(upstreamPayload)
2899
- });
2900
- if (!response.ok) {
2901
- consola.error("Failed to create chat completions", response);
2902
- throw new HTTPError("Failed to create chat completions", response);
2903
- }
2904
- if (payload.stream) return events(response);
2905
- return await response.json();
2906
- };
2907
-
2908
- //#endregion
2909
- //#region src/routes/chat-completions/handler.ts
2910
- const logger$6 = createHandlerLogger("chat-completions-handler");
2911
- const CHAT_COMPLETIONS_ENDPOINT$1 = "/chat/completions";
2912
- async function handleCompletion$1(c) {
2913
- await checkRateLimit(state);
2914
- const store = getRequestHistoryStore();
2915
- const request = buildRequestContext$1(c);
2916
- const payload = await c.req.json();
2917
- const clientModel = payload.model;
2918
- const streamRequested = Boolean(payload.stream);
2919
- const initiator = getChatInitiator(payload.messages);
2920
- const userId = payload.user ?? void 0;
2921
- const { safetyIdentifier, promptCacheKey } = parseUserId(userId);
2922
- const normalizedSafetyIdentifier = safetyIdentifier ?? void 0;
2923
- const normalizedPromptCacheKey = promptCacheKey ?? void 0;
2924
- request.userId = userId;
2925
- request.safetyIdentifier = normalizedSafetyIdentifier;
2926
- request.promptCacheKey = normalizedPromptCacheKey;
2927
- request.initiator = initiator;
2928
- if (getAliasTargetSet().has(clientModel.toLowerCase())) {
2929
- recordSelectionFailure$2(store, {
2930
- request,
2931
- clientModel,
2932
- stream: streamRequested,
2933
- reason: "MODEL_NOT_SUPPORTED"
2934
- });
2935
- return selectionFailureResponse$2(c, {
2936
- clientModel,
2937
- reason: "MODEL_NOT_SUPPORTED"
2938
- });
2939
- }
2940
- logger$6.debug("Request payload:", JSON.stringify(payload).slice(-400));
2941
- const selection = await accountsManager.selectAccountForRequest([{
2942
- modelId: clientModel,
2943
- endpoint: CHAT_COMPLETIONS_ENDPOINT$1
2944
- }]);
2945
- if (!selection.ok) {
2946
- recordSelectionFailure$2(store, {
2947
- request,
2948
- clientModel,
2949
- stream: streamRequested,
2950
- reason: selection.reason
2951
- });
2952
- return selectionFailureResponse$2(c, {
2953
- clientModel,
2954
- reason: selection.reason
2955
- });
2956
- }
2957
- const { account, selectedModel } = selection;
2958
- const upstreamPayload = {
2959
- ...payload,
2960
- model: selectedModel.id
2961
- };
2962
- const premiumRemainingBefore = account.premiumRemaining;
2963
- const premiumUnlimitedBefore = account.unlimited;
2964
- await logTokenCountForRequest({
2965
- payload: upstreamPayload,
2966
- selectedModel
2534
+ const { account, selectedModel } = selection;
2535
+ const upstreamPayload = {
2536
+ ...payload,
2537
+ model: selectedModel.id
2538
+ };
2539
+ const premiumRemainingBefore = account.premiumRemaining;
2540
+ const premiumUnlimitedBefore = account.unlimited;
2541
+ await logTokenCountForRequest({
2542
+ payload: upstreamPayload,
2543
+ selectedModel
2967
2544
  });
2968
2545
  if (state.manualApprove) await awaitApproval();
2969
2546
  const payloadWithMaxTokens = applyDefaultMaxTokens(upstreamPayload, selectedModel);
@@ -3311,600 +2888,1059 @@ async function handleNonStreamingRequest(params) {
3311
2888
  errorMessage
3312
2889
  });
3313
2890
  }
3314
- }
3315
- const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
3316
-
3317
- //#endregion
3318
- //#region src/routes/chat-completions/route.ts
3319
- const completionRoutes = new Hono();
3320
- completionRoutes.post("/", async (c) => {
2891
+ }
2892
+ const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
2893
+
2894
+ //#endregion
2895
+ //#region src/routes/chat-completions/route.ts
2896
+ const completionRoutes = new Hono();
2897
+ completionRoutes.post("/", async (c) => {
2898
+ try {
2899
+ return await handleCompletion$1(c);
2900
+ } catch (error) {
2901
+ return await forwardError(c, error);
2902
+ }
2903
+ });
2904
+
2905
+ //#endregion
2906
+ //#region src/services/copilot/create-embeddings.ts
2907
+ const createEmbeddings = async (payload, account) => {
2908
+ const ctx = account ?? accountFromState();
2909
+ if (!ctx.copilotToken) throw new Error("Copilot token not found");
2910
+ const response = await fetch(`${copilotBaseUrl(ctx)}/embeddings`, {
2911
+ method: "POST",
2912
+ headers: copilotHeaders(ctx),
2913
+ body: JSON.stringify(payload)
2914
+ });
2915
+ if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
2916
+ return await response.json();
2917
+ };
2918
+
2919
+ //#endregion
2920
+ //#region src/routes/embeddings/route.ts
2921
+ const embeddingRoutes = new Hono();
2922
+ const EMBEDDINGS_ENDPOINT = "/embeddings";
2923
+ embeddingRoutes.post("/", async (c) => {
2924
+ try {
2925
+ const store = getRequestHistoryStore();
2926
+ const requestId = randomUUID();
2927
+ const startedAtMs = Date.now();
2928
+ const method = c.req.raw.method;
2929
+ const path$2 = new URL(c.req.url, "http://local").pathname;
2930
+ const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
2931
+ const userAgent = c.req.header("user-agent") ?? void 0;
2932
+ const ctx = {
2933
+ requestId,
2934
+ startedAtMs,
2935
+ method,
2936
+ path: path$2,
2937
+ clientIp,
2938
+ clientIpSource,
2939
+ userAgent
2940
+ };
2941
+ const payload = await c.req.json();
2942
+ const clientModel = payload.model;
2943
+ if (getAliasTargetSet().has(clientModel.toLowerCase())) {
2944
+ recordSelectionFailure$1(store, {
2945
+ ctx,
2946
+ clientModel,
2947
+ reason: "MODEL_NOT_SUPPORTED"
2948
+ });
2949
+ return selectionFailureResponse$1(c, clientModel, "MODEL_NOT_SUPPORTED");
2950
+ }
2951
+ const selection = await accountsManager.selectAccountForRequest([{
2952
+ modelId: clientModel,
2953
+ endpoint: EMBEDDINGS_ENDPOINT
2954
+ }]);
2955
+ if (!selection.ok) {
2956
+ recordSelectionFailure$1(store, {
2957
+ ctx,
2958
+ clientModel,
2959
+ reason: selection.reason
2960
+ });
2961
+ return selectionFailureResponse$1(c, clientModel, selection.reason);
2962
+ }
2963
+ const upstreamPayload = {
2964
+ ...payload,
2965
+ model: selection.selectedModel.id
2966
+ };
2967
+ return await runEmbeddingsWithAccount({
2968
+ c,
2969
+ store,
2970
+ ctx,
2971
+ payload: upstreamPayload,
2972
+ clientModel,
2973
+ selection
2974
+ });
2975
+ } catch (error) {
2976
+ return await forwardError(c, error);
2977
+ }
2978
+ });
2979
+ function recordSelectionFailure$1(store, params) {
2980
+ const { ctx, clientModel, reason } = params;
2981
+ const finishedAtMs = Date.now();
2982
+ store.insert({
2983
+ requestId: ctx.requestId,
2984
+ startedAtMs: ctx.startedAtMs,
2985
+ finishedAtMs,
2986
+ durationMs: finishedAtMs - ctx.startedAtMs,
2987
+ method: ctx.method,
2988
+ path: ctx.path,
2989
+ upstreamEndpoint: EMBEDDINGS_ENDPOINT,
2990
+ stream: false,
2991
+ clientModel,
2992
+ clientIp: ctx.clientIp,
2993
+ clientIpSource: ctx.clientIpSource,
2994
+ userAgent: ctx.userAgent,
2995
+ httpStatus: reason === "MODEL_NOT_SUPPORTED" ? 400 : 429,
2996
+ selectionFailureReason: reason
2997
+ });
2998
+ }
2999
+ function selectionFailureResponse$1(c, clientModel, reason) {
3000
+ if (reason === "MODEL_NOT_SUPPORTED") return c.json({ error: {
3001
+ message: `Model "${clientModel}" is not available for any configured account.`,
3002
+ type: "invalid_request_error"
3003
+ } }, 400);
3004
+ return c.json({ error: {
3005
+ message: "All accounts have exhausted their quota. Please wait for quota refresh or add additional accounts.",
3006
+ type: "rate_limit_error"
3007
+ } }, 429);
3008
+ }
3009
+ async function runEmbeddingsWithAccount({ c, store, ctx, payload, clientModel, selection }) {
3010
+ const { account, reservation, selectedModel, endpoint, costUnits } = selection;
3011
+ const premiumRemainingBefore = account.premiumRemaining;
3012
+ const premiumUnlimitedBefore = account.unlimited;
3013
+ let httpStatus = 200;
3014
+ let usage = {};
3015
+ let errorName;
3016
+ let errorStatus;
3017
+ let errorMessage;
3018
+ let finishedAtMs;
3019
+ try {
3020
+ const accountCtx = toAccountContext(account);
3021
+ const response = await createEmbeddings(payload, accountCtx);
3022
+ usage = normalizeEmbeddingsUsage(response.usage);
3023
+ finishedAtMs = Date.now();
3024
+ return c.json(response);
3025
+ } catch (error) {
3026
+ finishedAtMs = Date.now();
3027
+ const details = extractErrorDetails(error);
3028
+ httpStatus = details.httpStatus;
3029
+ errorName = details.errorName;
3030
+ errorStatus = details.errorStatus;
3031
+ errorMessage = details.errorMessage;
3032
+ if (details.unauthorized) accountsManager.markAccountFailed(account.id, "Unauthorized (401)");
3033
+ throw error;
3034
+ } finally {
3035
+ const finishedAtMsFinal = finishedAtMs ?? Date.now();
3036
+ await accountsManager.finalizeQuota(account, reservation);
3037
+ const premiumRemainingAfter = account.premiumRemaining;
3038
+ const premiumUnlimitedAfter = account.unlimited;
3039
+ store.insert({
3040
+ requestId: ctx.requestId,
3041
+ startedAtMs: ctx.startedAtMs,
3042
+ finishedAtMs: finishedAtMsFinal,
3043
+ durationMs: finishedAtMsFinal - ctx.startedAtMs,
3044
+ method: ctx.method,
3045
+ path: ctx.path,
3046
+ upstreamEndpoint: endpoint,
3047
+ stream: false,
3048
+ accountId: account.id,
3049
+ accountType: account.accountType,
3050
+ costUnits,
3051
+ clientModel,
3052
+ upstreamModel: selectedModel.id,
3053
+ clientIp: ctx.clientIp,
3054
+ clientIpSource: ctx.clientIpSource,
3055
+ userAgent: ctx.userAgent,
3056
+ ...usage,
3057
+ premiumRemainingBefore,
3058
+ premiumRemainingAfter,
3059
+ premiumRemainingDiff: computeDiff(premiumRemainingBefore, premiumRemainingAfter),
3060
+ premiumUnlimitedBefore,
3061
+ premiumUnlimitedAfter,
3062
+ httpStatus,
3063
+ errorName,
3064
+ errorStatus,
3065
+ errorMessage
3066
+ });
3067
+ }
3068
+ }
3069
+
3070
+ //#endregion
3071
+ //#region src/lib/models.ts
3072
+ const findEndpointModel = (sdkModelId) => {
3073
+ const models = state.models?.data ?? [];
3074
+ const exactMatch = models.find((m) => m.id === sdkModelId);
3075
+ if (exactMatch) return exactMatch;
3076
+ const normalized = _normalizeSdkModelId(sdkModelId);
3077
+ if (!normalized) return;
3078
+ const modelName = `claude-${normalized.family}-${normalized.version}`;
3079
+ const model = models.find((m) => m.id === modelName);
3080
+ if (model) return model;
3081
+ };
3082
+ /**
3083
+ * Normalizes an SDK model ID to extract the model family and version.
3084
+ * this method from github copilot extension
3085
+ * Examples:
3086
+ * - "claude-opus-4-5-20251101" -> { family: "opus", version: "4.5" }
3087
+ * - "claude-3-5-sonnet-20241022" -> { family: "sonnet", version: "3.5" }
3088
+ * - "claude-sonnet-4-20250514" -> { family: "sonnet", version: "4" }
3089
+ * - "claude-haiku-3-5-20250514" -> { family: "haiku", version: "3.5" }
3090
+ * - "claude-haiku-4.5" -> { family: "haiku", version: "4.5" }
3091
+ */
3092
+ const _normalizeSdkModelId = (sdkModelId) => {
3093
+ const withoutDate = sdkModelId.toLowerCase().replace(/-\d{8}$/, "");
3094
+ const pattern1 = withoutDate.match(/^claude-(\w+)-(\d+)-(\d+)$/);
3095
+ if (pattern1) return {
3096
+ family: pattern1[1],
3097
+ version: `${pattern1[2]}.${pattern1[3]}`
3098
+ };
3099
+ const pattern2 = withoutDate.match(/^claude-(\d+)-(\d+)-(\w+)$/);
3100
+ if (pattern2) return {
3101
+ family: pattern2[3],
3102
+ version: `${pattern2[1]}.${pattern2[2]}`
3103
+ };
3104
+ const pattern3 = withoutDate.match(/^claude-(\w+)-(\d+)\.(\d+)$/);
3105
+ if (pattern3) return {
3106
+ family: pattern3[1],
3107
+ version: `${pattern3[2]}.${pattern3[3]}`
3108
+ };
3109
+ const pattern4 = withoutDate.match(/^claude-(\w+)-(\d+)$/);
3110
+ if (pattern4) return {
3111
+ family: pattern4[1],
3112
+ version: pattern4[2]
3113
+ };
3114
+ const pattern5 = withoutDate.match(/^claude-(\d+)-(\w+)$/);
3115
+ if (pattern5) return {
3116
+ family: pattern5[2],
3117
+ version: pattern5[1]
3118
+ };
3119
+ };
3120
+
3121
+ //#endregion
3122
+ //#region src/routes/messages/utils.ts
3123
+ function mapOpenAIStopReasonToAnthropic(finishReason) {
3124
+ if (finishReason === null) return null;
3125
+ return {
3126
+ stop: "end_turn",
3127
+ length: "max_tokens",
3128
+ tool_calls: "tool_use",
3129
+ content_filter: "end_turn"
3130
+ }[finishReason];
3131
+ }
3132
+ const mergeContentWithText = (toolResult, textBlock) => {
3133
+ if (typeof toolResult.content === "string") return {
3134
+ ...toolResult,
3135
+ content: `${toolResult.content}\n\n${textBlock.text}`
3136
+ };
3137
+ return {
3138
+ ...toolResult,
3139
+ content: [...toolResult.content, textBlock]
3140
+ };
3141
+ };
3142
+ const mergeContentWithTexts = (toolResult, textBlocks) => {
3143
+ if (typeof toolResult.content === "string") {
3144
+ const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n");
3145
+ return {
3146
+ ...toolResult,
3147
+ content: `${toolResult.content}\n\n${appendedTexts}`
3148
+ };
3149
+ }
3150
+ return {
3151
+ ...toolResult,
3152
+ content: [...toolResult.content, ...textBlocks]
3153
+ };
3154
+ };
3155
+ const mergeToolResult = (toolResults, textBlocks) => {
3156
+ if (toolResults.length === textBlocks.length) return toolResults.map((toolResult, index) => mergeContentWithText(toolResult, textBlocks[index]));
3157
+ const lastIndex = toolResults.length - 1;
3158
+ return toolResults.map((toolResult, index) => index === lastIndex ? mergeContentWithTexts(toolResult, textBlocks) : toolResult);
3159
+ };
3160
+ const mergeToolResultForClaude = (anthropicPayload) => {
3161
+ for (const msg of anthropicPayload.messages) {
3162
+ if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
3163
+ const toolResults = [];
3164
+ const textBlocks = [];
3165
+ let valid = true;
3166
+ for (const block of msg.content) if (block.type === "tool_result") toolResults.push(block);
3167
+ else if (block.type === "text") textBlocks.push(block);
3168
+ else {
3169
+ valid = false;
3170
+ break;
3171
+ }
3172
+ if (!valid || toolResults.length === 0 || textBlocks.length === 0) continue;
3173
+ msg.content = mergeToolResult(toolResults, textBlocks);
3174
+ }
3175
+ };
3176
+ const estimateInputTokens = async (payload, selectedModel, logger$7) => {
3321
3177
  try {
3322
- return await handleCompletion$1(c);
3178
+ return (await getTokenCount(payload, selectedModel)).input;
3323
3179
  } catch (error) {
3324
- return await forwardError(c, error);
3180
+ logger$7.warn("Failed to estimate input tokens for message_start", error);
3181
+ return;
3325
3182
  }
3326
- });
3327
-
3328
- //#endregion
3329
- //#region src/services/copilot/create-embeddings.ts
3330
- const createEmbeddings = async (payload, account) => {
3331
- const ctx = account ?? accountFromState();
3332
- if (!ctx.copilotToken) throw new Error("Copilot token not found");
3333
- const response = await fetch(`${copilotBaseUrl(ctx)}/embeddings`, {
3334
- method: "POST",
3335
- headers: copilotHeaders(ctx),
3336
- body: JSON.stringify(payload)
3183
+ };
3184
+ const isWarmupProbeRequest = (payload) => {
3185
+ const lastMsg = payload.messages.at(-1);
3186
+ if (!lastMsg || lastMsg.role !== "user" || !Array.isArray(lastMsg.content)) return false;
3187
+ const lastBlock = lastMsg.content.at(-1);
3188
+ if (!lastBlock || lastBlock.type !== "text") return false;
3189
+ const text = lastBlock.text.trim().toLowerCase();
3190
+ if (!(lastBlock.cache_control?.type === "ephemeral")) return false;
3191
+ if (text === "warmup") return true;
3192
+ if (text === "hello") {
3193
+ const preludeBlocks = lastMsg.content.slice(0, -1);
3194
+ if (preludeBlocks.length === 0) return false;
3195
+ return preludeBlocks.every((block) => block.type === "text" && block.text.trimStart().toLowerCase().startsWith("<system-reminder"));
3196
+ }
3197
+ return false;
3198
+ };
3199
+ const handleSelectionFailure = (context) => {
3200
+ const { c, store, requestId, startedAtMs, method, path: path$2, streamRequested, clientModel, clientIp, clientIpSource, userAgent, userId, safetyIdentifier, promptCacheKey, initiator, selection } = context;
3201
+ const finishedAtMs = Date.now();
3202
+ store.insert({
3203
+ requestId,
3204
+ startedAtMs,
3205
+ finishedAtMs,
3206
+ durationMs: finishedAtMs - startedAtMs,
3207
+ method,
3208
+ path: path$2,
3209
+ stream: streamRequested,
3210
+ clientModel,
3211
+ clientIp,
3212
+ clientIpSource,
3213
+ userAgent,
3214
+ userId,
3215
+ safetyIdentifier,
3216
+ promptCacheKey,
3217
+ initiator,
3218
+ httpStatus: selection.reason === "MODEL_NOT_SUPPORTED" ? 400 : 429,
3219
+ selectionFailureReason: selection.reason
3220
+ });
3221
+ if (selection.reason === "MODEL_NOT_SUPPORTED") return c.json({ error: {
3222
+ message: `Model "${clientModel}" is not available for any configured account.`,
3223
+ type: "invalid_request_error"
3224
+ } }, 400);
3225
+ return c.json({ error: {
3226
+ message: "All accounts have exhausted their quota. Please wait for quota refresh or add additional accounts.",
3227
+ type: "rate_limit_error"
3228
+ } }, 429);
3229
+ };
3230
+ const maybeBlockOriginalModelName = (context) => {
3231
+ if (!getAliasTargetSet().has(context.clientModel.toLowerCase())) return null;
3232
+ return handleSelectionFailure({
3233
+ ...context,
3234
+ selection: {
3235
+ ok: false,
3236
+ reason: "MODEL_NOT_SUPPORTED"
3237
+ }
3337
3238
  });
3338
- if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
3339
- return await response.json();
3340
3239
  };
3341
3240
 
3342
3241
  //#endregion
3343
- //#region src/routes/embeddings/route.ts
3344
- const embeddingRoutes = new Hono();
3345
- const EMBEDDINGS_ENDPOINT = "/embeddings";
3346
- embeddingRoutes.post("/", async (c) => {
3347
- try {
3348
- const store = getRequestHistoryStore();
3349
- const requestId = randomUUID();
3350
- const startedAtMs = Date.now();
3351
- const method = c.req.raw.method;
3352
- const path$2 = new URL(c.req.url, "http://local").pathname;
3353
- const { ip: clientIp, source: clientIpSource } = getClientIpInfo(c);
3354
- const userAgent = c.req.header("user-agent") ?? void 0;
3355
- const ctx = {
3356
- requestId,
3357
- startedAtMs,
3358
- method,
3359
- path: path$2,
3360
- clientIp,
3361
- clientIpSource,
3362
- userAgent
3363
- };
3364
- const payload = await c.req.json();
3365
- const clientModel = payload.model;
3366
- if (getAliasTargetSet().has(clientModel.toLowerCase())) {
3367
- recordSelectionFailure$1(store, {
3368
- ctx,
3369
- clientModel,
3370
- reason: "MODEL_NOT_SUPPORTED"
3371
- });
3372
- return selectionFailureResponse$1(c, clientModel, "MODEL_NOT_SUPPORTED");
3242
+ //#region src/routes/messages/non-stream-translation.ts
3243
+ const THINKING_TEXT = "Thinking...";
3244
+ function translateToOpenAI(payload) {
3245
+ const modelId = payload.model;
3246
+ const model = state.models?.data.find((m) => m.id === modelId);
3247
+ const thinkingBudget = getThinkingBudget(payload, model);
3248
+ return {
3249
+ model: modelId,
3250
+ messages: translateAnthropicMessagesToOpenAI(payload, modelId, thinkingBudget),
3251
+ max_tokens: payload.max_tokens,
3252
+ stop: payload.stop_sequences,
3253
+ stream: payload.stream,
3254
+ temperature: payload.temperature,
3255
+ top_p: payload.top_p,
3256
+ user: payload.metadata?.user_id,
3257
+ tools: translateAnthropicToolsToOpenAI(payload.tools),
3258
+ tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
3259
+ thinking_budget: thinkingBudget
3260
+ };
3261
+ }
3262
+ function getThinkingBudget(payload, model) {
3263
+ const thinking = payload.thinking;
3264
+ if (model && thinking) {
3265
+ const maxThinkingBudget = Math.min(model.capabilities.supports.max_thinking_budget ?? 0, (model.capabilities.limits.max_output_tokens ?? 0) - 1);
3266
+ thinking.budget_tokens ??= maxThinkingBudget;
3267
+ if (maxThinkingBudget > 0) {
3268
+ const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget);
3269
+ return Math.max(budgetTokens, model.capabilities.supports.min_thinking_budget ?? 1024);
3373
3270
  }
3374
- const selection = await accountsManager.selectAccountForRequest([{
3375
- modelId: clientModel,
3376
- endpoint: EMBEDDINGS_ENDPOINT
3377
- }]);
3378
- if (!selection.ok) {
3379
- recordSelectionFailure$1(store, {
3380
- ctx,
3381
- clientModel,
3382
- reason: selection.reason
3271
+ }
3272
+ }
3273
+ function translateAnthropicMessagesToOpenAI(payload, modelId, _thinkingBudget) {
3274
+ const systemMessages = handleSystemPrompt(payload.system);
3275
+ const otherMessages = payload.messages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, modelId));
3276
+ return [...systemMessages, ...otherMessages];
3277
+ }
3278
+ function handleSystemPrompt(system) {
3279
+ if (!system) return [];
3280
+ if (typeof system === "string") return [{
3281
+ role: "system",
3282
+ content: system
3283
+ }];
3284
+ else return [{
3285
+ role: "system",
3286
+ content: system.map((block) => {
3287
+ return block.text;
3288
+ }).join("\n\n")
3289
+ }];
3290
+ }
3291
+ function handleUserMessage(message) {
3292
+ const newMessages = [];
3293
+ if (Array.isArray(message.content)) {
3294
+ const toolResultBlocks = message.content.filter((block) => block.type === "tool_result");
3295
+ const otherBlocks = message.content.filter((block) => block.type !== "tool_result");
3296
+ for (const block of toolResultBlocks) newMessages.push({
3297
+ role: "tool",
3298
+ tool_call_id: block.tool_use_id,
3299
+ content: mapContent(block.content)
3300
+ });
3301
+ if (otherBlocks.length > 0) newMessages.push({
3302
+ role: "user",
3303
+ content: mapContent(otherBlocks)
3304
+ });
3305
+ } else newMessages.push({
3306
+ role: "user",
3307
+ content: mapContent(message.content)
3308
+ });
3309
+ return newMessages;
3310
+ }
3311
+ function handleAssistantMessage(message, modelId) {
3312
+ if (!Array.isArray(message.content)) return [{
3313
+ role: "assistant",
3314
+ content: mapContent(message.content)
3315
+ }];
3316
+ const toolUseBlocks = message.content.filter((block) => block.type === "tool_use");
3317
+ let thinkingBlocks = message.content.filter((block) => block.type === "thinking");
3318
+ if (modelId.startsWith("claude")) thinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT && b.signature && !b.signature.includes("@"));
3319
+ const thinkingContents = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT).map((b) => b.thinking);
3320
+ const allThinkingContent = thinkingContents.length > 0 ? thinkingContents.join("\n\n") : void 0;
3321
+ const signature = thinkingBlocks.find((b) => b.signature)?.signature;
3322
+ return toolUseBlocks.length > 0 ? [{
3323
+ role: "assistant",
3324
+ content: mapContent(message.content),
3325
+ reasoning_text: allThinkingContent,
3326
+ reasoning_opaque: signature,
3327
+ tool_calls: toolUseBlocks.map((toolUse) => ({
3328
+ id: toolUse.id,
3329
+ type: "function",
3330
+ function: {
3331
+ name: toolUse.name,
3332
+ arguments: JSON.stringify(toolUse.input)
3333
+ }
3334
+ }))
3335
+ }] : [{
3336
+ role: "assistant",
3337
+ content: mapContent(message.content),
3338
+ reasoning_text: allThinkingContent,
3339
+ reasoning_opaque: signature
3340
+ }];
3341
+ }
3342
+ function mapContent(content) {
3343
+ if (typeof content === "string") return content;
3344
+ if (!Array.isArray(content)) return null;
3345
+ if (!content.some((block) => block.type === "image")) return content.filter((block) => block.type === "text").map((block) => block.text).join("\n\n");
3346
+ const contentParts = [];
3347
+ for (const block of content) switch (block.type) {
3348
+ case "text":
3349
+ contentParts.push({
3350
+ type: "text",
3351
+ text: block.text
3383
3352
  });
3384
- return selectionFailureResponse$1(c, clientModel, selection.reason);
3385
- }
3386
- const upstreamPayload = {
3387
- ...payload,
3388
- model: selection.selectedModel.id
3389
- };
3390
- return await runEmbeddingsWithAccount({
3391
- c,
3392
- store,
3393
- ctx,
3394
- payload: upstreamPayload,
3395
- clientModel,
3396
- selection
3397
- });
3398
- } catch (error) {
3399
- return await forwardError(c, error);
3353
+ break;
3354
+ case "image":
3355
+ contentParts.push({
3356
+ type: "image_url",
3357
+ image_url: { url: `data:${block.source.media_type};base64,${block.source.data}` }
3358
+ });
3359
+ break;
3400
3360
  }
3401
- });
3402
- function recordSelectionFailure$1(store, params) {
3403
- const { ctx, clientModel, reason } = params;
3404
- const finishedAtMs = Date.now();
3405
- store.insert({
3406
- requestId: ctx.requestId,
3407
- startedAtMs: ctx.startedAtMs,
3408
- finishedAtMs,
3409
- durationMs: finishedAtMs - ctx.startedAtMs,
3410
- method: ctx.method,
3411
- path: ctx.path,
3412
- upstreamEndpoint: EMBEDDINGS_ENDPOINT,
3413
- stream: false,
3414
- clientModel,
3415
- clientIp: ctx.clientIp,
3416
- clientIpSource: ctx.clientIpSource,
3417
- userAgent: ctx.userAgent,
3418
- httpStatus: reason === "MODEL_NOT_SUPPORTED" ? 400 : 429,
3419
- selectionFailureReason: reason
3420
- });
3361
+ return contentParts;
3421
3362
  }
3422
- function selectionFailureResponse$1(c, clientModel, reason) {
3423
- if (reason === "MODEL_NOT_SUPPORTED") return c.json({ error: {
3424
- message: `Model "${clientModel}" is not available for any configured account.`,
3425
- type: "invalid_request_error"
3426
- } }, 400);
3427
- return c.json({ error: {
3428
- message: "All accounts have exhausted their quota. Please wait for quota refresh or add additional accounts.",
3429
- type: "rate_limit_error"
3430
- } }, 429);
3363
+ function translateAnthropicToolsToOpenAI(anthropicTools) {
3364
+ if (!anthropicTools) return;
3365
+ return anthropicTools.map((tool) => ({
3366
+ type: "function",
3367
+ function: {
3368
+ name: tool.name,
3369
+ description: tool.description,
3370
+ parameters: normalizeToolSchema(tool.input_schema)
3371
+ }
3372
+ }));
3431
3373
  }
3432
- async function runEmbeddingsWithAccount({ c, store, ctx, payload, clientModel, selection }) {
3433
- const { account, reservation, selectedModel, endpoint, costUnits } = selection;
3434
- const premiumRemainingBefore = account.premiumRemaining;
3435
- const premiumUnlimitedBefore = account.unlimited;
3436
- let httpStatus = 200;
3437
- let usage = {};
3438
- let errorName;
3439
- let errorStatus;
3440
- let errorMessage;
3441
- let finishedAtMs;
3442
- try {
3443
- const accountCtx = toAccountContext(account);
3444
- const response = await createEmbeddings(payload, accountCtx);
3445
- usage = normalizeEmbeddingsUsage(response.usage);
3446
- finishedAtMs = Date.now();
3447
- return c.json(response);
3448
- } catch (error) {
3449
- finishedAtMs = Date.now();
3450
- const details = extractErrorDetails(error);
3451
- httpStatus = details.httpStatus;
3452
- errorName = details.errorName;
3453
- errorStatus = details.errorStatus;
3454
- errorMessage = details.errorMessage;
3455
- if (details.unauthorized) accountsManager.markAccountFailed(account.id, "Unauthorized (401)");
3456
- throw error;
3457
- } finally {
3458
- const finishedAtMsFinal = finishedAtMs ?? Date.now();
3459
- await accountsManager.finalizeQuota(account, reservation);
3460
- const premiumRemainingAfter = account.premiumRemaining;
3461
- const premiumUnlimitedAfter = account.unlimited;
3462
- store.insert({
3463
- requestId: ctx.requestId,
3464
- startedAtMs: ctx.startedAtMs,
3465
- finishedAtMs: finishedAtMsFinal,
3466
- durationMs: finishedAtMsFinal - ctx.startedAtMs,
3467
- method: ctx.method,
3468
- path: ctx.path,
3469
- upstreamEndpoint: endpoint,
3470
- stream: false,
3471
- accountId: account.id,
3472
- accountType: account.accountType,
3473
- costUnits,
3474
- clientModel,
3475
- upstreamModel: selectedModel.id,
3476
- clientIp: ctx.clientIp,
3477
- clientIpSource: ctx.clientIpSource,
3478
- userAgent: ctx.userAgent,
3479
- ...usage,
3480
- premiumRemainingBefore,
3481
- premiumRemainingAfter,
3482
- premiumRemainingDiff: computeDiff(premiumRemainingBefore, premiumRemainingAfter),
3483
- premiumUnlimitedBefore,
3484
- premiumUnlimitedAfter,
3485
- httpStatus,
3486
- errorName,
3487
- errorStatus,
3488
- errorMessage
3489
- });
3374
+ /**
3375
+ * Ensures `type: "object"` schema has a `properties` field.
3376
+ * OpenAI's API rejects object schemas without it.
3377
+ */
3378
+ const normalizeToolSchema = (schema) => {
3379
+ if (schema.type === "object" && !schema.properties) return {
3380
+ ...schema,
3381
+ properties: {}
3382
+ };
3383
+ return schema;
3384
+ };
3385
+ function translateAnthropicToolChoiceToOpenAI(anthropicToolChoice) {
3386
+ if (!anthropicToolChoice) return;
3387
+ switch (anthropicToolChoice.type) {
3388
+ case "auto": return "auto";
3389
+ case "any": return "required";
3390
+ case "tool":
3391
+ if (anthropicToolChoice.name) return {
3392
+ type: "function",
3393
+ function: { name: anthropicToolChoice.name }
3394
+ };
3395
+ return;
3396
+ case "none": return "none";
3397
+ default: return;
3398
+ }
3399
+ }
3400
+ function translateToAnthropic(response) {
3401
+ const assistantContentBlocks = [];
3402
+ let stopReason = response.choices[0]?.finish_reason ?? null;
3403
+ for (const choice of response.choices) {
3404
+ const textBlocks = getAnthropicTextBlocks(choice.message.content);
3405
+ const thinkBlocks = getAnthropicThinkBlocks(choice.message.reasoning_text, choice.message.reasoning_opaque);
3406
+ const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls);
3407
+ assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks);
3408
+ if (choice.finish_reason === "tool_calls" || stopReason === "stop") stopReason = choice.finish_reason;
3490
3409
  }
3410
+ return {
3411
+ id: response.id,
3412
+ type: "message",
3413
+ role: "assistant",
3414
+ model: response.model,
3415
+ content: assistantContentBlocks,
3416
+ stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
3417
+ stop_sequence: null,
3418
+ usage: {
3419
+ input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
3420
+ output_tokens: response.usage?.completion_tokens ?? 0,
3421
+ ...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
3422
+ }
3423
+ };
3424
+ }
3425
+ function getAnthropicTextBlocks(messageContent) {
3426
+ if (typeof messageContent === "string" && messageContent.length > 0) return [{
3427
+ type: "text",
3428
+ text: messageContent
3429
+ }];
3430
+ if (Array.isArray(messageContent)) return messageContent.filter((part) => part.type === "text").map((part) => ({
3431
+ type: "text",
3432
+ text: part.text
3433
+ }));
3434
+ return [];
3435
+ }
3436
+ function getAnthropicThinkBlocks(reasoningText, reasoningOpaque) {
3437
+ if (reasoningText && reasoningText.length > 0) return [{
3438
+ type: "thinking",
3439
+ thinking: reasoningText,
3440
+ signature: reasoningOpaque || ""
3441
+ }];
3442
+ if (reasoningOpaque && reasoningOpaque.length > 0) return [{
3443
+ type: "thinking",
3444
+ thinking: THINKING_TEXT,
3445
+ signature: reasoningOpaque
3446
+ }];
3447
+ return [];
3448
+ }
3449
+ function getAnthropicToolUseBlocks(toolCalls) {
3450
+ if (!toolCalls) return [];
3451
+ return toolCalls.map((toolCall) => ({
3452
+ type: "tool_use",
3453
+ id: toolCall.id,
3454
+ name: toolCall.function.name,
3455
+ input: JSON.parse(toolCall.function.arguments)
3456
+ }));
3491
3457
  }
3492
3458
 
3493
3459
  //#endregion
3494
- //#region src/lib/models.ts
3495
- const findEndpointModel = (sdkModelId) => {
3496
- const models = state.models?.data ?? [];
3497
- const exactMatch = models.find((m) => m.id === sdkModelId);
3498
- if (exactMatch) return exactMatch;
3499
- const normalized = _normalizeSdkModelId(sdkModelId);
3500
- if (!normalized) return;
3501
- const modelName = `claude-${normalized.family}-${normalized.version}`;
3502
- const model = models.find((m) => m.id === modelName);
3503
- if (model) return model;
3504
- };
3460
+ //#region src/routes/messages/count-tokens-handler.ts
3505
3461
  /**
3506
- * Normalizes an SDK model ID to extract the model family and version.
3507
- * this method from github copilot extension
3508
- * Examples:
3509
- * - "claude-opus-4-5-20251101" -> { family: "opus", version: "4.5" }
3510
- * - "claude-3-5-sonnet-20241022" -> { family: "sonnet", version: "3.5" }
3511
- * - "claude-sonnet-4-20250514" -> { family: "sonnet", version: "4" }
3512
- * - "claude-haiku-3-5-20250514" -> { family: "haiku", version: "3.5" }
3513
- * - "claude-haiku-4.5" -> { family: "haiku", version: "4.5" }
3462
+ * Handles token counting for Anthropic messages
3514
3463
  */
3515
- const _normalizeSdkModelId = (sdkModelId) => {
3516
- const withoutDate = sdkModelId.toLowerCase().replace(/-\d{8}$/, "");
3517
- const pattern1 = withoutDate.match(/^claude-(\w+)-(\d+)-(\d+)$/);
3518
- if (pattern1) return {
3519
- family: pattern1[1],
3520
- version: `${pattern1[2]}.${pattern1[3]}`
3521
- };
3522
- const pattern2 = withoutDate.match(/^claude-(\d+)-(\d+)-(\w+)$/);
3523
- if (pattern2) return {
3524
- family: pattern2[3],
3525
- version: `${pattern2[1]}.${pattern2[2]}`
3526
- };
3527
- const pattern3 = withoutDate.match(/^claude-(\w+)-(\d+)\.(\d+)$/);
3528
- if (pattern3) return {
3529
- family: pattern3[1],
3530
- version: `${pattern3[2]}.${pattern3[3]}`
3531
- };
3532
- const pattern4 = withoutDate.match(/^claude-(\w+)-(\d+)$/);
3533
- if (pattern4) return {
3534
- family: pattern4[1],
3535
- version: pattern4[2]
3536
- };
3537
- const pattern5 = withoutDate.match(/^claude-(\d+)-(\w+)$/);
3538
- if (pattern5) return {
3539
- family: pattern5[2],
3540
- version: pattern5[1]
3464
+ async function handleCountTokens(c) {
3465
+ try {
3466
+ const anthropicBeta = c.req.header("anthropic-beta");
3467
+ const anthropicPayload = await c.req.json();
3468
+ const openAIPayload = translateToOpenAI(anthropicPayload);
3469
+ const selectedModel = findEndpointModel(anthropicPayload.model);
3470
+ anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model;
3471
+ if (!selectedModel) {
3472
+ consola.warn("Model not found, returning default token count");
3473
+ return c.json({ input_tokens: 1 });
3474
+ }
3475
+ const tokenCount = await getTokenCount(openAIPayload, selectedModel);
3476
+ if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
3477
+ let addToolSystemPromptCount = false;
3478
+ if (anthropicBeta) {
3479
+ const toolsLength = anthropicPayload.tools.length;
3480
+ addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
3481
+ }
3482
+ if (addToolSystemPromptCount) {
3483
+ if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
3484
+ else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
3485
+ }
3486
+ }
3487
+ let finalTokenCount = tokenCount.input + tokenCount.output;
3488
+ if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
3489
+ consola.info("Token count:", finalTokenCount);
3490
+ return c.json({ input_tokens: finalTokenCount });
3491
+ } catch (error) {
3492
+ consola.error("Error counting tokens:", error);
3493
+ return c.json({ input_tokens: 1 });
3494
+ }
3495
+ }
3496
+
3497
+ //#endregion
3498
+ //#region src/services/copilot/create-responses.ts
3499
+ const createResponses = async (payload, { vision, initiator, upstreamRequestId, subagentMarker, sessionId, isCompact }, account) => {
3500
+ const ctx = account ?? accountFromState();
3501
+ if (!ctx.copilotToken) throw new Error("Copilot token not found");
3502
+ const headers = {
3503
+ ...copilotHeaders(ctx, vision, upstreamRequestId),
3504
+ "x-initiator": initiator
3541
3505
  };
3506
+ prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers);
3507
+ prepareForCompact(headers, isCompact);
3508
+ payload.service_tier = null;
3509
+ const response = await fetch(`${copilotBaseUrl(ctx)}/responses`, {
3510
+ method: "POST",
3511
+ headers,
3512
+ body: JSON.stringify(payload)
3513
+ });
3514
+ if (!response.ok) {
3515
+ consola.error("Failed to create responses", response);
3516
+ throw new HTTPError("Failed to create responses", response);
3517
+ }
3518
+ if (payload.stream) return events(response);
3519
+ return await response.json();
3542
3520
  };
3543
3521
 
3544
3522
  //#endregion
3545
- //#region src/routes/messages/utils.ts
3546
- function mapOpenAIStopReasonToAnthropic(finishReason) {
3547
- if (finishReason === null) return null;
3548
- return {
3549
- stop: "end_turn",
3550
- length: "max_tokens",
3551
- tool_calls: "tool_use",
3552
- content_filter: "end_turn"
3553
- }[finishReason];
3554
- }
3555
- const mergeContentWithText = (toolResult, textBlock) => {
3556
- if (typeof toolResult.content === "string") return {
3557
- ...toolResult,
3558
- content: `${toolResult.content}\n\n${textBlock.text}`
3559
- };
3523
+ //#region src/routes/messages/responses-translation.ts
3524
+ const MESSAGE_TYPE = "message";
3525
+ const COMPACTION_SIGNATURE_PREFIX = "cm1#";
3526
+ const COMPACTION_SIGNATURE_SEPARATOR = "@";
3527
+ const THINKING_TEXT$1 = "Thinking...";
3528
+ const translateAnthropicMessagesToResponsesPayload = (payload, modelOverride) => {
3529
+ const model = modelOverride ?? payload.model;
3530
+ const input = [];
3531
+ const applyPhase = shouldApplyPhase(payload.model);
3532
+ for (const message of payload.messages) input.push(...translateMessage(message, payload.model, applyPhase));
3533
+ const translatedTools = convertAnthropicTools(payload.tools);
3534
+ const toolChoice = convertAnthropicToolChoice(payload.tool_choice);
3535
+ const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(payload.metadata?.user_id);
3560
3536
  return {
3561
- ...toolResult,
3562
- content: [...toolResult.content, textBlock]
3537
+ model,
3538
+ input,
3539
+ instructions: translateSystemPrompt(payload.system, model),
3540
+ temperature: 1,
3541
+ top_p: payload.top_p ?? null,
3542
+ max_output_tokens: Math.max(payload.max_tokens, 12800),
3543
+ tools: translatedTools,
3544
+ tool_choice: toolChoice,
3545
+ metadata: payload.metadata ? { ...payload.metadata } : null,
3546
+ safety_identifier: safetyIdentifier,
3547
+ prompt_cache_key: promptCacheKey,
3548
+ stream: payload.stream ?? null,
3549
+ store: false,
3550
+ parallel_tool_calls: true,
3551
+ reasoning: {
3552
+ effort: getReasoningEffortForModel(model),
3553
+ summary: "auto"
3554
+ },
3555
+ include: ["reasoning.encrypted_content"]
3563
3556
  };
3564
3557
  };
3565
- const mergeContentWithTexts = (toolResult, textBlocks) => {
3566
- if (typeof toolResult.content === "string") {
3567
- const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n");
3558
+ const encodeCompactionCarrierSignature = (compaction) => {
3559
+ return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`;
3560
+ };
3561
+ const decodeCompactionCarrierSignature = (signature) => {
3562
+ if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
3563
+ const raw = signature.slice(4);
3564
+ const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR);
3565
+ if (separatorIndex <= 0 || separatorIndex === raw.length - 1) return;
3566
+ const encrypted_content = raw.slice(0, separatorIndex);
3567
+ const id = raw.slice(separatorIndex + 1);
3568
+ if (!encrypted_content) return;
3568
3569
  return {
3569
- ...toolResult,
3570
- content: `${toolResult.content}\n\n${appendedTexts}`
3570
+ id,
3571
+ encrypted_content
3571
3572
  };
3572
3573
  }
3573
- return {
3574
- ...toolResult,
3575
- content: [...toolResult.content, ...textBlocks]
3576
- };
3577
3574
  };
3578
- const mergeToolResult = (toolResults, textBlocks) => {
3579
- if (toolResults.length === textBlocks.length) return toolResults.map((toolResult, index) => mergeContentWithText(toolResult, textBlocks[index]));
3580
- const lastIndex = toolResults.length - 1;
3581
- return toolResults.map((toolResult, index) => index === lastIndex ? mergeContentWithTexts(toolResult, textBlocks) : toolResult);
3575
+ const translateMessage = (message, model, applyPhase) => {
3576
+ if (message.role === "user") return translateUserMessage(message);
3577
+ return translateAssistantMessage(message, model, applyPhase);
3582
3578
  };
3583
- const mergeToolResultForClaude = (anthropicPayload) => {
3584
- for (const msg of anthropicPayload.messages) {
3585
- if (msg.role !== "user" || !Array.isArray(msg.content)) continue;
3586
- const toolResults = [];
3587
- const textBlocks = [];
3588
- let valid = true;
3589
- for (const block of msg.content) if (block.type === "tool_result") toolResults.push(block);
3590
- else if (block.type === "text") textBlocks.push(block);
3591
- else {
3592
- valid = false;
3593
- break;
3579
+ const translateUserMessage = (message) => {
3580
+ if (typeof message.content === "string") return [createMessage("user", message.content)];
3581
+ if (!Array.isArray(message.content)) return [];
3582
+ const items = [];
3583
+ const pendingContent = [];
3584
+ for (const block of message.content) {
3585
+ if (block.type === "tool_result") {
3586
+ flushPendingContent(pendingContent, items, { role: "user" });
3587
+ items.push(createFunctionCallOutput(block));
3588
+ continue;
3594
3589
  }
3595
- if (!valid || toolResults.length === 0 || textBlocks.length === 0) continue;
3596
- msg.content = mergeToolResult(toolResults, textBlocks);
3597
- }
3598
- };
3599
- const estimateInputTokens = async (payload, selectedModel, logger$7) => {
3600
- try {
3601
- return (await getTokenCount(payload, selectedModel)).input;
3602
- } catch (error) {
3603
- logger$7.warn("Failed to estimate input tokens for message_start", error);
3604
- return;
3605
- }
3606
- };
3607
- const isWarmupProbeRequest = (payload) => {
3608
- const lastMsg = payload.messages.at(-1);
3609
- if (!lastMsg || lastMsg.role !== "user" || !Array.isArray(lastMsg.content)) return false;
3610
- const lastBlock = lastMsg.content.at(-1);
3611
- if (!lastBlock || lastBlock.type !== "text") return false;
3612
- const text = lastBlock.text.trim().toLowerCase();
3613
- if (!(lastBlock.cache_control?.type === "ephemeral")) return false;
3614
- if (text === "warmup") return true;
3615
- if (text === "hello") {
3616
- const preludeBlocks = lastMsg.content.slice(0, -1);
3617
- if (preludeBlocks.length === 0) return false;
3618
- return preludeBlocks.every((block) => block.type === "text" && block.text.trimStart().toLowerCase().startsWith("<system-reminder"));
3590
+ const converted = translateUserContentBlock(block);
3591
+ if (converted) pendingContent.push(converted);
3619
3592
  }
3620
- return false;
3621
- };
3622
- const handleSelectionFailure = (context) => {
3623
- const { c, store, requestId, startedAtMs, method, path: path$2, streamRequested, clientModel, clientIp, clientIpSource, userAgent, userId, safetyIdentifier, promptCacheKey, initiator, selection } = context;
3624
- const finishedAtMs = Date.now();
3625
- store.insert({
3626
- requestId,
3627
- startedAtMs,
3628
- finishedAtMs,
3629
- durationMs: finishedAtMs - startedAtMs,
3630
- method,
3631
- path: path$2,
3632
- stream: streamRequested,
3633
- clientModel,
3634
- clientIp,
3635
- clientIpSource,
3636
- userAgent,
3637
- userId,
3638
- safetyIdentifier,
3639
- promptCacheKey,
3640
- initiator,
3641
- httpStatus: selection.reason === "MODEL_NOT_SUPPORTED" ? 400 : 429,
3642
- selectionFailureReason: selection.reason
3643
- });
3644
- if (selection.reason === "MODEL_NOT_SUPPORTED") return c.json({ error: {
3645
- message: `Model "${clientModel}" is not available for any configured account.`,
3646
- type: "invalid_request_error"
3647
- } }, 400);
3648
- return c.json({ error: {
3649
- message: "All accounts have exhausted their quota. Please wait for quota refresh or add additional accounts.",
3650
- type: "rate_limit_error"
3651
- } }, 429);
3593
+ flushPendingContent(pendingContent, items, { role: "user" });
3594
+ return items;
3652
3595
  };
3653
- const maybeBlockOriginalModelName = (context) => {
3654
- if (!getAliasTargetSet().has(context.clientModel.toLowerCase())) return null;
3655
- return handleSelectionFailure({
3656
- ...context,
3657
- selection: {
3658
- ok: false,
3659
- reason: "MODEL_NOT_SUPPORTED"
3596
+ const translateAssistantMessage = (message, model, applyPhase) => {
3597
+ const assistantPhase = resolveAssistantPhase(model, message.content, applyPhase);
3598
+ if (typeof message.content === "string") return [createMessage("assistant", message.content, assistantPhase)];
3599
+ if (!Array.isArray(message.content)) return [];
3600
+ const items = [];
3601
+ const pendingContent = [];
3602
+ for (const block of message.content) {
3603
+ if (block.type === "tool_use") {
3604
+ flushPendingContent(pendingContent, items, {
3605
+ role: "assistant",
3606
+ phase: assistantPhase
3607
+ });
3608
+ items.push(createFunctionToolCall(block));
3609
+ continue;
3660
3610
  }
3661
- });
3662
- };
3663
-
3664
- //#endregion
3665
- //#region src/routes/messages/non-stream-translation.ts
3666
- const THINKING_TEXT = "Thinking...";
3667
- function translateToOpenAI(payload) {
3668
- const modelId = payload.model;
3669
- const model = state.models?.data.find((m) => m.id === modelId);
3670
- const thinkingBudget = getThinkingBudget(payload, model);
3671
- return {
3672
- model: modelId,
3673
- messages: translateAnthropicMessagesToOpenAI(payload, modelId, thinkingBudget),
3674
- max_tokens: payload.max_tokens,
3675
- stop: payload.stop_sequences,
3676
- stream: payload.stream,
3677
- temperature: payload.temperature,
3678
- top_p: payload.top_p,
3679
- user: payload.metadata?.user_id,
3680
- tools: translateAnthropicToolsToOpenAI(payload.tools),
3681
- tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
3682
- thinking_budget: thinkingBudget
3683
- };
3684
- }
3685
- function getThinkingBudget(payload, model) {
3686
- const thinking = payload.thinking;
3687
- if (model && thinking) {
3688
- const maxThinkingBudget = Math.min(model.capabilities.supports.max_thinking_budget ?? 0, (model.capabilities.limits.max_output_tokens ?? 0) - 1);
3689
- thinking.budget_tokens ??= maxThinkingBudget;
3690
- if (maxThinkingBudget > 0) {
3691
- const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget);
3692
- return Math.max(budgetTokens, model.capabilities.supports.min_thinking_budget ?? 1024);
3611
+ if (block.type === "thinking" && block.signature) {
3612
+ const compactionContent = createCompactionContent(block);
3613
+ if (compactionContent) {
3614
+ flushPendingContent(pendingContent, items, {
3615
+ role: "assistant",
3616
+ phase: assistantPhase
3617
+ });
3618
+ items.push(compactionContent);
3619
+ continue;
3620
+ }
3621
+ if (block.signature.includes("@")) {
3622
+ flushPendingContent(pendingContent, items, {
3623
+ role: "assistant",
3624
+ phase: assistantPhase
3625
+ });
3626
+ items.push(createReasoningContent(block));
3627
+ continue;
3628
+ }
3693
3629
  }
3630
+ const converted = translateAssistantContentBlock(block);
3631
+ if (converted) pendingContent.push(converted);
3694
3632
  }
3695
- }
3696
- function translateAnthropicMessagesToOpenAI(payload, modelId, _thinkingBudget) {
3697
- const systemMessages = handleSystemPrompt(payload.system);
3698
- const otherMessages = payload.messages.flatMap((message) => message.role === "user" ? handleUserMessage(message) : handleAssistantMessage(message, modelId));
3699
- return [...systemMessages, ...otherMessages];
3700
- }
3701
- function handleSystemPrompt(system) {
3702
- if (!system) return [];
3703
- if (typeof system === "string") return [{
3704
- role: "system",
3705
- content: system
3706
- }];
3707
- else return [{
3708
- role: "system",
3709
- content: system.map((block) => {
3710
- return block.text;
3711
- }).join("\n\n")
3712
- }];
3713
- }
3714
- function handleUserMessage(message) {
3715
- const newMessages = [];
3716
- if (Array.isArray(message.content)) {
3717
- const toolResultBlocks = message.content.filter((block) => block.type === "tool_result");
3718
- const otherBlocks = message.content.filter((block) => block.type !== "tool_result");
3719
- for (const block of toolResultBlocks) newMessages.push({
3720
- role: "tool",
3721
- tool_call_id: block.tool_use_id,
3722
- content: mapContent(block.content)
3723
- });
3724
- if (otherBlocks.length > 0) newMessages.push({
3725
- role: "user",
3726
- content: mapContent(otherBlocks)
3727
- });
3728
- } else newMessages.push({
3729
- role: "user",
3730
- content: mapContent(message.content)
3731
- });
3732
- return newMessages;
3733
- }
3734
- function handleAssistantMessage(message, modelId) {
3735
- if (!Array.isArray(message.content)) return [{
3736
- role: "assistant",
3737
- content: mapContent(message.content)
3738
- }];
3739
- const toolUseBlocks = message.content.filter((block) => block.type === "tool_use");
3740
- let thinkingBlocks = message.content.filter((block) => block.type === "thinking");
3741
- if (modelId.startsWith("claude")) thinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT && b.signature && !b.signature.includes("@"));
3742
- const thinkingContents = thinkingBlocks.filter((b) => b.thinking && b.thinking !== THINKING_TEXT).map((b) => b.thinking);
3743
- const allThinkingContent = thinkingContents.length > 0 ? thinkingContents.join("\n\n") : void 0;
3744
- const signature = thinkingBlocks.find((b) => b.signature)?.signature;
3745
- return toolUseBlocks.length > 0 ? [{
3746
- role: "assistant",
3747
- content: mapContent(message.content),
3748
- reasoning_text: allThinkingContent,
3749
- reasoning_opaque: signature,
3750
- tool_calls: toolUseBlocks.map((toolUse) => ({
3751
- id: toolUse.id,
3752
- type: "function",
3753
- function: {
3754
- name: toolUse.name,
3755
- arguments: JSON.stringify(toolUse.input)
3756
- }
3757
- }))
3758
- }] : [{
3633
+ flushPendingContent(pendingContent, items, {
3759
3634
  role: "assistant",
3760
- content: mapContent(message.content),
3761
- reasoning_text: allThinkingContent,
3762
- reasoning_opaque: signature
3763
- }];
3764
- }
3765
- function mapContent(content) {
3766
- if (typeof content === "string") return content;
3767
- if (!Array.isArray(content)) return null;
3768
- if (!content.some((block) => block.type === "image")) return content.filter((block) => block.type === "text").map((block) => block.text).join("\n\n");
3769
- const contentParts = [];
3770
- for (const block of content) switch (block.type) {
3771
- case "text":
3772
- contentParts.push({
3773
- type: "text",
3774
- text: block.text
3775
- });
3776
- break;
3777
- case "image":
3778
- contentParts.push({
3779
- type: "image_url",
3780
- image_url: { url: `data:${block.source.media_type};base64,${block.source.data}` }
3781
- });
3782
- break;
3635
+ phase: assistantPhase
3636
+ });
3637
+ return items;
3638
+ };
3639
+ const translateUserContentBlock = (block) => {
3640
+ switch (block.type) {
3641
+ case "text": return createTextContent(block.text);
3642
+ case "image": return createImageContent(block);
3643
+ default: return;
3783
3644
  }
3784
- return contentParts;
3785
- }
3786
- function translateAnthropicToolsToOpenAI(anthropicTools) {
3787
- if (!anthropicTools) return;
3788
- return anthropicTools.map((tool) => ({
3645
+ };
3646
+ const translateAssistantContentBlock = (block) => {
3647
+ switch (block.type) {
3648
+ case "text": return createOutPutTextContent(block.text);
3649
+ default: return;
3650
+ }
3651
+ };
3652
+ const flushPendingContent = (pendingContent, target, message) => {
3653
+ if (pendingContent.length === 0) return;
3654
+ const messageContent = [...pendingContent];
3655
+ target.push(createMessage(message.role, messageContent, message.phase));
3656
+ pendingContent.length = 0;
3657
+ };
3658
+ const createMessage = (role, content, phase) => ({
3659
+ type: MESSAGE_TYPE,
3660
+ role,
3661
+ content,
3662
+ ...role === "assistant" && phase ? { phase } : {}
3663
+ });
3664
+ const resolveAssistantPhase = (_model, content, applyPhase) => {
3665
+ if (!applyPhase) return;
3666
+ if (typeof content === "string") return "final_answer";
3667
+ if (!Array.isArray(content)) return;
3668
+ if (!content.some((block) => block.type === "text")) return;
3669
+ return content.some((block) => block.type === "tool_use") ? "commentary" : "final_answer";
3670
+ };
3671
+ const shouldApplyPhase = (model) => {
3672
+ return getExtraPromptForModel(model).includes("## Intermediary updates");
3673
+ };
3674
+ const createTextContent = (text) => ({
3675
+ type: "input_text",
3676
+ text
3677
+ });
3678
+ const createOutPutTextContent = (text) => ({
3679
+ type: "output_text",
3680
+ text
3681
+ });
3682
+ const createImageContent = (block) => ({
3683
+ type: "input_image",
3684
+ image_url: `data:${block.source.media_type};base64,${block.source.data}`,
3685
+ detail: "auto"
3686
+ });
3687
+ const createReasoningContent = (block) => {
3688
+ const { encryptedContent, id } = parseReasoningSignature(block.signature);
3689
+ const thinking = block.thinking === THINKING_TEXT$1 ? "" : block.thinking;
3690
+ return {
3691
+ id,
3692
+ type: "reasoning",
3693
+ summary: thinking ? [{
3694
+ type: "summary_text",
3695
+ text: thinking
3696
+ }] : [],
3697
+ encrypted_content: encryptedContent
3698
+ };
3699
+ };
3700
+ const createCompactionContent = (block) => {
3701
+ const compaction = decodeCompactionCarrierSignature(block.signature);
3702
+ if (!compaction) return;
3703
+ return {
3704
+ id: compaction.id,
3705
+ type: "compaction",
3706
+ encrypted_content: compaction.encrypted_content
3707
+ };
3708
+ };
3709
+ const parseReasoningSignature = (signature) => {
3710
+ const splitIndex = signature.lastIndexOf("@");
3711
+ if (splitIndex <= 0 || splitIndex === signature.length - 1) return {
3712
+ encryptedContent: signature,
3713
+ id: ""
3714
+ };
3715
+ return {
3716
+ encryptedContent: signature.slice(0, splitIndex),
3717
+ id: signature.slice(splitIndex + 1)
3718
+ };
3719
+ };
3720
+ const createFunctionToolCall = (block) => ({
3721
+ type: "function_call",
3722
+ call_id: block.id,
3723
+ name: block.name,
3724
+ arguments: JSON.stringify(block.input),
3725
+ status: "completed"
3726
+ });
3727
+ const createFunctionCallOutput = (block) => ({
3728
+ type: "function_call_output",
3729
+ call_id: block.tool_use_id,
3730
+ output: convertToolResultContent(block.content),
3731
+ status: block.is_error ? "incomplete" : "completed"
3732
+ });
3733
+ const translateSystemPrompt = (system, model) => {
3734
+ if (!system) return null;
3735
+ const extraPrompt = getExtraPromptForModel(model);
3736
+ if (typeof system === "string") return system + extraPrompt;
3737
+ const text = system.map((block, index) => {
3738
+ if (index === 0) return block.text + extraPrompt;
3739
+ return block.text;
3740
+ }).join(" ");
3741
+ return text.length > 0 ? text : null;
3742
+ };
3743
+ const convertAnthropicTools = (tools) => {
3744
+ if (!tools || tools.length === 0) return null;
3745
+ return tools.map((tool) => ({
3789
3746
  type: "function",
3790
- function: {
3791
- name: tool.name,
3792
- description: tool.description,
3793
- parameters: tool.input_schema
3794
- }
3747
+ name: tool.name,
3748
+ parameters: normalizeToolSchema(tool.input_schema),
3749
+ strict: false,
3750
+ ...tool.description ? { description: tool.description } : {}
3795
3751
  }));
3796
- }
3797
- function translateAnthropicToolChoiceToOpenAI(anthropicToolChoice) {
3798
- if (!anthropicToolChoice) return;
3799
- switch (anthropicToolChoice.type) {
3752
+ };
3753
+ const convertAnthropicToolChoice = (choice) => {
3754
+ if (!choice) return "auto";
3755
+ switch (choice.type) {
3800
3756
  case "auto": return "auto";
3801
3757
  case "any": return "required";
3802
- case "tool":
3803
- if (anthropicToolChoice.name) return {
3804
- type: "function",
3805
- function: { name: anthropicToolChoice.name }
3806
- };
3807
- return;
3808
- case "none": return "none";
3809
- default: return;
3810
- }
3811
- }
3812
- function translateToAnthropic(response) {
3813
- const assistantContentBlocks = [];
3814
- let stopReason = response.choices[0]?.finish_reason ?? null;
3815
- for (const choice of response.choices) {
3816
- const textBlocks = getAnthropicTextBlocks(choice.message.content);
3817
- const thinkBlocks = getAnthropicThinkBlocks(choice.message.reasoning_text, choice.message.reasoning_opaque);
3818
- const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls);
3819
- assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks);
3820
- if (choice.finish_reason === "tool_calls" || stopReason === "stop") stopReason = choice.finish_reason;
3758
+ case "tool": return choice.name ? {
3759
+ type: "function",
3760
+ name: choice.name
3761
+ } : "auto";
3762
+ case "none": return "none";
3763
+ default: return "auto";
3821
3764
  }
3765
+ };
3766
+ const translateResponsesResultToAnthropic = (response) => {
3767
+ const contentBlocks = mapOutputToAnthropicContent(response.output);
3768
+ const usage = mapResponsesUsage(response);
3769
+ let anthropicContent = fallbackContentBlocks(response.output_text);
3770
+ if (contentBlocks.length > 0) anthropicContent = contentBlocks;
3771
+ const stopReason = mapResponsesStopReason(response);
3822
3772
  return {
3823
3773
  id: response.id,
3824
3774
  type: "message",
3825
3775
  role: "assistant",
3776
+ content: anthropicContent,
3826
3777
  model: response.model,
3827
- content: assistantContentBlocks,
3828
- stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
3778
+ stop_reason: stopReason,
3829
3779
  stop_sequence: null,
3830
- usage: {
3831
- input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
3832
- output_tokens: response.usage?.completion_tokens ?? 0,
3833
- ...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
3780
+ usage
3781
+ };
3782
+ };
3783
+ const mapOutputToAnthropicContent = (output) => {
3784
+ const contentBlocks = [];
3785
+ for (const item of output) switch (item.type) {
3786
+ case "reasoning": {
3787
+ const thinkingText = extractReasoningText(item);
3788
+ if (thinkingText.length > 0) contentBlocks.push({
3789
+ type: "thinking",
3790
+ thinking: thinkingText,
3791
+ signature: (item.encrypted_content ?? "") + "@" + item.id
3792
+ });
3793
+ break;
3794
+ }
3795
+ case "function_call": {
3796
+ const toolUseBlock = createToolUseContentBlock(item);
3797
+ if (toolUseBlock) contentBlocks.push(toolUseBlock);
3798
+ break;
3799
+ }
3800
+ case "message": {
3801
+ const combinedText = combineMessageTextContent(item.content);
3802
+ if (combinedText.length > 0) contentBlocks.push({
3803
+ type: "text",
3804
+ text: combinedText
3805
+ });
3806
+ break;
3807
+ }
3808
+ case "compaction": {
3809
+ const compactionBlock = createCompactionThinkingBlock(item);
3810
+ if (compactionBlock) contentBlocks.push(compactionBlock);
3811
+ break;
3812
+ }
3813
+ default: {
3814
+ const combinedText = combineMessageTextContent(item.content);
3815
+ if (combinedText.length > 0) contentBlocks.push({
3816
+ type: "text",
3817
+ text: combinedText
3818
+ });
3819
+ }
3820
+ }
3821
+ return contentBlocks;
3822
+ };
3823
+ const combineMessageTextContent = (content) => {
3824
+ if (!Array.isArray(content)) return "";
3825
+ let aggregated = "";
3826
+ for (const block of content) {
3827
+ if (isResponseOutputText(block)) {
3828
+ aggregated += block.text;
3829
+ continue;
3830
+ }
3831
+ if (isResponseOutputRefusal(block)) {
3832
+ aggregated += block.refusal;
3833
+ continue;
3834
+ }
3835
+ if (typeof block.text === "string") {
3836
+ aggregated += block.text;
3837
+ continue;
3838
+ }
3839
+ if (typeof block.reasoning === "string") {
3840
+ aggregated += block.reasoning;
3841
+ continue;
3842
+ }
3843
+ }
3844
+ return aggregated;
3845
+ };
3846
+ const extractReasoningText = (item) => {
3847
+ const segments = [];
3848
+ const collectFromBlocks = (blocks) => {
3849
+ if (!Array.isArray(blocks)) return;
3850
+ for (const block of blocks) if (typeof block.text === "string") {
3851
+ segments.push(block.text);
3852
+ continue;
3834
3853
  }
3835
3854
  };
3836
- }
3837
- function getAnthropicTextBlocks(messageContent) {
3838
- if (typeof messageContent === "string" && messageContent.length > 0) return [{
3839
- type: "text",
3840
- text: messageContent
3841
- }];
3842
- if (Array.isArray(messageContent)) return messageContent.filter((part) => part.type === "text").map((part) => ({
3843
- type: "text",
3844
- text: part.text
3845
- }));
3846
- return [];
3847
- }
3848
- function getAnthropicThinkBlocks(reasoningText, reasoningOpaque) {
3849
- if (reasoningText && reasoningText.length > 0) return [{
3850
- type: "thinking",
3851
- thinking: reasoningText,
3852
- signature: reasoningOpaque || ""
3853
- }];
3854
- if (reasoningOpaque && reasoningOpaque.length > 0) return [{
3855
- type: "thinking",
3856
- thinking: THINKING_TEXT,
3857
- signature: reasoningOpaque
3858
- }];
3859
- return [];
3860
- }
3861
- function getAnthropicToolUseBlocks(toolCalls) {
3862
- if (!toolCalls) return [];
3863
- return toolCalls.map((toolCall) => ({
3855
+ if (!item.summary || item.summary.length === 0) return THINKING_TEXT$1;
3856
+ collectFromBlocks(item.summary);
3857
+ return segments.join("").trim();
3858
+ };
3859
+ const createToolUseContentBlock = (call) => {
3860
+ const toolId = call.call_id;
3861
+ if (!call.name || !toolId) return null;
3862
+ const input = parseFunctionCallArguments(call.arguments);
3863
+ return {
3864
3864
  type: "tool_use",
3865
- id: toolCall.id,
3866
- name: toolCall.function.name,
3867
- input: JSON.parse(toolCall.function.arguments)
3868
- }));
3869
- }
3870
-
3871
- //#endregion
3872
- //#region src/routes/messages/count-tokens-handler.ts
3873
- /**
3874
- * Handles token counting for Anthropic messages
3875
- */
3876
- async function handleCountTokens(c) {
3865
+ id: toolId,
3866
+ name: call.name,
3867
+ input
3868
+ };
3869
+ };
3870
+ const createCompactionThinkingBlock = (item) => {
3871
+ if (!item.id || !item.encrypted_content) return null;
3872
+ return {
3873
+ type: "thinking",
3874
+ thinking: THINKING_TEXT$1,
3875
+ signature: encodeCompactionCarrierSignature({
3876
+ id: item.id,
3877
+ encrypted_content: item.encrypted_content
3878
+ })
3879
+ };
3880
+ };
3881
+ const parseFunctionCallArguments = (rawArguments) => {
3882
+ if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) return {};
3877
3883
  try {
3878
- const anthropicBeta = c.req.header("anthropic-beta");
3879
- const anthropicPayload = await c.req.json();
3880
- const openAIPayload = translateToOpenAI(anthropicPayload);
3881
- const selectedModel = findEndpointModel(anthropicPayload.model);
3882
- anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model;
3883
- if (!selectedModel) {
3884
- consola.warn("Model not found, returning default token count");
3885
- return c.json({ input_tokens: 1 });
3886
- }
3887
- const tokenCount = await getTokenCount(openAIPayload, selectedModel);
3888
- if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
3889
- let addToolSystemPromptCount = false;
3890
- if (anthropicBeta) {
3891
- const toolsLength = anthropicPayload.tools.length;
3892
- addToolSystemPromptCount = !anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__") || tool.name === "Skill" && toolsLength === 1);
3893
- }
3894
- if (addToolSystemPromptCount) {
3895
- if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + 346;
3896
- else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + 120;
3897
- }
3898
- }
3899
- let finalTokenCount = tokenCount.input + tokenCount.output;
3900
- if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
3901
- consola.info("Token count:", finalTokenCount);
3902
- return c.json({ input_tokens: finalTokenCount });
3884
+ const parsed = JSON.parse(rawArguments);
3885
+ if (Array.isArray(parsed)) return { arguments: parsed };
3886
+ if (parsed && typeof parsed === "object") return parsed;
3903
3887
  } catch (error) {
3904
- consola.error("Error counting tokens:", error);
3905
- return c.json({ input_tokens: 1 });
3888
+ consola.warn("Failed to parse function call arguments", {
3889
+ error,
3890
+ rawArguments
3891
+ });
3906
3892
  }
3907
- }
3893
+ return { raw_arguments: rawArguments };
3894
+ };
3895
+ const fallbackContentBlocks = (outputText) => {
3896
+ if (!outputText) return [];
3897
+ return [{
3898
+ type: "text",
3899
+ text: outputText
3900
+ }];
3901
+ };
3902
+ const mapResponsesStopReason = (response) => {
3903
+ const { status, incomplete_details: incompleteDetails } = response;
3904
+ if (status === "completed") {
3905
+ if (response.output.some((item) => item.type === "function_call")) return "tool_use";
3906
+ return "end_turn";
3907
+ }
3908
+ if (status === "incomplete") {
3909
+ if (incompleteDetails?.reason === "max_output_tokens") return "max_tokens";
3910
+ if (incompleteDetails?.reason === "content_filter") return "end_turn";
3911
+ }
3912
+ return null;
3913
+ };
3914
+ const mapResponsesUsage = (response) => {
3915
+ const inputTokens = response.usage?.input_tokens ?? 0;
3916
+ const outputTokens = response.usage?.output_tokens ?? 0;
3917
+ const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens;
3918
+ return {
3919
+ input_tokens: inputTokens - (inputCachedTokens ?? 0),
3920
+ output_tokens: outputTokens,
3921
+ ...response.usage?.input_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.input_tokens_details.cached_tokens }
3922
+ };
3923
+ };
3924
+ const isRecord = (value) => typeof value === "object" && value !== null;
3925
+ const isResponseOutputText = (block) => isRecord(block) && "type" in block && block.type === "output_text";
3926
+ const isResponseOutputRefusal = (block) => isRecord(block) && "type" in block && block.type === "refusal";
3927
+ const convertToolResultContent = (content) => {
3928
+ if (typeof content === "string") return content;
3929
+ if (Array.isArray(content)) {
3930
+ const result = [];
3931
+ for (const block of content) switch (block.type) {
3932
+ case "text":
3933
+ result.push(createTextContent(block.text));
3934
+ break;
3935
+ case "image":
3936
+ result.push(createImageContent(block));
3937
+ break;
3938
+ default: break;
3939
+ }
3940
+ return result;
3941
+ }
3942
+ return "";
3943
+ };
3908
3944
 
3909
3945
  //#endregion
3910
3946
  //#region src/routes/messages/responses-stream-translation.ts
@@ -4441,6 +4477,7 @@ const createMessages = async (payload, account, options) => {
4441
4477
  "x-initiator": options?.subagentMarker ? "agent" : initiator
4442
4478
  };
4443
4479
  prepareInteractionHeaders(options?.sessionId, Boolean(options?.subagentMarker), headers);
4480
+ prepareForCompact(headers, options?.isCompact);
4444
4481
  const anthropicBeta = buildAnthropicBetaHeader(options?.anthropicBetaHeader, payload.thinking);
4445
4482
  if (anthropicBeta) headers["anthropic-beta"] = anthropicBeta;
4446
4483
  const response = await fetch(`${copilotBaseUrl(ctx)}/v1/messages`, {
@@ -4794,7 +4831,7 @@ async function handleCompletion(c) {
4794
4831
  const streamRequested = Boolean(anthropicPayload.stream);
4795
4832
  const rawUserId = anthropicPayload.metadata?.user_id;
4796
4833
  const userId = typeof rawUserId === "string" ? rawUserId : void 0;
4797
- const { safetyIdentifier, promptCacheKey } = parseUserId(userId);
4834
+ const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(userId);
4798
4835
  const normalizedSafetyIdentifier = safetyIdentifier ?? void 0;
4799
4836
  const normalizedPromptCacheKey = promptCacheKey ?? void 0;
4800
4837
  const blockedResponse = maybeBlockOriginalModelName({
@@ -4887,7 +4924,8 @@ async function handleCompletion(c) {
4887
4924
  subagentMarker,
4888
4925
  sessionId,
4889
4926
  instr,
4890
- selectedModel
4927
+ selectedModel,
4928
+ isCompact
4891
4929
  });
4892
4930
  if (endpoint === RESPONSES_ENDPOINT$1) return await handleWithResponsesApi({
4893
4931
  c,
@@ -4897,7 +4935,8 @@ async function handleCompletion(c) {
4897
4935
  subagentMarker,
4898
4936
  sessionId,
4899
4937
  selectedModel,
4900
- instr
4938
+ instr,
4939
+ isCompact
4901
4940
  });
4902
4941
  return await handleWithChatCompletions({
4903
4942
  c,
@@ -4906,11 +4945,12 @@ async function handleCompletion(c) {
4906
4945
  subagentMarker,
4907
4946
  sessionId,
4908
4947
  selectedModel,
4909
- instr
4948
+ instr,
4949
+ isCompact
4910
4950
  });
4911
4951
  }
4912
4952
  const handleWithChatCompletions = async (params) => {
4913
- const { c, openAIPayload, initiatorOverride, subagentMarker, sessionId, selectedModel, instr } = params;
4953
+ const { c, openAIPayload, initiatorOverride, subagentMarker, sessionId, selectedModel, instr, isCompact } = params;
4914
4954
  logger$5.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
4915
4955
  const ctx = toAccountContext(instr.account);
4916
4956
  const initiator = initiatorOverride ?? getChatInitiator(openAIPayload.messages);
@@ -4921,7 +4961,8 @@ const handleWithChatCompletions = async (params) => {
4921
4961
  upstreamRequestId: instr.upstreamRequestId,
4922
4962
  initiator,
4923
4963
  subagentMarker,
4924
- sessionId
4964
+ sessionId,
4965
+ isCompact
4925
4966
  });
4926
4967
  } catch (error) {
4927
4968
  return await handleChatCompletionsCreateError({
@@ -4952,7 +4993,7 @@ const handleWithChatCompletions = async (params) => {
4952
4993
  }));
4953
4994
  };
4954
4995
  const handleWithResponsesApi = async (params) => {
4955
- const { c, anthropicPayload, openAIPayload, initiatorOverride, subagentMarker, sessionId, selectedModel, instr } = params;
4996
+ const { c, anthropicPayload, openAIPayload, initiatorOverride, subagentMarker, sessionId, selectedModel, instr, isCompact } = params;
4956
4997
  const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload, selectedModel.id);
4957
4998
  applyResponsesApiContextManagement(responsesPayload, selectedModel.capabilities.limits.max_prompt_tokens);
4958
4999
  compactInputByLatestCompaction(responsesPayload);
@@ -4968,7 +5009,8 @@ const handleWithResponsesApi = async (params) => {
4968
5009
  initiator: resolvedInitiator,
4969
5010
  upstreamRequestId: instr.upstreamRequestId,
4970
5011
  subagentMarker,
4971
- sessionId
5012
+ sessionId,
5013
+ isCompact
4972
5014
  }, ctx);
4973
5015
  } catch (error) {
4974
5016
  return await handleResponsesCreateError({
@@ -5419,7 +5461,7 @@ async function streamMessagesAndLog(params) {
5419
5461
  }
5420
5462
  }
5421
5463
  const handleWithMessagesApi = async (params) => {
5422
- const { c, anthropicPayload, anthropicBetaHeader, initiatorOverride, subagentMarker, sessionId, instr, selectedModel } = params;
5464
+ const { c, anthropicPayload, anthropicBetaHeader, initiatorOverride, subagentMarker, sessionId, instr, selectedModel, isCompact } = params;
5423
5465
  for (const msg of anthropicPayload.messages) if (msg.role === "assistant" && Array.isArray(msg.content)) msg.content = msg.content.filter((block) => {
5424
5466
  if (block.type !== "thinking") return true;
5425
5467
  return block.thinking && block.thinking !== "Thinking..." && block.signature && !block.signature.includes("@");
@@ -5443,7 +5485,8 @@ const handleWithMessagesApi = async (params) => {
5443
5485
  upstreamRequestId: instr.upstreamRequestId,
5444
5486
  initiator,
5445
5487
  subagentMarker,
5446
- sessionId
5488
+ sessionId,
5489
+ isCompact
5447
5490
  });
5448
5491
  } catch (error) {
5449
5492
  return await handleMessagesCreateError({
@@ -5665,14 +5708,37 @@ async function handleProviderMessages(c) {
5665
5708
  provider
5666
5709
  }));
5667
5710
  const upstreamResponse = await forwardProviderMessages(providerConfig, payload, c.req.raw.headers);
5711
+ if (!upstreamResponse.ok) {
5712
+ logger$3.error("Failed to create responses", upstreamResponse);
5713
+ throw new HTTPError("Failed to create responses", upstreamResponse);
5714
+ }
5668
5715
  const contentType = upstreamResponse.headers.get("content-type") ?? "";
5669
5716
  if (Boolean(payload.stream) && contentType.includes("text/event-stream")) {
5670
5717
  logger$3.debug("provider.messages.streaming");
5671
5718
  return streamSSE(c, async (stream) => {
5672
- for await (const event of events(upstreamResponse)) {
5673
- const eventName = event.event;
5674
- const data = event.data ?? "";
5675
- logger$3.debug("provider.messages.raw_stream_event", data);
5719
+ for await (const chunk of events(upstreamResponse)) {
5720
+ logger$3.debug("provider.messages.raw_stream_event:", chunk.data);
5721
+ const eventName = chunk.event;
5722
+ if (eventName === "ping") {
5723
+ await stream.writeSSE({
5724
+ event: "ping",
5725
+ data: "{\"type\":\"ping\"}"
5726
+ });
5727
+ continue;
5728
+ }
5729
+ let data = chunk.data;
5730
+ if (!data) continue;
5731
+ try {
5732
+ const parsed = JSON.parse(data);
5733
+ if (parsed.type === "message_start") adjustInputTokens(providerConfig, parsed.message.usage);
5734
+ else if (parsed.type === "message_delta") adjustInputTokens(providerConfig, parsed.usage);
5735
+ data = JSON.stringify(parsed);
5736
+ } catch (error) {
5737
+ logger$3.error("provider.messages.streaming.adjust_tokens_error", {
5738
+ error,
5739
+ originalData: data
5740
+ });
5741
+ }
5676
5742
  await stream.writeSSE({
5677
5743
  event: eventName,
5678
5744
  data
@@ -5680,7 +5746,10 @@ async function handleProviderMessages(c) {
5680
5746
  }
5681
5747
  });
5682
5748
  }
5683
- return createProviderProxyResponse(upstreamResponse);
5749
+ const jsonBody = await upstreamResponse.json();
5750
+ adjustInputTokens(providerConfig, jsonBody.usage);
5751
+ logger$3.debug("provider.messages.no_stream result:", JSON.stringify(jsonBody));
5752
+ return c.json(jsonBody);
5684
5753
  } catch (error) {
5685
5754
  logger$3.error("provider.messages.error", {
5686
5755
  provider,
@@ -5689,6 +5758,11 @@ async function handleProviderMessages(c) {
5689
5758
  throw error;
5690
5759
  }
5691
5760
  }
5761
+ const adjustInputTokens = (providerConfig, usage) => {
5762
+ if (!providerConfig.adjustInputTokens || !usage) return;
5763
+ usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
5764
+ logger$3.debug("provider.messages.adjusted_usage:", JSON.stringify(usage));
5765
+ };
5692
5766
 
5693
5767
  //#endregion
5694
5768
  //#region src/routes/provider/messages/route.ts
@@ -5788,7 +5862,7 @@ const handleResponses = async (c) => {
5788
5862
  const streamRequested = Boolean(payload.stream);
5789
5863
  const { initiator: initialInitiator } = getResponsesRequestOptions(payload);
5790
5864
  const userId = payload.metadata?.user_id;
5791
- const { safetyIdentifier, promptCacheKey } = parseUserId(userId);
5865
+ const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(userId);
5792
5866
  const normalizedSafetyIdentifier = safetyIdentifier ?? void 0;
5793
5867
  const normalizedPromptCacheKey = promptCacheKey ?? void 0;
5794
5868
  request.userId = userId;
@@ -6295,6 +6369,7 @@ usageRoute.get("/:accountIndex", async (c) => {
6295
6369
  //#endregion
6296
6370
  //#region src/server.ts
6297
6371
  const server = new Hono();
6372
+ server.use(traceIdMiddleware);
6298
6373
  server.use(logger());
6299
6374
  server.use(cors());
6300
6375
  server.use("*", createAuthMiddleware({
@@ -6320,4 +6395,4 @@ server.route("/:provider/v1/models", providerModelRoutes);
6320
6395
 
6321
6396
  //#endregion
6322
6397
  export { server };
6323
- //# sourceMappingURL=server-CM_0PrbK.js.map
6398
+ //# sourceMappingURL=server-D3sySKxC.js.map