copilot-api-plus 1.0.51 → 1.0.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,7 +42,7 @@
42
42
  | 🌐 **代理支持** | 支持 HTTP/HTTPS 代理,配置持久化 |
43
43
  | 🐳 **Docker 支持** | 提供完整的 Docker 部署方案 |
44
44
  | 🔑 **API Key 认证** | 可选的 API Key 鉴权,保护公开部署的服务 |
45
- | ✂️ **智能上下文压缩** | Prompt 超过模型 Token 限制时自动截断,保留系统消息和最近对话 |
45
+ | ✂️ **上下文透传** | 全量透传上下文至上游 API,由客户端(如 Claude Code)自行管理压缩 |
46
46
  | 🔍 **智能模型匹配** | 自动处理模型名格式差异(日期后缀、dash/dot 版本号等) |
47
47
  | 🔁 **Antigravity 端点容错** | 双端点自动切换,按模型族追踪速率限制,指数退避重试 |
48
48
 
@@ -648,14 +648,12 @@ curl http://localhost:4141/v1/messages \
648
648
 
649
649
  ## 🔧 技术细节
650
650
 
651
- ### 智能上下文压缩
651
+ ### 上下文管理
652
652
 
653
- Prompt Token 数量超过模型的上下文窗口限制时,代理层会自动截断消息以避免上游 API 返回 400 错误:
653
+ 代理层不做上下文截断,全量透传消息至上游 API。上下文压缩由客户端负责:
654
654
 
655
- - **保留系统/开发者消息**:system developer 角色的消息始终保留
656
- - **保留最近对话**:优先丢弃最早的消息,保留最近的上下文
657
- - **工具调用分组**:assistant 的 tool_calls 和对应的 tool result 消息作为一组,不会被拆散
658
- - **5% 安全余量**:实际限制为模型上下文窗口的 95%,避免边界情况
655
+ - **Claude Code**:通过 `/count_tokens` 端点获取当前 token 数,接近上限时自动触发 `/compact` 压缩
656
+ - **其他客户端**:如果上游 API 返回 400(token 超限),客户端自行处理重试
659
657
 
660
658
  ### 智能模型名匹配
661
659
 
@@ -690,12 +688,6 @@ Google Antigravity 模式内置了可靠性保障:
690
688
  - `out` — 输出 token 数
691
689
  - `cache_read` — 缓存命中的 token 数(仅在有缓存时显示)
692
690
 
693
- 触发上下文压缩时会额外输出一行:
694
-
695
- ```
696
- Truncated: 190385 -> 117537 tokens (-59 msgs)
697
- ```
698
-
699
691
  ### 网络重试
700
692
 
701
693
  对上游 API 的请求内置了瞬时网络错误重试(TLS 断开、连接重置等):
package/dist/main.js CHANGED
@@ -2608,6 +2608,194 @@ const awaitApproval = async () => {
2608
2608
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
2609
2609
  };
2610
2610
 
2611
+ //#endregion
2612
+ //#region src/lib/rate-limit.ts
2613
+ async function checkRateLimit(state$1) {
2614
+ if (state$1.rateLimitSeconds === void 0) return;
2615
+ const now = Date.now();
2616
+ if (!state$1.lastRequestTimestamp) {
2617
+ state$1.lastRequestTimestamp = now;
2618
+ return;
2619
+ }
2620
+ const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2621
+ if (elapsedSeconds > state$1.rateLimitSeconds) {
2622
+ state$1.lastRequestTimestamp = now;
2623
+ return;
2624
+ }
2625
+ const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2626
+ if (!state$1.rateLimitWait) {
2627
+ consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2628
+ throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2629
+ }
2630
+ const waitTimeMs = waitTimeSeconds * 1e3;
2631
+ consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2632
+ await sleep(waitTimeMs);
2633
+ state$1.lastRequestTimestamp = now;
2634
+ consola.info("Rate limit wait completed, proceeding with request");
2635
+ }
2636
+
2637
+ //#endregion
2638
+ //#region src/services/copilot/create-chat-completions.ts
2639
+ const createChatCompletions = async (payload) => {
2640
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2641
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2642
+ const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
2643
+ const buildHeaders = () => ({
2644
+ ...copilotHeaders(state, enableVision),
2645
+ "X-Initiator": isAgentCall ? "agent" : "user"
2646
+ });
2647
+ consola.debug("Sending request to Copilot:", {
2648
+ model: payload.model,
2649
+ endpoint: `${copilotBaseUrl(state)}/chat/completions`
2650
+ });
2651
+ const url = `${copilotBaseUrl(state)}/chat/completions`;
2652
+ const body = payload.stream ? {
2653
+ ...payload,
2654
+ stream_options: { include_usage: true }
2655
+ } : payload;
2656
+ const bodyString = JSON.stringify(body);
2657
+ const maxRetries = 2;
2658
+ let lastError;
2659
+ let response;
2660
+ for (let attempt = 0; attempt <= maxRetries; attempt++) try {
2661
+ response = await fetch(url, {
2662
+ method: "POST",
2663
+ headers: buildHeaders(),
2664
+ body: bodyString
2665
+ });
2666
+ break;
2667
+ } catch (error) {
2668
+ lastError = error;
2669
+ if (attempt < maxRetries) {
2670
+ const delay = 1e3 * (attempt + 1);
2671
+ consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
2672
+ await new Promise((r) => setTimeout(r, delay));
2673
+ }
2674
+ }
2675
+ if (!response) throw lastError;
2676
+ if (response.status === 401) {
2677
+ consola.warn("Copilot token expired, refreshing and retrying...");
2678
+ try {
2679
+ await refreshCopilotToken();
2680
+ response = await fetch(url, {
2681
+ method: "POST",
2682
+ headers: buildHeaders(),
2683
+ body: bodyString
2684
+ });
2685
+ } catch (refreshError) {
2686
+ consola.error("Failed to refresh token:", refreshError);
2687
+ }
2688
+ }
2689
+ if (!response.ok) {
2690
+ const errorBody = await response.text();
2691
+ consola.error("Failed to create chat completions", {
2692
+ status: response.status,
2693
+ statusText: response.statusText,
2694
+ body: errorBody
2695
+ });
2696
+ throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
2697
+ }
2698
+ if (payload.stream) return events(response);
2699
+ return await response.json();
2700
+ };
2701
+
2702
+ //#endregion
2703
+ //#region src/routes/chat-completions/handler.ts
2704
+ /**
2705
+ * Set max_tokens from model limits if not already provided in the payload.
2706
+ */
2707
+ function applyMaxTokens(payload) {
2708
+ if (!isNullish(payload.max_tokens)) return payload;
2709
+ const selectedModel = findModel(payload.model);
2710
+ if (!selectedModel) return payload;
2711
+ const maxTokens = selectedModel.capabilities.limits.max_output_tokens;
2712
+ if (maxTokens) {
2713
+ consola.debug("Set max_tokens to:", maxTokens);
2714
+ return {
2715
+ ...payload,
2716
+ max_tokens: maxTokens
2717
+ };
2718
+ }
2719
+ return payload;
2720
+ }
2721
+ async function handleCompletion$1(c) {
2722
+ await checkRateLimit(state);
2723
+ const rawPayload = await c.req.json();
2724
+ consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
2725
+ const payload = applyMaxTokens(rawPayload);
2726
+ if (state.manualApprove) await awaitApproval();
2727
+ const response = await createChatCompletions(payload);
2728
+ if (isNonStreaming$1(response)) {
2729
+ consola.debug("Non-streaming response:", JSON.stringify(response));
2730
+ if (response.usage) setTokenUsage({
2731
+ inputTokens: response.usage.prompt_tokens,
2732
+ outputTokens: response.usage.completion_tokens,
2733
+ cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
2734
+ });
2735
+ return c.json(response);
2736
+ }
2737
+ consola.debug("Streaming response");
2738
+ return streamSSE(c, async (stream) => {
2739
+ for await (const chunk of response) {
2740
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2741
+ try {
2742
+ const sseChunk = chunk;
2743
+ if (sseChunk.data && sseChunk.data !== "[DONE]") {
2744
+ const parsed = JSON.parse(sseChunk.data);
2745
+ if (parsed.usage) {
2746
+ const usage = {
2747
+ inputTokens: parsed.usage.prompt_tokens ?? 0,
2748
+ outputTokens: parsed.usage.completion_tokens ?? 0,
2749
+ cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
2750
+ };
2751
+ setTokenUsage(usage);
2752
+ }
2753
+ }
2754
+ } catch {}
2755
+ await stream.writeSSE(chunk);
2756
+ }
2757
+ signalStreamDone();
2758
+ });
2759
+ }
2760
+ const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
2761
+
2762
+ //#endregion
2763
+ //#region src/routes/chat-completions/route.ts
2764
+ const completionRoutes = new Hono();
2765
+ completionRoutes.post("/", async (c) => {
2766
+ try {
2767
+ return await handleCompletion$1(c);
2768
+ } catch (error) {
2769
+ return await forwardError(c, error);
2770
+ }
2771
+ });
2772
+
2773
+ //#endregion
2774
+ //#region src/services/copilot/create-embeddings.ts
2775
+ const createEmbeddings = async (payload) => {
2776
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2777
+ const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
2778
+ method: "POST",
2779
+ headers: copilotHeaders(state),
2780
+ body: JSON.stringify(payload)
2781
+ });
2782
+ if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
2783
+ return await response.json();
2784
+ };
2785
+
2786
+ //#endregion
2787
+ //#region src/routes/embeddings/route.ts
2788
+ const embeddingRoutes = new Hono();
2789
+ embeddingRoutes.post("/", async (c) => {
2790
+ try {
2791
+ const paylod = await c.req.json();
2792
+ const response = await createEmbeddings(paylod);
2793
+ return c.json(response);
2794
+ } catch (error) {
2795
+ return await forwardError(c, error);
2796
+ }
2797
+ });
2798
+
2611
2799
  //#endregion
2612
2800
  //#region src/lib/tokenizer.ts
2613
2801
  const ENCODING_MAP = {
@@ -2801,337 +2989,6 @@ const getTokenCount = async (payload, model) => {
2801
2989
  };
2802
2990
  };
2803
2991
 
2804
- //#endregion
2805
- //#region src/lib/context-compression.ts
2806
- /**
2807
- * Get the maximum prompt token limit for a model.
2808
- * Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
2809
- */
2810
- const getMaxPromptTokens = (model) => {
2811
- const limits = model.capabilities.limits;
2812
- if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
2813
- if (limits.max_context_window_tokens) {
2814
- const outputReserve = limits.max_output_tokens ?? 4096;
2815
- return limits.max_context_window_tokens - outputReserve;
2816
- }
2817
- };
2818
- /**
2819
- * Check if a message is a tool-related message (tool call or tool result).
2820
- * Tool messages must be kept together with their paired assistant message.
2821
- */
2822
- const isToolMessage = (message) => {
2823
- return message.role === "tool";
2824
- };
2825
- /**
2826
- * Check if an assistant message contains tool calls.
2827
- */
2828
- const hasToolCalls = (message) => {
2829
- return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
2830
- };
2831
- const groupMessages = (messages) => {
2832
- const groups = [];
2833
- let i = 0;
2834
- while (i < messages.length) {
2835
- const message = messages[i];
2836
- if (message.role === "system" || message.role === "developer") {
2837
- groups.push({
2838
- messages: [message],
2839
- isSystem: true,
2840
- isRecent: false
2841
- });
2842
- i++;
2843
- continue;
2844
- }
2845
- if (hasToolCalls(message)) {
2846
- const group = [message];
2847
- let j = i + 1;
2848
- while (j < messages.length && isToolMessage(messages[j])) {
2849
- group.push(messages[j]);
2850
- j++;
2851
- }
2852
- groups.push({
2853
- messages: group,
2854
- isSystem: false,
2855
- isRecent: false
2856
- });
2857
- i = j;
2858
- continue;
2859
- }
2860
- groups.push({
2861
- messages: [message],
2862
- isSystem: false,
2863
- isRecent: false
2864
- });
2865
- i++;
2866
- }
2867
- return groups;
2868
- };
2869
- /**
2870
- * Create a truncation notice message to inform the model that earlier context was removed.
2871
- */
2872
- const createTruncationNotice = () => ({
2873
- role: "user",
2874
- content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
2875
- });
2876
- /**
2877
- * Intelligently truncate messages to fit within the model's token limit.
2878
- *
2879
- * Strategy:
2880
- * 1. Always preserve system/developer messages (they contain critical instructions)
2881
- * 2. Always preserve the most recent messages (they contain the current task context)
2882
- * 3. Remove middle conversation messages, oldest first
2883
- * 4. Insert a truncation notice where messages were removed
2884
- * 5. Keep tool call/result pairs together (never split them)
2885
- *
2886
- * Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
2887
- */
2888
- const truncateMessages = async (payload, model) => {
2889
- const maxPromptTokens = getMaxPromptTokens(model);
2890
- if (!maxPromptTokens) {
2891
- consola.debug("No token limit found for model, skipping truncation");
2892
- return payload;
2893
- }
2894
- const tokenCount = await getTokenCount(payload, model);
2895
- const safeLimit = Math.floor(maxPromptTokens * .95);
2896
- if (tokenCount.input <= safeLimit) return payload;
2897
- const groups = groupMessages(payload.messages);
2898
- const systemGroups = groups.filter((g) => g.isSystem);
2899
- const conversationGroups = groups.filter((g) => !g.isSystem);
2900
- if (conversationGroups.length === 0) {
2901
- consola.warn("No conversation messages to truncate, only system messages");
2902
- return payload;
2903
- }
2904
- let truncatedPayload = payload;
2905
- let dropCount = 0;
2906
- const maxDrop = Math.max(0, conversationGroups.length - 1);
2907
- while (dropCount <= maxDrop) {
2908
- const keptConversationGroups = conversationGroups.slice(dropCount);
2909
- const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
2910
- const newMessages = [
2911
- ...systemGroups.flatMap((g) => g.messages),
2912
- ...truncationNotice,
2913
- ...keptConversationGroups.flatMap((g) => g.messages)
2914
- ];
2915
- truncatedPayload = {
2916
- ...payload,
2917
- messages: newMessages
2918
- };
2919
- const newTokenCount = await getTokenCount(truncatedPayload, model);
2920
- if (newTokenCount.input <= safeLimit) {
2921
- if (dropCount > 0) {
2922
- const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
2923
- console.log(`Truncated: ${tokenCount.input} -> ${newTokenCount.input} tokens (-${droppedMessages} msgs)`);
2924
- }
2925
- return truncatedPayload;
2926
- }
2927
- dropCount++;
2928
- }
2929
- const finalTokenCount = await getTokenCount(truncatedPayload, model);
2930
- consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
2931
- return truncatedPayload;
2932
- };
2933
-
2934
- //#endregion
2935
- //#region src/lib/rate-limit.ts
2936
- async function checkRateLimit(state$1) {
2937
- if (state$1.rateLimitSeconds === void 0) return;
2938
- const now = Date.now();
2939
- if (!state$1.lastRequestTimestamp) {
2940
- state$1.lastRequestTimestamp = now;
2941
- return;
2942
- }
2943
- const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2944
- if (elapsedSeconds > state$1.rateLimitSeconds) {
2945
- state$1.lastRequestTimestamp = now;
2946
- return;
2947
- }
2948
- const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2949
- if (!state$1.rateLimitWait) {
2950
- consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2951
- throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2952
- }
2953
- const waitTimeMs = waitTimeSeconds * 1e3;
2954
- consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2955
- await sleep(waitTimeMs);
2956
- state$1.lastRequestTimestamp = now;
2957
- consola.info("Rate limit wait completed, proceeding with request");
2958
- }
2959
-
2960
- //#endregion
2961
- //#region src/services/copilot/create-chat-completions.ts
2962
- const createChatCompletions = async (payload) => {
2963
- if (!state.copilotToken) throw new Error("Copilot token not found");
2964
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2965
- const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
2966
- const buildHeaders = () => ({
2967
- ...copilotHeaders(state, enableVision),
2968
- "X-Initiator": isAgentCall ? "agent" : "user"
2969
- });
2970
- consola.debug("Sending request to Copilot:", {
2971
- model: payload.model,
2972
- endpoint: `${copilotBaseUrl(state)}/chat/completions`
2973
- });
2974
- const url = `${copilotBaseUrl(state)}/chat/completions`;
2975
- const body = payload.stream ? {
2976
- ...payload,
2977
- stream_options: { include_usage: true }
2978
- } : payload;
2979
- const bodyString = JSON.stringify(body);
2980
- const maxRetries = 2;
2981
- let lastError;
2982
- let response;
2983
- for (let attempt = 0; attempt <= maxRetries; attempt++) try {
2984
- response = await fetch(url, {
2985
- method: "POST",
2986
- headers: buildHeaders(),
2987
- body: bodyString
2988
- });
2989
- break;
2990
- } catch (error) {
2991
- lastError = error;
2992
- if (attempt < maxRetries) {
2993
- const delay = 1e3 * (attempt + 1);
2994
- consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
2995
- await new Promise((r) => setTimeout(r, delay));
2996
- }
2997
- }
2998
- if (!response) throw lastError;
2999
- if (response.status === 401) {
3000
- consola.warn("Copilot token expired, refreshing and retrying...");
3001
- try {
3002
- await refreshCopilotToken();
3003
- response = await fetch(url, {
3004
- method: "POST",
3005
- headers: buildHeaders(),
3006
- body: bodyString
3007
- });
3008
- } catch (refreshError) {
3009
- consola.error("Failed to refresh token:", refreshError);
3010
- }
3011
- }
3012
- if (!response.ok) {
3013
- const errorBody = await response.text();
3014
- consola.error("Failed to create chat completions", {
3015
- status: response.status,
3016
- statusText: response.statusText,
3017
- body: errorBody
3018
- });
3019
- throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
3020
- }
3021
- if (payload.stream) return events(response);
3022
- return await response.json();
3023
- };
3024
-
3025
- //#endregion
3026
- //#region src/routes/chat-completions/handler.ts
3027
- /**
3028
- * Calculate token count, log it, and auto-truncate if needed.
3029
- *
3030
- * Uses multi-strategy exact matching via findModel() to handle
3031
- * mismatches between requested and available model names.
3032
- */
3033
- async function processPayloadTokens(payload) {
3034
- const selectedModel = findModel(payload.model);
3035
- if (!selectedModel) {
3036
- consola.warn("No model selected, skipping token count calculation");
3037
- return payload;
3038
- }
3039
- try {
3040
- const tokenCount = await getTokenCount(payload, selectedModel);
3041
- consola.debug("Current token count:", tokenCount);
3042
- const truncated = await truncateMessages(payload, selectedModel);
3043
- if (isNullish(truncated.max_tokens)) {
3044
- const withMaxTokens = {
3045
- ...truncated,
3046
- max_tokens: selectedModel.capabilities.limits.max_output_tokens
3047
- };
3048
- consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
3049
- return withMaxTokens;
3050
- }
3051
- return truncated;
3052
- } catch (error) {
3053
- consola.warn("Failed to calculate token count:", error);
3054
- return payload;
3055
- }
3056
- }
3057
- async function handleCompletion$1(c) {
3058
- await checkRateLimit(state);
3059
- const rawPayload = await c.req.json();
3060
- consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
3061
- const payload = await processPayloadTokens(rawPayload);
3062
- if (state.manualApprove) await awaitApproval();
3063
- const response = await createChatCompletions(payload);
3064
- if (isNonStreaming$1(response)) {
3065
- consola.debug("Non-streaming response:", JSON.stringify(response));
3066
- if (response.usage) setTokenUsage({
3067
- inputTokens: response.usage.prompt_tokens,
3068
- outputTokens: response.usage.completion_tokens,
3069
- cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
3070
- });
3071
- return c.json(response);
3072
- }
3073
- consola.debug("Streaming response");
3074
- return streamSSE(c, async (stream) => {
3075
- for await (const chunk of response) {
3076
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
3077
- try {
3078
- const sseChunk = chunk;
3079
- if (sseChunk.data && sseChunk.data !== "[DONE]") {
3080
- const parsed = JSON.parse(sseChunk.data);
3081
- if (parsed.usage) {
3082
- const usage = {
3083
- inputTokens: parsed.usage.prompt_tokens ?? 0,
3084
- outputTokens: parsed.usage.completion_tokens ?? 0,
3085
- cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
3086
- };
3087
- setTokenUsage(usage);
3088
- }
3089
- }
3090
- } catch {}
3091
- await stream.writeSSE(chunk);
3092
- }
3093
- signalStreamDone();
3094
- });
3095
- }
3096
- const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
3097
-
3098
- //#endregion
3099
- //#region src/routes/chat-completions/route.ts
3100
- const completionRoutes = new Hono();
3101
- completionRoutes.post("/", async (c) => {
3102
- try {
3103
- return await handleCompletion$1(c);
3104
- } catch (error) {
3105
- return await forwardError(c, error);
3106
- }
3107
- });
3108
-
3109
- //#endregion
3110
- //#region src/services/copilot/create-embeddings.ts
3111
- const createEmbeddings = async (payload) => {
3112
- if (!state.copilotToken) throw new Error("Copilot token not found");
3113
- const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
3114
- method: "POST",
3115
- headers: copilotHeaders(state),
3116
- body: JSON.stringify(payload)
3117
- });
3118
- if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
3119
- return await response.json();
3120
- };
3121
-
3122
- //#endregion
3123
- //#region src/routes/embeddings/route.ts
3124
- const embeddingRoutes = new Hono();
3125
- embeddingRoutes.post("/", async (c) => {
3126
- try {
3127
- const paylod = await c.req.json();
3128
- const response = await createEmbeddings(paylod);
3129
- return c.json(response);
3130
- } catch (error) {
3131
- return await forwardError(c, error);
3132
- }
3133
- });
3134
-
3135
2992
  //#endregion
3136
2993
  //#region src/routes/messages/utils.ts
3137
2994
  function mapOpenAIStopReasonToAnthropic(finishReason) {
@@ -3373,7 +3230,7 @@ async function handleCountTokens(c) {
3373
3230
  let finalTokenCount = tokenCount.input + tokenCount.output;
3374
3231
  if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
3375
3232
  else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
3376
- consola.debug("Token count:", finalTokenCount);
3233
+ console.log("Token count:", finalTokenCount);
3377
3234
  return c.json({ input_tokens: finalTokenCount });
3378
3235
  } catch (error) {
3379
3236
  consola.error("Error counting tokens:", error);
@@ -3507,30 +3364,10 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
3507
3364
 
3508
3365
  //#endregion
3509
3366
  //#region src/routes/messages/handler.ts
3510
- /**
3511
- * Auto-truncate OpenAI payload if prompt tokens exceed model limit.
3512
- *
3513
- * Uses multi-strategy exact matching via findModel() to handle
3514
- * mismatches between Anthropic and Copilot model naming conventions.
3515
- */
3516
- async function autoTruncatePayload(payload) {
3517
- const selectedModel = findModel(payload.model);
3518
- if (!selectedModel) {
3519
- consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
3520
- return payload;
3521
- }
3522
- try {
3523
- return await truncateMessages(payload, selectedModel);
3524
- } catch (error) {
3525
- consola.warn("Failed to auto-truncate context:", error);
3526
- return payload;
3527
- }
3528
- }
3529
3367
  async function handleCompletion(c) {
3530
3368
  await checkRateLimit(state);
3531
3369
  const anthropicPayload = await c.req.json();
3532
- const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
3533
- const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
3370
+ const openAIPayload = translateToOpenAI(anthropicPayload);
3534
3371
  if (state.manualApprove) await awaitApproval();
3535
3372
  const response = await createChatCompletions(openAIPayload);
3536
3373
  if (isNonStreaming(response)) {