copilot-api-plus 1.0.50 → 1.0.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  import { PATHS, ensurePaths } from "./paths-CVYLp61D.js";
3
3
  import { state } from "./state-CcLGr8VN.js";
4
- import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
5
- import { HTTPError, forwardError } from "./error-CvU5otz-.js";
6
- import { cacheModels, cacheVSCodeVersion, clearGithubToken, findModel, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-B777vbx8.js";
4
+ import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-DEDD9jIs.js";
5
+ import { HTTPError, forwardError } from "./error-SzJ4KHd8.js";
6
+ import { cacheModels, cacheVSCodeVersion, clearGithubToken, findModel, isNullish, refreshCopilotToken, setupCopilotToken, setupGitHubToken, sleep } from "./token-CpxbiiIw.js";
7
7
  import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-CWGl6kMf.js";
8
8
  import { clearZenAuth, getZenAuthPath } from "./auth-BrdL89xk.js";
9
9
  import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-uEbEgq0L.js";
@@ -1197,12 +1197,24 @@ const apiKeyAuthMiddleware = async (c, next) => {
1197
1197
  * Global token usage store for passing usage info from handlers to logger.
1198
1198
  * Handlers call setTokenUsage() when usage is available,
1199
1199
  * logger reads and clears it after await next().
1200
+ *
1201
+ * For streaming responses, usage arrives after next() returns.
1202
+ * In that case the handler calls signalStreamDone() when the stream ends,
1203
+ * and the logger waits for it with a timeout.
1200
1204
  */
1201
1205
  let pendingTokenUsage;
1206
+ let streamDoneResolve;
1202
1207
  function setTokenUsage(usage) {
1203
1208
  pendingTokenUsage = usage;
1204
1209
  }
1205
1210
  /**
1211
+ * Notify the logger that a streaming response has finished sending.
1212
+ * Must be called at the end of streamSSE callbacks.
1213
+ */
1214
+ function signalStreamDone() {
1215
+ streamDoneResolve?.();
1216
+ }
1217
+ /**
1206
1218
  * Get timestamp string in format HH:mm:ss
1207
1219
  */
1208
1220
  function getTime() {
@@ -1252,9 +1264,16 @@ function modelLogger() {
1252
1264
  const modelPrefix = model ? `[${model}] ` : "";
1253
1265
  const startTime = getTime();
1254
1266
  pendingTokenUsage = void 0;
1267
+ const localStreamDone = new Promise((resolve) => {
1268
+ streamDoneResolve = resolve;
1269
+ });
1255
1270
  console.log(`${modelPrefix}${startTime} <-- ${method} ${fullPath}`);
1256
1271
  const start$1 = Date.now();
1257
1272
  await next();
1273
+ if (c.res.headers.get("content-type")?.includes("text/event-stream") && !pendingTokenUsage) {
1274
+ const timeout = new Promise((resolve) => setTimeout(resolve, 12e4));
1275
+ await Promise.race([localStreamDone, timeout]);
1276
+ }
1258
1277
  const duration = Date.now() - start$1;
1259
1278
  const endTime = getTime();
1260
1279
  const usage = pendingTokenUsage;
@@ -2589,6 +2608,194 @@ const awaitApproval = async () => {
2589
2608
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
2590
2609
  };
2591
2610
 
2611
+ //#endregion
2612
+ //#region src/lib/rate-limit.ts
2613
+ async function checkRateLimit(state$1) {
2614
+ if (state$1.rateLimitSeconds === void 0) return;
2615
+ const now = Date.now();
2616
+ if (!state$1.lastRequestTimestamp) {
2617
+ state$1.lastRequestTimestamp = now;
2618
+ return;
2619
+ }
2620
+ const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2621
+ if (elapsedSeconds > state$1.rateLimitSeconds) {
2622
+ state$1.lastRequestTimestamp = now;
2623
+ return;
2624
+ }
2625
+ const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2626
+ if (!state$1.rateLimitWait) {
2627
+ consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2628
+ throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2629
+ }
2630
+ const waitTimeMs = waitTimeSeconds * 1e3;
2631
+ consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2632
+ await sleep(waitTimeMs);
2633
+ state$1.lastRequestTimestamp = now;
2634
+ consola.info("Rate limit wait completed, proceeding with request");
2635
+ }
2636
+
2637
+ //#endregion
2638
+ //#region src/services/copilot/create-chat-completions.ts
2639
+ const createChatCompletions = async (payload) => {
2640
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2641
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2642
+ const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
2643
+ const buildHeaders = () => ({
2644
+ ...copilotHeaders(state, enableVision),
2645
+ "X-Initiator": isAgentCall ? "agent" : "user"
2646
+ });
2647
+ consola.debug("Sending request to Copilot:", {
2648
+ model: payload.model,
2649
+ endpoint: `${copilotBaseUrl(state)}/chat/completions`
2650
+ });
2651
+ const url = `${copilotBaseUrl(state)}/chat/completions`;
2652
+ const body = payload.stream ? {
2653
+ ...payload,
2654
+ stream_options: { include_usage: true }
2655
+ } : payload;
2656
+ const bodyString = JSON.stringify(body);
2657
+ const maxRetries = 2;
2658
+ let lastError;
2659
+ let response;
2660
+ for (let attempt = 0; attempt <= maxRetries; attempt++) try {
2661
+ response = await fetch(url, {
2662
+ method: "POST",
2663
+ headers: buildHeaders(),
2664
+ body: bodyString
2665
+ });
2666
+ break;
2667
+ } catch (error) {
2668
+ lastError = error;
2669
+ if (attempt < maxRetries) {
2670
+ const delay = 1e3 * (attempt + 1);
2671
+ consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
2672
+ await new Promise((r) => setTimeout(r, delay));
2673
+ }
2674
+ }
2675
+ if (!response) throw lastError;
2676
+ if (response.status === 401) {
2677
+ consola.warn("Copilot token expired, refreshing and retrying...");
2678
+ try {
2679
+ await refreshCopilotToken();
2680
+ response = await fetch(url, {
2681
+ method: "POST",
2682
+ headers: buildHeaders(),
2683
+ body: bodyString
2684
+ });
2685
+ } catch (refreshError) {
2686
+ consola.error("Failed to refresh token:", refreshError);
2687
+ }
2688
+ }
2689
+ if (!response.ok) {
2690
+ const errorBody = await response.text();
2691
+ consola.error("Failed to create chat completions", {
2692
+ status: response.status,
2693
+ statusText: response.statusText,
2694
+ body: errorBody
2695
+ });
2696
+ throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
2697
+ }
2698
+ if (payload.stream) return events(response);
2699
+ return await response.json();
2700
+ };
2701
+
2702
+ //#endregion
2703
+ //#region src/routes/chat-completions/handler.ts
2704
+ /**
2705
+ * Set max_tokens from model limits if not already provided in the payload.
2706
+ */
2707
+ function applyMaxTokens(payload) {
2708
+ if (!isNullish(payload.max_tokens)) return payload;
2709
+ const selectedModel = findModel(payload.model);
2710
+ if (!selectedModel) return payload;
2711
+ const maxTokens = selectedModel.capabilities.limits.max_output_tokens;
2712
+ if (maxTokens) {
2713
+ consola.debug("Set max_tokens to:", maxTokens);
2714
+ return {
2715
+ ...payload,
2716
+ max_tokens: maxTokens
2717
+ };
2718
+ }
2719
+ return payload;
2720
+ }
2721
+ async function handleCompletion$1(c) {
2722
+ await checkRateLimit(state);
2723
+ const rawPayload = await c.req.json();
2724
+ consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
2725
+ const payload = applyMaxTokens(rawPayload);
2726
+ if (state.manualApprove) await awaitApproval();
2727
+ const response = await createChatCompletions(payload);
2728
+ if (isNonStreaming$1(response)) {
2729
+ consola.debug("Non-streaming response:", JSON.stringify(response));
2730
+ if (response.usage) setTokenUsage({
2731
+ inputTokens: response.usage.prompt_tokens,
2732
+ outputTokens: response.usage.completion_tokens,
2733
+ cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
2734
+ });
2735
+ return c.json(response);
2736
+ }
2737
+ consola.debug("Streaming response");
2738
+ return streamSSE(c, async (stream) => {
2739
+ for await (const chunk of response) {
2740
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2741
+ try {
2742
+ const sseChunk = chunk;
2743
+ if (sseChunk.data && sseChunk.data !== "[DONE]") {
2744
+ const parsed = JSON.parse(sseChunk.data);
2745
+ if (parsed.usage) {
2746
+ const usage = {
2747
+ inputTokens: parsed.usage.prompt_tokens ?? 0,
2748
+ outputTokens: parsed.usage.completion_tokens ?? 0,
2749
+ cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
2750
+ };
2751
+ setTokenUsage(usage);
2752
+ }
2753
+ }
2754
+ } catch {}
2755
+ await stream.writeSSE(chunk);
2756
+ }
2757
+ signalStreamDone();
2758
+ });
2759
+ }
2760
+ const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
2761
+
2762
+ //#endregion
2763
+ //#region src/routes/chat-completions/route.ts
2764
+ const completionRoutes = new Hono();
2765
+ completionRoutes.post("/", async (c) => {
2766
+ try {
2767
+ return await handleCompletion$1(c);
2768
+ } catch (error) {
2769
+ return await forwardError(c, error);
2770
+ }
2771
+ });
2772
+
2773
+ //#endregion
2774
+ //#region src/services/copilot/create-embeddings.ts
2775
+ const createEmbeddings = async (payload) => {
2776
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2777
+ const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
2778
+ method: "POST",
2779
+ headers: copilotHeaders(state),
2780
+ body: JSON.stringify(payload)
2781
+ });
2782
+ if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
2783
+ return await response.json();
2784
+ };
2785
+
2786
+ //#endregion
2787
+ //#region src/routes/embeddings/route.ts
2788
+ const embeddingRoutes = new Hono();
2789
+ embeddingRoutes.post("/", async (c) => {
2790
+ try {
2791
+ const paylod = await c.req.json();
2792
+ const response = await createEmbeddings(paylod);
2793
+ return c.json(response);
2794
+ } catch (error) {
2795
+ return await forwardError(c, error);
2796
+ }
2797
+ });
2798
+
2592
2799
  //#endregion
2593
2800
  //#region src/lib/tokenizer.ts
2594
2801
  const ENCODING_MAP = {
@@ -2782,323 +2989,6 @@ const getTokenCount = async (payload, model) => {
2782
2989
  };
2783
2990
  };
2784
2991
 
2785
- //#endregion
2786
- //#region src/lib/context-compression.ts
2787
- /**
2788
- * Get the maximum prompt token limit for a model.
2789
- * Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
2790
- */
2791
- const getMaxPromptTokens = (model) => {
2792
- const limits = model.capabilities.limits;
2793
- if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
2794
- if (limits.max_context_window_tokens) {
2795
- const outputReserve = limits.max_output_tokens ?? 4096;
2796
- return limits.max_context_window_tokens - outputReserve;
2797
- }
2798
- };
2799
- /**
2800
- * Check if a message is a tool-related message (tool call or tool result).
2801
- * Tool messages must be kept together with their paired assistant message.
2802
- */
2803
- const isToolMessage = (message) => {
2804
- return message.role === "tool";
2805
- };
2806
- /**
2807
- * Check if an assistant message contains tool calls.
2808
- */
2809
- const hasToolCalls = (message) => {
2810
- return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
2811
- };
2812
- const groupMessages = (messages) => {
2813
- const groups = [];
2814
- let i = 0;
2815
- while (i < messages.length) {
2816
- const message = messages[i];
2817
- if (message.role === "system" || message.role === "developer") {
2818
- groups.push({
2819
- messages: [message],
2820
- isSystem: true,
2821
- isRecent: false
2822
- });
2823
- i++;
2824
- continue;
2825
- }
2826
- if (hasToolCalls(message)) {
2827
- const group = [message];
2828
- let j = i + 1;
2829
- while (j < messages.length && isToolMessage(messages[j])) {
2830
- group.push(messages[j]);
2831
- j++;
2832
- }
2833
- groups.push({
2834
- messages: group,
2835
- isSystem: false,
2836
- isRecent: false
2837
- });
2838
- i = j;
2839
- continue;
2840
- }
2841
- groups.push({
2842
- messages: [message],
2843
- isSystem: false,
2844
- isRecent: false
2845
- });
2846
- i++;
2847
- }
2848
- return groups;
2849
- };
2850
- /**
2851
- * Create a truncation notice message to inform the model that earlier context was removed.
2852
- */
2853
- const createTruncationNotice = () => ({
2854
- role: "user",
2855
- content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
2856
- });
2857
- /**
2858
- * Intelligently truncate messages to fit within the model's token limit.
2859
- *
2860
- * Strategy:
2861
- * 1. Always preserve system/developer messages (they contain critical instructions)
2862
- * 2. Always preserve the most recent messages (they contain the current task context)
2863
- * 3. Remove middle conversation messages, oldest first
2864
- * 4. Insert a truncation notice where messages were removed
2865
- * 5. Keep tool call/result pairs together (never split them)
2866
- *
2867
- * Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
2868
- */
2869
- const truncateMessages = async (payload, model) => {
2870
- const maxPromptTokens = getMaxPromptTokens(model);
2871
- if (!maxPromptTokens) {
2872
- consola.debug("No token limit found for model, skipping truncation");
2873
- return payload;
2874
- }
2875
- const tokenCount = await getTokenCount(payload, model);
2876
- const safeLimit = Math.floor(maxPromptTokens * .95);
2877
- if (tokenCount.input <= safeLimit) return payload;
2878
- const groups = groupMessages(payload.messages);
2879
- const systemGroups = groups.filter((g) => g.isSystem);
2880
- const conversationGroups = groups.filter((g) => !g.isSystem);
2881
- if (conversationGroups.length === 0) {
2882
- consola.warn("No conversation messages to truncate, only system messages");
2883
- return payload;
2884
- }
2885
- let truncatedPayload = payload;
2886
- let dropCount = 0;
2887
- const maxDrop = Math.max(0, conversationGroups.length - 1);
2888
- while (dropCount <= maxDrop) {
2889
- const keptConversationGroups = conversationGroups.slice(dropCount);
2890
- const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
2891
- const newMessages = [
2892
- ...systemGroups.flatMap((g) => g.messages),
2893
- ...truncationNotice,
2894
- ...keptConversationGroups.flatMap((g) => g.messages)
2895
- ];
2896
- truncatedPayload = {
2897
- ...payload,
2898
- messages: newMessages
2899
- };
2900
- const newTokenCount = await getTokenCount(truncatedPayload, model);
2901
- if (newTokenCount.input <= safeLimit) {
2902
- if (dropCount > 0) {
2903
- const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
2904
- console.log(`Truncated: ${tokenCount.input} -> ${newTokenCount.input} tokens (-${droppedMessages} msgs)`);
2905
- }
2906
- return truncatedPayload;
2907
- }
2908
- dropCount++;
2909
- }
2910
- const finalTokenCount = await getTokenCount(truncatedPayload, model);
2911
- consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
2912
- return truncatedPayload;
2913
- };
2914
-
2915
- //#endregion
2916
- //#region src/lib/rate-limit.ts
2917
- async function checkRateLimit(state$1) {
2918
- if (state$1.rateLimitSeconds === void 0) return;
2919
- const now = Date.now();
2920
- if (!state$1.lastRequestTimestamp) {
2921
- state$1.lastRequestTimestamp = now;
2922
- return;
2923
- }
2924
- const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2925
- if (elapsedSeconds > state$1.rateLimitSeconds) {
2926
- state$1.lastRequestTimestamp = now;
2927
- return;
2928
- }
2929
- const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2930
- if (!state$1.rateLimitWait) {
2931
- consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2932
- throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2933
- }
2934
- const waitTimeMs = waitTimeSeconds * 1e3;
2935
- consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2936
- await sleep(waitTimeMs);
2937
- state$1.lastRequestTimestamp = now;
2938
- consola.info("Rate limit wait completed, proceeding with request");
2939
- }
2940
-
2941
- //#endregion
2942
- //#region src/services/copilot/create-chat-completions.ts
2943
- const createChatCompletions = async (payload) => {
2944
- if (!state.copilotToken) throw new Error("Copilot token not found");
2945
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2946
- const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
2947
- const headers = {
2948
- ...copilotHeaders(state, enableVision),
2949
- "X-Initiator": isAgentCall ? "agent" : "user"
2950
- };
2951
- consola.debug("Sending request to Copilot:", {
2952
- model: payload.model,
2953
- endpoint: `${copilotBaseUrl(state)}/chat/completions`
2954
- });
2955
- const url = `${copilotBaseUrl(state)}/chat/completions`;
2956
- const body = payload.stream ? {
2957
- ...payload,
2958
- stream_options: { include_usage: true }
2959
- } : payload;
2960
- const fetchOptions = {
2961
- method: "POST",
2962
- headers,
2963
- body: JSON.stringify(body)
2964
- };
2965
- const maxRetries = 2;
2966
- let lastError;
2967
- let response;
2968
- for (let attempt = 0; attempt <= maxRetries; attempt++) try {
2969
- response = await fetch(url, fetchOptions);
2970
- break;
2971
- } catch (error) {
2972
- lastError = error;
2973
- if (attempt < maxRetries) {
2974
- const delay = 1e3 * (attempt + 1);
2975
- consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
2976
- await new Promise((r) => setTimeout(r, delay));
2977
- }
2978
- }
2979
- if (!response) throw lastError;
2980
- if (!response.ok) {
2981
- const errorBody = await response.text();
2982
- consola.error("Failed to create chat completions", {
2983
- status: response.status,
2984
- statusText: response.statusText,
2985
- body: errorBody
2986
- });
2987
- throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
2988
- }
2989
- if (payload.stream) return events(response);
2990
- return await response.json();
2991
- };
2992
-
2993
- //#endregion
2994
- //#region src/routes/chat-completions/handler.ts
2995
- /**
2996
- * Calculate token count, log it, and auto-truncate if needed.
2997
- *
2998
- * Uses multi-strategy exact matching via findModel() to handle
2999
- * mismatches between requested and available model names.
3000
- */
3001
- async function processPayloadTokens(payload) {
3002
- const selectedModel = findModel(payload.model);
3003
- if (!selectedModel) {
3004
- consola.warn("No model selected, skipping token count calculation");
3005
- return payload;
3006
- }
3007
- try {
3008
- const tokenCount = await getTokenCount(payload, selectedModel);
3009
- consola.info("Current token count:", tokenCount);
3010
- const truncated = await truncateMessages(payload, selectedModel);
3011
- if (isNullish(truncated.max_tokens)) {
3012
- const withMaxTokens = {
3013
- ...truncated,
3014
- max_tokens: selectedModel.capabilities.limits.max_output_tokens
3015
- };
3016
- consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
3017
- return withMaxTokens;
3018
- }
3019
- return truncated;
3020
- } catch (error) {
3021
- consola.warn("Failed to calculate token count:", error);
3022
- return payload;
3023
- }
3024
- }
3025
- async function handleCompletion$1(c) {
3026
- await checkRateLimit(state);
3027
- const rawPayload = await c.req.json();
3028
- consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
3029
- const payload = await processPayloadTokens(rawPayload);
3030
- if (state.manualApprove) await awaitApproval();
3031
- const response = await createChatCompletions(payload);
3032
- if (isNonStreaming$1(response)) {
3033
- consola.debug("Non-streaming response:", JSON.stringify(response));
3034
- if (response.usage) setTokenUsage({
3035
- inputTokens: response.usage.prompt_tokens,
3036
- outputTokens: response.usage.completion_tokens,
3037
- cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
3038
- });
3039
- return c.json(response);
3040
- }
3041
- consola.debug("Streaming response");
3042
- return streamSSE(c, async (stream) => {
3043
- for await (const chunk of response) {
3044
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
3045
- try {
3046
- const sseChunk = chunk;
3047
- if (sseChunk.data && sseChunk.data !== "[DONE]") {
3048
- const parsed = JSON.parse(sseChunk.data);
3049
- if (parsed.usage) {
3050
- const usage = {
3051
- inputTokens: parsed.usage.prompt_tokens ?? 0,
3052
- outputTokens: parsed.usage.completion_tokens ?? 0,
3053
- cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
3054
- };
3055
- setTokenUsage(usage);
3056
- }
3057
- }
3058
- } catch {}
3059
- await stream.writeSSE(chunk);
3060
- }
3061
- });
3062
- }
3063
- const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
3064
-
3065
- //#endregion
3066
- //#region src/routes/chat-completions/route.ts
3067
- const completionRoutes = new Hono();
3068
- completionRoutes.post("/", async (c) => {
3069
- try {
3070
- return await handleCompletion$1(c);
3071
- } catch (error) {
3072
- return await forwardError(c, error);
3073
- }
3074
- });
3075
-
3076
- //#endregion
3077
- //#region src/services/copilot/create-embeddings.ts
3078
- const createEmbeddings = async (payload) => {
3079
- if (!state.copilotToken) throw new Error("Copilot token not found");
3080
- const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
3081
- method: "POST",
3082
- headers: copilotHeaders(state),
3083
- body: JSON.stringify(payload)
3084
- });
3085
- if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
3086
- return await response.json();
3087
- };
3088
-
3089
- //#endregion
3090
- //#region src/routes/embeddings/route.ts
3091
- const embeddingRoutes = new Hono();
3092
- embeddingRoutes.post("/", async (c) => {
3093
- try {
3094
- const paylod = await c.req.json();
3095
- const response = await createEmbeddings(paylod);
3096
- return c.json(response);
3097
- } catch (error) {
3098
- return await forwardError(c, error);
3099
- }
3100
- });
3101
-
3102
2992
  //#endregion
3103
2993
  //#region src/routes/messages/utils.ts
3104
2994
  function mapOpenAIStopReasonToAnthropic(finishReason) {
@@ -3340,7 +3230,7 @@ async function handleCountTokens(c) {
3340
3230
  let finalTokenCount = tokenCount.input + tokenCount.output;
3341
3231
  if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
3342
3232
  else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
3343
- consola.info("Token count:", finalTokenCount);
3233
+ console.log("Token count:", finalTokenCount);
3344
3234
  return c.json({ input_tokens: finalTokenCount });
3345
3235
  } catch (error) {
3346
3236
  consola.error("Error counting tokens:", error);
@@ -3474,30 +3364,10 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
3474
3364
 
3475
3365
  //#endregion
3476
3366
  //#region src/routes/messages/handler.ts
3477
- /**
3478
- * Auto-truncate OpenAI payload if prompt tokens exceed model limit.
3479
- *
3480
- * Uses multi-strategy exact matching via findModel() to handle
3481
- * mismatches between Anthropic and Copilot model naming conventions.
3482
- */
3483
- async function autoTruncatePayload(payload) {
3484
- const selectedModel = findModel(payload.model);
3485
- if (!selectedModel) {
3486
- consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
3487
- return payload;
3488
- }
3489
- try {
3490
- return await truncateMessages(payload, selectedModel);
3491
- } catch (error) {
3492
- consola.warn("Failed to auto-truncate context:", error);
3493
- return payload;
3494
- }
3495
- }
3496
3367
  async function handleCompletion(c) {
3497
3368
  await checkRateLimit(state);
3498
3369
  const anthropicPayload = await c.req.json();
3499
- const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
3500
- const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
3370
+ const openAIPayload = translateToOpenAI(anthropicPayload);
3501
3371
  if (state.manualApprove) await awaitApproval();
3502
3372
  const response = await createChatCompletions(openAIPayload);
3503
3373
  if (isNonStreaming(response)) {
@@ -3535,6 +3405,7 @@ async function handleCompletion(c) {
3535
3405
  data: JSON.stringify(event)
3536
3406
  });
3537
3407
  }
3408
+ signalStreamDone();
3538
3409
  });
3539
3410
  }
3540
3411
  const isNonStreaming = (response) => Object.hasOwn(response, "choices");
@@ -4107,7 +3978,7 @@ async function runServer(options$1) {
4107
3978
  state.githubToken = options$1.githubToken;
4108
3979
  consola.info("Using provided GitHub token");
4109
3980
  try {
4110
- const { getGitHubUser } = await import("./get-user-CsQCc3Qx.js");
3981
+ const { getGitHubUser } = await import("./get-user-HhhC3uQr.js");
4111
3982
  const user = await getGitHubUser();
4112
3983
  consola.info(`Logged in as ${user.login}`);
4113
3984
  } catch (error) {
@@ -4118,10 +3989,10 @@ async function runServer(options$1) {
4118
3989
  try {
4119
3990
  await setupCopilotToken();
4120
3991
  } catch (error) {
4121
- const { HTTPError: HTTPError$1 } = await import("./error-CsShqJjE.js");
3992
+ const { HTTPError: HTTPError$1 } = await import("./error-DNWWcl_s.js");
4122
3993
  if (error instanceof HTTPError$1 && error.response.status === 401) {
4123
3994
  consola.error("Failed to get Copilot token - GitHub token may be invalid or Copilot access revoked");
4124
- const { clearGithubToken: clearGithubToken$1 } = await import("./token-CCg0yU7a.js");
3995
+ const { clearGithubToken: clearGithubToken$1 } = await import("./token-DkNaoDp7.js");
4125
3996
  await clearGithubToken$1();
4126
3997
  consola.info("Please restart to re-authenticate");
4127
3998
  }