copilot-api-plus 1.0.51 → 1.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +190 -353
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -2608,6 +2608,194 @@ const awaitApproval = async () => {
|
|
|
2608
2608
|
if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
|
|
2609
2609
|
};
|
|
2610
2610
|
|
|
2611
|
+
//#endregion
|
|
2612
|
+
//#region src/lib/rate-limit.ts
|
|
2613
|
+
async function checkRateLimit(state$1) {
|
|
2614
|
+
if (state$1.rateLimitSeconds === void 0) return;
|
|
2615
|
+
const now = Date.now();
|
|
2616
|
+
if (!state$1.lastRequestTimestamp) {
|
|
2617
|
+
state$1.lastRequestTimestamp = now;
|
|
2618
|
+
return;
|
|
2619
|
+
}
|
|
2620
|
+
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2621
|
+
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2622
|
+
state$1.lastRequestTimestamp = now;
|
|
2623
|
+
return;
|
|
2624
|
+
}
|
|
2625
|
+
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2626
|
+
if (!state$1.rateLimitWait) {
|
|
2627
|
+
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2628
|
+
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2629
|
+
}
|
|
2630
|
+
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2631
|
+
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2632
|
+
await sleep(waitTimeMs);
|
|
2633
|
+
state$1.lastRequestTimestamp = now;
|
|
2634
|
+
consola.info("Rate limit wait completed, proceeding with request");
|
|
2635
|
+
}
|
|
2636
|
+
|
|
2637
|
+
//#endregion
|
|
2638
|
+
//#region src/services/copilot/create-chat-completions.ts
|
|
2639
|
+
const createChatCompletions = async (payload) => {
|
|
2640
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2641
|
+
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
2642
|
+
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
2643
|
+
const buildHeaders = () => ({
|
|
2644
|
+
...copilotHeaders(state, enableVision),
|
|
2645
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
2646
|
+
});
|
|
2647
|
+
consola.debug("Sending request to Copilot:", {
|
|
2648
|
+
model: payload.model,
|
|
2649
|
+
endpoint: `${copilotBaseUrl(state)}/chat/completions`
|
|
2650
|
+
});
|
|
2651
|
+
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
2652
|
+
const body = payload.stream ? {
|
|
2653
|
+
...payload,
|
|
2654
|
+
stream_options: { include_usage: true }
|
|
2655
|
+
} : payload;
|
|
2656
|
+
const bodyString = JSON.stringify(body);
|
|
2657
|
+
const maxRetries = 2;
|
|
2658
|
+
let lastError;
|
|
2659
|
+
let response;
|
|
2660
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) try {
|
|
2661
|
+
response = await fetch(url, {
|
|
2662
|
+
method: "POST",
|
|
2663
|
+
headers: buildHeaders(),
|
|
2664
|
+
body: bodyString
|
|
2665
|
+
});
|
|
2666
|
+
break;
|
|
2667
|
+
} catch (error) {
|
|
2668
|
+
lastError = error;
|
|
2669
|
+
if (attempt < maxRetries) {
|
|
2670
|
+
const delay = 1e3 * (attempt + 1);
|
|
2671
|
+
consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
|
|
2672
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
if (!response) throw lastError;
|
|
2676
|
+
if (response.status === 401) {
|
|
2677
|
+
consola.warn("Copilot token expired, refreshing and retrying...");
|
|
2678
|
+
try {
|
|
2679
|
+
await refreshCopilotToken();
|
|
2680
|
+
response = await fetch(url, {
|
|
2681
|
+
method: "POST",
|
|
2682
|
+
headers: buildHeaders(),
|
|
2683
|
+
body: bodyString
|
|
2684
|
+
});
|
|
2685
|
+
} catch (refreshError) {
|
|
2686
|
+
consola.error("Failed to refresh token:", refreshError);
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
if (!response.ok) {
|
|
2690
|
+
const errorBody = await response.text();
|
|
2691
|
+
consola.error("Failed to create chat completions", {
|
|
2692
|
+
status: response.status,
|
|
2693
|
+
statusText: response.statusText,
|
|
2694
|
+
body: errorBody
|
|
2695
|
+
});
|
|
2696
|
+
throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
|
|
2697
|
+
}
|
|
2698
|
+
if (payload.stream) return events(response);
|
|
2699
|
+
return await response.json();
|
|
2700
|
+
};
|
|
2701
|
+
|
|
2702
|
+
//#endregion
|
|
2703
|
+
//#region src/routes/chat-completions/handler.ts
|
|
2704
|
+
/**
|
|
2705
|
+
* Set max_tokens from model limits if not already provided in the payload.
|
|
2706
|
+
*/
|
|
2707
|
+
function applyMaxTokens(payload) {
|
|
2708
|
+
if (!isNullish(payload.max_tokens)) return payload;
|
|
2709
|
+
const selectedModel = findModel(payload.model);
|
|
2710
|
+
if (!selectedModel) return payload;
|
|
2711
|
+
const maxTokens = selectedModel.capabilities.limits.max_output_tokens;
|
|
2712
|
+
if (maxTokens) {
|
|
2713
|
+
consola.debug("Set max_tokens to:", maxTokens);
|
|
2714
|
+
return {
|
|
2715
|
+
...payload,
|
|
2716
|
+
max_tokens: maxTokens
|
|
2717
|
+
};
|
|
2718
|
+
}
|
|
2719
|
+
return payload;
|
|
2720
|
+
}
|
|
2721
|
+
async function handleCompletion$1(c) {
|
|
2722
|
+
await checkRateLimit(state);
|
|
2723
|
+
const rawPayload = await c.req.json();
|
|
2724
|
+
consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
|
|
2725
|
+
const payload = applyMaxTokens(rawPayload);
|
|
2726
|
+
if (state.manualApprove) await awaitApproval();
|
|
2727
|
+
const response = await createChatCompletions(payload);
|
|
2728
|
+
if (isNonStreaming$1(response)) {
|
|
2729
|
+
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
2730
|
+
if (response.usage) setTokenUsage({
|
|
2731
|
+
inputTokens: response.usage.prompt_tokens,
|
|
2732
|
+
outputTokens: response.usage.completion_tokens,
|
|
2733
|
+
cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
|
|
2734
|
+
});
|
|
2735
|
+
return c.json(response);
|
|
2736
|
+
}
|
|
2737
|
+
consola.debug("Streaming response");
|
|
2738
|
+
return streamSSE(c, async (stream) => {
|
|
2739
|
+
for await (const chunk of response) {
|
|
2740
|
+
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
2741
|
+
try {
|
|
2742
|
+
const sseChunk = chunk;
|
|
2743
|
+
if (sseChunk.data && sseChunk.data !== "[DONE]") {
|
|
2744
|
+
const parsed = JSON.parse(sseChunk.data);
|
|
2745
|
+
if (parsed.usage) {
|
|
2746
|
+
const usage = {
|
|
2747
|
+
inputTokens: parsed.usage.prompt_tokens ?? 0,
|
|
2748
|
+
outputTokens: parsed.usage.completion_tokens ?? 0,
|
|
2749
|
+
cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
2750
|
+
};
|
|
2751
|
+
setTokenUsage(usage);
|
|
2752
|
+
}
|
|
2753
|
+
}
|
|
2754
|
+
} catch {}
|
|
2755
|
+
await stream.writeSSE(chunk);
|
|
2756
|
+
}
|
|
2757
|
+
signalStreamDone();
|
|
2758
|
+
});
|
|
2759
|
+
}
|
|
2760
|
+
const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
|
|
2761
|
+
|
|
2762
|
+
//#endregion
|
|
2763
|
+
//#region src/routes/chat-completions/route.ts
|
|
2764
|
+
const completionRoutes = new Hono();
|
|
2765
|
+
completionRoutes.post("/", async (c) => {
|
|
2766
|
+
try {
|
|
2767
|
+
return await handleCompletion$1(c);
|
|
2768
|
+
} catch (error) {
|
|
2769
|
+
return await forwardError(c, error);
|
|
2770
|
+
}
|
|
2771
|
+
});
|
|
2772
|
+
|
|
2773
|
+
//#endregion
|
|
2774
|
+
//#region src/services/copilot/create-embeddings.ts
|
|
2775
|
+
const createEmbeddings = async (payload) => {
|
|
2776
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2777
|
+
const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
|
|
2778
|
+
method: "POST",
|
|
2779
|
+
headers: copilotHeaders(state),
|
|
2780
|
+
body: JSON.stringify(payload)
|
|
2781
|
+
});
|
|
2782
|
+
if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
|
|
2783
|
+
return await response.json();
|
|
2784
|
+
};
|
|
2785
|
+
|
|
2786
|
+
//#endregion
|
|
2787
|
+
//#region src/routes/embeddings/route.ts
|
|
2788
|
+
const embeddingRoutes = new Hono();
|
|
2789
|
+
embeddingRoutes.post("/", async (c) => {
|
|
2790
|
+
try {
|
|
2791
|
+
const paylod = await c.req.json();
|
|
2792
|
+
const response = await createEmbeddings(paylod);
|
|
2793
|
+
return c.json(response);
|
|
2794
|
+
} catch (error) {
|
|
2795
|
+
return await forwardError(c, error);
|
|
2796
|
+
}
|
|
2797
|
+
});
|
|
2798
|
+
|
|
2611
2799
|
//#endregion
|
|
2612
2800
|
//#region src/lib/tokenizer.ts
|
|
2613
2801
|
const ENCODING_MAP = {
|
|
@@ -2801,337 +2989,6 @@ const getTokenCount = async (payload, model) => {
|
|
|
2801
2989
|
};
|
|
2802
2990
|
};
|
|
2803
2991
|
|
|
2804
|
-
//#endregion
|
|
2805
|
-
//#region src/lib/context-compression.ts
|
|
2806
|
-
/**
|
|
2807
|
-
* Get the maximum prompt token limit for a model.
|
|
2808
|
-
* Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
|
|
2809
|
-
*/
|
|
2810
|
-
const getMaxPromptTokens = (model) => {
|
|
2811
|
-
const limits = model.capabilities.limits;
|
|
2812
|
-
if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
|
|
2813
|
-
if (limits.max_context_window_tokens) {
|
|
2814
|
-
const outputReserve = limits.max_output_tokens ?? 4096;
|
|
2815
|
-
return limits.max_context_window_tokens - outputReserve;
|
|
2816
|
-
}
|
|
2817
|
-
};
|
|
2818
|
-
/**
|
|
2819
|
-
* Check if a message is a tool-related message (tool call or tool result).
|
|
2820
|
-
* Tool messages must be kept together with their paired assistant message.
|
|
2821
|
-
*/
|
|
2822
|
-
const isToolMessage = (message) => {
|
|
2823
|
-
return message.role === "tool";
|
|
2824
|
-
};
|
|
2825
|
-
/**
|
|
2826
|
-
* Check if an assistant message contains tool calls.
|
|
2827
|
-
*/
|
|
2828
|
-
const hasToolCalls = (message) => {
|
|
2829
|
-
return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
|
2830
|
-
};
|
|
2831
|
-
const groupMessages = (messages) => {
|
|
2832
|
-
const groups = [];
|
|
2833
|
-
let i = 0;
|
|
2834
|
-
while (i < messages.length) {
|
|
2835
|
-
const message = messages[i];
|
|
2836
|
-
if (message.role === "system" || message.role === "developer") {
|
|
2837
|
-
groups.push({
|
|
2838
|
-
messages: [message],
|
|
2839
|
-
isSystem: true,
|
|
2840
|
-
isRecent: false
|
|
2841
|
-
});
|
|
2842
|
-
i++;
|
|
2843
|
-
continue;
|
|
2844
|
-
}
|
|
2845
|
-
if (hasToolCalls(message)) {
|
|
2846
|
-
const group = [message];
|
|
2847
|
-
let j = i + 1;
|
|
2848
|
-
while (j < messages.length && isToolMessage(messages[j])) {
|
|
2849
|
-
group.push(messages[j]);
|
|
2850
|
-
j++;
|
|
2851
|
-
}
|
|
2852
|
-
groups.push({
|
|
2853
|
-
messages: group,
|
|
2854
|
-
isSystem: false,
|
|
2855
|
-
isRecent: false
|
|
2856
|
-
});
|
|
2857
|
-
i = j;
|
|
2858
|
-
continue;
|
|
2859
|
-
}
|
|
2860
|
-
groups.push({
|
|
2861
|
-
messages: [message],
|
|
2862
|
-
isSystem: false,
|
|
2863
|
-
isRecent: false
|
|
2864
|
-
});
|
|
2865
|
-
i++;
|
|
2866
|
-
}
|
|
2867
|
-
return groups;
|
|
2868
|
-
};
|
|
2869
|
-
/**
|
|
2870
|
-
* Create a truncation notice message to inform the model that earlier context was removed.
|
|
2871
|
-
*/
|
|
2872
|
-
const createTruncationNotice = () => ({
|
|
2873
|
-
role: "user",
|
|
2874
|
-
content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
|
|
2875
|
-
});
|
|
2876
|
-
/**
|
|
2877
|
-
* Intelligently truncate messages to fit within the model's token limit.
|
|
2878
|
-
*
|
|
2879
|
-
* Strategy:
|
|
2880
|
-
* 1. Always preserve system/developer messages (they contain critical instructions)
|
|
2881
|
-
* 2. Always preserve the most recent messages (they contain the current task context)
|
|
2882
|
-
* 3. Remove middle conversation messages, oldest first
|
|
2883
|
-
* 4. Insert a truncation notice where messages were removed
|
|
2884
|
-
* 5. Keep tool call/result pairs together (never split them)
|
|
2885
|
-
*
|
|
2886
|
-
* Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
|
|
2887
|
-
*/
|
|
2888
|
-
const truncateMessages = async (payload, model) => {
|
|
2889
|
-
const maxPromptTokens = getMaxPromptTokens(model);
|
|
2890
|
-
if (!maxPromptTokens) {
|
|
2891
|
-
consola.debug("No token limit found for model, skipping truncation");
|
|
2892
|
-
return payload;
|
|
2893
|
-
}
|
|
2894
|
-
const tokenCount = await getTokenCount(payload, model);
|
|
2895
|
-
const safeLimit = Math.floor(maxPromptTokens * .95);
|
|
2896
|
-
if (tokenCount.input <= safeLimit) return payload;
|
|
2897
|
-
const groups = groupMessages(payload.messages);
|
|
2898
|
-
const systemGroups = groups.filter((g) => g.isSystem);
|
|
2899
|
-
const conversationGroups = groups.filter((g) => !g.isSystem);
|
|
2900
|
-
if (conversationGroups.length === 0) {
|
|
2901
|
-
consola.warn("No conversation messages to truncate, only system messages");
|
|
2902
|
-
return payload;
|
|
2903
|
-
}
|
|
2904
|
-
let truncatedPayload = payload;
|
|
2905
|
-
let dropCount = 0;
|
|
2906
|
-
const maxDrop = Math.max(0, conversationGroups.length - 1);
|
|
2907
|
-
while (dropCount <= maxDrop) {
|
|
2908
|
-
const keptConversationGroups = conversationGroups.slice(dropCount);
|
|
2909
|
-
const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
|
|
2910
|
-
const newMessages = [
|
|
2911
|
-
...systemGroups.flatMap((g) => g.messages),
|
|
2912
|
-
...truncationNotice,
|
|
2913
|
-
...keptConversationGroups.flatMap((g) => g.messages)
|
|
2914
|
-
];
|
|
2915
|
-
truncatedPayload = {
|
|
2916
|
-
...payload,
|
|
2917
|
-
messages: newMessages
|
|
2918
|
-
};
|
|
2919
|
-
const newTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2920
|
-
if (newTokenCount.input <= safeLimit) {
|
|
2921
|
-
if (dropCount > 0) {
|
|
2922
|
-
const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
|
|
2923
|
-
console.log(`Truncated: ${tokenCount.input} -> ${newTokenCount.input} tokens (-${droppedMessages} msgs)`);
|
|
2924
|
-
}
|
|
2925
|
-
return truncatedPayload;
|
|
2926
|
-
}
|
|
2927
|
-
dropCount++;
|
|
2928
|
-
}
|
|
2929
|
-
const finalTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2930
|
-
consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
|
|
2931
|
-
return truncatedPayload;
|
|
2932
|
-
};
|
|
2933
|
-
|
|
2934
|
-
//#endregion
|
|
2935
|
-
//#region src/lib/rate-limit.ts
|
|
2936
|
-
async function checkRateLimit(state$1) {
|
|
2937
|
-
if (state$1.rateLimitSeconds === void 0) return;
|
|
2938
|
-
const now = Date.now();
|
|
2939
|
-
if (!state$1.lastRequestTimestamp) {
|
|
2940
|
-
state$1.lastRequestTimestamp = now;
|
|
2941
|
-
return;
|
|
2942
|
-
}
|
|
2943
|
-
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2944
|
-
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2945
|
-
state$1.lastRequestTimestamp = now;
|
|
2946
|
-
return;
|
|
2947
|
-
}
|
|
2948
|
-
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2949
|
-
if (!state$1.rateLimitWait) {
|
|
2950
|
-
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2951
|
-
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2952
|
-
}
|
|
2953
|
-
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2954
|
-
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2955
|
-
await sleep(waitTimeMs);
|
|
2956
|
-
state$1.lastRequestTimestamp = now;
|
|
2957
|
-
consola.info("Rate limit wait completed, proceeding with request");
|
|
2958
|
-
}
|
|
2959
|
-
|
|
2960
|
-
//#endregion
|
|
2961
|
-
//#region src/services/copilot/create-chat-completions.ts
|
|
2962
|
-
const createChatCompletions = async (payload) => {
|
|
2963
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2964
|
-
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
2965
|
-
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
2966
|
-
const buildHeaders = () => ({
|
|
2967
|
-
...copilotHeaders(state, enableVision),
|
|
2968
|
-
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
2969
|
-
});
|
|
2970
|
-
consola.debug("Sending request to Copilot:", {
|
|
2971
|
-
model: payload.model,
|
|
2972
|
-
endpoint: `${copilotBaseUrl(state)}/chat/completions`
|
|
2973
|
-
});
|
|
2974
|
-
const url = `${copilotBaseUrl(state)}/chat/completions`;
|
|
2975
|
-
const body = payload.stream ? {
|
|
2976
|
-
...payload,
|
|
2977
|
-
stream_options: { include_usage: true }
|
|
2978
|
-
} : payload;
|
|
2979
|
-
const bodyString = JSON.stringify(body);
|
|
2980
|
-
const maxRetries = 2;
|
|
2981
|
-
let lastError;
|
|
2982
|
-
let response;
|
|
2983
|
-
for (let attempt = 0; attempt <= maxRetries; attempt++) try {
|
|
2984
|
-
response = await fetch(url, {
|
|
2985
|
-
method: "POST",
|
|
2986
|
-
headers: buildHeaders(),
|
|
2987
|
-
body: bodyString
|
|
2988
|
-
});
|
|
2989
|
-
break;
|
|
2990
|
-
} catch (error) {
|
|
2991
|
-
lastError = error;
|
|
2992
|
-
if (attempt < maxRetries) {
|
|
2993
|
-
const delay = 1e3 * (attempt + 1);
|
|
2994
|
-
consola.warn(`Network error on attempt ${attempt + 1}/${maxRetries + 1}, retrying in ${delay}ms:`, error instanceof Error ? error.message : error);
|
|
2995
|
-
await new Promise((r) => setTimeout(r, delay));
|
|
2996
|
-
}
|
|
2997
|
-
}
|
|
2998
|
-
if (!response) throw lastError;
|
|
2999
|
-
if (response.status === 401) {
|
|
3000
|
-
consola.warn("Copilot token expired, refreshing and retrying...");
|
|
3001
|
-
try {
|
|
3002
|
-
await refreshCopilotToken();
|
|
3003
|
-
response = await fetch(url, {
|
|
3004
|
-
method: "POST",
|
|
3005
|
-
headers: buildHeaders(),
|
|
3006
|
-
body: bodyString
|
|
3007
|
-
});
|
|
3008
|
-
} catch (refreshError) {
|
|
3009
|
-
consola.error("Failed to refresh token:", refreshError);
|
|
3010
|
-
}
|
|
3011
|
-
}
|
|
3012
|
-
if (!response.ok) {
|
|
3013
|
-
const errorBody = await response.text();
|
|
3014
|
-
consola.error("Failed to create chat completions", {
|
|
3015
|
-
status: response.status,
|
|
3016
|
-
statusText: response.statusText,
|
|
3017
|
-
body: errorBody
|
|
3018
|
-
});
|
|
3019
|
-
throw new HTTPError(`Failed to create chat completions: ${response.status} ${errorBody}`, response);
|
|
3020
|
-
}
|
|
3021
|
-
if (payload.stream) return events(response);
|
|
3022
|
-
return await response.json();
|
|
3023
|
-
};
|
|
3024
|
-
|
|
3025
|
-
//#endregion
|
|
3026
|
-
//#region src/routes/chat-completions/handler.ts
|
|
3027
|
-
/**
|
|
3028
|
-
* Calculate token count, log it, and auto-truncate if needed.
|
|
3029
|
-
*
|
|
3030
|
-
* Uses multi-strategy exact matching via findModel() to handle
|
|
3031
|
-
* mismatches between requested and available model names.
|
|
3032
|
-
*/
|
|
3033
|
-
async function processPayloadTokens(payload) {
|
|
3034
|
-
const selectedModel = findModel(payload.model);
|
|
3035
|
-
if (!selectedModel) {
|
|
3036
|
-
consola.warn("No model selected, skipping token count calculation");
|
|
3037
|
-
return payload;
|
|
3038
|
-
}
|
|
3039
|
-
try {
|
|
3040
|
-
const tokenCount = await getTokenCount(payload, selectedModel);
|
|
3041
|
-
consola.debug("Current token count:", tokenCount);
|
|
3042
|
-
const truncated = await truncateMessages(payload, selectedModel);
|
|
3043
|
-
if (isNullish(truncated.max_tokens)) {
|
|
3044
|
-
const withMaxTokens = {
|
|
3045
|
-
...truncated,
|
|
3046
|
-
max_tokens: selectedModel.capabilities.limits.max_output_tokens
|
|
3047
|
-
};
|
|
3048
|
-
consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
|
|
3049
|
-
return withMaxTokens;
|
|
3050
|
-
}
|
|
3051
|
-
return truncated;
|
|
3052
|
-
} catch (error) {
|
|
3053
|
-
consola.warn("Failed to calculate token count:", error);
|
|
3054
|
-
return payload;
|
|
3055
|
-
}
|
|
3056
|
-
}
|
|
3057
|
-
async function handleCompletion$1(c) {
|
|
3058
|
-
await checkRateLimit(state);
|
|
3059
|
-
const rawPayload = await c.req.json();
|
|
3060
|
-
consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
|
|
3061
|
-
const payload = await processPayloadTokens(rawPayload);
|
|
3062
|
-
if (state.manualApprove) await awaitApproval();
|
|
3063
|
-
const response = await createChatCompletions(payload);
|
|
3064
|
-
if (isNonStreaming$1(response)) {
|
|
3065
|
-
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
3066
|
-
if (response.usage) setTokenUsage({
|
|
3067
|
-
inputTokens: response.usage.prompt_tokens,
|
|
3068
|
-
outputTokens: response.usage.completion_tokens,
|
|
3069
|
-
cacheReadTokens: response.usage.prompt_tokens_details?.cached_tokens
|
|
3070
|
-
});
|
|
3071
|
-
return c.json(response);
|
|
3072
|
-
}
|
|
3073
|
-
consola.debug("Streaming response");
|
|
3074
|
-
return streamSSE(c, async (stream) => {
|
|
3075
|
-
for await (const chunk of response) {
|
|
3076
|
-
consola.debug("Streaming chunk:", JSON.stringify(chunk));
|
|
3077
|
-
try {
|
|
3078
|
-
const sseChunk = chunk;
|
|
3079
|
-
if (sseChunk.data && sseChunk.data !== "[DONE]") {
|
|
3080
|
-
const parsed = JSON.parse(sseChunk.data);
|
|
3081
|
-
if (parsed.usage) {
|
|
3082
|
-
const usage = {
|
|
3083
|
-
inputTokens: parsed.usage.prompt_tokens ?? 0,
|
|
3084
|
-
outputTokens: parsed.usage.completion_tokens ?? 0,
|
|
3085
|
-
cacheReadTokens: parsed.usage.prompt_tokens_details?.cached_tokens
|
|
3086
|
-
};
|
|
3087
|
-
setTokenUsage(usage);
|
|
3088
|
-
}
|
|
3089
|
-
}
|
|
3090
|
-
} catch {}
|
|
3091
|
-
await stream.writeSSE(chunk);
|
|
3092
|
-
}
|
|
3093
|
-
signalStreamDone();
|
|
3094
|
-
});
|
|
3095
|
-
}
|
|
3096
|
-
const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
|
|
3097
|
-
|
|
3098
|
-
//#endregion
|
|
3099
|
-
//#region src/routes/chat-completions/route.ts
|
|
3100
|
-
const completionRoutes = new Hono();
|
|
3101
|
-
completionRoutes.post("/", async (c) => {
|
|
3102
|
-
try {
|
|
3103
|
-
return await handleCompletion$1(c);
|
|
3104
|
-
} catch (error) {
|
|
3105
|
-
return await forwardError(c, error);
|
|
3106
|
-
}
|
|
3107
|
-
});
|
|
3108
|
-
|
|
3109
|
-
//#endregion
|
|
3110
|
-
//#region src/services/copilot/create-embeddings.ts
|
|
3111
|
-
const createEmbeddings = async (payload) => {
|
|
3112
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
3113
|
-
const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
|
|
3114
|
-
method: "POST",
|
|
3115
|
-
headers: copilotHeaders(state),
|
|
3116
|
-
body: JSON.stringify(payload)
|
|
3117
|
-
});
|
|
3118
|
-
if (!response.ok) throw new HTTPError("Failed to create embeddings", response);
|
|
3119
|
-
return await response.json();
|
|
3120
|
-
};
|
|
3121
|
-
|
|
3122
|
-
//#endregion
|
|
3123
|
-
//#region src/routes/embeddings/route.ts
|
|
3124
|
-
const embeddingRoutes = new Hono();
|
|
3125
|
-
embeddingRoutes.post("/", async (c) => {
|
|
3126
|
-
try {
|
|
3127
|
-
const paylod = await c.req.json();
|
|
3128
|
-
const response = await createEmbeddings(paylod);
|
|
3129
|
-
return c.json(response);
|
|
3130
|
-
} catch (error) {
|
|
3131
|
-
return await forwardError(c, error);
|
|
3132
|
-
}
|
|
3133
|
-
});
|
|
3134
|
-
|
|
3135
2992
|
//#endregion
|
|
3136
2993
|
//#region src/routes/messages/utils.ts
|
|
3137
2994
|
function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
@@ -3373,7 +3230,7 @@ async function handleCountTokens(c) {
|
|
|
3373
3230
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
3374
3231
|
if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * 1.15);
|
|
3375
3232
|
else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * 1.03);
|
|
3376
|
-
|
|
3233
|
+
console.log("Token count:", finalTokenCount);
|
|
3377
3234
|
return c.json({ input_tokens: finalTokenCount });
|
|
3378
3235
|
} catch (error) {
|
|
3379
3236
|
consola.error("Error counting tokens:", error);
|
|
@@ -3507,30 +3364,10 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
|
|
|
3507
3364
|
|
|
3508
3365
|
//#endregion
|
|
3509
3366
|
//#region src/routes/messages/handler.ts
|
|
3510
|
-
/**
|
|
3511
|
-
* Auto-truncate OpenAI payload if prompt tokens exceed model limit.
|
|
3512
|
-
*
|
|
3513
|
-
* Uses multi-strategy exact matching via findModel() to handle
|
|
3514
|
-
* mismatches between Anthropic and Copilot model naming conventions.
|
|
3515
|
-
*/
|
|
3516
|
-
async function autoTruncatePayload(payload) {
|
|
3517
|
-
const selectedModel = findModel(payload.model);
|
|
3518
|
-
if (!selectedModel) {
|
|
3519
|
-
consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
|
|
3520
|
-
return payload;
|
|
3521
|
-
}
|
|
3522
|
-
try {
|
|
3523
|
-
return await truncateMessages(payload, selectedModel);
|
|
3524
|
-
} catch (error) {
|
|
3525
|
-
consola.warn("Failed to auto-truncate context:", error);
|
|
3526
|
-
return payload;
|
|
3527
|
-
}
|
|
3528
|
-
}
|
|
3529
3367
|
async function handleCompletion(c) {
|
|
3530
3368
|
await checkRateLimit(state);
|
|
3531
3369
|
const anthropicPayload = await c.req.json();
|
|
3532
|
-
const
|
|
3533
|
-
const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
|
|
3370
|
+
const openAIPayload = translateToOpenAI(anthropicPayload);
|
|
3534
3371
|
if (state.manualApprove) await awaitApproval();
|
|
3535
3372
|
const response = await createChatCompletions(openAIPayload);
|
|
3536
3373
|
if (isNonStreaming(response)) {
|