copilot-api-plus 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -4,9 +4,9 @@ import { state } from "./state-CcLGr8VN.js";
4
4
  import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
5
5
  import { HTTPError, forwardError } from "./error-CvU5otz-.js";
6
6
  import { cacheModels, cacheVSCodeVersion, clearGithubToken, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-ClgudjZm.js";
7
- import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getValidAccessToken, rotateAccount } from "./auth-CM_ilreU.js";
7
+ import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-Dz36Lk4o.js";
8
8
  import { clearZenAuth, getZenAuthPath } from "./auth-T55-Bhoo.js";
9
- import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-DMdiCNoU.js";
9
+ import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-VsThqHZf.js";
10
10
  import { createRequire } from "node:module";
11
11
  import { defineCommand, runMain } from "citty";
12
12
  import consola from "consola";
@@ -540,7 +540,7 @@ function initProxyFromEnv() {
540
540
  * Add a new Antigravity account via OAuth
541
541
  */
542
542
  async function addAccount() {
543
- const { setupAntigravity, loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
543
+ const { setupAntigravity, loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
544
544
  const existingAuth = await loadAntigravityAuth();
545
545
  if (existingAuth && existingAuth.accounts.length > 0) {
546
546
  const enabledCount = existingAuth.accounts.filter((a) => a.enable).length;
@@ -552,7 +552,7 @@ async function addAccount() {
552
552
  * List all Antigravity accounts
553
553
  */
554
554
  async function listAccounts() {
555
- const { loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
555
+ const { loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
556
556
  const auth$1 = await loadAntigravityAuth();
557
557
  if (!auth$1 || auth$1.accounts.length === 0) {
558
558
  consola.info("No Antigravity accounts configured");
@@ -573,7 +573,7 @@ async function listAccounts() {
573
573
  * Remove an Antigravity account by index
574
574
  */
575
575
  async function removeAccount(index) {
576
- const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-B2lTFLSD.js");
576
+ const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-BgquW2Yd.js");
577
577
  const auth$1 = await loadAntigravityAuth();
578
578
  if (!auth$1 || auth$1.accounts.length === 0) {
579
579
  consola.error("No Antigravity accounts configured");
@@ -592,7 +592,7 @@ async function removeAccount(index) {
592
592
  * Clear all Antigravity accounts
593
593
  */
594
594
  async function clearAccounts() {
595
- const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-B2lTFLSD.js");
595
+ const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-BgquW2Yd.js");
596
596
  if (await consola.prompt("Are you sure you want to remove all Antigravity accounts?", {
597
597
  type: "confirm",
598
598
  initial: false
@@ -1403,9 +1403,9 @@ function processChunk(chunk, state$1) {
1403
1403
 
1404
1404
  //#endregion
1405
1405
  //#region src/services/antigravity/create-chat-completions.ts
1406
- const ANTIGRAVITY_API_HOST$1 = "daily-cloudcode-pa.sandbox.googleapis.com";
1407
- const ANTIGRAVITY_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:streamGenerateContent?alt=sse`;
1408
- const ANTIGRAVITY_NO_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:generateContent`;
1406
+ const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
1407
+ const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
1408
+ const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
1409
1409
  const ANTIGRAVITY_USER_AGENT$1 = "antigravity/1.11.3 windows/amd64";
1410
1410
  const GEMINI_API_HOST = "generativelanguage.googleapis.com";
1411
1411
  const getGeminiStreamUrl = (model, apiKey) => `https://${GEMINI_API_HOST}/v1beta/models/${model}:streamGenerateContent?alt=sse&key=${apiKey}`;
@@ -1625,14 +1625,14 @@ async function createWithApiKey(request, apiKey) {
1625
1625
  * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
1626
1626
  */
1627
1627
  async function createWithOAuth(request, accessToken) {
1628
- const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL$1 : ANTIGRAVITY_NO_STREAM_URL$1;
1628
+ const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
1629
1629
  const body = buildAntigravityRequestBody(request);
1630
1630
  consola.debug(`Antigravity request to ${endpoint} with model ${request.model}`);
1631
1631
  try {
1632
1632
  const response = await fetch(endpoint, {
1633
1633
  method: "POST",
1634
1634
  headers: {
1635
- Host: ANTIGRAVITY_API_HOST$1,
1635
+ Host: ANTIGRAVITY_API_HOST,
1636
1636
  "User-Agent": ANTIGRAVITY_USER_AGENT$1,
1637
1637
  Authorization: `Bearer ${accessToken}`,
1638
1638
  "Content-Type": "application/json",
@@ -2036,10 +2036,49 @@ function generateToolId() {
2036
2036
 
2037
2037
  //#endregion
2038
2038
  //#region src/services/antigravity/create-messages.ts
2039
- const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
2040
- const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
2041
- const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
2039
+ const ANTIGRAVITY_ENDPOINTS = ["daily-cloudcode-pa.sandbox.googleapis.com", "cloudcode-pa.googleapis.com"];
2040
+ let currentEndpointIndex = 0;
2041
+ function getStreamUrl(host) {
2042
+ return `https://${host}/v1internal:streamGenerateContent?alt=sse`;
2043
+ }
2044
+ function getNoStreamUrl(host) {
2045
+ return `https://${host}/v1internal:generateContent`;
2046
+ }
2047
+ function getCurrentHost() {
2048
+ return ANTIGRAVITY_ENDPOINTS[currentEndpointIndex];
2049
+ }
2050
+ function rotateEndpoint() {
2051
+ const oldIndex = currentEndpointIndex;
2052
+ currentEndpointIndex = (currentEndpointIndex + 1) % ANTIGRAVITY_ENDPOINTS.length;
2053
+ consola.info(`Rotating endpoint: ${ANTIGRAVITY_ENDPOINTS[oldIndex]} → ${ANTIGRAVITY_ENDPOINTS[currentEndpointIndex]}`);
2054
+ }
2042
2055
  const ANTIGRAVITY_USER_AGENT = "antigravity/1.11.3 windows/amd64";
2056
+ const rateLimitTracker = {};
2057
+ function getModelFamily(model) {
2058
+ if (model.includes("claude")) return "claude";
2059
+ if (model.includes("gemini")) return "gemini";
2060
+ return "other";
2061
+ }
2062
+ function trackRateLimit(model) {
2063
+ const family = getModelFamily(model);
2064
+ if (!rateLimitTracker[family]) rateLimitTracker[family] = {
2065
+ lastLimitTime: 0,
2066
+ consecutiveErrors: 0
2067
+ };
2068
+ rateLimitTracker[family].lastLimitTime = Date.now();
2069
+ rateLimitTracker[family].consecutiveErrors++;
2070
+ }
2071
+ function clearRateLimitTracker(model) {
2072
+ const family = getModelFamily(model);
2073
+ if (rateLimitTracker[family]) rateLimitTracker[family].consecutiveErrors = 0;
2074
+ }
2075
+ function getBackoffDelay(model, baseDelay) {
2076
+ const family = getModelFamily(model);
2077
+ const info = rateLimitTracker[family];
2078
+ if (!info) return baseDelay;
2079
+ const multiplier = Math.min(Math.pow(2, info.consecutiveErrors - 1), 60);
2080
+ return Math.min(baseDelay * multiplier, 3e4);
2081
+ }
2043
2082
  /**
2044
2083
  * Extract text from system content (can be string or array)
2045
2084
  */
@@ -2161,7 +2200,7 @@ function convertTools(tools) {
2161
2200
  * Build Antigravity request body
2162
2201
  * The Antigravity API expects a specific nested structure with request object
2163
2202
  */
2164
- function buildGeminiRequest(request) {
2203
+ function buildGeminiRequest(request, projectId) {
2165
2204
  const { contents, systemInstruction } = convertMessages(request.messages, request.system);
2166
2205
  const tools = convertTools(request.tools);
2167
2206
  const innerRequest = {
@@ -2179,12 +2218,14 @@ function buildGeminiRequest(request) {
2179
2218
  ...innerRequest.generationConfig,
2180
2219
  thinkingConfig: { includeThoughts: true }
2181
2220
  };
2182
- return {
2221
+ const result = {
2183
2222
  model: request.model,
2184
2223
  userAgent: "antigravity",
2185
2224
  requestId: `agent-${crypto.randomUUID()}`,
2186
2225
  request: innerRequest
2187
2226
  };
2227
+ if (projectId) result.project = projectId;
2228
+ return result;
2188
2229
  }
2189
2230
  /**
2190
2231
  * Create error response
@@ -2204,19 +2245,29 @@ function createErrorResponse(type, message, status) {
2204
2245
  /**
2205
2246
  * Create Anthropic-compatible message response using Antigravity
2206
2247
  * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
2248
+ *
2249
+ * Features:
2250
+ * - Endpoint fallback (daily → prod)
2251
+ * - Per-model-family rate limit tracking
2252
+ * - Exponential backoff for consecutive errors
2253
+ * - Smart retry for short delays (≤5s on same endpoint)
2207
2254
  */
2208
2255
  const MAX_RETRIES$3 = 5;
2256
+ const MAX_ENDPOINT_RETRIES = 2;
2209
2257
  async function executeAntigravityRequest(request) {
2210
- const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
2211
- const body = buildGeminiRequest(request);
2258
+ const projectId = await getCurrentProjectId();
2259
+ const body = buildGeminiRequest(request, projectId);
2260
+ let endpointRetries = 0;
2212
2261
  for (let attempt = 0; attempt <= MAX_RETRIES$3; attempt++) {
2262
+ const host = getCurrentHost();
2263
+ const endpoint = request.stream ? getStreamUrl(host) : getNoStreamUrl(host);
2213
2264
  const accessToken = await getValidAccessToken();
2214
2265
  if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.", 401);
2215
2266
  try {
2216
2267
  const response = await fetch(endpoint, {
2217
2268
  method: "POST",
2218
2269
  headers: {
2219
- Host: ANTIGRAVITY_API_HOST,
2270
+ Host: host,
2220
2271
  "User-Agent": ANTIGRAVITY_USER_AGENT,
2221
2272
  Authorization: `Bearer ${accessToken}`,
2222
2273
  "Content-Type": "application/json",
@@ -2224,17 +2275,33 @@ async function executeAntigravityRequest(request) {
2224
2275
  },
2225
2276
  body: JSON.stringify(body)
2226
2277
  });
2227
- if (response.ok) return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
2228
- const errorResult = await handleApiError(response);
2278
+ if (response.ok) {
2279
+ clearRateLimitTracker(request.model);
2280
+ return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
2281
+ }
2282
+ const errorResult = await handleApiError(response, request.model);
2229
2283
  if (errorResult.shouldRetry && attempt < MAX_RETRIES$3) {
2230
- consola.info(`Rate limited, retrying in ${errorResult.retryDelayMs}ms`);
2231
- await sleep(errorResult.retryDelayMs);
2284
+ trackRateLimit(request.model);
2285
+ const backoffDelay = getBackoffDelay(request.model, errorResult.retryDelayMs);
2286
+ if (backoffDelay <= 5e3 || endpointRetries >= MAX_ENDPOINT_RETRIES) {
2287
+ consola.info(`Rate limited, retrying in ${backoffDelay}ms (attempt ${attempt + 1}/${MAX_RETRIES$3})`);
2288
+ await sleep(backoffDelay);
2289
+ } else {
2290
+ rotateEndpoint();
2291
+ endpointRetries++;
2292
+ consola.info(`Switching endpoint, retrying in ${errorResult.retryDelayMs}ms`);
2293
+ await sleep(errorResult.retryDelayMs);
2294
+ }
2232
2295
  continue;
2233
2296
  }
2234
2297
  return errorResult.response;
2235
2298
  } catch (error) {
2236
2299
  consola.error("Antigravity request error:", error);
2237
2300
  if (attempt < MAX_RETRIES$3) {
2301
+ if (endpointRetries < MAX_ENDPOINT_RETRIES) {
2302
+ rotateEndpoint();
2303
+ endpointRetries++;
2304
+ }
2238
2305
  await sleep(500);
2239
2306
  continue;
2240
2307
  }
@@ -2248,10 +2315,15 @@ async function createAntigravityMessages(request) {
2248
2315
  }
2249
2316
  /**
2250
2317
  * Parse retry delay from error response
2318
+ * Supports multiple formats:
2319
+ * - RetryInfo.retryDelay: "3.5s"
2320
+ * - quotaResetDelay: "3000ms" or "3s"
2321
+ * - message: "Your quota will reset after 3s"
2251
2322
  */
2252
2323
  function parseRetryDelay$3(errorText) {
2253
2324
  try {
2254
- const details = JSON.parse(errorText).error?.details ?? [];
2325
+ const errorData = JSON.parse(errorText);
2326
+ const details = errorData.error?.details ?? [];
2255
2327
  for (const detail of details) {
2256
2328
  if (detail["@type"]?.includes("RetryInfo") && detail.retryDelay) {
2257
2329
  const match = /(\d+(?:\.\d+)?)s/.exec(detail.retryDelay);
@@ -2265,13 +2337,16 @@ function parseRetryDelay$3(errorText) {
2265
2337
  }
2266
2338
  }
2267
2339
  }
2340
+ const message = errorData.error?.message ?? "";
2341
+ const resetMatch = /quota will reset after (\d+(?:\.\d+)?)s/i.exec(message);
2342
+ if (resetMatch) return Math.ceil(Number.parseFloat(resetMatch[1]) * 1e3);
2268
2343
  } catch {}
2269
2344
  return 500;
2270
2345
  }
2271
2346
  /**
2272
2347
  * Handle API error response
2273
2348
  */
2274
- async function handleApiError(response) {
2349
+ async function handleApiError(response, _model) {
2275
2350
  const errorText = await response.text();
2276
2351
  consola.error(`Antigravity error: ${response.status} ${errorText}`);
2277
2352
  if (response.status === 403) await disableCurrentAccount();
@@ -2492,32 +2567,6 @@ const awaitApproval = async () => {
2492
2567
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
2493
2568
  };
2494
2569
 
2495
- //#endregion
2496
- //#region src/lib/rate-limit.ts
2497
- async function checkRateLimit(state$1) {
2498
- if (state$1.rateLimitSeconds === void 0) return;
2499
- const now = Date.now();
2500
- if (!state$1.lastRequestTimestamp) {
2501
- state$1.lastRequestTimestamp = now;
2502
- return;
2503
- }
2504
- const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2505
- if (elapsedSeconds > state$1.rateLimitSeconds) {
2506
- state$1.lastRequestTimestamp = now;
2507
- return;
2508
- }
2509
- const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2510
- if (!state$1.rateLimitWait) {
2511
- consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2512
- throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2513
- }
2514
- const waitTimeMs = waitTimeSeconds * 1e3;
2515
- consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2516
- await sleep(waitTimeMs);
2517
- state$1.lastRequestTimestamp = now;
2518
- consola.info("Rate limit wait completed, proceeding with request");
2519
- }
2520
-
2521
2570
  //#endregion
2522
2571
  //#region src/lib/tokenizer.ts
2523
2572
  const ENCODING_MAP = {
@@ -2715,6 +2764,163 @@ const getTokenCount = async (payload, model) => {
2715
2764
  };
2716
2765
  };
2717
2766
 
2767
+ //#endregion
2768
+ //#region src/lib/context-compression.ts
2769
+ /**
2770
+ * Get the maximum prompt token limit for a model.
2771
+ * Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
2772
+ */
2773
+ const getMaxPromptTokens = (model) => {
2774
+ const limits = model.capabilities.limits;
2775
+ if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
2776
+ if (limits.max_context_window_tokens) {
2777
+ const outputReserve = limits.max_output_tokens ?? 4096;
2778
+ return limits.max_context_window_tokens - outputReserve;
2779
+ }
2780
+ };
2781
+ /**
2782
+ * Check if a message is a tool-related message (tool call or tool result).
2783
+ * Tool messages must be kept together with their paired assistant message.
2784
+ */
2785
+ const isToolMessage = (message) => {
2786
+ return message.role === "tool";
2787
+ };
2788
+ /**
2789
+ * Check if an assistant message contains tool calls.
2790
+ */
2791
+ const hasToolCalls = (message) => {
2792
+ return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
2793
+ };
2794
+ const groupMessages = (messages) => {
2795
+ const groups = [];
2796
+ let i = 0;
2797
+ while (i < messages.length) {
2798
+ const message = messages[i];
2799
+ if (message.role === "system" || message.role === "developer") {
2800
+ groups.push({
2801
+ messages: [message],
2802
+ isSystem: true,
2803
+ isRecent: false
2804
+ });
2805
+ i++;
2806
+ continue;
2807
+ }
2808
+ if (hasToolCalls(message)) {
2809
+ const group = [message];
2810
+ let j = i + 1;
2811
+ while (j < messages.length && isToolMessage(messages[j])) {
2812
+ group.push(messages[j]);
2813
+ j++;
2814
+ }
2815
+ groups.push({
2816
+ messages: group,
2817
+ isSystem: false,
2818
+ isRecent: false
2819
+ });
2820
+ i = j;
2821
+ continue;
2822
+ }
2823
+ groups.push({
2824
+ messages: [message],
2825
+ isSystem: false,
2826
+ isRecent: false
2827
+ });
2828
+ i++;
2829
+ }
2830
+ return groups;
2831
+ };
2832
+ /**
2833
+ * Create a truncation notice message to inform the model that earlier context was removed.
2834
+ */
2835
+ const createTruncationNotice = () => ({
2836
+ role: "user",
2837
+ content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
2838
+ });
2839
+ /**
2840
+ * Intelligently truncate messages to fit within the model's token limit.
2841
+ *
2842
+ * Strategy:
2843
+ * 1. Always preserve system/developer messages (they contain critical instructions)
2844
+ * 2. Always preserve the most recent messages (they contain the current task context)
2845
+ * 3. Remove middle conversation messages, oldest first
2846
+ * 4. Insert a truncation notice where messages were removed
2847
+ * 5. Keep tool call/result pairs together (never split them)
2848
+ *
2849
+ * Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
2850
+ */
2851
+ const truncateMessages = async (payload, model) => {
2852
+ const maxPromptTokens = getMaxPromptTokens(model);
2853
+ if (!maxPromptTokens) {
2854
+ consola.debug("No token limit found for model, skipping truncation");
2855
+ return payload;
2856
+ }
2857
+ const tokenCount = await getTokenCount(payload, model);
2858
+ const safeLimit = Math.floor(maxPromptTokens * .95);
2859
+ if (tokenCount.input <= safeLimit) return payload;
2860
+ consola.warn(`Prompt tokens (${tokenCount.input}) exceed safe limit (${safeLimit}/${maxPromptTokens}). Auto-truncating context...`);
2861
+ const groups = groupMessages(payload.messages);
2862
+ const systemGroups = groups.filter((g) => g.isSystem);
2863
+ const conversationGroups = groups.filter((g) => !g.isSystem);
2864
+ if (conversationGroups.length === 0) {
2865
+ consola.warn("No conversation messages to truncate, only system messages");
2866
+ return payload;
2867
+ }
2868
+ let truncatedPayload = payload;
2869
+ let dropCount = 0;
2870
+ const maxDrop = Math.max(0, conversationGroups.length - 1);
2871
+ while (dropCount <= maxDrop) {
2872
+ const keptConversationGroups = conversationGroups.slice(dropCount);
2873
+ const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
2874
+ const newMessages = [
2875
+ ...systemGroups.flatMap((g) => g.messages),
2876
+ ...truncationNotice,
2877
+ ...keptConversationGroups.flatMap((g) => g.messages)
2878
+ ];
2879
+ truncatedPayload = {
2880
+ ...payload,
2881
+ messages: newMessages
2882
+ };
2883
+ const newTokenCount = await getTokenCount(truncatedPayload, model);
2884
+ if (newTokenCount.input <= safeLimit) {
2885
+ if (dropCount > 0) {
2886
+ const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
2887
+ consola.info(`Truncated ${droppedMessages} messages (${dropCount} conversation groups). Tokens: ${tokenCount.input} -> ${newTokenCount.input} (limit: ${maxPromptTokens})`);
2888
+ }
2889
+ return truncatedPayload;
2890
+ }
2891
+ dropCount++;
2892
+ }
2893
+ const finalTokenCount = await getTokenCount(truncatedPayload, model);
2894
+ consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
2895
+ return truncatedPayload;
2896
+ };
2897
+
2898
+ //#endregion
2899
+ //#region src/lib/rate-limit.ts
2900
+ async function checkRateLimit(state$1) {
2901
+ if (state$1.rateLimitSeconds === void 0) return;
2902
+ const now = Date.now();
2903
+ if (!state$1.lastRequestTimestamp) {
2904
+ state$1.lastRequestTimestamp = now;
2905
+ return;
2906
+ }
2907
+ const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2908
+ if (elapsedSeconds > state$1.rateLimitSeconds) {
2909
+ state$1.lastRequestTimestamp = now;
2910
+ return;
2911
+ }
2912
+ const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2913
+ if (!state$1.rateLimitWait) {
2914
+ consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2915
+ throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2916
+ }
2917
+ const waitTimeMs = waitTimeSeconds * 1e3;
2918
+ consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2919
+ await sleep(waitTimeMs);
2920
+ state$1.lastRequestTimestamp = now;
2921
+ consola.info("Rate limit wait completed, proceeding with request");
2922
+ }
2923
+
2718
2924
  //#endregion
2719
2925
  //#region src/services/copilot/create-chat-completions.ts
2720
2926
  const createChatCompletions = async (payload) => {
@@ -2749,27 +2955,39 @@ const createChatCompletions = async (payload) => {
2749
2955
 
2750
2956
  //#endregion
2751
2957
  //#region src/routes/chat-completions/handler.ts
2752
- async function handleCompletion$1(c) {
2753
- await checkRateLimit(state);
2754
- let payload = await c.req.json();
2755
- consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
2958
+ /**
2959
+ * Calculate token count, log it, and auto-truncate if needed.
2960
+ */
2961
+ async function processPayloadTokens(payload) {
2756
2962
  const selectedModel = state.models?.data.find((model) => model.id === payload.model);
2963
+ if (!selectedModel) {
2964
+ consola.warn("No model selected, skipping token count calculation");
2965
+ return payload;
2966
+ }
2757
2967
  try {
2758
- if (selectedModel) {
2759
- const tokenCount = await getTokenCount(payload, selectedModel);
2760
- consola.info("Current token count:", tokenCount);
2761
- } else consola.warn("No model selected, skipping token count calculation");
2968
+ const tokenCount = await getTokenCount(payload, selectedModel);
2969
+ consola.info("Current token count:", tokenCount);
2970
+ const truncated = await truncateMessages(payload, selectedModel);
2971
+ if (isNullish(truncated.max_tokens)) {
2972
+ const withMaxTokens = {
2973
+ ...truncated,
2974
+ max_tokens: selectedModel.capabilities.limits.max_output_tokens
2975
+ };
2976
+ consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
2977
+ return withMaxTokens;
2978
+ }
2979
+ return truncated;
2762
2980
  } catch (error) {
2763
2981
  consola.warn("Failed to calculate token count:", error);
2982
+ return payload;
2764
2983
  }
2984
+ }
2985
+ async function handleCompletion$1(c) {
2986
+ await checkRateLimit(state);
2987
+ const rawPayload = await c.req.json();
2988
+ consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
2989
+ const payload = await processPayloadTokens(rawPayload);
2765
2990
  if (state.manualApprove) await awaitApproval();
2766
- if (isNullish(payload.max_tokens)) {
2767
- payload = {
2768
- ...payload,
2769
- max_tokens: selectedModel?.capabilities.limits.max_output_tokens
2770
- };
2771
- consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
2772
- }
2773
2991
  const response = await createChatCompletions(payload);
2774
2992
  if (isNonStreaming$1(response)) {
2775
2993
  consola.debug("Non-streaming response:", JSON.stringify(response));
@@ -3200,10 +3418,27 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
3200
3418
 
3201
3419
  //#endregion
3202
3420
  //#region src/routes/messages/handler.ts
3421
+ /**
3422
+ * Auto-truncate OpenAI payload if prompt tokens exceed model limit.
3423
+ */
3424
+ async function autoTruncatePayload(payload) {
3425
+ const selectedModel = state.models?.data.find((model) => model.id === payload.model);
3426
+ if (!selectedModel) {
3427
+ consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
3428
+ return payload;
3429
+ }
3430
+ try {
3431
+ return await truncateMessages(payload, selectedModel);
3432
+ } catch (error) {
3433
+ consola.warn("Failed to auto-truncate context:", error);
3434
+ return payload;
3435
+ }
3436
+ }
3203
3437
  async function handleCompletion(c) {
3204
3438
  await checkRateLimit(state);
3205
3439
  const anthropicPayload = await c.req.json();
3206
- const openAIPayload = translateToOpenAI(anthropicPayload);
3440
+ const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
3441
+ const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
3207
3442
  if (state.manualApprove) await awaitApproval();
3208
3443
  const response = await createChatCompletions(openAIPayload);
3209
3444
  if (isNonStreaming(response)) {
@@ -3760,7 +3995,7 @@ async function runServer(options$1) {
3760
3995
  } else if (options$1.antigravity) {
3761
3996
  consola.info("Google Antigravity mode enabled");
3762
3997
  state.antigravityMode = true;
3763
- const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-B2lTFLSD.js");
3998
+ const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-BgquW2Yd.js");
3764
3999
  if (options$1.antigravityClientId && options$1.antigravityClientSecret) {
3765
4000
  setOAuthCredentials(options$1.antigravityClientId, options$1.antigravityClientSecret);
3766
4001
  consola.info("Using provided OAuth credentials from CLI");
@@ -3789,7 +4024,7 @@ async function runServer(options$1) {
3789
4024
  }
3790
4025
  if (!await getCurrentAccount() && !hasApiKey()) throw new Error("No enabled Antigravity accounts available");
3791
4026
  }
3792
- const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-CmDpYUV-.js");
4027
+ const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-D1vQG5Eb.js");
3793
4028
  const models = await getAntigravityModels$1();
3794
4029
  state.antigravityModels = models;
3795
4030
  consola.info(`Available Antigravity models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`);