copilot-api-plus 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -4,9 +4,9 @@ import { state } from "./state-CcLGr8VN.js";
4
4
  import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
5
5
  import { HTTPError, forwardError } from "./error-CvU5otz-.js";
6
6
  import { cacheModels, cacheVSCodeVersion, clearGithubToken, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-ClgudjZm.js";
7
- import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getValidAccessToken, rotateAccount } from "./auth-CM_ilreU.js";
7
+ import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-Dz36Lk4o.js";
8
8
  import { clearZenAuth, getZenAuthPath } from "./auth-T55-Bhoo.js";
9
- import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-DMdiCNoU.js";
9
+ import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-VsThqHZf.js";
10
10
  import { createRequire } from "node:module";
11
11
  import { defineCommand, runMain } from "citty";
12
12
  import consola from "consola";
@@ -540,7 +540,7 @@ function initProxyFromEnv() {
540
540
  * Add a new Antigravity account via OAuth
541
541
  */
542
542
  async function addAccount() {
543
- const { setupAntigravity, loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
543
+ const { setupAntigravity, loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
544
544
  const existingAuth = await loadAntigravityAuth();
545
545
  if (existingAuth && existingAuth.accounts.length > 0) {
546
546
  const enabledCount = existingAuth.accounts.filter((a) => a.enable).length;
@@ -552,7 +552,7 @@ async function addAccount() {
552
552
  * List all Antigravity accounts
553
553
  */
554
554
  async function listAccounts() {
555
- const { loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
555
+ const { loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
556
556
  const auth$1 = await loadAntigravityAuth();
557
557
  if (!auth$1 || auth$1.accounts.length === 0) {
558
558
  consola.info("No Antigravity accounts configured");
@@ -573,7 +573,7 @@ async function listAccounts() {
573
573
  * Remove an Antigravity account by index
574
574
  */
575
575
  async function removeAccount(index) {
576
- const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-B2lTFLSD.js");
576
+ const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-BgquW2Yd.js");
577
577
  const auth$1 = await loadAntigravityAuth();
578
578
  if (!auth$1 || auth$1.accounts.length === 0) {
579
579
  consola.error("No Antigravity accounts configured");
@@ -592,7 +592,7 @@ async function removeAccount(index) {
592
592
  * Clear all Antigravity accounts
593
593
  */
594
594
  async function clearAccounts() {
595
- const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-B2lTFLSD.js");
595
+ const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-BgquW2Yd.js");
596
596
  if (await consola.prompt("Are you sure you want to remove all Antigravity accounts?", {
597
597
  type: "confirm",
598
598
  initial: false
@@ -1403,9 +1403,9 @@ function processChunk(chunk, state$1) {
1403
1403
 
1404
1404
  //#endregion
1405
1405
  //#region src/services/antigravity/create-chat-completions.ts
1406
- const ANTIGRAVITY_API_HOST$1 = "daily-cloudcode-pa.sandbox.googleapis.com";
1407
- const ANTIGRAVITY_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:streamGenerateContent?alt=sse`;
1408
- const ANTIGRAVITY_NO_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:generateContent`;
1406
+ const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
1407
+ const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
1408
+ const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
1409
1409
  const ANTIGRAVITY_USER_AGENT$1 = "antigravity/1.11.3 windows/amd64";
1410
1410
  const GEMINI_API_HOST = "generativelanguage.googleapis.com";
1411
1411
  const getGeminiStreamUrl = (model, apiKey) => `https://${GEMINI_API_HOST}/v1beta/models/${model}:streamGenerateContent?alt=sse&key=${apiKey}`;
@@ -1625,14 +1625,14 @@ async function createWithApiKey(request, apiKey) {
1625
1625
  * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
1626
1626
  */
1627
1627
  async function createWithOAuth(request, accessToken) {
1628
- const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL$1 : ANTIGRAVITY_NO_STREAM_URL$1;
1628
+ const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
1629
1629
  const body = buildAntigravityRequestBody(request);
1630
1630
  consola.debug(`Antigravity request to ${endpoint} with model ${request.model}`);
1631
1631
  try {
1632
1632
  const response = await fetch(endpoint, {
1633
1633
  method: "POST",
1634
1634
  headers: {
1635
- Host: ANTIGRAVITY_API_HOST$1,
1635
+ Host: ANTIGRAVITY_API_HOST,
1636
1636
  "User-Agent": ANTIGRAVITY_USER_AGENT$1,
1637
1637
  Authorization: `Bearer ${accessToken}`,
1638
1638
  "Content-Type": "application/json",
@@ -1831,6 +1831,49 @@ app$1.post("/", async (c) => {
1831
1831
  });
1832
1832
  const antigravityChatCompletionsRoute = app$1;
1833
1833
 
1834
+ //#endregion
1835
+ //#region src/lib/request-queue.ts
1836
+ var RequestQueue = class {
1837
+ queue = [];
1838
+ activeCount = 0;
1839
+ maxConcurrent;
1840
+ minDelayMs;
1841
+ lastRequestTime = 0;
1842
+ constructor(maxConcurrent = 2, minDelayMs = 300) {
1843
+ this.maxConcurrent = maxConcurrent;
1844
+ this.minDelayMs = minDelayMs;
1845
+ }
1846
+ async enqueue(execute) {
1847
+ return new Promise((resolve, reject) => {
1848
+ this.queue.push({
1849
+ execute,
1850
+ resolve,
1851
+ reject
1852
+ });
1853
+ this.processQueue();
1854
+ });
1855
+ }
1856
+ async processQueue() {
1857
+ if (this.activeCount >= this.maxConcurrent || this.queue.length === 0) return;
1858
+ const request = this.queue.shift();
1859
+ if (!request) return;
1860
+ this.activeCount++;
1861
+ const elapsed = Date.now() - this.lastRequestTime;
1862
+ if (elapsed < this.minDelayMs) await new Promise((r) => setTimeout(r, this.minDelayMs - elapsed));
1863
+ this.lastRequestTime = Date.now();
1864
+ try {
1865
+ const result = await request.execute();
1866
+ request.resolve(result);
1867
+ } catch (error) {
1868
+ request.reject(error);
1869
+ } finally {
1870
+ this.activeCount--;
1871
+ this.processQueue();
1872
+ }
1873
+ }
1874
+ };
1875
+ const antigravityQueue = new RequestQueue(2, 500);
1876
+
1834
1877
  //#endregion
1835
1878
  //#region src/services/antigravity/anthropic-events.ts
1836
1879
  /**
@@ -1993,10 +2036,49 @@ function generateToolId() {
1993
2036
 
1994
2037
  //#endregion
1995
2038
  //#region src/services/antigravity/create-messages.ts
1996
- const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
1997
- const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
1998
- const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
2039
+ const ANTIGRAVITY_ENDPOINTS = ["daily-cloudcode-pa.sandbox.googleapis.com", "cloudcode-pa.googleapis.com"];
2040
+ let currentEndpointIndex = 0;
2041
+ function getStreamUrl(host) {
2042
+ return `https://${host}/v1internal:streamGenerateContent?alt=sse`;
2043
+ }
2044
+ function getNoStreamUrl(host) {
2045
+ return `https://${host}/v1internal:generateContent`;
2046
+ }
2047
+ function getCurrentHost() {
2048
+ return ANTIGRAVITY_ENDPOINTS[currentEndpointIndex];
2049
+ }
2050
+ function rotateEndpoint() {
2051
+ const oldIndex = currentEndpointIndex;
2052
+ currentEndpointIndex = (currentEndpointIndex + 1) % ANTIGRAVITY_ENDPOINTS.length;
2053
+ consola.info(`Rotating endpoint: ${ANTIGRAVITY_ENDPOINTS[oldIndex]} → ${ANTIGRAVITY_ENDPOINTS[currentEndpointIndex]}`);
2054
+ }
1999
2055
  const ANTIGRAVITY_USER_AGENT = "antigravity/1.11.3 windows/amd64";
2056
+ const rateLimitTracker = {};
2057
+ function getModelFamily(model) {
2058
+ if (model.includes("claude")) return "claude";
2059
+ if (model.includes("gemini")) return "gemini";
2060
+ return "other";
2061
+ }
2062
+ function trackRateLimit(model) {
2063
+ const family = getModelFamily(model);
2064
+ if (!rateLimitTracker[family]) rateLimitTracker[family] = {
2065
+ lastLimitTime: 0,
2066
+ consecutiveErrors: 0
2067
+ };
2068
+ rateLimitTracker[family].lastLimitTime = Date.now();
2069
+ rateLimitTracker[family].consecutiveErrors++;
2070
+ }
2071
+ function clearRateLimitTracker(model) {
2072
+ const family = getModelFamily(model);
2073
+ if (rateLimitTracker[family]) rateLimitTracker[family].consecutiveErrors = 0;
2074
+ }
2075
+ function getBackoffDelay(model, baseDelay) {
2076
+ const family = getModelFamily(model);
2077
+ const info = rateLimitTracker[family];
2078
+ if (!info) return baseDelay;
2079
+ const multiplier = Math.min(Math.pow(2, info.consecutiveErrors - 1), 60);
2080
+ return Math.min(baseDelay * multiplier, 3e4);
2081
+ }
2000
2082
  /**
2001
2083
  * Extract text from system content (can be string or array)
2002
2084
  */
@@ -2118,7 +2200,7 @@ function convertTools(tools) {
2118
2200
  * Build Antigravity request body
2119
2201
  * The Antigravity API expects a specific nested structure with request object
2120
2202
  */
2121
- function buildGeminiRequest(request) {
2203
+ function buildGeminiRequest(request, projectId) {
2122
2204
  const { contents, systemInstruction } = convertMessages(request.messages, request.system);
2123
2205
  const tools = convertTools(request.tools);
2124
2206
  const innerRequest = {
@@ -2136,12 +2218,14 @@ function buildGeminiRequest(request) {
2136
2218
  ...innerRequest.generationConfig,
2137
2219
  thinkingConfig: { includeThoughts: true }
2138
2220
  };
2139
- return {
2221
+ const result = {
2140
2222
  model: request.model,
2141
2223
  userAgent: "antigravity",
2142
2224
  requestId: `agent-${crypto.randomUUID()}`,
2143
2225
  request: innerRequest
2144
2226
  };
2227
+ if (projectId) result.project = projectId;
2228
+ return result;
2145
2229
  }
2146
2230
  /**
2147
2231
  * Create error response
@@ -2161,20 +2245,29 @@ function createErrorResponse(type, message, status) {
2161
2245
  /**
2162
2246
  * Create Anthropic-compatible message response using Antigravity
2163
2247
  * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
2248
+ *
2249
+ * Features:
2250
+ * - Endpoint fallback (daily → prod)
2251
+ * - Per-model-family rate limit tracking
2252
+ * - Exponential backoff for consecutive errors
2253
+ * - Smart retry for short delays (≤5s on same endpoint)
2164
2254
  */
2165
2255
  const MAX_RETRIES$3 = 5;
2166
- async function createAntigravityMessages(request) {
2167
- const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
2168
- const body = buildGeminiRequest(request);
2256
+ const MAX_ENDPOINT_RETRIES = 2;
2257
+ async function executeAntigravityRequest(request) {
2258
+ const projectId = await getCurrentProjectId();
2259
+ const body = buildGeminiRequest(request, projectId);
2260
+ let endpointRetries = 0;
2169
2261
  for (let attempt = 0; attempt <= MAX_RETRIES$3; attempt++) {
2262
+ const host = getCurrentHost();
2263
+ const endpoint = request.stream ? getStreamUrl(host) : getNoStreamUrl(host);
2170
2264
  const accessToken = await getValidAccessToken();
2171
- if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available. Please run login first.", 401);
2172
- consola.debug(`Antigravity request to ${endpoint} (attempt ${attempt + 1}/${MAX_RETRIES$3 + 1})`);
2265
+ if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.", 401);
2173
2266
  try {
2174
2267
  const response = await fetch(endpoint, {
2175
2268
  method: "POST",
2176
2269
  headers: {
2177
- Host: ANTIGRAVITY_API_HOST,
2270
+ Host: host,
2178
2271
  "User-Agent": ANTIGRAVITY_USER_AGENT,
2179
2272
  Authorization: `Bearer ${accessToken}`,
2180
2273
  "Content-Type": "application/json",
@@ -2182,17 +2275,33 @@ async function createAntigravityMessages(request) {
2182
2275
  },
2183
2276
  body: JSON.stringify(body)
2184
2277
  });
2185
- if (response.ok) return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
2186
- const errorResult = await handleApiError(response);
2278
+ if (response.ok) {
2279
+ clearRateLimitTracker(request.model);
2280
+ return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
2281
+ }
2282
+ const errorResult = await handleApiError(response, request.model);
2187
2283
  if (errorResult.shouldRetry && attempt < MAX_RETRIES$3) {
2188
- consola.info(`Rate limited, retrying in ${errorResult.retryDelayMs}ms...`);
2189
- await sleep(errorResult.retryDelayMs);
2284
+ trackRateLimit(request.model);
2285
+ const backoffDelay = getBackoffDelay(request.model, errorResult.retryDelayMs);
2286
+ if (backoffDelay <= 5e3 || endpointRetries >= MAX_ENDPOINT_RETRIES) {
2287
+ consola.info(`Rate limited, retrying in ${backoffDelay}ms (attempt ${attempt + 1}/${MAX_RETRIES$3})`);
2288
+ await sleep(backoffDelay);
2289
+ } else {
2290
+ rotateEndpoint();
2291
+ endpointRetries++;
2292
+ consola.info(`Switching endpoint, retrying in ${errorResult.retryDelayMs}ms`);
2293
+ await sleep(errorResult.retryDelayMs);
2294
+ }
2190
2295
  continue;
2191
2296
  }
2192
2297
  return errorResult.response;
2193
2298
  } catch (error) {
2194
- consola.error("Antigravity messages request error:", error);
2299
+ consola.error("Antigravity request error:", error);
2195
2300
  if (attempt < MAX_RETRIES$3) {
2301
+ if (endpointRetries < MAX_ENDPOINT_RETRIES) {
2302
+ rotateEndpoint();
2303
+ endpointRetries++;
2304
+ }
2196
2305
  await sleep(500);
2197
2306
  continue;
2198
2307
  }
@@ -2201,12 +2310,20 @@ async function createAntigravityMessages(request) {
2201
2310
  }
2202
2311
  return createErrorResponse("api_error", "Max retries exceeded", 429);
2203
2312
  }
2313
+ async function createAntigravityMessages(request) {
2314
+ return antigravityQueue.enqueue(() => executeAntigravityRequest(request));
2315
+ }
2204
2316
  /**
2205
2317
  * Parse retry delay from error response
2318
+ * Supports multiple formats:
2319
+ * - RetryInfo.retryDelay: "3.5s"
2320
+ * - quotaResetDelay: "3000ms" or "3s"
2321
+ * - message: "Your quota will reset after 3s"
2206
2322
  */
2207
2323
  function parseRetryDelay$3(errorText) {
2208
2324
  try {
2209
- const details = JSON.parse(errorText).error?.details ?? [];
2325
+ const errorData = JSON.parse(errorText);
2326
+ const details = errorData.error?.details ?? [];
2210
2327
  for (const detail of details) {
2211
2328
  if (detail["@type"]?.includes("RetryInfo") && detail.retryDelay) {
2212
2329
  const match = /(\d+(?:\.\d+)?)s/.exec(detail.retryDelay);
@@ -2220,13 +2337,16 @@ function parseRetryDelay$3(errorText) {
2220
2337
  }
2221
2338
  }
2222
2339
  }
2340
+ const message = errorData.error?.message ?? "";
2341
+ const resetMatch = /quota will reset after (\d+(?:\.\d+)?)s/i.exec(message);
2342
+ if (resetMatch) return Math.ceil(Number.parseFloat(resetMatch[1]) * 1e3);
2223
2343
  } catch {}
2224
2344
  return 500;
2225
2345
  }
2226
2346
  /**
2227
2347
  * Handle API error response
2228
2348
  */
2229
- async function handleApiError(response) {
2349
+ async function handleApiError(response, _model) {
2230
2350
  const errorText = await response.text();
2231
2351
  consola.error(`Antigravity error: ${response.status} ${errorText}`);
2232
2352
  if (response.status === 403) await disableCurrentAccount();
@@ -2447,32 +2567,6 @@ const awaitApproval = async () => {
2447
2567
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
2448
2568
  };
2449
2569
 
2450
- //#endregion
2451
- //#region src/lib/rate-limit.ts
2452
- async function checkRateLimit(state$1) {
2453
- if (state$1.rateLimitSeconds === void 0) return;
2454
- const now = Date.now();
2455
- if (!state$1.lastRequestTimestamp) {
2456
- state$1.lastRequestTimestamp = now;
2457
- return;
2458
- }
2459
- const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2460
- if (elapsedSeconds > state$1.rateLimitSeconds) {
2461
- state$1.lastRequestTimestamp = now;
2462
- return;
2463
- }
2464
- const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2465
- if (!state$1.rateLimitWait) {
2466
- consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2467
- throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2468
- }
2469
- const waitTimeMs = waitTimeSeconds * 1e3;
2470
- consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2471
- await sleep(waitTimeMs);
2472
- state$1.lastRequestTimestamp = now;
2473
- consola.info("Rate limit wait completed, proceeding with request");
2474
- }
2475
-
2476
2570
  //#endregion
2477
2571
  //#region src/lib/tokenizer.ts
2478
2572
  const ENCODING_MAP = {
@@ -2670,6 +2764,163 @@ const getTokenCount = async (payload, model) => {
2670
2764
  };
2671
2765
  };
2672
2766
 
2767
+ //#endregion
2768
+ //#region src/lib/context-compression.ts
2769
+ /**
2770
+ * Get the maximum prompt token limit for a model.
2771
+ * Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
2772
+ */
2773
+ const getMaxPromptTokens = (model) => {
2774
+ const limits = model.capabilities.limits;
2775
+ if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
2776
+ if (limits.max_context_window_tokens) {
2777
+ const outputReserve = limits.max_output_tokens ?? 4096;
2778
+ return limits.max_context_window_tokens - outputReserve;
2779
+ }
2780
+ };
2781
+ /**
2782
+ * Check if a message is a tool-related message (tool call or tool result).
2783
+ * Tool messages must be kept together with their paired assistant message.
2784
+ */
2785
+ const isToolMessage = (message) => {
2786
+ return message.role === "tool";
2787
+ };
2788
+ /**
2789
+ * Check if an assistant message contains tool calls.
2790
+ */
2791
+ const hasToolCalls = (message) => {
2792
+ return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
2793
+ };
2794
+ const groupMessages = (messages) => {
2795
+ const groups = [];
2796
+ let i = 0;
2797
+ while (i < messages.length) {
2798
+ const message = messages[i];
2799
+ if (message.role === "system" || message.role === "developer") {
2800
+ groups.push({
2801
+ messages: [message],
2802
+ isSystem: true,
2803
+ isRecent: false
2804
+ });
2805
+ i++;
2806
+ continue;
2807
+ }
2808
+ if (hasToolCalls(message)) {
2809
+ const group = [message];
2810
+ let j = i + 1;
2811
+ while (j < messages.length && isToolMessage(messages[j])) {
2812
+ group.push(messages[j]);
2813
+ j++;
2814
+ }
2815
+ groups.push({
2816
+ messages: group,
2817
+ isSystem: false,
2818
+ isRecent: false
2819
+ });
2820
+ i = j;
2821
+ continue;
2822
+ }
2823
+ groups.push({
2824
+ messages: [message],
2825
+ isSystem: false,
2826
+ isRecent: false
2827
+ });
2828
+ i++;
2829
+ }
2830
+ return groups;
2831
+ };
2832
+ /**
2833
+ * Create a truncation notice message to inform the model that earlier context was removed.
2834
+ */
2835
+ const createTruncationNotice = () => ({
2836
+ role: "user",
2837
+ content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
2838
+ });
2839
+ /**
2840
+ * Intelligently truncate messages to fit within the model's token limit.
2841
+ *
2842
+ * Strategy:
2843
+ * 1. Always preserve system/developer messages (they contain critical instructions)
2844
+ * 2. Always preserve the most recent messages (they contain the current task context)
2845
+ * 3. Remove middle conversation messages, oldest first
2846
+ * 4. Insert a truncation notice where messages were removed
2847
+ * 5. Keep tool call/result pairs together (never split them)
2848
+ *
2849
+ * Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
2850
+ */
2851
+ const truncateMessages = async (payload, model) => {
2852
+ const maxPromptTokens = getMaxPromptTokens(model);
2853
+ if (!maxPromptTokens) {
2854
+ consola.debug("No token limit found for model, skipping truncation");
2855
+ return payload;
2856
+ }
2857
+ const tokenCount = await getTokenCount(payload, model);
2858
+ const safeLimit = Math.floor(maxPromptTokens * .95);
2859
+ if (tokenCount.input <= safeLimit) return payload;
2860
+ consola.warn(`Prompt tokens (${tokenCount.input}) exceed safe limit (${safeLimit}/${maxPromptTokens}). Auto-truncating context...`);
2861
+ const groups = groupMessages(payload.messages);
2862
+ const systemGroups = groups.filter((g) => g.isSystem);
2863
+ const conversationGroups = groups.filter((g) => !g.isSystem);
2864
+ if (conversationGroups.length === 0) {
2865
+ consola.warn("No conversation messages to truncate, only system messages");
2866
+ return payload;
2867
+ }
2868
+ let truncatedPayload = payload;
2869
+ let dropCount = 0;
2870
+ const maxDrop = Math.max(0, conversationGroups.length - 1);
2871
+ while (dropCount <= maxDrop) {
2872
+ const keptConversationGroups = conversationGroups.slice(dropCount);
2873
+ const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
2874
+ const newMessages = [
2875
+ ...systemGroups.flatMap((g) => g.messages),
2876
+ ...truncationNotice,
2877
+ ...keptConversationGroups.flatMap((g) => g.messages)
2878
+ ];
2879
+ truncatedPayload = {
2880
+ ...payload,
2881
+ messages: newMessages
2882
+ };
2883
+ const newTokenCount = await getTokenCount(truncatedPayload, model);
2884
+ if (newTokenCount.input <= safeLimit) {
2885
+ if (dropCount > 0) {
2886
+ const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
2887
+ consola.info(`Truncated ${droppedMessages} messages (${dropCount} conversation groups). Tokens: ${tokenCount.input} -> ${newTokenCount.input} (limit: ${maxPromptTokens})`);
2888
+ }
2889
+ return truncatedPayload;
2890
+ }
2891
+ dropCount++;
2892
+ }
2893
+ const finalTokenCount = await getTokenCount(truncatedPayload, model);
2894
+ consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
2895
+ return truncatedPayload;
2896
+ };
2897
+
2898
+ //#endregion
2899
+ //#region src/lib/rate-limit.ts
2900
+ async function checkRateLimit(state$1) {
2901
+ if (state$1.rateLimitSeconds === void 0) return;
2902
+ const now = Date.now();
2903
+ if (!state$1.lastRequestTimestamp) {
2904
+ state$1.lastRequestTimestamp = now;
2905
+ return;
2906
+ }
2907
+ const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
2908
+ if (elapsedSeconds > state$1.rateLimitSeconds) {
2909
+ state$1.lastRequestTimestamp = now;
2910
+ return;
2911
+ }
2912
+ const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
2913
+ if (!state$1.rateLimitWait) {
2914
+ consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
2915
+ throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
2916
+ }
2917
+ const waitTimeMs = waitTimeSeconds * 1e3;
2918
+ consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
2919
+ await sleep(waitTimeMs);
2920
+ state$1.lastRequestTimestamp = now;
2921
+ consola.info("Rate limit wait completed, proceeding with request");
2922
+ }
2923
+
2673
2924
  //#endregion
2674
2925
  //#region src/services/copilot/create-chat-completions.ts
2675
2926
  const createChatCompletions = async (payload) => {
@@ -2704,27 +2955,39 @@ const createChatCompletions = async (payload) => {
2704
2955
 
2705
2956
  //#endregion
2706
2957
  //#region src/routes/chat-completions/handler.ts
2707
- async function handleCompletion$1(c) {
2708
- await checkRateLimit(state);
2709
- let payload = await c.req.json();
2710
- consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
2958
+ /**
2959
+ * Calculate token count, log it, and auto-truncate if needed.
2960
+ */
2961
+ async function processPayloadTokens(payload) {
2711
2962
  const selectedModel = state.models?.data.find((model) => model.id === payload.model);
2963
+ if (!selectedModel) {
2964
+ consola.warn("No model selected, skipping token count calculation");
2965
+ return payload;
2966
+ }
2712
2967
  try {
2713
- if (selectedModel) {
2714
- const tokenCount = await getTokenCount(payload, selectedModel);
2715
- consola.info("Current token count:", tokenCount);
2716
- } else consola.warn("No model selected, skipping token count calculation");
2968
+ const tokenCount = await getTokenCount(payload, selectedModel);
2969
+ consola.info("Current token count:", tokenCount);
2970
+ const truncated = await truncateMessages(payload, selectedModel);
2971
+ if (isNullish(truncated.max_tokens)) {
2972
+ const withMaxTokens = {
2973
+ ...truncated,
2974
+ max_tokens: selectedModel.capabilities.limits.max_output_tokens
2975
+ };
2976
+ consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
2977
+ return withMaxTokens;
2978
+ }
2979
+ return truncated;
2717
2980
  } catch (error) {
2718
2981
  consola.warn("Failed to calculate token count:", error);
2982
+ return payload;
2719
2983
  }
2984
+ }
2985
+ async function handleCompletion$1(c) {
2986
+ await checkRateLimit(state);
2987
+ const rawPayload = await c.req.json();
2988
+ consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
2989
+ const payload = await processPayloadTokens(rawPayload);
2720
2990
  if (state.manualApprove) await awaitApproval();
2721
- if (isNullish(payload.max_tokens)) {
2722
- payload = {
2723
- ...payload,
2724
- max_tokens: selectedModel?.capabilities.limits.max_output_tokens
2725
- };
2726
- consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
2727
- }
2728
2991
  const response = await createChatCompletions(payload);
2729
2992
  if (isNonStreaming$1(response)) {
2730
2993
  consola.debug("Non-streaming response:", JSON.stringify(response));
@@ -3155,10 +3418,27 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
3155
3418
 
3156
3419
  //#endregion
3157
3420
  //#region src/routes/messages/handler.ts
3421
+ /**
3422
+ * Auto-truncate OpenAI payload if prompt tokens exceed model limit.
3423
+ */
3424
+ async function autoTruncatePayload(payload) {
3425
+ const selectedModel = state.models?.data.find((model) => model.id === payload.model);
3426
+ if (!selectedModel) {
3427
+ consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
3428
+ return payload;
3429
+ }
3430
+ try {
3431
+ return await truncateMessages(payload, selectedModel);
3432
+ } catch (error) {
3433
+ consola.warn("Failed to auto-truncate context:", error);
3434
+ return payload;
3435
+ }
3436
+ }
3158
3437
  async function handleCompletion(c) {
3159
3438
  await checkRateLimit(state);
3160
3439
  const anthropicPayload = await c.req.json();
3161
- const openAIPayload = translateToOpenAI(anthropicPayload);
3440
+ const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
3441
+ const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
3162
3442
  if (state.manualApprove) await awaitApproval();
3163
3443
  const response = await createChatCompletions(openAIPayload);
3164
3444
  if (isNonStreaming(response)) {
@@ -3715,7 +3995,7 @@ async function runServer(options$1) {
3715
3995
  } else if (options$1.antigravity) {
3716
3996
  consola.info("Google Antigravity mode enabled");
3717
3997
  state.antigravityMode = true;
3718
- const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-B2lTFLSD.js");
3998
+ const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-BgquW2Yd.js");
3719
3999
  if (options$1.antigravityClientId && options$1.antigravityClientSecret) {
3720
4000
  setOAuthCredentials(options$1.antigravityClientId, options$1.antigravityClientSecret);
3721
4001
  consola.info("Using provided OAuth credentials from CLI");
@@ -3744,7 +4024,7 @@ async function runServer(options$1) {
3744
4024
  }
3745
4025
  if (!await getCurrentAccount() && !hasApiKey()) throw new Error("No enabled Antigravity accounts available");
3746
4026
  }
3747
- const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-CmDpYUV-.js");
4027
+ const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-D1vQG5Eb.js");
3748
4028
  const models = await getAntigravityModels$1();
3749
4029
  state.antigravityModels = models;
3750
4030
  consola.info(`Available Antigravity models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`);