npm - copilot-api-plus - Versions diffs - 1.0.36 → 1.0.38 - Mend

copilot-api-plus 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/auth-BgquW2Yd.js +4 -0
package/dist/{auth-CM_ilreU.js → auth-Dz36Lk4o.js} +20 -7
package/dist/auth-Dz36Lk4o.js.map +1 -0
package/dist/get-models-D1vQG5Eb.js +5 -0
package/dist/{get-models-DMdiCNoU.js → get-models-VsThqHZf.js} +2 -2
package/dist/{get-models-DMdiCNoU.js.map → get-models-VsThqHZf.js.map} +1 -1
package/dist/main.js +353 -73
package/dist/main.js.map +1 -1
package/package.json +1 -1
package/dist/auth-B2lTFLSD.js +0 -4
package/dist/auth-CM_ilreU.js.map +0 -1
package/dist/get-models-CmDpYUV-.js +0 -5

package/dist/main.js CHANGED Viewed

@@ -4,9 +4,9 @@ import { state } from "./state-CcLGr8VN.js";
 import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
 import { HTTPError, forwardError } from "./error-CvU5otz-.js";
 import { cacheModels, cacheVSCodeVersion, clearGithubToken, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-ClgudjZm.js";
-import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getValidAccessToken, rotateAccount } from "./auth-CM_ilreU.js";
+import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-Dz36Lk4o.js";
 import { clearZenAuth, getZenAuthPath } from "./auth-T55-Bhoo.js";
-import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-DMdiCNoU.js";
+import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-VsThqHZf.js";
 import { createRequire } from "node:module";
 import { defineCommand, runMain } from "citty";
 import consola from "consola";
@@ -540,7 +540,7 @@ function initProxyFromEnv() {
 * Add a new Antigravity account via OAuth
 */
 async function addAccount() {
-	const { setupAntigravity, loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
+	const { setupAntigravity, loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
 	const existingAuth = await loadAntigravityAuth();
 	if (existingAuth && existingAuth.accounts.length > 0) {
 		const enabledCount = existingAuth.accounts.filter((a) => a.enable).length;
@@ -552,7 +552,7 @@ async function addAccount() {
 * List all Antigravity accounts
 */
 async function listAccounts() {
-	const { loadAntigravityAuth } = await import("./auth-B2lTFLSD.js");
+	const { loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
 	const auth$1 = await loadAntigravityAuth();
 	if (!auth$1 || auth$1.accounts.length === 0) {
 		consola.info("No Antigravity accounts configured");
@@ -573,7 +573,7 @@ async function listAccounts() {
 * Remove an Antigravity account by index
 */
 async function removeAccount(index) {
-	const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-B2lTFLSD.js");
+	const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-BgquW2Yd.js");
 	const auth$1 = await loadAntigravityAuth();
 	if (!auth$1 || auth$1.accounts.length === 0) {
 		consola.error("No Antigravity accounts configured");
@@ -592,7 +592,7 @@ async function removeAccount(index) {
 * Clear all Antigravity accounts
 */
 async function clearAccounts() {
-	const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-B2lTFLSD.js");
+	const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-BgquW2Yd.js");
 	if (await consola.prompt("Are you sure you want to remove all Antigravity accounts?", {
 		type: "confirm",
 		initial: false
@@ -1403,9 +1403,9 @@ function processChunk(chunk, state$1) {
 //#endregion
 //#region src/services/antigravity/create-chat-completions.ts
-const ANTIGRAVITY_API_HOST$1 = "daily-cloudcode-pa.sandbox.googleapis.com";
-const ANTIGRAVITY_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:streamGenerateContent?alt=sse`;
-const ANTIGRAVITY_NO_STREAM_URL$1 = `https://${ANTIGRAVITY_API_HOST$1}/v1internal:generateContent`;
+const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
+const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
+const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
 const ANTIGRAVITY_USER_AGENT$1 = "antigravity/1.11.3 windows/amd64";
 const GEMINI_API_HOST = "generativelanguage.googleapis.com";
 const getGeminiStreamUrl = (model, apiKey) => `https://${GEMINI_API_HOST}/v1beta/models/${model}:streamGenerateContent?alt=sse&key=${apiKey}`;
@@ -1625,14 +1625,14 @@ async function createWithApiKey(request, apiKey) {
 * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
 */
 async function createWithOAuth(request, accessToken) {
-	const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL$1 : ANTIGRAVITY_NO_STREAM_URL$1;
+	const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
 	const body = buildAntigravityRequestBody(request);
 	consola.debug(`Antigravity request to ${endpoint} with model ${request.model}`);
 	try {
 		const response = await fetch(endpoint, {
 			method: "POST",
 			headers: {
-				Host: ANTIGRAVITY_API_HOST$1,
+				Host: ANTIGRAVITY_API_HOST,
 				"User-Agent": ANTIGRAVITY_USER_AGENT$1,
 				Authorization: `Bearer ${accessToken}`,
 				"Content-Type": "application/json",
@@ -1831,6 +1831,49 @@ app$1.post("/", async (c) => {
 });
 const antigravityChatCompletionsRoute = app$1;
+//#endregion
+//#region src/lib/request-queue.ts
+var RequestQueue = class {
+	queue = [];
+	activeCount = 0;
+	maxConcurrent;
+	minDelayMs;
+	lastRequestTime = 0;
+	constructor(maxConcurrent = 2, minDelayMs = 300) {
+		this.maxConcurrent = maxConcurrent;
+		this.minDelayMs = minDelayMs;
+	}
+	async enqueue(execute) {
+		return new Promise((resolve, reject) => {
+			this.queue.push({
+				execute,
+				resolve,
+				reject
+			});
+			this.processQueue();
+		});
+	}
+	async processQueue() {
+		if (this.activeCount >= this.maxConcurrent || this.queue.length === 0) return;
+		const request = this.queue.shift();
+		if (!request) return;
+		this.activeCount++;
+		const elapsed = Date.now() - this.lastRequestTime;
+		if (elapsed < this.minDelayMs) await new Promise((r) => setTimeout(r, this.minDelayMs - elapsed));
+		this.lastRequestTime = Date.now();
+		try {
+			const result = await request.execute();
+			request.resolve(result);
+		} catch (error) {
+			request.reject(error);
+		} finally {
+			this.activeCount--;
+			this.processQueue();
+		}
+	}
+};
+const antigravityQueue = new RequestQueue(2, 500);
 //#endregion
 //#region src/services/antigravity/anthropic-events.ts
 /**
@@ -1993,10 +2036,49 @@ function generateToolId() {
 //#endregion
 //#region src/services/antigravity/create-messages.ts
-const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
-const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
-const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
+const ANTIGRAVITY_ENDPOINTS = ["daily-cloudcode-pa.sandbox.googleapis.com", "cloudcode-pa.googleapis.com"];
+let currentEndpointIndex = 0;
+function getStreamUrl(host) {
+	return `https://${host}/v1internal:streamGenerateContent?alt=sse`;
+}
+function getNoStreamUrl(host) {
+	return `https://${host}/v1internal:generateContent`;
+}
+function getCurrentHost() {
+	return ANTIGRAVITY_ENDPOINTS[currentEndpointIndex];
+}
+function rotateEndpoint() {
+	const oldIndex = currentEndpointIndex;
+	currentEndpointIndex = (currentEndpointIndex + 1) % ANTIGRAVITY_ENDPOINTS.length;
+	consola.info(`Rotating endpoint: ${ANTIGRAVITY_ENDPOINTS[oldIndex]} → ${ANTIGRAVITY_ENDPOINTS[currentEndpointIndex]}`);
+}
 const ANTIGRAVITY_USER_AGENT = "antigravity/1.11.3 windows/amd64";
+const rateLimitTracker = {};
+function getModelFamily(model) {
+	if (model.includes("claude")) return "claude";
+	if (model.includes("gemini")) return "gemini";
+	return "other";
+}
+function trackRateLimit(model) {
+	const family = getModelFamily(model);
+	if (!rateLimitTracker[family]) rateLimitTracker[family] = {
+		lastLimitTime: 0,
+		consecutiveErrors: 0
+	};
+	rateLimitTracker[family].lastLimitTime = Date.now();
+	rateLimitTracker[family].consecutiveErrors++;
+}
+function clearRateLimitTracker(model) {
+	const family = getModelFamily(model);
+	if (rateLimitTracker[family]) rateLimitTracker[family].consecutiveErrors = 0;
+}
+function getBackoffDelay(model, baseDelay) {
+	const family = getModelFamily(model);
+	const info = rateLimitTracker[family];
+	if (!info) return baseDelay;
+	const multiplier = Math.min(Math.pow(2, info.consecutiveErrors - 1), 60);
+	return Math.min(baseDelay * multiplier, 3e4);
+}
 /**
 * Extract text from system content (can be string or array)
 */
@@ -2118,7 +2200,7 @@ function convertTools(tools) {
 * Build Antigravity request body
 * The Antigravity API expects a specific nested structure with request object
 */
-function buildGeminiRequest(request) {
+function buildGeminiRequest(request, projectId) {
 	const { contents, systemInstruction } = convertMessages(request.messages, request.system);
 	const tools = convertTools(request.tools);
 	const innerRequest = {
@@ -2136,12 +2218,14 @@ function buildGeminiRequest(request) {
 		...innerRequest.generationConfig,
 		thinkingConfig: { includeThoughts: true }
 	};
-	return {
+	const result = {
 		model: request.model,
 		userAgent: "antigravity",
 		requestId: `agent-${crypto.randomUUID()}`,
 		request: innerRequest
 	};
+	if (projectId) result.project = projectId;
+	return result;
 }
 /**
 * Create error response
@@ -2161,20 +2245,29 @@ function createErrorResponse(type, message, status) {
 /**
 * Create Anthropic-compatible message response using Antigravity
 * Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
+*
+* Features:
+* - Endpoint fallback (daily → prod)
+* - Per-model-family rate limit tracking
+* - Exponential backoff for consecutive errors
+* - Smart retry for short delays (≤5s on same endpoint)
 */
 const MAX_RETRIES$3 = 5;
-async function createAntigravityMessages(request) {
-	const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
-	const body = buildGeminiRequest(request);
+const MAX_ENDPOINT_RETRIES = 2;
+async function executeAntigravityRequest(request) {
+	const projectId = await getCurrentProjectId();
+	const body = buildGeminiRequest(request, projectId);
+	let endpointRetries = 0;
 	for (let attempt = 0; attempt <= MAX_RETRIES$3; attempt++) {
+		const host = getCurrentHost();
+		const endpoint = request.stream ? getStreamUrl(host) : getNoStreamUrl(host);
 		const accessToken = await getValidAccessToken();
-		if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available. Please run login first.", 401);
-		consola.debug(`Antigravity request to ${endpoint} (attempt ${attempt + 1}/${MAX_RETRIES$3 + 1})`);
+		if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.", 401);
 		try {
 			const response = await fetch(endpoint, {
 				method: "POST",
 				headers: {
-					Host: ANTIGRAVITY_API_HOST,
+					Host: host,
 					"User-Agent": ANTIGRAVITY_USER_AGENT,
 					Authorization: `Bearer ${accessToken}`,
 					"Content-Type": "application/json",
@@ -2182,17 +2275,33 @@ async function createAntigravityMessages(request) {
 				},
 				body: JSON.stringify(body)
 			});
-			if (response.ok) return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
-			const errorResult = await handleApiError(response);
+			if (response.ok) {
+				clearRateLimitTracker(request.model);
+				return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
+			}
+			const errorResult = await handleApiError(response, request.model);
 			if (errorResult.shouldRetry && attempt < MAX_RETRIES$3) {
-				consola.info(`Rate limited, retrying in ${errorResult.retryDelayMs}ms...`);
-				await sleep(errorResult.retryDelayMs);
+				trackRateLimit(request.model);
+				const backoffDelay = getBackoffDelay(request.model, errorResult.retryDelayMs);
+				if (backoffDelay <= 5e3 || endpointRetries >= MAX_ENDPOINT_RETRIES) {
+					consola.info(`Rate limited, retrying in ${backoffDelay}ms (attempt ${attempt + 1}/${MAX_RETRIES$3})`);
+					await sleep(backoffDelay);
+				} else {
+					rotateEndpoint();
+					endpointRetries++;
+					consola.info(`Switching endpoint, retrying in ${errorResult.retryDelayMs}ms`);
+					await sleep(errorResult.retryDelayMs);
+				}
 				continue;
 			}
 			return errorResult.response;
 		} catch (error) {
-			consola.error("Antigravity messages request error:", error);
+			consola.error("Antigravity request error:", error);
 			if (attempt < MAX_RETRIES$3) {
+				if (endpointRetries < MAX_ENDPOINT_RETRIES) {
+					rotateEndpoint();
+					endpointRetries++;
+				}
 				await sleep(500);
 				continue;
 			}
@@ -2201,12 +2310,20 @@ async function createAntigravityMessages(request) {
 	}
 	return createErrorResponse("api_error", "Max retries exceeded", 429);
 }
+async function createAntigravityMessages(request) {
+	return antigravityQueue.enqueue(() => executeAntigravityRequest(request));
+}
 /**
 * Parse retry delay from error response
+* Supports multiple formats:
+* - RetryInfo.retryDelay: "3.5s"
+* - quotaResetDelay: "3000ms" or "3s"
+* - message: "Your quota will reset after 3s"
 */
 function parseRetryDelay$3(errorText) {
 	try {
-		const details = JSON.parse(errorText).error?.details ?? [];
+		const errorData = JSON.parse(errorText);
+		const details = errorData.error?.details ?? [];
 		for (const detail of details) {
 			if (detail["@type"]?.includes("RetryInfo") && detail.retryDelay) {
 				const match = /(\d+(?:\.\d+)?)s/.exec(detail.retryDelay);
@@ -2220,13 +2337,16 @@ function parseRetryDelay$3(errorText) {
 				}
 			}
 		}
+		const message = errorData.error?.message ?? "";
+		const resetMatch = /quota will reset after (\d+(?:\.\d+)?)s/i.exec(message);
+		if (resetMatch) return Math.ceil(Number.parseFloat(resetMatch[1]) * 1e3);
 	} catch {}
 	return 500;
 }
 /**
 * Handle API error response
 */
-async function handleApiError(response) {
+async function handleApiError(response, _model) {
 	const errorText = await response.text();
 	consola.error(`Antigravity error: ${response.status} ${errorText}`);
 	if (response.status === 403) await disableCurrentAccount();
@@ -2447,32 +2567,6 @@ const awaitApproval = async () => {
 	if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
 };
-//#endregion
-//#region src/lib/rate-limit.ts
-async function checkRateLimit(state$1) {
-	if (state$1.rateLimitSeconds === void 0) return;
-	const now = Date.now();
-	if (!state$1.lastRequestTimestamp) {
-		state$1.lastRequestTimestamp = now;
-		return;
-	}
-	const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
-	if (elapsedSeconds > state$1.rateLimitSeconds) {
-		state$1.lastRequestTimestamp = now;
-		return;
-	}
-	const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
-	if (!state$1.rateLimitWait) {
-		consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
-		throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
-	}
-	const waitTimeMs = waitTimeSeconds * 1e3;
-	consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
-	await sleep(waitTimeMs);
-	state$1.lastRequestTimestamp = now;
-	consola.info("Rate limit wait completed, proceeding with request");
-}
 //#endregion
 //#region src/lib/tokenizer.ts
 const ENCODING_MAP = {
@@ -2670,6 +2764,163 @@ const getTokenCount = async (payload, model) => {
 	};
 };
+//#endregion
+//#region src/lib/context-compression.ts
+/**
+* Get the maximum prompt token limit for a model.
+* Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
+*/
+const getMaxPromptTokens = (model) => {
+	const limits = model.capabilities.limits;
+	if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
+	if (limits.max_context_window_tokens) {
+		const outputReserve = limits.max_output_tokens ?? 4096;
+		return limits.max_context_window_tokens - outputReserve;
+	}
+};
+/**
+* Check if a message is a tool-related message (tool call or tool result).
+* Tool messages must be kept together with their paired assistant message.
+*/
+const isToolMessage = (message) => {
+	return message.role === "tool";
+};
+/**
+* Check if an assistant message contains tool calls.
+*/
+const hasToolCalls = (message) => {
+	return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
+};
+const groupMessages = (messages) => {
+	const groups = [];
+	let i = 0;
+	while (i < messages.length) {
+		const message = messages[i];
+		if (message.role === "system" || message.role === "developer") {
+			groups.push({
+				messages: [message],
+				isSystem: true,
+				isRecent: false
+			});
+			i++;
+			continue;
+		}
+		if (hasToolCalls(message)) {
+			const group = [message];
+			let j = i + 1;
+			while (j < messages.length && isToolMessage(messages[j])) {
+				group.push(messages[j]);
+				j++;
+			}
+			groups.push({
+				messages: group,
+				isSystem: false,
+				isRecent: false
+			});
+			i = j;
+			continue;
+		}
+		groups.push({
+			messages: [message],
+			isSystem: false,
+			isRecent: false
+		});
+		i++;
+	}
+	return groups;
+};
+/**
+* Create a truncation notice message to inform the model that earlier context was removed.
+*/
+const createTruncationNotice = () => ({
+	role: "user",
+	content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
+});
+/**
+* Intelligently truncate messages to fit within the model's token limit.
+*
+* Strategy:
+* 1. Always preserve system/developer messages (they contain critical instructions)
+* 2. Always preserve the most recent messages (they contain the current task context)
+* 3. Remove middle conversation messages, oldest first
+* 4. Insert a truncation notice where messages were removed
+* 5. Keep tool call/result pairs together (never split them)
+*
+* Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
+*/
+const truncateMessages = async (payload, model) => {
+	const maxPromptTokens = getMaxPromptTokens(model);
+	if (!maxPromptTokens) {
+		consola.debug("No token limit found for model, skipping truncation");
+		return payload;
+	}
+	const tokenCount = await getTokenCount(payload, model);
+	const safeLimit = Math.floor(maxPromptTokens * .95);
+	if (tokenCount.input <= safeLimit) return payload;
+	consola.warn(`Prompt tokens (${tokenCount.input}) exceed safe limit (${safeLimit}/${maxPromptTokens}). Auto-truncating context...`);
+	const groups = groupMessages(payload.messages);
+	const systemGroups = groups.filter((g) => g.isSystem);
+	const conversationGroups = groups.filter((g) => !g.isSystem);
+	if (conversationGroups.length === 0) {
+		consola.warn("No conversation messages to truncate, only system messages");
+		return payload;
+	}
+	let truncatedPayload = payload;
+	let dropCount = 0;
+	const maxDrop = Math.max(0, conversationGroups.length - 1);
+	while (dropCount <= maxDrop) {
+		const keptConversationGroups = conversationGroups.slice(dropCount);
+		const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
+		const newMessages = [
+			...systemGroups.flatMap((g) => g.messages),
+			...truncationNotice,
+			...keptConversationGroups.flatMap((g) => g.messages)
+		];
+		truncatedPayload = {
+			...payload,
+			messages: newMessages
+		};
+		const newTokenCount = await getTokenCount(truncatedPayload, model);
+		if (newTokenCount.input <= safeLimit) {
+			if (dropCount > 0) {
+				const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
+				consola.info(`Truncated ${droppedMessages} messages (${dropCount} conversation groups). Tokens: ${tokenCount.input} -> ${newTokenCount.input} (limit: ${maxPromptTokens})`);
+			}
+			return truncatedPayload;
+		}
+		dropCount++;
+	}
+	const finalTokenCount = await getTokenCount(truncatedPayload, model);
+	consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
+	return truncatedPayload;
+};
+//#endregion
+//#region src/lib/rate-limit.ts
+async function checkRateLimit(state$1) {
+	if (state$1.rateLimitSeconds === void 0) return;
+	const now = Date.now();
+	if (!state$1.lastRequestTimestamp) {
+		state$1.lastRequestTimestamp = now;
+		return;
+	}
+	const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
+	if (elapsedSeconds > state$1.rateLimitSeconds) {
+		state$1.lastRequestTimestamp = now;
+		return;
+	}
+	const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
+	if (!state$1.rateLimitWait) {
+		consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
+		throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
+	}
+	const waitTimeMs = waitTimeSeconds * 1e3;
+	consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
+	await sleep(waitTimeMs);
+	state$1.lastRequestTimestamp = now;
+	consola.info("Rate limit wait completed, proceeding with request");
+}
 //#endregion
 //#region src/services/copilot/create-chat-completions.ts
 const createChatCompletions = async (payload) => {
@@ -2704,27 +2955,39 @@ const createChatCompletions = async (payload) => {
 //#endregion
 //#region src/routes/chat-completions/handler.ts
-async function handleCompletion$1(c) {
-	await checkRateLimit(state);
-	let payload = await c.req.json();
-	consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
+/**
+* Calculate token count, log it, and auto-truncate if needed.
+*/
+async function processPayloadTokens(payload) {
 	const selectedModel = state.models?.data.find((model) => model.id === payload.model);
+	if (!selectedModel) {
+		consola.warn("No model selected, skipping token count calculation");
+		return payload;
+	}
 	try {
-		if (selectedModel) {
-			const tokenCount = await getTokenCount(payload, selectedModel);
-			consola.info("Current token count:", tokenCount);
-		} else consola.warn("No model selected, skipping token count calculation");
+		const tokenCount = await getTokenCount(payload, selectedModel);
+		consola.info("Current token count:", tokenCount);
+		const truncated = await truncateMessages(payload, selectedModel);
+		if (isNullish(truncated.max_tokens)) {
+			const withMaxTokens = {
+				...truncated,
+				max_tokens: selectedModel.capabilities.limits.max_output_tokens
+			};
+			consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
+			return withMaxTokens;
+		}
+		return truncated;
 	} catch (error) {
 		consola.warn("Failed to calculate token count:", error);
+		return payload;
 	}
+}
+async function handleCompletion$1(c) {
+	await checkRateLimit(state);
+	const rawPayload = await c.req.json();
+	consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
+	const payload = await processPayloadTokens(rawPayload);
 	if (state.manualApprove) await awaitApproval();
-	if (isNullish(payload.max_tokens)) {
-		payload = {
-			...payload,
-			max_tokens: selectedModel?.capabilities.limits.max_output_tokens
-		};
-		consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
-	}
 	const response = await createChatCompletions(payload);
 	if (isNonStreaming$1(response)) {
 		consola.debug("Non-streaming response:", JSON.stringify(response));
@@ -3155,10 +3418,27 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
 //#endregion
 //#region src/routes/messages/handler.ts
+/**
+* Auto-truncate OpenAI payload if prompt tokens exceed model limit.
+*/
+async function autoTruncatePayload(payload) {
+	const selectedModel = state.models?.data.find((model) => model.id === payload.model);
+	if (!selectedModel) {
+		consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
+		return payload;
+	}
+	try {
+		return await truncateMessages(payload, selectedModel);
+	} catch (error) {
+		consola.warn("Failed to auto-truncate context:", error);
+		return payload;
+	}
+}
 async function handleCompletion(c) {
 	await checkRateLimit(state);
 	const anthropicPayload = await c.req.json();
-	const openAIPayload = translateToOpenAI(anthropicPayload);
+	const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
+	const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
 	if (state.manualApprove) await awaitApproval();
 	const response = await createChatCompletions(openAIPayload);
 	if (isNonStreaming(response)) {
@@ -3715,7 +3995,7 @@ async function runServer(options$1) {
 	} else if (options$1.antigravity) {
 		consola.info("Google Antigravity mode enabled");
 		state.antigravityMode = true;
-		const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-B2lTFLSD.js");
+		const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-BgquW2Yd.js");
 		if (options$1.antigravityClientId && options$1.antigravityClientSecret) {
 			setOAuthCredentials(options$1.antigravityClientId, options$1.antigravityClientSecret);
 			consola.info("Using provided OAuth credentials from CLI");
@@ -3744,7 +4024,7 @@ async function runServer(options$1) {
 			}
 			if (!await getCurrentAccount() && !hasApiKey()) throw new Error("No enabled Antigravity accounts available");
 		}
-		const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-CmDpYUV-.js");
+		const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-D1vQG5Eb.js");
 		const models = await getAntigravityModels$1();
 		state.antigravityModels = models;
 		consola.info(`Available Antigravity models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`);