copilot-api-plus 1.0.37 → 1.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-BgquW2Yd.js +4 -0
- package/dist/{auth-CM_ilreU.js → auth-Dz36Lk4o.js} +20 -7
- package/dist/auth-Dz36Lk4o.js.map +1 -0
- package/dist/get-models-D1vQG5Eb.js +5 -0
- package/dist/{get-models-DMdiCNoU.js → get-models-VsThqHZf.js} +2 -2
- package/dist/{get-models-DMdiCNoU.js.map → get-models-VsThqHZf.js.map} +1 -1
- package/dist/main.js +304 -69
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
- package/dist/auth-B2lTFLSD.js +0 -4
- package/dist/auth-CM_ilreU.js.map +0 -1
- package/dist/get-models-CmDpYUV-.js +0 -5
package/dist/main.js
CHANGED
|
@@ -4,9 +4,9 @@ import { state } from "./state-CcLGr8VN.js";
|
|
|
4
4
|
import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
|
|
5
5
|
import { HTTPError, forwardError } from "./error-CvU5otz-.js";
|
|
6
6
|
import { cacheModels, cacheVSCodeVersion, clearGithubToken, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-ClgudjZm.js";
|
|
7
|
-
import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getValidAccessToken, rotateAccount } from "./auth-
|
|
7
|
+
import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-Dz36Lk4o.js";
|
|
8
8
|
import { clearZenAuth, getZenAuthPath } from "./auth-T55-Bhoo.js";
|
|
9
|
-
import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-
|
|
9
|
+
import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-VsThqHZf.js";
|
|
10
10
|
import { createRequire } from "node:module";
|
|
11
11
|
import { defineCommand, runMain } from "citty";
|
|
12
12
|
import consola from "consola";
|
|
@@ -540,7 +540,7 @@ function initProxyFromEnv() {
|
|
|
540
540
|
* Add a new Antigravity account via OAuth
|
|
541
541
|
*/
|
|
542
542
|
async function addAccount() {
|
|
543
|
-
const { setupAntigravity, loadAntigravityAuth } = await import("./auth-
|
|
543
|
+
const { setupAntigravity, loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
544
544
|
const existingAuth = await loadAntigravityAuth();
|
|
545
545
|
if (existingAuth && existingAuth.accounts.length > 0) {
|
|
546
546
|
const enabledCount = existingAuth.accounts.filter((a) => a.enable).length;
|
|
@@ -552,7 +552,7 @@ async function addAccount() {
|
|
|
552
552
|
* List all Antigravity accounts
|
|
553
553
|
*/
|
|
554
554
|
async function listAccounts() {
|
|
555
|
-
const { loadAntigravityAuth } = await import("./auth-
|
|
555
|
+
const { loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
556
556
|
const auth$1 = await loadAntigravityAuth();
|
|
557
557
|
if (!auth$1 || auth$1.accounts.length === 0) {
|
|
558
558
|
consola.info("No Antigravity accounts configured");
|
|
@@ -573,7 +573,7 @@ async function listAccounts() {
|
|
|
573
573
|
* Remove an Antigravity account by index
|
|
574
574
|
*/
|
|
575
575
|
async function removeAccount(index) {
|
|
576
|
-
const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-
|
|
576
|
+
const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
577
577
|
const auth$1 = await loadAntigravityAuth();
|
|
578
578
|
if (!auth$1 || auth$1.accounts.length === 0) {
|
|
579
579
|
consola.error("No Antigravity accounts configured");
|
|
@@ -592,7 +592,7 @@ async function removeAccount(index) {
|
|
|
592
592
|
* Clear all Antigravity accounts
|
|
593
593
|
*/
|
|
594
594
|
async function clearAccounts() {
|
|
595
|
-
const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-
|
|
595
|
+
const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-BgquW2Yd.js");
|
|
596
596
|
if (await consola.prompt("Are you sure you want to remove all Antigravity accounts?", {
|
|
597
597
|
type: "confirm",
|
|
598
598
|
initial: false
|
|
@@ -1403,9 +1403,9 @@ function processChunk(chunk, state$1) {
|
|
|
1403
1403
|
|
|
1404
1404
|
//#endregion
|
|
1405
1405
|
//#region src/services/antigravity/create-chat-completions.ts
|
|
1406
|
-
const ANTIGRAVITY_API_HOST
|
|
1407
|
-
const ANTIGRAVITY_STREAM_URL
|
|
1408
|
-
const ANTIGRAVITY_NO_STREAM_URL
|
|
1406
|
+
const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
|
|
1407
|
+
const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
|
|
1408
|
+
const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
|
|
1409
1409
|
const ANTIGRAVITY_USER_AGENT$1 = "antigravity/1.11.3 windows/amd64";
|
|
1410
1410
|
const GEMINI_API_HOST = "generativelanguage.googleapis.com";
|
|
1411
1411
|
const getGeminiStreamUrl = (model, apiKey) => `https://${GEMINI_API_HOST}/v1beta/models/${model}:streamGenerateContent?alt=sse&key=${apiKey}`;
|
|
@@ -1625,14 +1625,14 @@ async function createWithApiKey(request, apiKey) {
|
|
|
1625
1625
|
* Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
|
|
1626
1626
|
*/
|
|
1627
1627
|
async function createWithOAuth(request, accessToken) {
|
|
1628
|
-
const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL
|
|
1628
|
+
const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
|
|
1629
1629
|
const body = buildAntigravityRequestBody(request);
|
|
1630
1630
|
consola.debug(`Antigravity request to ${endpoint} with model ${request.model}`);
|
|
1631
1631
|
try {
|
|
1632
1632
|
const response = await fetch(endpoint, {
|
|
1633
1633
|
method: "POST",
|
|
1634
1634
|
headers: {
|
|
1635
|
-
Host: ANTIGRAVITY_API_HOST
|
|
1635
|
+
Host: ANTIGRAVITY_API_HOST,
|
|
1636
1636
|
"User-Agent": ANTIGRAVITY_USER_AGENT$1,
|
|
1637
1637
|
Authorization: `Bearer ${accessToken}`,
|
|
1638
1638
|
"Content-Type": "application/json",
|
|
@@ -2036,10 +2036,49 @@ function generateToolId() {
|
|
|
2036
2036
|
|
|
2037
2037
|
//#endregion
|
|
2038
2038
|
//#region src/services/antigravity/create-messages.ts
|
|
2039
|
-
const
|
|
2040
|
-
|
|
2041
|
-
|
|
2039
|
+
const ANTIGRAVITY_ENDPOINTS = ["daily-cloudcode-pa.sandbox.googleapis.com", "cloudcode-pa.googleapis.com"];
|
|
2040
|
+
let currentEndpointIndex = 0;
|
|
2041
|
+
function getStreamUrl(host) {
|
|
2042
|
+
return `https://${host}/v1internal:streamGenerateContent?alt=sse`;
|
|
2043
|
+
}
|
|
2044
|
+
function getNoStreamUrl(host) {
|
|
2045
|
+
return `https://${host}/v1internal:generateContent`;
|
|
2046
|
+
}
|
|
2047
|
+
function getCurrentHost() {
|
|
2048
|
+
return ANTIGRAVITY_ENDPOINTS[currentEndpointIndex];
|
|
2049
|
+
}
|
|
2050
|
+
function rotateEndpoint() {
|
|
2051
|
+
const oldIndex = currentEndpointIndex;
|
|
2052
|
+
currentEndpointIndex = (currentEndpointIndex + 1) % ANTIGRAVITY_ENDPOINTS.length;
|
|
2053
|
+
consola.info(`Rotating endpoint: ${ANTIGRAVITY_ENDPOINTS[oldIndex]} → ${ANTIGRAVITY_ENDPOINTS[currentEndpointIndex]}`);
|
|
2054
|
+
}
|
|
2042
2055
|
const ANTIGRAVITY_USER_AGENT = "antigravity/1.11.3 windows/amd64";
|
|
2056
|
+
const rateLimitTracker = {};
|
|
2057
|
+
function getModelFamily(model) {
|
|
2058
|
+
if (model.includes("claude")) return "claude";
|
|
2059
|
+
if (model.includes("gemini")) return "gemini";
|
|
2060
|
+
return "other";
|
|
2061
|
+
}
|
|
2062
|
+
function trackRateLimit(model) {
|
|
2063
|
+
const family = getModelFamily(model);
|
|
2064
|
+
if (!rateLimitTracker[family]) rateLimitTracker[family] = {
|
|
2065
|
+
lastLimitTime: 0,
|
|
2066
|
+
consecutiveErrors: 0
|
|
2067
|
+
};
|
|
2068
|
+
rateLimitTracker[family].lastLimitTime = Date.now();
|
|
2069
|
+
rateLimitTracker[family].consecutiveErrors++;
|
|
2070
|
+
}
|
|
2071
|
+
function clearRateLimitTracker(model) {
|
|
2072
|
+
const family = getModelFamily(model);
|
|
2073
|
+
if (rateLimitTracker[family]) rateLimitTracker[family].consecutiveErrors = 0;
|
|
2074
|
+
}
|
|
2075
|
+
function getBackoffDelay(model, baseDelay) {
|
|
2076
|
+
const family = getModelFamily(model);
|
|
2077
|
+
const info = rateLimitTracker[family];
|
|
2078
|
+
if (!info) return baseDelay;
|
|
2079
|
+
const multiplier = Math.min(Math.pow(2, info.consecutiveErrors - 1), 60);
|
|
2080
|
+
return Math.min(baseDelay * multiplier, 3e4);
|
|
2081
|
+
}
|
|
2043
2082
|
/**
|
|
2044
2083
|
* Extract text from system content (can be string or array)
|
|
2045
2084
|
*/
|
|
@@ -2161,7 +2200,7 @@ function convertTools(tools) {
|
|
|
2161
2200
|
* Build Antigravity request body
|
|
2162
2201
|
* The Antigravity API expects a specific nested structure with request object
|
|
2163
2202
|
*/
|
|
2164
|
-
function buildGeminiRequest(request) {
|
|
2203
|
+
function buildGeminiRequest(request, projectId) {
|
|
2165
2204
|
const { contents, systemInstruction } = convertMessages(request.messages, request.system);
|
|
2166
2205
|
const tools = convertTools(request.tools);
|
|
2167
2206
|
const innerRequest = {
|
|
@@ -2179,12 +2218,14 @@ function buildGeminiRequest(request) {
|
|
|
2179
2218
|
...innerRequest.generationConfig,
|
|
2180
2219
|
thinkingConfig: { includeThoughts: true }
|
|
2181
2220
|
};
|
|
2182
|
-
|
|
2221
|
+
const result = {
|
|
2183
2222
|
model: request.model,
|
|
2184
2223
|
userAgent: "antigravity",
|
|
2185
2224
|
requestId: `agent-${crypto.randomUUID()}`,
|
|
2186
2225
|
request: innerRequest
|
|
2187
2226
|
};
|
|
2227
|
+
if (projectId) result.project = projectId;
|
|
2228
|
+
return result;
|
|
2188
2229
|
}
|
|
2189
2230
|
/**
|
|
2190
2231
|
* Create error response
|
|
@@ -2204,19 +2245,29 @@ function createErrorResponse(type, message, status) {
|
|
|
2204
2245
|
/**
|
|
2205
2246
|
* Create Anthropic-compatible message response using Antigravity
|
|
2206
2247
|
* Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
|
|
2248
|
+
*
|
|
2249
|
+
* Features:
|
|
2250
|
+
* - Endpoint fallback (daily → prod)
|
|
2251
|
+
* - Per-model-family rate limit tracking
|
|
2252
|
+
* - Exponential backoff for consecutive errors
|
|
2253
|
+
* - Smart retry for short delays (≤5s on same endpoint)
|
|
2207
2254
|
*/
|
|
2208
2255
|
const MAX_RETRIES$3 = 5;
|
|
2256
|
+
const MAX_ENDPOINT_RETRIES = 2;
|
|
2209
2257
|
async function executeAntigravityRequest(request) {
|
|
2210
|
-
const
|
|
2211
|
-
const body = buildGeminiRequest(request);
|
|
2258
|
+
const projectId = await getCurrentProjectId();
|
|
2259
|
+
const body = buildGeminiRequest(request, projectId);
|
|
2260
|
+
let endpointRetries = 0;
|
|
2212
2261
|
for (let attempt = 0; attempt <= MAX_RETRIES$3; attempt++) {
|
|
2262
|
+
const host = getCurrentHost();
|
|
2263
|
+
const endpoint = request.stream ? getStreamUrl(host) : getNoStreamUrl(host);
|
|
2213
2264
|
const accessToken = await getValidAccessToken();
|
|
2214
2265
|
if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.", 401);
|
|
2215
2266
|
try {
|
|
2216
2267
|
const response = await fetch(endpoint, {
|
|
2217
2268
|
method: "POST",
|
|
2218
2269
|
headers: {
|
|
2219
|
-
Host:
|
|
2270
|
+
Host: host,
|
|
2220
2271
|
"User-Agent": ANTIGRAVITY_USER_AGENT,
|
|
2221
2272
|
Authorization: `Bearer ${accessToken}`,
|
|
2222
2273
|
"Content-Type": "application/json",
|
|
@@ -2224,17 +2275,33 @@ async function executeAntigravityRequest(request) {
|
|
|
2224
2275
|
},
|
|
2225
2276
|
body: JSON.stringify(body)
|
|
2226
2277
|
});
|
|
2227
|
-
if (response.ok)
|
|
2228
|
-
|
|
2278
|
+
if (response.ok) {
|
|
2279
|
+
clearRateLimitTracker(request.model);
|
|
2280
|
+
return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
|
|
2281
|
+
}
|
|
2282
|
+
const errorResult = await handleApiError(response, request.model);
|
|
2229
2283
|
if (errorResult.shouldRetry && attempt < MAX_RETRIES$3) {
|
|
2230
|
-
|
|
2231
|
-
|
|
2284
|
+
trackRateLimit(request.model);
|
|
2285
|
+
const backoffDelay = getBackoffDelay(request.model, errorResult.retryDelayMs);
|
|
2286
|
+
if (backoffDelay <= 5e3 || endpointRetries >= MAX_ENDPOINT_RETRIES) {
|
|
2287
|
+
consola.info(`Rate limited, retrying in ${backoffDelay}ms (attempt ${attempt + 1}/${MAX_RETRIES$3})`);
|
|
2288
|
+
await sleep(backoffDelay);
|
|
2289
|
+
} else {
|
|
2290
|
+
rotateEndpoint();
|
|
2291
|
+
endpointRetries++;
|
|
2292
|
+
consola.info(`Switching endpoint, retrying in ${errorResult.retryDelayMs}ms`);
|
|
2293
|
+
await sleep(errorResult.retryDelayMs);
|
|
2294
|
+
}
|
|
2232
2295
|
continue;
|
|
2233
2296
|
}
|
|
2234
2297
|
return errorResult.response;
|
|
2235
2298
|
} catch (error) {
|
|
2236
2299
|
consola.error("Antigravity request error:", error);
|
|
2237
2300
|
if (attempt < MAX_RETRIES$3) {
|
|
2301
|
+
if (endpointRetries < MAX_ENDPOINT_RETRIES) {
|
|
2302
|
+
rotateEndpoint();
|
|
2303
|
+
endpointRetries++;
|
|
2304
|
+
}
|
|
2238
2305
|
await sleep(500);
|
|
2239
2306
|
continue;
|
|
2240
2307
|
}
|
|
@@ -2248,10 +2315,15 @@ async function createAntigravityMessages(request) {
|
|
|
2248
2315
|
}
|
|
2249
2316
|
/**
|
|
2250
2317
|
* Parse retry delay from error response
|
|
2318
|
+
* Supports multiple formats:
|
|
2319
|
+
* - RetryInfo.retryDelay: "3.5s"
|
|
2320
|
+
* - quotaResetDelay: "3000ms" or "3s"
|
|
2321
|
+
* - message: "Your quota will reset after 3s"
|
|
2251
2322
|
*/
|
|
2252
2323
|
function parseRetryDelay$3(errorText) {
|
|
2253
2324
|
try {
|
|
2254
|
-
const
|
|
2325
|
+
const errorData = JSON.parse(errorText);
|
|
2326
|
+
const details = errorData.error?.details ?? [];
|
|
2255
2327
|
for (const detail of details) {
|
|
2256
2328
|
if (detail["@type"]?.includes("RetryInfo") && detail.retryDelay) {
|
|
2257
2329
|
const match = /(\d+(?:\.\d+)?)s/.exec(detail.retryDelay);
|
|
@@ -2265,13 +2337,16 @@ function parseRetryDelay$3(errorText) {
|
|
|
2265
2337
|
}
|
|
2266
2338
|
}
|
|
2267
2339
|
}
|
|
2340
|
+
const message = errorData.error?.message ?? "";
|
|
2341
|
+
const resetMatch = /quota will reset after (\d+(?:\.\d+)?)s/i.exec(message);
|
|
2342
|
+
if (resetMatch) return Math.ceil(Number.parseFloat(resetMatch[1]) * 1e3);
|
|
2268
2343
|
} catch {}
|
|
2269
2344
|
return 500;
|
|
2270
2345
|
}
|
|
2271
2346
|
/**
|
|
2272
2347
|
* Handle API error response
|
|
2273
2348
|
*/
|
|
2274
|
-
async function handleApiError(response) {
|
|
2349
|
+
async function handleApiError(response, _model) {
|
|
2275
2350
|
const errorText = await response.text();
|
|
2276
2351
|
consola.error(`Antigravity error: ${response.status} ${errorText}`);
|
|
2277
2352
|
if (response.status === 403) await disableCurrentAccount();
|
|
@@ -2492,32 +2567,6 @@ const awaitApproval = async () => {
|
|
|
2492
2567
|
if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
|
|
2493
2568
|
};
|
|
2494
2569
|
|
|
2495
|
-
//#endregion
|
|
2496
|
-
//#region src/lib/rate-limit.ts
|
|
2497
|
-
async function checkRateLimit(state$1) {
|
|
2498
|
-
if (state$1.rateLimitSeconds === void 0) return;
|
|
2499
|
-
const now = Date.now();
|
|
2500
|
-
if (!state$1.lastRequestTimestamp) {
|
|
2501
|
-
state$1.lastRequestTimestamp = now;
|
|
2502
|
-
return;
|
|
2503
|
-
}
|
|
2504
|
-
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2505
|
-
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2506
|
-
state$1.lastRequestTimestamp = now;
|
|
2507
|
-
return;
|
|
2508
|
-
}
|
|
2509
|
-
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2510
|
-
if (!state$1.rateLimitWait) {
|
|
2511
|
-
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2512
|
-
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2513
|
-
}
|
|
2514
|
-
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2515
|
-
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2516
|
-
await sleep(waitTimeMs);
|
|
2517
|
-
state$1.lastRequestTimestamp = now;
|
|
2518
|
-
consola.info("Rate limit wait completed, proceeding with request");
|
|
2519
|
-
}
|
|
2520
|
-
|
|
2521
2570
|
//#endregion
|
|
2522
2571
|
//#region src/lib/tokenizer.ts
|
|
2523
2572
|
const ENCODING_MAP = {
|
|
@@ -2715,6 +2764,163 @@ const getTokenCount = async (payload, model) => {
|
|
|
2715
2764
|
};
|
|
2716
2765
|
};
|
|
2717
2766
|
|
|
2767
|
+
//#endregion
|
|
2768
|
+
//#region src/lib/context-compression.ts
|
|
2769
|
+
/**
|
|
2770
|
+
* Get the maximum prompt token limit for a model.
|
|
2771
|
+
* Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
|
|
2772
|
+
*/
|
|
2773
|
+
const getMaxPromptTokens = (model) => {
|
|
2774
|
+
const limits = model.capabilities.limits;
|
|
2775
|
+
if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
|
|
2776
|
+
if (limits.max_context_window_tokens) {
|
|
2777
|
+
const outputReserve = limits.max_output_tokens ?? 4096;
|
|
2778
|
+
return limits.max_context_window_tokens - outputReserve;
|
|
2779
|
+
}
|
|
2780
|
+
};
|
|
2781
|
+
/**
|
|
2782
|
+
* Check if a message is a tool-related message (tool call or tool result).
|
|
2783
|
+
* Tool messages must be kept together with their paired assistant message.
|
|
2784
|
+
*/
|
|
2785
|
+
const isToolMessage = (message) => {
|
|
2786
|
+
return message.role === "tool";
|
|
2787
|
+
};
|
|
2788
|
+
/**
|
|
2789
|
+
* Check if an assistant message contains tool calls.
|
|
2790
|
+
*/
|
|
2791
|
+
const hasToolCalls = (message) => {
|
|
2792
|
+
return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
|
2793
|
+
};
|
|
2794
|
+
const groupMessages = (messages) => {
|
|
2795
|
+
const groups = [];
|
|
2796
|
+
let i = 0;
|
|
2797
|
+
while (i < messages.length) {
|
|
2798
|
+
const message = messages[i];
|
|
2799
|
+
if (message.role === "system" || message.role === "developer") {
|
|
2800
|
+
groups.push({
|
|
2801
|
+
messages: [message],
|
|
2802
|
+
isSystem: true,
|
|
2803
|
+
isRecent: false
|
|
2804
|
+
});
|
|
2805
|
+
i++;
|
|
2806
|
+
continue;
|
|
2807
|
+
}
|
|
2808
|
+
if (hasToolCalls(message)) {
|
|
2809
|
+
const group = [message];
|
|
2810
|
+
let j = i + 1;
|
|
2811
|
+
while (j < messages.length && isToolMessage(messages[j])) {
|
|
2812
|
+
group.push(messages[j]);
|
|
2813
|
+
j++;
|
|
2814
|
+
}
|
|
2815
|
+
groups.push({
|
|
2816
|
+
messages: group,
|
|
2817
|
+
isSystem: false,
|
|
2818
|
+
isRecent: false
|
|
2819
|
+
});
|
|
2820
|
+
i = j;
|
|
2821
|
+
continue;
|
|
2822
|
+
}
|
|
2823
|
+
groups.push({
|
|
2824
|
+
messages: [message],
|
|
2825
|
+
isSystem: false,
|
|
2826
|
+
isRecent: false
|
|
2827
|
+
});
|
|
2828
|
+
i++;
|
|
2829
|
+
}
|
|
2830
|
+
return groups;
|
|
2831
|
+
};
|
|
2832
|
+
/**
|
|
2833
|
+
* Create a truncation notice message to inform the model that earlier context was removed.
|
|
2834
|
+
*/
|
|
2835
|
+
const createTruncationNotice = () => ({
|
|
2836
|
+
role: "user",
|
|
2837
|
+
content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
|
|
2838
|
+
});
|
|
2839
|
+
/**
|
|
2840
|
+
* Intelligently truncate messages to fit within the model's token limit.
|
|
2841
|
+
*
|
|
2842
|
+
* Strategy:
|
|
2843
|
+
* 1. Always preserve system/developer messages (they contain critical instructions)
|
|
2844
|
+
* 2. Always preserve the most recent messages (they contain the current task context)
|
|
2845
|
+
* 3. Remove middle conversation messages, oldest first
|
|
2846
|
+
* 4. Insert a truncation notice where messages were removed
|
|
2847
|
+
* 5. Keep tool call/result pairs together (never split them)
|
|
2848
|
+
*
|
|
2849
|
+
* Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
|
|
2850
|
+
*/
|
|
2851
|
+
const truncateMessages = async (payload, model) => {
|
|
2852
|
+
const maxPromptTokens = getMaxPromptTokens(model);
|
|
2853
|
+
if (!maxPromptTokens) {
|
|
2854
|
+
consola.debug("No token limit found for model, skipping truncation");
|
|
2855
|
+
return payload;
|
|
2856
|
+
}
|
|
2857
|
+
const tokenCount = await getTokenCount(payload, model);
|
|
2858
|
+
const safeLimit = Math.floor(maxPromptTokens * .95);
|
|
2859
|
+
if (tokenCount.input <= safeLimit) return payload;
|
|
2860
|
+
consola.warn(`Prompt tokens (${tokenCount.input}) exceed safe limit (${safeLimit}/${maxPromptTokens}). Auto-truncating context...`);
|
|
2861
|
+
const groups = groupMessages(payload.messages);
|
|
2862
|
+
const systemGroups = groups.filter((g) => g.isSystem);
|
|
2863
|
+
const conversationGroups = groups.filter((g) => !g.isSystem);
|
|
2864
|
+
if (conversationGroups.length === 0) {
|
|
2865
|
+
consola.warn("No conversation messages to truncate, only system messages");
|
|
2866
|
+
return payload;
|
|
2867
|
+
}
|
|
2868
|
+
let truncatedPayload = payload;
|
|
2869
|
+
let dropCount = 0;
|
|
2870
|
+
const maxDrop = Math.max(0, conversationGroups.length - 1);
|
|
2871
|
+
while (dropCount <= maxDrop) {
|
|
2872
|
+
const keptConversationGroups = conversationGroups.slice(dropCount);
|
|
2873
|
+
const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
|
|
2874
|
+
const newMessages = [
|
|
2875
|
+
...systemGroups.flatMap((g) => g.messages),
|
|
2876
|
+
...truncationNotice,
|
|
2877
|
+
...keptConversationGroups.flatMap((g) => g.messages)
|
|
2878
|
+
];
|
|
2879
|
+
truncatedPayload = {
|
|
2880
|
+
...payload,
|
|
2881
|
+
messages: newMessages
|
|
2882
|
+
};
|
|
2883
|
+
const newTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2884
|
+
if (newTokenCount.input <= safeLimit) {
|
|
2885
|
+
if (dropCount > 0) {
|
|
2886
|
+
const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
|
|
2887
|
+
consola.info(`Truncated ${droppedMessages} messages (${dropCount} conversation groups). Tokens: ${tokenCount.input} -> ${newTokenCount.input} (limit: ${maxPromptTokens})`);
|
|
2888
|
+
}
|
|
2889
|
+
return truncatedPayload;
|
|
2890
|
+
}
|
|
2891
|
+
dropCount++;
|
|
2892
|
+
}
|
|
2893
|
+
const finalTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2894
|
+
consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
|
|
2895
|
+
return truncatedPayload;
|
|
2896
|
+
};
|
|
2897
|
+
|
|
2898
|
+
//#endregion
|
|
2899
|
+
//#region src/lib/rate-limit.ts
|
|
2900
|
+
async function checkRateLimit(state$1) {
|
|
2901
|
+
if (state$1.rateLimitSeconds === void 0) return;
|
|
2902
|
+
const now = Date.now();
|
|
2903
|
+
if (!state$1.lastRequestTimestamp) {
|
|
2904
|
+
state$1.lastRequestTimestamp = now;
|
|
2905
|
+
return;
|
|
2906
|
+
}
|
|
2907
|
+
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2908
|
+
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2909
|
+
state$1.lastRequestTimestamp = now;
|
|
2910
|
+
return;
|
|
2911
|
+
}
|
|
2912
|
+
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2913
|
+
if (!state$1.rateLimitWait) {
|
|
2914
|
+
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2915
|
+
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2916
|
+
}
|
|
2917
|
+
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2918
|
+
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2919
|
+
await sleep(waitTimeMs);
|
|
2920
|
+
state$1.lastRequestTimestamp = now;
|
|
2921
|
+
consola.info("Rate limit wait completed, proceeding with request");
|
|
2922
|
+
}
|
|
2923
|
+
|
|
2718
2924
|
//#endregion
|
|
2719
2925
|
//#region src/services/copilot/create-chat-completions.ts
|
|
2720
2926
|
const createChatCompletions = async (payload) => {
|
|
@@ -2749,27 +2955,39 @@ const createChatCompletions = async (payload) => {
|
|
|
2749
2955
|
|
|
2750
2956
|
//#endregion
|
|
2751
2957
|
//#region src/routes/chat-completions/handler.ts
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2958
|
+
/**
|
|
2959
|
+
* Calculate token count, log it, and auto-truncate if needed.
|
|
2960
|
+
*/
|
|
2961
|
+
async function processPayloadTokens(payload) {
|
|
2756
2962
|
const selectedModel = state.models?.data.find((model) => model.id === payload.model);
|
|
2963
|
+
if (!selectedModel) {
|
|
2964
|
+
consola.warn("No model selected, skipping token count calculation");
|
|
2965
|
+
return payload;
|
|
2966
|
+
}
|
|
2757
2967
|
try {
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2968
|
+
const tokenCount = await getTokenCount(payload, selectedModel);
|
|
2969
|
+
consola.info("Current token count:", tokenCount);
|
|
2970
|
+
const truncated = await truncateMessages(payload, selectedModel);
|
|
2971
|
+
if (isNullish(truncated.max_tokens)) {
|
|
2972
|
+
const withMaxTokens = {
|
|
2973
|
+
...truncated,
|
|
2974
|
+
max_tokens: selectedModel.capabilities.limits.max_output_tokens
|
|
2975
|
+
};
|
|
2976
|
+
consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
|
|
2977
|
+
return withMaxTokens;
|
|
2978
|
+
}
|
|
2979
|
+
return truncated;
|
|
2762
2980
|
} catch (error) {
|
|
2763
2981
|
consola.warn("Failed to calculate token count:", error);
|
|
2982
|
+
return payload;
|
|
2764
2983
|
}
|
|
2984
|
+
}
|
|
2985
|
+
async function handleCompletion$1(c) {
|
|
2986
|
+
await checkRateLimit(state);
|
|
2987
|
+
const rawPayload = await c.req.json();
|
|
2988
|
+
consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
|
|
2989
|
+
const payload = await processPayloadTokens(rawPayload);
|
|
2765
2990
|
if (state.manualApprove) await awaitApproval();
|
|
2766
|
-
if (isNullish(payload.max_tokens)) {
|
|
2767
|
-
payload = {
|
|
2768
|
-
...payload,
|
|
2769
|
-
max_tokens: selectedModel?.capabilities.limits.max_output_tokens
|
|
2770
|
-
};
|
|
2771
|
-
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
|
|
2772
|
-
}
|
|
2773
2991
|
const response = await createChatCompletions(payload);
|
|
2774
2992
|
if (isNonStreaming$1(response)) {
|
|
2775
2993
|
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
@@ -3200,10 +3418,27 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
|
|
|
3200
3418
|
|
|
3201
3419
|
//#endregion
|
|
3202
3420
|
//#region src/routes/messages/handler.ts
|
|
3421
|
+
/**
|
|
3422
|
+
* Auto-truncate OpenAI payload if prompt tokens exceed model limit.
|
|
3423
|
+
*/
|
|
3424
|
+
async function autoTruncatePayload(payload) {
|
|
3425
|
+
const selectedModel = state.models?.data.find((model) => model.id === payload.model);
|
|
3426
|
+
if (!selectedModel) {
|
|
3427
|
+
consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
|
|
3428
|
+
return payload;
|
|
3429
|
+
}
|
|
3430
|
+
try {
|
|
3431
|
+
return await truncateMessages(payload, selectedModel);
|
|
3432
|
+
} catch (error) {
|
|
3433
|
+
consola.warn("Failed to auto-truncate context:", error);
|
|
3434
|
+
return payload;
|
|
3435
|
+
}
|
|
3436
|
+
}
|
|
3203
3437
|
async function handleCompletion(c) {
|
|
3204
3438
|
await checkRateLimit(state);
|
|
3205
3439
|
const anthropicPayload = await c.req.json();
|
|
3206
|
-
const
|
|
3440
|
+
const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
|
|
3441
|
+
const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
|
|
3207
3442
|
if (state.manualApprove) await awaitApproval();
|
|
3208
3443
|
const response = await createChatCompletions(openAIPayload);
|
|
3209
3444
|
if (isNonStreaming(response)) {
|
|
@@ -3760,7 +3995,7 @@ async function runServer(options$1) {
|
|
|
3760
3995
|
} else if (options$1.antigravity) {
|
|
3761
3996
|
consola.info("Google Antigravity mode enabled");
|
|
3762
3997
|
state.antigravityMode = true;
|
|
3763
|
-
const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-
|
|
3998
|
+
const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-BgquW2Yd.js");
|
|
3764
3999
|
if (options$1.antigravityClientId && options$1.antigravityClientSecret) {
|
|
3765
4000
|
setOAuthCredentials(options$1.antigravityClientId, options$1.antigravityClientSecret);
|
|
3766
4001
|
consola.info("Using provided OAuth credentials from CLI");
|
|
@@ -3789,7 +4024,7 @@ async function runServer(options$1) {
|
|
|
3789
4024
|
}
|
|
3790
4025
|
if (!await getCurrentAccount() && !hasApiKey()) throw new Error("No enabled Antigravity accounts available");
|
|
3791
4026
|
}
|
|
3792
|
-
const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-
|
|
4027
|
+
const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-D1vQG5Eb.js");
|
|
3793
4028
|
const models = await getAntigravityModels$1();
|
|
3794
4029
|
state.antigravityModels = models;
|
|
3795
4030
|
consola.info(`Available Antigravity models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`);
|