copilot-api-plus 1.0.36 → 1.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth-BgquW2Yd.js +4 -0
- package/dist/{auth-CM_ilreU.js → auth-Dz36Lk4o.js} +20 -7
- package/dist/auth-Dz36Lk4o.js.map +1 -0
- package/dist/get-models-D1vQG5Eb.js +5 -0
- package/dist/{get-models-DMdiCNoU.js → get-models-VsThqHZf.js} +2 -2
- package/dist/{get-models-DMdiCNoU.js.map → get-models-VsThqHZf.js.map} +1 -1
- package/dist/main.js +353 -73
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
- package/dist/auth-B2lTFLSD.js +0 -4
- package/dist/auth-CM_ilreU.js.map +0 -1
- package/dist/get-models-CmDpYUV-.js +0 -5
package/dist/main.js
CHANGED
|
@@ -4,9 +4,9 @@ import { state } from "./state-CcLGr8VN.js";
|
|
|
4
4
|
import { GITHUB_API_BASE_URL, copilotBaseUrl, copilotHeaders, githubHeaders } from "./get-user-BzIEATcF.js";
|
|
5
5
|
import { HTTPError, forwardError } from "./error-CvU5otz-.js";
|
|
6
6
|
import { cacheModels, cacheVSCodeVersion, clearGithubToken, isNullish, setupCopilotToken, setupGitHubToken, sleep } from "./token-ClgudjZm.js";
|
|
7
|
-
import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getValidAccessToken, rotateAccount } from "./auth-
|
|
7
|
+
import { clearAntigravityAuth, disableCurrentAccount, getAntigravityAuthPath, getApiKey, getCurrentProjectId, getValidAccessToken, rotateAccount } from "./auth-Dz36Lk4o.js";
|
|
8
8
|
import { clearZenAuth, getZenAuthPath } from "./auth-T55-Bhoo.js";
|
|
9
|
-
import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-
|
|
9
|
+
import { getAntigravityModels, getAntigravityUsage, isThinkingModel } from "./get-models-VsThqHZf.js";
|
|
10
10
|
import { createRequire } from "node:module";
|
|
11
11
|
import { defineCommand, runMain } from "citty";
|
|
12
12
|
import consola from "consola";
|
|
@@ -540,7 +540,7 @@ function initProxyFromEnv() {
|
|
|
540
540
|
* Add a new Antigravity account via OAuth
|
|
541
541
|
*/
|
|
542
542
|
async function addAccount() {
|
|
543
|
-
const { setupAntigravity, loadAntigravityAuth } = await import("./auth-
|
|
543
|
+
const { setupAntigravity, loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
544
544
|
const existingAuth = await loadAntigravityAuth();
|
|
545
545
|
if (existingAuth && existingAuth.accounts.length > 0) {
|
|
546
546
|
const enabledCount = existingAuth.accounts.filter((a) => a.enable).length;
|
|
@@ -552,7 +552,7 @@ async function addAccount() {
|
|
|
552
552
|
* List all Antigravity accounts
|
|
553
553
|
*/
|
|
554
554
|
async function listAccounts() {
|
|
555
|
-
const { loadAntigravityAuth } = await import("./auth-
|
|
555
|
+
const { loadAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
556
556
|
const auth$1 = await loadAntigravityAuth();
|
|
557
557
|
if (!auth$1 || auth$1.accounts.length === 0) {
|
|
558
558
|
consola.info("No Antigravity accounts configured");
|
|
@@ -573,7 +573,7 @@ async function listAccounts() {
|
|
|
573
573
|
* Remove an Antigravity account by index
|
|
574
574
|
*/
|
|
575
575
|
async function removeAccount(index) {
|
|
576
|
-
const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-
|
|
576
|
+
const { loadAntigravityAuth, saveAntigravityAuth } = await import("./auth-BgquW2Yd.js");
|
|
577
577
|
const auth$1 = await loadAntigravityAuth();
|
|
578
578
|
if (!auth$1 || auth$1.accounts.length === 0) {
|
|
579
579
|
consola.error("No Antigravity accounts configured");
|
|
@@ -592,7 +592,7 @@ async function removeAccount(index) {
|
|
|
592
592
|
* Clear all Antigravity accounts
|
|
593
593
|
*/
|
|
594
594
|
async function clearAccounts() {
|
|
595
|
-
const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-
|
|
595
|
+
const { clearAntigravityAuth: clearAntigravityAuth$1 } = await import("./auth-BgquW2Yd.js");
|
|
596
596
|
if (await consola.prompt("Are you sure you want to remove all Antigravity accounts?", {
|
|
597
597
|
type: "confirm",
|
|
598
598
|
initial: false
|
|
@@ -1403,9 +1403,9 @@ function processChunk(chunk, state$1) {
|
|
|
1403
1403
|
|
|
1404
1404
|
//#endregion
|
|
1405
1405
|
//#region src/services/antigravity/create-chat-completions.ts
|
|
1406
|
-
const ANTIGRAVITY_API_HOST
|
|
1407
|
-
const ANTIGRAVITY_STREAM_URL
|
|
1408
|
-
const ANTIGRAVITY_NO_STREAM_URL
|
|
1406
|
+
const ANTIGRAVITY_API_HOST = "daily-cloudcode-pa.sandbox.googleapis.com";
|
|
1407
|
+
const ANTIGRAVITY_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:streamGenerateContent?alt=sse`;
|
|
1408
|
+
const ANTIGRAVITY_NO_STREAM_URL = `https://${ANTIGRAVITY_API_HOST}/v1internal:generateContent`;
|
|
1409
1409
|
const ANTIGRAVITY_USER_AGENT$1 = "antigravity/1.11.3 windows/amd64";
|
|
1410
1410
|
const GEMINI_API_HOST = "generativelanguage.googleapis.com";
|
|
1411
1411
|
const getGeminiStreamUrl = (model, apiKey) => `https://${GEMINI_API_HOST}/v1beta/models/${model}:streamGenerateContent?alt=sse&key=${apiKey}`;
|
|
@@ -1625,14 +1625,14 @@ async function createWithApiKey(request, apiKey) {
|
|
|
1625
1625
|
* Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
|
|
1626
1626
|
*/
|
|
1627
1627
|
async function createWithOAuth(request, accessToken) {
|
|
1628
|
-
const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL
|
|
1628
|
+
const endpoint = request.stream ? ANTIGRAVITY_STREAM_URL : ANTIGRAVITY_NO_STREAM_URL;
|
|
1629
1629
|
const body = buildAntigravityRequestBody(request);
|
|
1630
1630
|
consola.debug(`Antigravity request to ${endpoint} with model ${request.model}`);
|
|
1631
1631
|
try {
|
|
1632
1632
|
const response = await fetch(endpoint, {
|
|
1633
1633
|
method: "POST",
|
|
1634
1634
|
headers: {
|
|
1635
|
-
Host: ANTIGRAVITY_API_HOST
|
|
1635
|
+
Host: ANTIGRAVITY_API_HOST,
|
|
1636
1636
|
"User-Agent": ANTIGRAVITY_USER_AGENT$1,
|
|
1637
1637
|
Authorization: `Bearer ${accessToken}`,
|
|
1638
1638
|
"Content-Type": "application/json",
|
|
@@ -1831,6 +1831,49 @@ app$1.post("/", async (c) => {
|
|
|
1831
1831
|
});
|
|
1832
1832
|
const antigravityChatCompletionsRoute = app$1;
|
|
1833
1833
|
|
|
1834
|
+
//#endregion
|
|
1835
|
+
//#region src/lib/request-queue.ts
|
|
1836
|
+
var RequestQueue = class {
|
|
1837
|
+
queue = [];
|
|
1838
|
+
activeCount = 0;
|
|
1839
|
+
maxConcurrent;
|
|
1840
|
+
minDelayMs;
|
|
1841
|
+
lastRequestTime = 0;
|
|
1842
|
+
constructor(maxConcurrent = 2, minDelayMs = 300) {
|
|
1843
|
+
this.maxConcurrent = maxConcurrent;
|
|
1844
|
+
this.minDelayMs = minDelayMs;
|
|
1845
|
+
}
|
|
1846
|
+
async enqueue(execute) {
|
|
1847
|
+
return new Promise((resolve, reject) => {
|
|
1848
|
+
this.queue.push({
|
|
1849
|
+
execute,
|
|
1850
|
+
resolve,
|
|
1851
|
+
reject
|
|
1852
|
+
});
|
|
1853
|
+
this.processQueue();
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
async processQueue() {
|
|
1857
|
+
if (this.activeCount >= this.maxConcurrent || this.queue.length === 0) return;
|
|
1858
|
+
const request = this.queue.shift();
|
|
1859
|
+
if (!request) return;
|
|
1860
|
+
this.activeCount++;
|
|
1861
|
+
const elapsed = Date.now() - this.lastRequestTime;
|
|
1862
|
+
if (elapsed < this.minDelayMs) await new Promise((r) => setTimeout(r, this.minDelayMs - elapsed));
|
|
1863
|
+
this.lastRequestTime = Date.now();
|
|
1864
|
+
try {
|
|
1865
|
+
const result = await request.execute();
|
|
1866
|
+
request.resolve(result);
|
|
1867
|
+
} catch (error) {
|
|
1868
|
+
request.reject(error);
|
|
1869
|
+
} finally {
|
|
1870
|
+
this.activeCount--;
|
|
1871
|
+
this.processQueue();
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
};
|
|
1875
|
+
const antigravityQueue = new RequestQueue(2, 500);
|
|
1876
|
+
|
|
1834
1877
|
//#endregion
|
|
1835
1878
|
//#region src/services/antigravity/anthropic-events.ts
|
|
1836
1879
|
/**
|
|
@@ -1993,10 +2036,49 @@ function generateToolId() {
|
|
|
1993
2036
|
|
|
1994
2037
|
//#endregion
|
|
1995
2038
|
//#region src/services/antigravity/create-messages.ts
|
|
1996
|
-
const
|
|
1997
|
-
|
|
1998
|
-
|
|
2039
|
+
const ANTIGRAVITY_ENDPOINTS = ["daily-cloudcode-pa.sandbox.googleapis.com", "cloudcode-pa.googleapis.com"];
|
|
2040
|
+
let currentEndpointIndex = 0;
|
|
2041
|
+
function getStreamUrl(host) {
|
|
2042
|
+
return `https://${host}/v1internal:streamGenerateContent?alt=sse`;
|
|
2043
|
+
}
|
|
2044
|
+
function getNoStreamUrl(host) {
|
|
2045
|
+
return `https://${host}/v1internal:generateContent`;
|
|
2046
|
+
}
|
|
2047
|
+
function getCurrentHost() {
|
|
2048
|
+
return ANTIGRAVITY_ENDPOINTS[currentEndpointIndex];
|
|
2049
|
+
}
|
|
2050
|
+
function rotateEndpoint() {
|
|
2051
|
+
const oldIndex = currentEndpointIndex;
|
|
2052
|
+
currentEndpointIndex = (currentEndpointIndex + 1) % ANTIGRAVITY_ENDPOINTS.length;
|
|
2053
|
+
consola.info(`Rotating endpoint: ${ANTIGRAVITY_ENDPOINTS[oldIndex]} → ${ANTIGRAVITY_ENDPOINTS[currentEndpointIndex]}`);
|
|
2054
|
+
}
|
|
1999
2055
|
const ANTIGRAVITY_USER_AGENT = "antigravity/1.11.3 windows/amd64";
|
|
2056
|
+
const rateLimitTracker = {};
|
|
2057
|
+
function getModelFamily(model) {
|
|
2058
|
+
if (model.includes("claude")) return "claude";
|
|
2059
|
+
if (model.includes("gemini")) return "gemini";
|
|
2060
|
+
return "other";
|
|
2061
|
+
}
|
|
2062
|
+
function trackRateLimit(model) {
|
|
2063
|
+
const family = getModelFamily(model);
|
|
2064
|
+
if (!rateLimitTracker[family]) rateLimitTracker[family] = {
|
|
2065
|
+
lastLimitTime: 0,
|
|
2066
|
+
consecutiveErrors: 0
|
|
2067
|
+
};
|
|
2068
|
+
rateLimitTracker[family].lastLimitTime = Date.now();
|
|
2069
|
+
rateLimitTracker[family].consecutiveErrors++;
|
|
2070
|
+
}
|
|
2071
|
+
function clearRateLimitTracker(model) {
|
|
2072
|
+
const family = getModelFamily(model);
|
|
2073
|
+
if (rateLimitTracker[family]) rateLimitTracker[family].consecutiveErrors = 0;
|
|
2074
|
+
}
|
|
2075
|
+
function getBackoffDelay(model, baseDelay) {
|
|
2076
|
+
const family = getModelFamily(model);
|
|
2077
|
+
const info = rateLimitTracker[family];
|
|
2078
|
+
if (!info) return baseDelay;
|
|
2079
|
+
const multiplier = Math.min(Math.pow(2, info.consecutiveErrors - 1), 60);
|
|
2080
|
+
return Math.min(baseDelay * multiplier, 3e4);
|
|
2081
|
+
}
|
|
2000
2082
|
/**
|
|
2001
2083
|
* Extract text from system content (can be string or array)
|
|
2002
2084
|
*/
|
|
@@ -2118,7 +2200,7 @@ function convertTools(tools) {
|
|
|
2118
2200
|
* Build Antigravity request body
|
|
2119
2201
|
* The Antigravity API expects a specific nested structure with request object
|
|
2120
2202
|
*/
|
|
2121
|
-
function buildGeminiRequest(request) {
|
|
2203
|
+
function buildGeminiRequest(request, projectId) {
|
|
2122
2204
|
const { contents, systemInstruction } = convertMessages(request.messages, request.system);
|
|
2123
2205
|
const tools = convertTools(request.tools);
|
|
2124
2206
|
const innerRequest = {
|
|
@@ -2136,12 +2218,14 @@ function buildGeminiRequest(request) {
|
|
|
2136
2218
|
...innerRequest.generationConfig,
|
|
2137
2219
|
thinkingConfig: { includeThoughts: true }
|
|
2138
2220
|
};
|
|
2139
|
-
|
|
2221
|
+
const result = {
|
|
2140
2222
|
model: request.model,
|
|
2141
2223
|
userAgent: "antigravity",
|
|
2142
2224
|
requestId: `agent-${crypto.randomUUID()}`,
|
|
2143
2225
|
request: innerRequest
|
|
2144
2226
|
};
|
|
2227
|
+
if (projectId) result.project = projectId;
|
|
2228
|
+
return result;
|
|
2145
2229
|
}
|
|
2146
2230
|
/**
|
|
2147
2231
|
* Create error response
|
|
@@ -2161,20 +2245,29 @@ function createErrorResponse(type, message, status) {
|
|
|
2161
2245
|
/**
|
|
2162
2246
|
* Create Anthropic-compatible message response using Antigravity
|
|
2163
2247
|
* Note: Both Gemini and Claude models use the same endpoint and Gemini-style format
|
|
2248
|
+
*
|
|
2249
|
+
* Features:
|
|
2250
|
+
* - Endpoint fallback (daily → prod)
|
|
2251
|
+
* - Per-model-family rate limit tracking
|
|
2252
|
+
* - Exponential backoff for consecutive errors
|
|
2253
|
+
* - Smart retry for short delays (≤5s on same endpoint)
|
|
2164
2254
|
*/
|
|
2165
2255
|
const MAX_RETRIES$3 = 5;
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
const
|
|
2256
|
+
const MAX_ENDPOINT_RETRIES = 2;
|
|
2257
|
+
async function executeAntigravityRequest(request) {
|
|
2258
|
+
const projectId = await getCurrentProjectId();
|
|
2259
|
+
const body = buildGeminiRequest(request, projectId);
|
|
2260
|
+
let endpointRetries = 0;
|
|
2169
2261
|
for (let attempt = 0; attempt <= MAX_RETRIES$3; attempt++) {
|
|
2262
|
+
const host = getCurrentHost();
|
|
2263
|
+
const endpoint = request.stream ? getStreamUrl(host) : getNoStreamUrl(host);
|
|
2170
2264
|
const accessToken = await getValidAccessToken();
|
|
2171
|
-
if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.
|
|
2172
|
-
consola.debug(`Antigravity request to ${endpoint} (attempt ${attempt + 1}/${MAX_RETRIES$3 + 1})`);
|
|
2265
|
+
if (!accessToken) return createErrorResponse("authentication_error", "No valid Antigravity access token available.", 401);
|
|
2173
2266
|
try {
|
|
2174
2267
|
const response = await fetch(endpoint, {
|
|
2175
2268
|
method: "POST",
|
|
2176
2269
|
headers: {
|
|
2177
|
-
Host:
|
|
2270
|
+
Host: host,
|
|
2178
2271
|
"User-Agent": ANTIGRAVITY_USER_AGENT,
|
|
2179
2272
|
Authorization: `Bearer ${accessToken}`,
|
|
2180
2273
|
"Content-Type": "application/json",
|
|
@@ -2182,17 +2275,33 @@ async function createAntigravityMessages(request) {
|
|
|
2182
2275
|
},
|
|
2183
2276
|
body: JSON.stringify(body)
|
|
2184
2277
|
});
|
|
2185
|
-
if (response.ok)
|
|
2186
|
-
|
|
2278
|
+
if (response.ok) {
|
|
2279
|
+
clearRateLimitTracker(request.model);
|
|
2280
|
+
return request.stream ? transformStreamResponse(response, request.model) : await transformNonStreamResponse(response, request.model);
|
|
2281
|
+
}
|
|
2282
|
+
const errorResult = await handleApiError(response, request.model);
|
|
2187
2283
|
if (errorResult.shouldRetry && attempt < MAX_RETRIES$3) {
|
|
2188
|
-
|
|
2189
|
-
|
|
2284
|
+
trackRateLimit(request.model);
|
|
2285
|
+
const backoffDelay = getBackoffDelay(request.model, errorResult.retryDelayMs);
|
|
2286
|
+
if (backoffDelay <= 5e3 || endpointRetries >= MAX_ENDPOINT_RETRIES) {
|
|
2287
|
+
consola.info(`Rate limited, retrying in ${backoffDelay}ms (attempt ${attempt + 1}/${MAX_RETRIES$3})`);
|
|
2288
|
+
await sleep(backoffDelay);
|
|
2289
|
+
} else {
|
|
2290
|
+
rotateEndpoint();
|
|
2291
|
+
endpointRetries++;
|
|
2292
|
+
consola.info(`Switching endpoint, retrying in ${errorResult.retryDelayMs}ms`);
|
|
2293
|
+
await sleep(errorResult.retryDelayMs);
|
|
2294
|
+
}
|
|
2190
2295
|
continue;
|
|
2191
2296
|
}
|
|
2192
2297
|
return errorResult.response;
|
|
2193
2298
|
} catch (error) {
|
|
2194
|
-
consola.error("Antigravity
|
|
2299
|
+
consola.error("Antigravity request error:", error);
|
|
2195
2300
|
if (attempt < MAX_RETRIES$3) {
|
|
2301
|
+
if (endpointRetries < MAX_ENDPOINT_RETRIES) {
|
|
2302
|
+
rotateEndpoint();
|
|
2303
|
+
endpointRetries++;
|
|
2304
|
+
}
|
|
2196
2305
|
await sleep(500);
|
|
2197
2306
|
continue;
|
|
2198
2307
|
}
|
|
@@ -2201,12 +2310,20 @@ async function createAntigravityMessages(request) {
|
|
|
2201
2310
|
}
|
|
2202
2311
|
return createErrorResponse("api_error", "Max retries exceeded", 429);
|
|
2203
2312
|
}
|
|
2313
|
+
async function createAntigravityMessages(request) {
|
|
2314
|
+
return antigravityQueue.enqueue(() => executeAntigravityRequest(request));
|
|
2315
|
+
}
|
|
2204
2316
|
/**
|
|
2205
2317
|
* Parse retry delay from error response
|
|
2318
|
+
* Supports multiple formats:
|
|
2319
|
+
* - RetryInfo.retryDelay: "3.5s"
|
|
2320
|
+
* - quotaResetDelay: "3000ms" or "3s"
|
|
2321
|
+
* - message: "Your quota will reset after 3s"
|
|
2206
2322
|
*/
|
|
2207
2323
|
function parseRetryDelay$3(errorText) {
|
|
2208
2324
|
try {
|
|
2209
|
-
const
|
|
2325
|
+
const errorData = JSON.parse(errorText);
|
|
2326
|
+
const details = errorData.error?.details ?? [];
|
|
2210
2327
|
for (const detail of details) {
|
|
2211
2328
|
if (detail["@type"]?.includes("RetryInfo") && detail.retryDelay) {
|
|
2212
2329
|
const match = /(\d+(?:\.\d+)?)s/.exec(detail.retryDelay);
|
|
@@ -2220,13 +2337,16 @@ function parseRetryDelay$3(errorText) {
|
|
|
2220
2337
|
}
|
|
2221
2338
|
}
|
|
2222
2339
|
}
|
|
2340
|
+
const message = errorData.error?.message ?? "";
|
|
2341
|
+
const resetMatch = /quota will reset after (\d+(?:\.\d+)?)s/i.exec(message);
|
|
2342
|
+
if (resetMatch) return Math.ceil(Number.parseFloat(resetMatch[1]) * 1e3);
|
|
2223
2343
|
} catch {}
|
|
2224
2344
|
return 500;
|
|
2225
2345
|
}
|
|
2226
2346
|
/**
|
|
2227
2347
|
* Handle API error response
|
|
2228
2348
|
*/
|
|
2229
|
-
async function handleApiError(response) {
|
|
2349
|
+
async function handleApiError(response, _model) {
|
|
2230
2350
|
const errorText = await response.text();
|
|
2231
2351
|
consola.error(`Antigravity error: ${response.status} ${errorText}`);
|
|
2232
2352
|
if (response.status === 403) await disableCurrentAccount();
|
|
@@ -2447,32 +2567,6 @@ const awaitApproval = async () => {
|
|
|
2447
2567
|
if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
|
|
2448
2568
|
};
|
|
2449
2569
|
|
|
2450
|
-
//#endregion
|
|
2451
|
-
//#region src/lib/rate-limit.ts
|
|
2452
|
-
async function checkRateLimit(state$1) {
|
|
2453
|
-
if (state$1.rateLimitSeconds === void 0) return;
|
|
2454
|
-
const now = Date.now();
|
|
2455
|
-
if (!state$1.lastRequestTimestamp) {
|
|
2456
|
-
state$1.lastRequestTimestamp = now;
|
|
2457
|
-
return;
|
|
2458
|
-
}
|
|
2459
|
-
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2460
|
-
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2461
|
-
state$1.lastRequestTimestamp = now;
|
|
2462
|
-
return;
|
|
2463
|
-
}
|
|
2464
|
-
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2465
|
-
if (!state$1.rateLimitWait) {
|
|
2466
|
-
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2467
|
-
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2468
|
-
}
|
|
2469
|
-
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2470
|
-
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2471
|
-
await sleep(waitTimeMs);
|
|
2472
|
-
state$1.lastRequestTimestamp = now;
|
|
2473
|
-
consola.info("Rate limit wait completed, proceeding with request");
|
|
2474
|
-
}
|
|
2475
|
-
|
|
2476
2570
|
//#endregion
|
|
2477
2571
|
//#region src/lib/tokenizer.ts
|
|
2478
2572
|
const ENCODING_MAP = {
|
|
@@ -2670,6 +2764,163 @@ const getTokenCount = async (payload, model) => {
|
|
|
2670
2764
|
};
|
|
2671
2765
|
};
|
|
2672
2766
|
|
|
2767
|
+
//#endregion
|
|
2768
|
+
//#region src/lib/context-compression.ts
|
|
2769
|
+
/**
|
|
2770
|
+
* Get the maximum prompt token limit for a model.
|
|
2771
|
+
* Prefers max_prompt_tokens, falls back to max_context_window_tokens minus max_output_tokens.
|
|
2772
|
+
*/
|
|
2773
|
+
const getMaxPromptTokens = (model) => {
|
|
2774
|
+
const limits = model.capabilities.limits;
|
|
2775
|
+
if (limits.max_prompt_tokens) return limits.max_prompt_tokens;
|
|
2776
|
+
if (limits.max_context_window_tokens) {
|
|
2777
|
+
const outputReserve = limits.max_output_tokens ?? 4096;
|
|
2778
|
+
return limits.max_context_window_tokens - outputReserve;
|
|
2779
|
+
}
|
|
2780
|
+
};
|
|
2781
|
+
/**
|
|
2782
|
+
* Check if a message is a tool-related message (tool call or tool result).
|
|
2783
|
+
* Tool messages must be kept together with their paired assistant message.
|
|
2784
|
+
*/
|
|
2785
|
+
const isToolMessage = (message) => {
|
|
2786
|
+
return message.role === "tool";
|
|
2787
|
+
};
|
|
2788
|
+
/**
|
|
2789
|
+
* Check if an assistant message contains tool calls.
|
|
2790
|
+
*/
|
|
2791
|
+
const hasToolCalls = (message) => {
|
|
2792
|
+
return message.role === "assistant" && Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
|
2793
|
+
};
|
|
2794
|
+
const groupMessages = (messages) => {
|
|
2795
|
+
const groups = [];
|
|
2796
|
+
let i = 0;
|
|
2797
|
+
while (i < messages.length) {
|
|
2798
|
+
const message = messages[i];
|
|
2799
|
+
if (message.role === "system" || message.role === "developer") {
|
|
2800
|
+
groups.push({
|
|
2801
|
+
messages: [message],
|
|
2802
|
+
isSystem: true,
|
|
2803
|
+
isRecent: false
|
|
2804
|
+
});
|
|
2805
|
+
i++;
|
|
2806
|
+
continue;
|
|
2807
|
+
}
|
|
2808
|
+
if (hasToolCalls(message)) {
|
|
2809
|
+
const group = [message];
|
|
2810
|
+
let j = i + 1;
|
|
2811
|
+
while (j < messages.length && isToolMessage(messages[j])) {
|
|
2812
|
+
group.push(messages[j]);
|
|
2813
|
+
j++;
|
|
2814
|
+
}
|
|
2815
|
+
groups.push({
|
|
2816
|
+
messages: group,
|
|
2817
|
+
isSystem: false,
|
|
2818
|
+
isRecent: false
|
|
2819
|
+
});
|
|
2820
|
+
i = j;
|
|
2821
|
+
continue;
|
|
2822
|
+
}
|
|
2823
|
+
groups.push({
|
|
2824
|
+
messages: [message],
|
|
2825
|
+
isSystem: false,
|
|
2826
|
+
isRecent: false
|
|
2827
|
+
});
|
|
2828
|
+
i++;
|
|
2829
|
+
}
|
|
2830
|
+
return groups;
|
|
2831
|
+
};
|
|
2832
|
+
/**
|
|
2833
|
+
* Create a truncation notice message to inform the model that earlier context was removed.
|
|
2834
|
+
*/
|
|
2835
|
+
const createTruncationNotice = () => ({
|
|
2836
|
+
role: "user",
|
|
2837
|
+
content: "[Note: Earlier conversation history was automatically truncated to fit within the model's context window. The most recent messages have been preserved.]"
|
|
2838
|
+
});
|
|
2839
|
+
/**
|
|
2840
|
+
* Intelligently truncate messages to fit within the model's token limit.
|
|
2841
|
+
*
|
|
2842
|
+
* Strategy:
|
|
2843
|
+
* 1. Always preserve system/developer messages (they contain critical instructions)
|
|
2844
|
+
* 2. Always preserve the most recent messages (they contain the current task context)
|
|
2845
|
+
* 3. Remove middle conversation messages, oldest first
|
|
2846
|
+
* 4. Insert a truncation notice where messages were removed
|
|
2847
|
+
* 5. Keep tool call/result pairs together (never split them)
|
|
2848
|
+
*
|
|
2849
|
+
* Safety margin: keeps 5% below the limit to account for token counting inaccuracies.
|
|
2850
|
+
*/
|
|
2851
|
+
const truncateMessages = async (payload, model) => {
|
|
2852
|
+
const maxPromptTokens = getMaxPromptTokens(model);
|
|
2853
|
+
if (!maxPromptTokens) {
|
|
2854
|
+
consola.debug("No token limit found for model, skipping truncation");
|
|
2855
|
+
return payload;
|
|
2856
|
+
}
|
|
2857
|
+
const tokenCount = await getTokenCount(payload, model);
|
|
2858
|
+
const safeLimit = Math.floor(maxPromptTokens * .95);
|
|
2859
|
+
if (tokenCount.input <= safeLimit) return payload;
|
|
2860
|
+
consola.warn(`Prompt tokens (${tokenCount.input}) exceed safe limit (${safeLimit}/${maxPromptTokens}). Auto-truncating context...`);
|
|
2861
|
+
const groups = groupMessages(payload.messages);
|
|
2862
|
+
const systemGroups = groups.filter((g) => g.isSystem);
|
|
2863
|
+
const conversationGroups = groups.filter((g) => !g.isSystem);
|
|
2864
|
+
if (conversationGroups.length === 0) {
|
|
2865
|
+
consola.warn("No conversation messages to truncate, only system messages");
|
|
2866
|
+
return payload;
|
|
2867
|
+
}
|
|
2868
|
+
let truncatedPayload = payload;
|
|
2869
|
+
let dropCount = 0;
|
|
2870
|
+
const maxDrop = Math.max(0, conversationGroups.length - 1);
|
|
2871
|
+
while (dropCount <= maxDrop) {
|
|
2872
|
+
const keptConversationGroups = conversationGroups.slice(dropCount);
|
|
2873
|
+
const truncationNotice = dropCount > 0 ? [createTruncationNotice()] : [];
|
|
2874
|
+
const newMessages = [
|
|
2875
|
+
...systemGroups.flatMap((g) => g.messages),
|
|
2876
|
+
...truncationNotice,
|
|
2877
|
+
...keptConversationGroups.flatMap((g) => g.messages)
|
|
2878
|
+
];
|
|
2879
|
+
truncatedPayload = {
|
|
2880
|
+
...payload,
|
|
2881
|
+
messages: newMessages
|
|
2882
|
+
};
|
|
2883
|
+
const newTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2884
|
+
if (newTokenCount.input <= safeLimit) {
|
|
2885
|
+
if (dropCount > 0) {
|
|
2886
|
+
const droppedMessages = conversationGroups.slice(0, dropCount).reduce((sum, g) => sum + g.messages.length, 0);
|
|
2887
|
+
consola.info(`Truncated ${droppedMessages} messages (${dropCount} conversation groups). Tokens: ${tokenCount.input} -> ${newTokenCount.input} (limit: ${maxPromptTokens})`);
|
|
2888
|
+
}
|
|
2889
|
+
return truncatedPayload;
|
|
2890
|
+
}
|
|
2891
|
+
dropCount++;
|
|
2892
|
+
}
|
|
2893
|
+
const finalTokenCount = await getTokenCount(truncatedPayload, model);
|
|
2894
|
+
consola.warn(`Could not reduce tokens below limit even after maximum truncation. Current: ${finalTokenCount.input}, limit: ${maxPromptTokens}. System messages or the last message may be too large.`);
|
|
2895
|
+
return truncatedPayload;
|
|
2896
|
+
};
|
|
2897
|
+
|
|
2898
|
+
//#endregion
|
|
2899
|
+
//#region src/lib/rate-limit.ts
|
|
2900
|
+
async function checkRateLimit(state$1) {
|
|
2901
|
+
if (state$1.rateLimitSeconds === void 0) return;
|
|
2902
|
+
const now = Date.now();
|
|
2903
|
+
if (!state$1.lastRequestTimestamp) {
|
|
2904
|
+
state$1.lastRequestTimestamp = now;
|
|
2905
|
+
return;
|
|
2906
|
+
}
|
|
2907
|
+
const elapsedSeconds = (now - state$1.lastRequestTimestamp) / 1e3;
|
|
2908
|
+
if (elapsedSeconds > state$1.rateLimitSeconds) {
|
|
2909
|
+
state$1.lastRequestTimestamp = now;
|
|
2910
|
+
return;
|
|
2911
|
+
}
|
|
2912
|
+
const waitTimeSeconds = Math.ceil(state$1.rateLimitSeconds - elapsedSeconds);
|
|
2913
|
+
if (!state$1.rateLimitWait) {
|
|
2914
|
+
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
2915
|
+
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
2916
|
+
}
|
|
2917
|
+
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
2918
|
+
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
2919
|
+
await sleep(waitTimeMs);
|
|
2920
|
+
state$1.lastRequestTimestamp = now;
|
|
2921
|
+
consola.info("Rate limit wait completed, proceeding with request");
|
|
2922
|
+
}
|
|
2923
|
+
|
|
2673
2924
|
//#endregion
|
|
2674
2925
|
//#region src/services/copilot/create-chat-completions.ts
|
|
2675
2926
|
const createChatCompletions = async (payload) => {
|
|
@@ -2704,27 +2955,39 @@ const createChatCompletions = async (payload) => {
|
|
|
2704
2955
|
|
|
2705
2956
|
//#endregion
|
|
2706
2957
|
//#region src/routes/chat-completions/handler.ts
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2958
|
+
/**
|
|
2959
|
+
* Calculate token count, log it, and auto-truncate if needed.
|
|
2960
|
+
*/
|
|
2961
|
+
async function processPayloadTokens(payload) {
|
|
2711
2962
|
const selectedModel = state.models?.data.find((model) => model.id === payload.model);
|
|
2963
|
+
if (!selectedModel) {
|
|
2964
|
+
consola.warn("No model selected, skipping token count calculation");
|
|
2965
|
+
return payload;
|
|
2966
|
+
}
|
|
2712
2967
|
try {
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2968
|
+
const tokenCount = await getTokenCount(payload, selectedModel);
|
|
2969
|
+
consola.info("Current token count:", tokenCount);
|
|
2970
|
+
const truncated = await truncateMessages(payload, selectedModel);
|
|
2971
|
+
if (isNullish(truncated.max_tokens)) {
|
|
2972
|
+
const withMaxTokens = {
|
|
2973
|
+
...truncated,
|
|
2974
|
+
max_tokens: selectedModel.capabilities.limits.max_output_tokens
|
|
2975
|
+
};
|
|
2976
|
+
consola.debug("Set max_tokens to:", JSON.stringify(withMaxTokens.max_tokens));
|
|
2977
|
+
return withMaxTokens;
|
|
2978
|
+
}
|
|
2979
|
+
return truncated;
|
|
2717
2980
|
} catch (error) {
|
|
2718
2981
|
consola.warn("Failed to calculate token count:", error);
|
|
2982
|
+
return payload;
|
|
2719
2983
|
}
|
|
2984
|
+
}
|
|
2985
|
+
async function handleCompletion$1(c) {
|
|
2986
|
+
await checkRateLimit(state);
|
|
2987
|
+
const rawPayload = await c.req.json();
|
|
2988
|
+
consola.debug("Request payload:", JSON.stringify(rawPayload).slice(-400));
|
|
2989
|
+
const payload = await processPayloadTokens(rawPayload);
|
|
2720
2990
|
if (state.manualApprove) await awaitApproval();
|
|
2721
|
-
if (isNullish(payload.max_tokens)) {
|
|
2722
|
-
payload = {
|
|
2723
|
-
...payload,
|
|
2724
|
-
max_tokens: selectedModel?.capabilities.limits.max_output_tokens
|
|
2725
|
-
};
|
|
2726
|
-
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
|
|
2727
|
-
}
|
|
2728
2991
|
const response = await createChatCompletions(payload);
|
|
2729
2992
|
if (isNonStreaming$1(response)) {
|
|
2730
2993
|
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
@@ -3155,10 +3418,27 @@ function translateChunkToAnthropicEvents(chunk, state$1) {
|
|
|
3155
3418
|
|
|
3156
3419
|
//#endregion
|
|
3157
3420
|
//#region src/routes/messages/handler.ts
|
|
3421
|
+
/**
|
|
3422
|
+
* Auto-truncate OpenAI payload if prompt tokens exceed model limit.
|
|
3423
|
+
*/
|
|
3424
|
+
async function autoTruncatePayload(payload) {
|
|
3425
|
+
const selectedModel = state.models?.data.find((model) => model.id === payload.model);
|
|
3426
|
+
if (!selectedModel) {
|
|
3427
|
+
consola.warn("No model selected for Anthropic endpoint, skipping auto-truncation");
|
|
3428
|
+
return payload;
|
|
3429
|
+
}
|
|
3430
|
+
try {
|
|
3431
|
+
return await truncateMessages(payload, selectedModel);
|
|
3432
|
+
} catch (error) {
|
|
3433
|
+
consola.warn("Failed to auto-truncate context:", error);
|
|
3434
|
+
return payload;
|
|
3435
|
+
}
|
|
3436
|
+
}
|
|
3158
3437
|
async function handleCompletion(c) {
|
|
3159
3438
|
await checkRateLimit(state);
|
|
3160
3439
|
const anthropicPayload = await c.req.json();
|
|
3161
|
-
const
|
|
3440
|
+
const rawOpenAIPayload = translateToOpenAI(anthropicPayload);
|
|
3441
|
+
const openAIPayload = await autoTruncatePayload(rawOpenAIPayload);
|
|
3162
3442
|
if (state.manualApprove) await awaitApproval();
|
|
3163
3443
|
const response = await createChatCompletions(openAIPayload);
|
|
3164
3444
|
if (isNonStreaming(response)) {
|
|
@@ -3715,7 +3995,7 @@ async function runServer(options$1) {
|
|
|
3715
3995
|
} else if (options$1.antigravity) {
|
|
3716
3996
|
consola.info("Google Antigravity mode enabled");
|
|
3717
3997
|
state.antigravityMode = true;
|
|
3718
|
-
const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-
|
|
3998
|
+
const { loadAntigravityAuth, setupAntigravity, getCurrentAccount, hasApiKey, getApiKey: getApiKey$1, setOAuthCredentials } = await import("./auth-BgquW2Yd.js");
|
|
3719
3999
|
if (options$1.antigravityClientId && options$1.antigravityClientSecret) {
|
|
3720
4000
|
setOAuthCredentials(options$1.antigravityClientId, options$1.antigravityClientSecret);
|
|
3721
4001
|
consola.info("Using provided OAuth credentials from CLI");
|
|
@@ -3744,7 +4024,7 @@ async function runServer(options$1) {
|
|
|
3744
4024
|
}
|
|
3745
4025
|
if (!await getCurrentAccount() && !hasApiKey()) throw new Error("No enabled Antigravity accounts available");
|
|
3746
4026
|
}
|
|
3747
|
-
const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-
|
|
4027
|
+
const { getAntigravityModels: getAntigravityModels$1 } = await import("./get-models-D1vQG5Eb.js");
|
|
3748
4028
|
const models = await getAntigravityModels$1();
|
|
3749
4029
|
state.antigravityModels = models;
|
|
3750
4030
|
consola.info(`Available Antigravity models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`);
|