@hsupu/copilot-api 0.7.6 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +66 -17
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -2097,20 +2097,50 @@ const getTokenCount = async (payload, model) => {
|
|
|
2097
2097
|
//#region src/lib/auto-compact.ts
|
|
2098
2098
|
const DEFAULT_CONFIG = {
|
|
2099
2099
|
targetTokens: 12e4,
|
|
2100
|
-
safetyMarginPercent: 2
|
|
2100
|
+
safetyMarginPercent: 2,
|
|
2101
|
+
maxRequestBodyBytes: 500 * 1024
|
|
2101
2102
|
};
|
|
2102
2103
|
/**
|
|
2103
|
-
*
|
|
2104
|
+
* Dynamic byte limit that adjusts based on 413 errors.
|
|
2105
|
+
* Starts at 500KB and can be adjusted when 413 errors are encountered.
|
|
2106
|
+
*/
|
|
2107
|
+
let dynamicByteLimitOverride = null;
|
|
2108
|
+
/**
|
|
2109
|
+
* Called when a 413 error is encountered with a specific payload size.
|
|
2110
|
+
* Adjusts the dynamic byte limit to 90% of the failing size.
|
|
2111
|
+
*/
|
|
2112
|
+
function onRequestTooLarge(failingBytes) {
|
|
2113
|
+
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2114
|
+
dynamicByteLimitOverride = newLimit;
|
|
2115
|
+
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
|
|
2116
|
+
}
|
|
2117
|
+
/**
|
|
2118
|
+
* Check if payload needs compaction based on model limits OR request body size.
|
|
2104
2119
|
* Uses a safety margin to account for token counting differences.
|
|
2105
2120
|
*/
|
|
2106
|
-
async function checkNeedsCompaction(payload, model,
|
|
2121
|
+
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2122
|
+
const cfg = {
|
|
2123
|
+
...DEFAULT_CONFIG,
|
|
2124
|
+
...config
|
|
2125
|
+
};
|
|
2107
2126
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
2108
2127
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2109
|
-
const
|
|
2128
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2129
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
2130
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2131
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
2132
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
2133
|
+
let reason;
|
|
2134
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2135
|
+
else if (exceedsTokens) reason = "tokens";
|
|
2136
|
+
else if (exceedsBytes) reason = "bytes";
|
|
2110
2137
|
return {
|
|
2111
|
-
needed:
|
|
2138
|
+
needed: exceedsTokens || exceedsBytes,
|
|
2112
2139
|
currentTokens,
|
|
2113
|
-
|
|
2140
|
+
tokenLimit,
|
|
2141
|
+
currentBytes,
|
|
2142
|
+
byteLimit,
|
|
2143
|
+
reason
|
|
2114
2144
|
};
|
|
2115
2145
|
}
|
|
2116
2146
|
/**
|
|
@@ -2217,7 +2247,7 @@ function createTruncationMarker(removedCount) {
|
|
|
2217
2247
|
};
|
|
2218
2248
|
}
|
|
2219
2249
|
/**
|
|
2220
|
-
* Perform auto-compaction on a payload that exceeds token limits.
|
|
2250
|
+
* Perform auto-compaction on a payload that exceeds token or size limits.
|
|
2221
2251
|
* This uses simple truncation - no LLM calls required.
|
|
2222
2252
|
* Uses iterative approach with decreasing target tokens until under limit.
|
|
2223
2253
|
*/
|
|
@@ -2228,21 +2258,29 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2228
2258
|
};
|
|
2229
2259
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
2230
2260
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2231
|
-
const
|
|
2232
|
-
|
|
2261
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2262
|
+
const originalBytes = JSON.stringify(payload).length;
|
|
2263
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2264
|
+
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
2233
2265
|
payload,
|
|
2234
2266
|
wasCompacted: false,
|
|
2235
2267
|
originalTokens,
|
|
2236
2268
|
compactedTokens: originalTokens,
|
|
2237
2269
|
removedMessageCount: 0
|
|
2238
2270
|
};
|
|
2239
|
-
|
|
2271
|
+
const exceedsTokens = originalTokens > tokenLimit;
|
|
2272
|
+
const exceedsBytes = originalBytes > byteLimit;
|
|
2273
|
+
let reason;
|
|
2274
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2275
|
+
else if (exceedsBytes) reason = "size";
|
|
2276
|
+
else reason = "tokens";
|
|
2277
|
+
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
|
|
2240
2278
|
const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
|
|
2241
2279
|
const systemTokens = estimateSystemTokens(systemMessages);
|
|
2242
2280
|
consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
|
|
2243
2281
|
const MAX_ITERATIONS = 5;
|
|
2244
2282
|
const MIN_TARGET = 2e4;
|
|
2245
|
-
let currentTarget = Math.min(cfg.targetTokens,
|
|
2283
|
+
let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
|
|
2246
2284
|
let lastResult = null;
|
|
2247
2285
|
for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
|
|
2248
2286
|
const result = await tryCompactWithTarget({
|
|
@@ -2252,16 +2290,21 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
2252
2290
|
remainingMessages,
|
|
2253
2291
|
systemTokens,
|
|
2254
2292
|
targetTokens: currentTarget,
|
|
2255
|
-
limit,
|
|
2293
|
+
limit: tokenLimit,
|
|
2256
2294
|
originalTokens
|
|
2257
2295
|
});
|
|
2258
2296
|
if (!result.wasCompacted) return result;
|
|
2259
2297
|
lastResult = result;
|
|
2260
|
-
|
|
2261
|
-
|
|
2298
|
+
const resultBytes = JSON.stringify(result.payload).length;
|
|
2299
|
+
const underTokenLimit = result.compactedTokens <= tokenLimit;
|
|
2300
|
+
const underByteLimit = resultBytes <= byteLimit;
|
|
2301
|
+
if (underTokenLimit && underByteLimit) {
|
|
2302
|
+
consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
|
|
2262
2303
|
return result;
|
|
2263
2304
|
}
|
|
2264
|
-
|
|
2305
|
+
const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
|
|
2306
|
+
const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
|
|
2307
|
+
consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
|
|
2265
2308
|
currentTarget = Math.floor(currentTarget * .7);
|
|
2266
2309
|
if (currentTarget < MIN_TARGET) {
|
|
2267
2310
|
consola.error("Auto-compact: Cannot reduce further, target too low");
|
|
@@ -2433,12 +2476,16 @@ async function buildFinalPayload(payload, model) {
|
|
|
2433
2476
|
}
|
|
2434
2477
|
try {
|
|
2435
2478
|
const check = await checkNeedsCompaction(payload, model);
|
|
2436
|
-
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.
|
|
2479
|
+
consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2437
2480
|
if (!check.needed) return {
|
|
2438
2481
|
finalPayload: payload,
|
|
2439
2482
|
compactResult: null
|
|
2440
2483
|
};
|
|
2441
|
-
|
|
2484
|
+
let reasonText;
|
|
2485
|
+
if (check.reason === "both") reasonText = "tokens and size";
|
|
2486
|
+
else if (check.reason === "bytes") reasonText = "size";
|
|
2487
|
+
else reasonText = "tokens";
|
|
2488
|
+
consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
|
|
2442
2489
|
const compactResult = await autoCompact(payload, model);
|
|
2443
2490
|
return {
|
|
2444
2491
|
finalPayload: compactResult.payload,
|
|
@@ -2454,11 +2501,13 @@ async function buildFinalPayload(payload, model) {
|
|
|
2454
2501
|
}
|
|
2455
2502
|
/**
|
|
2456
2503
|
* Log helpful debugging information when a 413 error occurs.
|
|
2504
|
+
* Also adjusts the dynamic byte limit for future requests.
|
|
2457
2505
|
*/
|
|
2458
2506
|
async function logPayloadSizeInfo(payload, model) {
|
|
2459
2507
|
const messageCount = payload.messages.length;
|
|
2460
2508
|
const bodySize = JSON.stringify(payload).length;
|
|
2461
2509
|
const bodySizeKB = Math.round(bodySize / 1024);
|
|
2510
|
+
onRequestTooLarge(bodySize);
|
|
2462
2511
|
let imageCount = 0;
|
|
2463
2512
|
let largeMessages = 0;
|
|
2464
2513
|
let totalImageSize = 0;
|