@hsupu/copilot-api 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -2097,20 +2097,50 @@ const getTokenCount = async (payload, model) => {
2097
2097
  //#region src/lib/auto-compact.ts
2098
2098
  const DEFAULT_CONFIG = {
2099
2099
  targetTokens: 12e4,
2100
- safetyMarginPercent: 2
2100
+ safetyMarginPercent: 2,
2101
+ maxRequestBodyBytes: 500 * 1024
2101
2102
  };
2102
2103
  /**
2103
- * Check if payload needs compaction based on model limits.
2104
+ * Dynamic byte limit that adjusts based on 413 errors.
2105
+ * Starts at 500KB and can be adjusted when 413 errors are encountered.
2106
+ */
2107
+ let dynamicByteLimitOverride = null;
2108
+ /**
2109
+ * Called when a 413 error is encountered with a specific payload size.
2110
+ * Adjusts the dynamic byte limit to 90% of the failing size.
2111
+ */
2112
+ function onRequestTooLarge(failingBytes) {
2113
+ const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2114
+ dynamicByteLimitOverride = newLimit;
2115
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2116
+ }
2117
+ /**
2118
+ * Check if payload needs compaction based on model limits OR request body size.
2104
2119
  * Uses a safety margin to account for token counting differences.
2105
2120
  */
2106
- async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
2121
+ async function checkNeedsCompaction(payload, model, config = {}) {
2122
+ const cfg = {
2123
+ ...DEFAULT_CONFIG,
2124
+ ...config
2125
+ };
2107
2126
  const currentTokens = (await getTokenCount(payload, model)).input;
2108
2127
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2109
- const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
2128
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2129
+ const currentBytes = JSON.stringify(payload).length;
2130
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2131
+ const exceedsTokens = currentTokens > tokenLimit;
2132
+ const exceedsBytes = currentBytes > byteLimit;
2133
+ let reason;
2134
+ if (exceedsTokens && exceedsBytes) reason = "both";
2135
+ else if (exceedsTokens) reason = "tokens";
2136
+ else if (exceedsBytes) reason = "bytes";
2110
2137
  return {
2111
- needed: currentTokens > limit,
2138
+ needed: exceedsTokens || exceedsBytes,
2112
2139
  currentTokens,
2113
- limit
2140
+ tokenLimit,
2141
+ currentBytes,
2142
+ byteLimit,
2143
+ reason
2114
2144
  };
2115
2145
  }
2116
2146
  /**
@@ -2217,7 +2247,7 @@ function createTruncationMarker(removedCount) {
2217
2247
  };
2218
2248
  }
2219
2249
  /**
2220
- * Perform auto-compaction on a payload that exceeds token limits.
2250
+ * Perform auto-compaction on a payload that exceeds token or size limits.
2221
2251
  * This uses simple truncation - no LLM calls required.
2222
2252
  * Uses iterative approach with decreasing target tokens until under limit.
2223
2253
  */
@@ -2228,21 +2258,29 @@ async function autoCompact(payload, model, config = {}) {
2228
2258
  };
2229
2259
  const originalTokens = (await getTokenCount(payload, model)).input;
2230
2260
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2231
- const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2232
- if (originalTokens <= limit) return {
2261
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2262
+ const originalBytes = JSON.stringify(payload).length;
2263
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2264
+ if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
2233
2265
  payload,
2234
2266
  wasCompacted: false,
2235
2267
  originalTokens,
2236
2268
  compactedTokens: originalTokens,
2237
2269
  removedMessageCount: 0
2238
2270
  };
2239
- consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
2271
+ const exceedsTokens = originalTokens > tokenLimit;
2272
+ const exceedsBytes = originalBytes > byteLimit;
2273
+ let reason;
2274
+ if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2275
+ else if (exceedsBytes) reason = "size";
2276
+ else reason = "tokens";
2277
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
2240
2278
  const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
2241
2279
  const systemTokens = estimateSystemTokens(systemMessages);
2242
2280
  consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
2243
2281
  const MAX_ITERATIONS = 5;
2244
2282
  const MIN_TARGET = 2e4;
2245
- let currentTarget = Math.min(cfg.targetTokens, limit);
2283
+ let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
2246
2284
  let lastResult = null;
2247
2285
  for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
2248
2286
  const result = await tryCompactWithTarget({
@@ -2252,16 +2290,21 @@ async function autoCompact(payload, model, config = {}) {
2252
2290
  remainingMessages,
2253
2291
  systemTokens,
2254
2292
  targetTokens: currentTarget,
2255
- limit,
2293
+ limit: tokenLimit,
2256
2294
  originalTokens
2257
2295
  });
2258
2296
  if (!result.wasCompacted) return result;
2259
2297
  lastResult = result;
2260
- if (result.compactedTokens <= limit) {
2261
- consola.info(`Auto-compact: ${originalTokens} ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
2298
+ const resultBytes = JSON.stringify(result.payload).length;
2299
+ const underTokenLimit = result.compactedTokens <= tokenLimit;
2300
+ const underByteLimit = resultBytes <= byteLimit;
2301
+ if (underTokenLimit && underByteLimit) {
2302
+ consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
2262
2303
  return result;
2263
2304
  }
2264
- consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
2305
+ const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
2306
+ const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
2307
+ consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
2265
2308
  currentTarget = Math.floor(currentTarget * .7);
2266
2309
  if (currentTarget < MIN_TARGET) {
2267
2310
  consola.error("Auto-compact: Cannot reduce further, target too low");
@@ -2433,12 +2476,16 @@ async function buildFinalPayload(payload, model) {
2433
2476
  }
2434
2477
  try {
2435
2478
  const check = await checkNeedsCompaction(payload, model);
2436
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
2479
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2437
2480
  if (!check.needed) return {
2438
2481
  finalPayload: payload,
2439
2482
  compactResult: null
2440
2483
  };
2441
- consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
2484
+ let reasonText;
2485
+ if (check.reason === "both") reasonText = "tokens and size";
2486
+ else if (check.reason === "bytes") reasonText = "size";
2487
+ else reasonText = "tokens";
2488
+ consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2442
2489
  const compactResult = await autoCompact(payload, model);
2443
2490
  return {
2444
2491
  finalPayload: compactResult.payload,
@@ -2454,11 +2501,13 @@ async function buildFinalPayload(payload, model) {
2454
2501
  }
2455
2502
  /**
2456
2503
  * Log helpful debugging information when a 413 error occurs.
2504
+ * Also adjusts the dynamic byte limit for future requests.
2457
2505
  */
2458
2506
  async function logPayloadSizeInfo(payload, model) {
2459
2507
  const messageCount = payload.messages.length;
2460
2508
  const bodySize = JSON.stringify(payload).length;
2461
2509
  const bodySizeKB = Math.round(bodySize / 1024);
2510
+ onRequestTooLarge(bodySize);
2462
2511
  let imageCount = 0;
2463
2512
  let largeMessages = 0;
2464
2513
  let totalImageSize = 0;