@hsupu/copilot-api 0.7.8 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -46,7 +46,7 @@ const state = {
46
46
  accountType: "individual",
47
47
  manualApprove: false,
48
48
  showToken: false,
49
- autoCompact: false
49
+ autoCompact: true
50
50
  };
51
51
 
52
52
  //#endregion
@@ -821,7 +821,7 @@ const patchClaude = defineCommand({
821
821
  //#endregion
822
822
  //#region package.json
823
823
  var name = "@hsupu/copilot-api";
824
- var version = "0.7.8";
824
+ var version = "0.7.9";
825
825
  var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
826
826
  var keywords = [
827
827
  "proxy",
@@ -2176,188 +2176,157 @@ const getTokenCount = async (payload, model) => {
2176
2176
  //#endregion
2177
2177
  //#region src/lib/auto-compact.ts
2178
2178
  const DEFAULT_CONFIG = {
2179
- targetTokens: 12e4,
2180
2179
  safetyMarginPercent: 2,
2181
2180
  maxRequestBodyBytes: 500 * 1024
2182
2181
  };
2182
+ /** Dynamic byte limit that adjusts based on 413 errors */
2183
+ let dynamicByteLimit = null;
2183
2184
  /**
2184
- * Dynamic byte limit that adjusts based on 413 errors.
2185
- * Starts at 500KB and can be adjusted when 413 errors are encountered.
2186
- */
2187
- let dynamicByteLimitOverride = null;
2188
- /**
2189
- * Called when a 413 error is encountered with a specific payload size.
2190
- * Adjusts the dynamic byte limit to 90% of the failing size.
2185
+ * Called when a 413 error occurs. Adjusts the byte limit to 90% of the failing size.
2191
2186
  */
2192
2187
  function onRequestTooLarge(failingBytes) {
2193
2188
  const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2194
- dynamicByteLimitOverride = newLimit;
2195
- consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2189
+ dynamicByteLimit = newLimit;
2190
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed ${Math.round(newLimit / 1024)}KB`);
2196
2191
  }
2197
- /**
2198
- * Check if payload needs compaction based on model limits OR request body size.
2199
- * Uses a safety margin to account for token counting differences.
2200
- */
2201
- async function checkNeedsCompaction(payload, model, config = {}) {
2202
- const cfg = {
2203
- ...DEFAULT_CONFIG,
2204
- ...config
2205
- };
2206
- const currentTokens = (await getTokenCount(payload, model)).input;
2207
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2208
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2209
- const currentBytes = JSON.stringify(payload).length;
2210
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2211
- const exceedsTokens = currentTokens > tokenLimit;
2212
- const exceedsBytes = currentBytes > byteLimit;
2213
- let reason;
2214
- if (exceedsTokens && exceedsBytes) reason = "both";
2215
- else if (exceedsTokens) reason = "tokens";
2216
- else if (exceedsBytes) reason = "bytes";
2192
+ function calculateLimits(model, config) {
2193
+ const rawTokenLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2194
+ const tokenLimit = Math.floor(rawTokenLimit * (1 - config.safetyMarginPercent / 100));
2195
+ const byteLimit = dynamicByteLimit ?? config.maxRequestBodyBytes;
2217
2196
  return {
2218
- needed: exceedsTokens || exceedsBytes,
2219
- currentTokens,
2220
2197
  tokenLimit,
2221
- currentBytes,
2222
- byteLimit,
2223
- reason
2198
+ byteLimit
2224
2199
  };
2225
2200
  }
2226
- /**
2227
- * Calculate approximate token count for a single message.
2228
- * This is a fast estimation for splitting decisions.
2229
- */
2230
- function estimateMessageTokens(message) {
2231
- let text = "";
2232
- if (typeof message.content === "string") text = message.content;
2233
- else if (Array.isArray(message.content)) {
2234
- for (const part of message.content) if (part.type === "text") text += part.text;
2235
- else if ("image_url" in part) text += part.image_url.url;
2201
+ /** Estimate tokens for a single message (fast approximation) */
2202
+ function estimateMessageTokens(msg) {
2203
+ let charCount = 0;
2204
+ if (typeof msg.content === "string") charCount = msg.content.length;
2205
+ else if (Array.isArray(msg.content)) {
2206
+ for (const part of msg.content) if (part.type === "text") charCount += part.text.length;
2207
+ else if ("image_url" in part) charCount += Math.min(part.image_url.url.length, 1e4);
2236
2208
  }
2237
- if (message.tool_calls) text += JSON.stringify(message.tool_calls);
2238
- return Math.ceil(text.length / 4) + 10;
2209
+ if (msg.tool_calls) charCount += JSON.stringify(msg.tool_calls).length;
2210
+ return Math.ceil(charCount / 4) + 10;
2239
2211
  }
2240
- /**
2241
- * Extract system messages from the beginning of the message list.
2242
- */
2212
+ /** Get byte size of a message */
2213
+ function getMessageBytes(msg) {
2214
+ return JSON.stringify(msg).length;
2215
+ }
2216
+ /** Extract system/developer messages from the beginning */
2243
2217
  function extractSystemMessages(messages) {
2244
- const systemMessages = [];
2245
- let i = 0;
2246
- while (i < messages.length) {
2247
- const msg = messages[i];
2248
- if (msg.role === "system" || msg.role === "developer") {
2249
- systemMessages.push(msg);
2250
- i++;
2251
- } else break;
2218
+ let splitIndex = 0;
2219
+ while (splitIndex < messages.length) {
2220
+ const role = messages[splitIndex].role;
2221
+ if (role !== "system" && role !== "developer") break;
2222
+ splitIndex++;
2252
2223
  }
2253
2224
  return {
2254
- systemMessages,
2255
- remainingMessages: messages.slice(i)
2225
+ systemMessages: messages.slice(0, splitIndex),
2226
+ conversationMessages: messages.slice(splitIndex)
2256
2227
  };
2257
2228
  }
2258
- /**
2259
- * Extract tool_use ids from assistant messages with tool_calls.
2260
- */
2261
- function getToolUseIds(message) {
2262
- if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
2229
+ /** Get tool_use IDs from an assistant message */
2230
+ function getToolCallIds(msg) {
2231
+ if (msg.role === "assistant" && msg.tool_calls) return msg.tool_calls.map((tc) => tc.id);
2263
2232
  return [];
2264
2233
  }
2265
- /**
2266
- * Estimate the byte size of a message (for binary search).
2267
- */
2268
- function estimateMessageBytes(message) {
2269
- return JSON.stringify(message).length;
2234
+ /** Filter orphaned tool_result messages */
2235
+ function filterOrphanedToolResults(messages) {
2236
+ const toolUseIds = /* @__PURE__ */ new Set();
2237
+ for (const msg of messages) for (const id of getToolCallIds(msg)) toolUseIds.add(id);
2238
+ let removedCount = 0;
2239
+ const filtered = messages.filter((msg) => {
2240
+ if (msg.role === "tool" && msg.tool_call_id && !toolUseIds.has(msg.tool_call_id)) {
2241
+ removedCount++;
2242
+ return false;
2243
+ }
2244
+ return true;
2245
+ });
2246
+ if (removedCount > 0) consola.debug(`Auto-compact: Filtered ${removedCount} orphaned tool_result`);
2247
+ return filtered;
2248
+ }
2249
+ /** Ensure messages start with a user message */
2250
+ function ensureStartsWithUser(messages) {
2251
+ let startIndex = 0;
2252
+ while (startIndex < messages.length && messages[startIndex].role !== "user") startIndex++;
2253
+ if (startIndex > 0) consola.debug(`Auto-compact: Skipped ${startIndex} leading non-user messages`);
2254
+ return messages.slice(startIndex);
2270
2255
  }
2271
2256
  /**
2272
- * Find the optimal preserve index using binary search.
2273
- * This finds the point where we keep as many messages as possible
2274
- * while staying under both token and byte limits.
2257
+ * Find the optimal index from which to preserve messages.
2258
+ * Uses binary search with pre-calculated cumulative sums.
2259
+ * Returns the smallest index where the preserved portion fits within limits.
2275
2260
  */
2276
- function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
2261
+ function findOptimalPreserveIndex(params) {
2262
+ const { messages, systemBytes, systemTokens, payloadOverhead, tokenLimit, byteLimit } = params;
2277
2263
  if (messages.length === 0) return 0;
2278
- const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2279
- const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
2280
- const markerOverhead = 200;
2281
- const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
2282
- const availableBytes = targetBytes - systemBytes - markerOverhead;
2283
- const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
2284
- const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
2285
- for (let i = messages.length - 1; i >= 0; i--) {
2264
+ const markerBytes = 200;
2265
+ const availableTokens = tokenLimit - systemTokens - 50;
2266
+ const availableBytes = byteLimit - payloadOverhead - systemBytes - markerBytes;
2267
+ if (availableTokens <= 0 || availableBytes <= 0) return messages.length;
2268
+ const n = messages.length;
2269
+ const cumTokens = Array.from({ length: n + 1 }, () => 0);
2270
+ const cumBytes = Array.from({ length: n + 1 }, () => 0);
2271
+ for (let i = n - 1; i >= 0; i--) {
2286
2272
  const msg = messages[i];
2287
- cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
2288
- cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
2273
+ cumTokens[i] = cumTokens[i + 1] + estimateMessageTokens(msg);
2274
+ cumBytes[i] = cumBytes[i + 1] + getMessageBytes(msg) + 1;
2289
2275
  }
2290
2276
  let left = 0;
2291
- let right = messages.length;
2277
+ let right = n;
2292
2278
  while (left < right) {
2293
- const mid = Math.floor((left + right) / 2);
2294
- const tokensFromMid = cumulativeTokens[mid];
2295
- const bytesFromMid = cumulativeBytes[mid];
2296
- if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
2279
+ const mid = left + right >>> 1;
2280
+ if (cumTokens[mid] <= availableTokens && cumBytes[mid] <= availableBytes) right = mid;
2297
2281
  else left = mid + 1;
2298
2282
  }
2299
2283
  return left;
2300
2284
  }
2301
2285
  /**
2302
- * Filter out orphaned tool_result messages that don't have a matching tool_use
2303
- * in the preserved message list. This prevents API errors when truncation
2304
- * separates tool_use/tool_result pairs.
2286
+ * Check if payload needs compaction based on model limits or byte size.
2305
2287
  */
2306
- function filterOrphanedToolResults(messages) {
2307
- const availableToolUseIds = /* @__PURE__ */ new Set();
2308
- for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
2309
- const filteredMessages = [];
2310
- let removedCount = 0;
2311
- for (const msg of messages) {
2312
- if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
2313
- removedCount++;
2314
- continue;
2315
- }
2316
- filteredMessages.push(msg);
2317
- }
2318
- if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
2319
- return filteredMessages;
2320
- }
2321
- /**
2322
- * Ensure the message list starts with a user message.
2323
- * If it starts with assistant or tool messages, skip them until we find a user message.
2324
- * This is required because OpenAI API expects conversations to start with user messages
2325
- * (after system messages).
2326
- */
2327
- function ensureStartsWithUser(messages) {
2328
- let startIndex = 0;
2329
- while (startIndex < messages.length) {
2330
- if (messages[startIndex].role === "user") break;
2331
- startIndex++;
2332
- }
2333
- if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
2334
- return messages.slice(startIndex);
2288
+ async function checkNeedsCompaction(payload, model, config = {}) {
2289
+ const cfg = {
2290
+ ...DEFAULT_CONFIG,
2291
+ ...config
2292
+ };
2293
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2294
+ const currentTokens = (await getTokenCount(payload, model)).input;
2295
+ const currentBytes = JSON.stringify(payload).length;
2296
+ const exceedsTokens = currentTokens > tokenLimit;
2297
+ const exceedsBytes = currentBytes > byteLimit;
2298
+ let reason;
2299
+ if (exceedsTokens && exceedsBytes) reason = "both";
2300
+ else if (exceedsTokens) reason = "tokens";
2301
+ else if (exceedsBytes) reason = "bytes";
2302
+ return {
2303
+ needed: exceedsTokens || exceedsBytes,
2304
+ currentTokens,
2305
+ tokenLimit,
2306
+ currentBytes,
2307
+ byteLimit,
2308
+ reason
2309
+ };
2335
2310
  }
2336
- /**
2337
- * Create a truncation marker message.
2338
- */
2311
+ /** Create a truncation marker message */
2339
2312
  function createTruncationMarker(removedCount) {
2340
2313
  return {
2341
2314
  role: "user",
2342
- content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages were removed to fit context limits. The conversation continues below.]`
2315
+ content: `[CONTEXT TRUNCATED: ${removedCount} earlier messages removed to fit context limits]`
2343
2316
  };
2344
2317
  }
2345
2318
  /**
2346
- * Perform auto-compaction on a payload that exceeds token or size limits.
2347
- * This uses simple truncation with binary search - no LLM calls required.
2348
- * The algorithm finds the optimal truncation point to maximize preserved messages
2349
- * while staying under both token and byte limits.
2319
+ * Perform auto-compaction on a payload that exceeds limits.
2320
+ * Uses binary search to find the optimal truncation point.
2350
2321
  */
2351
2322
  async function autoCompact(payload, model, config = {}) {
2352
2323
  const cfg = {
2353
2324
  ...DEFAULT_CONFIG,
2354
2325
  ...config
2355
2326
  };
2356
- const originalTokens = (await getTokenCount(payload, model)).input;
2357
- const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2358
- const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2327
+ const { tokenLimit, byteLimit } = calculateLimits(model, cfg);
2359
2328
  const originalBytes = JSON.stringify(payload).length;
2360
- const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2329
+ const originalTokens = (await getTokenCount(payload, model)).input;
2361
2330
  if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
2362
2331
  payload,
2363
2332
  wasCompacted: false,
@@ -2371,12 +2340,23 @@ async function autoCompact(payload, model, config = {}) {
2371
2340
  if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2372
2341
  else if (exceedsBytes) reason = "size";
2373
2342
  else reason = "tokens";
2374
- consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
2375
- const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
2376
- consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
2377
- const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
2343
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB)`);
2344
+ const { systemMessages, conversationMessages } = extractSystemMessages(payload.messages);
2345
+ const messagesJson = JSON.stringify(payload.messages);
2346
+ const payloadOverhead = originalBytes - messagesJson.length;
2347
+ const systemBytes = systemMessages.reduce((sum, m) => sum + getMessageBytes(m) + 1, 0);
2348
+ const systemTokens = systemMessages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
2349
+ consola.debug(`Auto-compact: overhead=${Math.round(payloadOverhead / 1024)}KB, system=${systemMessages.length} msgs (${Math.round(systemBytes / 1024)}KB)`);
2350
+ const preserveIndex = findOptimalPreserveIndex({
2351
+ messages: conversationMessages,
2352
+ systemBytes,
2353
+ systemTokens,
2354
+ payloadOverhead,
2355
+ tokenLimit,
2356
+ byteLimit
2357
+ });
2378
2358
  if (preserveIndex === 0) {
2379
- consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
2359
+ consola.warn("Auto-compact: Cannot truncate, system messages too large");
2380
2360
  return {
2381
2361
  payload,
2382
2362
  wasCompacted: false,
@@ -2385,8 +2365,8 @@ async function autoCompact(payload, model, config = {}) {
2385
2365
  removedMessageCount: 0
2386
2366
  };
2387
2367
  }
2388
- if (preserveIndex >= remainingMessages.length) {
2389
- consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
2368
+ if (preserveIndex >= conversationMessages.length) {
2369
+ consola.warn("Auto-compact: Would need to remove all messages");
2390
2370
  return {
2391
2371
  payload,
2392
2372
  wasCompacted: false,
@@ -2395,13 +2375,12 @@ async function autoCompact(payload, model, config = {}) {
2395
2375
  removedMessageCount: 0
2396
2376
  };
2397
2377
  }
2398
- const removedMessages = remainingMessages.slice(0, preserveIndex);
2399
- let preservedMessages = remainingMessages.slice(preserveIndex);
2400
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2401
- preservedMessages = ensureStartsWithUser(preservedMessages);
2402
- preservedMessages = filterOrphanedToolResults(preservedMessages);
2403
- if (preservedMessages.length === 0) {
2404
- consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
2378
+ let preserved = conversationMessages.slice(preserveIndex);
2379
+ preserved = filterOrphanedToolResults(preserved);
2380
+ preserved = ensureStartsWithUser(preserved);
2381
+ preserved = filterOrphanedToolResults(preserved);
2382
+ if (preserved.length === 0) {
2383
+ consola.warn("Auto-compact: All messages filtered out after cleanup");
2405
2384
  return {
2406
2385
  payload,
2407
2386
  wasCompacted: false,
@@ -2410,29 +2389,30 @@ async function autoCompact(payload, model, config = {}) {
2410
2389
  removedMessageCount: 0
2411
2390
  };
2412
2391
  }
2413
- consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
2414
- const truncationMarker = createTruncationMarker(removedMessages.length);
2392
+ const removedCount = conversationMessages.length - preserved.length;
2393
+ const marker = createTruncationMarker(removedCount);
2415
2394
  const newPayload = {
2416
2395
  ...payload,
2417
2396
  messages: [
2418
2397
  ...systemMessages,
2419
- truncationMarker,
2420
- ...preservedMessages
2398
+ marker,
2399
+ ...preserved
2421
2400
  ]
2422
2401
  };
2423
- const newTokenCount = await getTokenCount(newPayload, model);
2424
2402
  const newBytes = JSON.stringify(newPayload).length;
2425
- consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
2403
+ const newTokenCount = await getTokenCount(newPayload, model);
2404
+ consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedCount} messages)`);
2405
+ if (newBytes > byteLimit) consola.warn(`Auto-compact: Result still over byte limit (${Math.round(newBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB)`);
2426
2406
  return {
2427
2407
  payload: newPayload,
2428
2408
  wasCompacted: true,
2429
2409
  originalTokens,
2430
2410
  compactedTokens: newTokenCount.input,
2431
- removedMessageCount: removedMessages.length
2411
+ removedMessageCount: removedCount
2432
2412
  };
2433
2413
  }
2434
2414
  /**
2435
- * Create a marker to append to responses indicating auto-compaction occurred.
2415
+ * Create a marker to prepend to responses indicating auto-compaction occurred.
2436
2416
  */
2437
2417
  function createCompactionMarker(result) {
2438
2418
  if (!result.wasCompacted) return "";
@@ -4983,7 +4963,7 @@ async function runServer(options) {
4983
4963
  consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4984
4964
  });
4985
4965
  else consola.info("Rate limiting disabled");
4986
- if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
4966
+ if (!options.autoCompact) consola.info("Auto-compact disabled");
4987
4967
  initHistory(options.history, options.historyLimit);
4988
4968
  if (options.history) {
4989
4969
  const limitText = options.historyLimit === 0 ? "unlimited" : `max ${options.historyLimit}`;
@@ -5125,10 +5105,10 @@ const start = defineCommand({
5125
5105
  default: "1000",
5126
5106
  description: "Maximum number of history entries to keep in memory (0 = unlimited)"
5127
5107
  },
5128
- "auto-compact": {
5108
+ "no-auto-compact": {
5129
5109
  type: "boolean",
5130
5110
  default: false,
5131
- description: "Automatically compress conversation history when exceeding model token limits"
5111
+ description: "Disable automatic conversation history compression when exceeding limits"
5132
5112
  }
5133
5113
  },
5134
5114
  run({ args }) {
@@ -5149,7 +5129,7 @@ const start = defineCommand({
5149
5129
  proxyEnv: args["proxy-env"],
5150
5130
  history: !args["no-history"],
5151
5131
  historyLimit: Number.parseInt(args["history-limit"], 10),
5152
- autoCompact: args["auto-compact"]
5132
+ autoCompact: !args["no-auto-compact"]
5153
5133
  });
5154
5134
  }
5155
5135
  });