@hsupu/copilot-api 0.7.6 → 0.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -246,8 +246,8 @@ async function getVSCodeVersion() {
246
246
  }
247
247
  });
248
248
  if (!response.ok) return FALLBACK;
249
- const version = (await response.json()).tag_name;
250
- if (version && /^\d+\.\d+\.\d+$/.test(version)) return version;
249
+ const version$1 = (await response.json()).tag_name;
250
+ if (version$1 && /^\d+\.\d+\.\d+$/.test(version$1)) return version$1;
251
251
  return FALLBACK;
252
252
  } catch {
253
253
  return FALLBACK;
@@ -434,13 +434,13 @@ const checkUsage = defineCommand({
434
434
  const premiumUsed = premiumTotal - premium.remaining;
435
435
  const premiumPercentUsed = premiumTotal > 0 ? premiumUsed / premiumTotal * 100 : 0;
436
436
  const premiumPercentRemaining = premium.percent_remaining;
437
- function summarizeQuota(name, snap) {
438
- if (!snap) return `${name}: N/A`;
437
+ function summarizeQuota(name$1, snap) {
438
+ if (!snap) return `${name$1}: N/A`;
439
439
  const total = snap.entitlement;
440
440
  const used = total - snap.remaining;
441
441
  const percentUsed = total > 0 ? used / total * 100 : 0;
442
442
  const percentRemaining = snap.percent_remaining;
443
- return `${name}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
443
+ return `${name$1}: ${used}/${total} used (${percentUsed.toFixed(1)}% used, ${percentRemaining.toFixed(1)}% remaining)`;
444
444
  }
445
445
  const premiumLine = `Premium: ${premiumUsed}/${premiumTotal} used (${premiumPercentUsed.toFixed(1)}% used, ${premiumPercentRemaining.toFixed(1)}% remaining)`;
446
446
  const chatLine = summarizeQuota("Chat", usage.quota_snapshots.chat);
@@ -481,9 +481,9 @@ async function checkTokenExists() {
481
481
  }
482
482
  }
483
483
  async function getDebugInfo() {
484
- const [version, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
484
+ const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
485
485
  return {
486
- version,
486
+ version: version$1,
487
487
  runtime: getRuntimeInfo(),
488
488
  paths: {
489
489
  APP_DIR: PATHS.APP_DIR,
@@ -571,8 +571,8 @@ const PATTERNS = {
571
571
  /**
572
572
  * Parse semver version string to comparable parts
573
573
  */
574
- function parseVersion(version) {
575
- return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
574
+ function parseVersion(version$1) {
575
+ return version$1.split(".").map((n) => Number.parseInt(n, 10) || 0);
576
576
  }
577
577
  /**
578
578
  * Compare two semver versions
@@ -590,9 +590,9 @@ function compareVersions(a, b) {
590
590
  }
591
591
  return 0;
592
592
  }
593
- function getPatternTypeForVersion(version) {
594
- if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
- if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
593
+ function getPatternTypeForVersion(version$1) {
594
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
+ if (compareVersions(version$1, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version$1, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
596
596
  return null;
597
597
  }
598
598
  /**
@@ -624,8 +624,8 @@ function findInVoltaTools(voltaHome) {
624
624
  if (existsSync(packagesPath)) paths.push(packagesPath);
625
625
  const toolsDir = join(voltaHome, "tools", "image", "node");
626
626
  if (existsSync(toolsDir)) try {
627
- for (const version of readdirSync(toolsDir)) {
628
- const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
627
+ for (const version$1 of readdirSync(toolsDir)) {
628
+ const claudePath = join(toolsDir, version$1, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
629
629
  if (existsSync(claudePath)) paths.push(claudePath);
630
630
  }
631
631
  } catch {}
@@ -668,23 +668,23 @@ function getCurrentLimit(content) {
668
668
  * Check if Claude Code version is supported for patching
669
669
  */
670
670
  function checkVersionSupport(cliPath) {
671
- const version = getClaudeCodeVersion(cliPath);
672
- if (!version) return {
671
+ const version$1 = getClaudeCodeVersion(cliPath);
672
+ if (!version$1) return {
673
673
  supported: false,
674
674
  version: null,
675
675
  patternType: null,
676
676
  error: "Could not detect Claude Code version"
677
677
  };
678
- const patternType = getPatternTypeForVersion(version);
678
+ const patternType = getPatternTypeForVersion(version$1);
679
679
  if (!patternType) return {
680
680
  supported: false,
681
- version,
681
+ version: version$1,
682
682
  patternType: null,
683
- error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
683
+ error: `Version ${version$1} is not supported. Supported: ${getSupportedRangeString()}`
684
684
  };
685
685
  return {
686
686
  supported: true,
687
- version,
687
+ version: version$1,
688
688
  patternType
689
689
  };
690
690
  }
@@ -735,8 +735,8 @@ function restoreClaudeCode(cliPath) {
735
735
  return true;
736
736
  }
737
737
  function showStatus(cliPath, currentLimit) {
738
- const version = getClaudeCodeVersion(cliPath);
739
- if (version) consola.info(`Claude Code version: ${version}`);
738
+ const version$1 = getClaudeCodeVersion(cliPath);
739
+ if (version$1) consola.info(`Claude Code version: ${version$1}`);
740
740
  if (currentLimit === null) {
741
741
  consola.warn("Could not detect current limit - CLI may have been updated");
742
742
  consola.info("Look for the BS9 variable or HR function pattern in cli.js");
@@ -818,6 +818,86 @@ const patchClaude = defineCommand({
818
818
  }
819
819
  });
820
820
 
821
+ //#endregion
822
+ //#region package.json
823
+ var name = "@hsupu/copilot-api";
824
+ var version = "0.7.8";
825
+ var description = "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!";
826
+ var keywords = [
827
+ "proxy",
828
+ "github-copilot",
829
+ "openai-compatible",
830
+ "anthropic-compatible"
831
+ ];
832
+ var homepage = "https://github.com/puxu-msft/copilot-api-js";
833
+ var bugs = "https://github.com/puxu-msft/copilot-api-js/issues";
834
+ var repository = {
835
+ "type": "git",
836
+ "url": "git+https://github.com/puxu-msft/copilot-api-js.git"
837
+ };
838
+ var author = "hsupu";
839
+ var type = "module";
840
+ var bin = { "copilot-api": "dist/main.js" };
841
+ var files = ["dist"];
842
+ var scripts = {
843
+ "build": "npx tsdown",
844
+ "dev": "bun run --watch ./src/main.ts",
845
+ "knip": "knip-bun",
846
+ "lint": "eslint --cache",
847
+ "lint:all": "eslint --cache .",
848
+ "prepack": "npm run build",
849
+ "prepare": "npm run build && (command -v bun >/dev/null 2>&1 && simple-git-hooks || true)",
850
+ "release": "bumpp && npm publish --access public",
851
+ "start": "NODE_ENV=production bun run ./src/main.ts",
852
+ "typecheck": "tsc"
853
+ };
854
+ var simple_git_hooks = { "pre-commit": "bun x lint-staged" };
855
+ var lint_staged = { "*": "bun run lint --fix" };
856
+ var dependencies = {
857
+ "citty": "^0.1.6",
858
+ "clipboardy": "^5.0.0",
859
+ "consola": "^3.4.2",
860
+ "fetch-event-stream": "^0.1.5",
861
+ "gpt-tokenizer": "^3.0.1",
862
+ "hono": "^4.9.9",
863
+ "picocolors": "^1.1.1",
864
+ "proxy-from-env": "^1.1.0",
865
+ "srvx": "^0.8.9",
866
+ "tiny-invariant": "^1.3.3",
867
+ "undici": "^7.16.0"
868
+ };
869
+ var devDependencies = {
870
+ "@echristian/eslint-config": "^0.0.54",
871
+ "@types/bun": "^1.2.23",
872
+ "@types/proxy-from-env": "^1.0.4",
873
+ "bumpp": "^10.2.3",
874
+ "eslint": "^9.37.0",
875
+ "knip": "^5.64.1",
876
+ "lint-staged": "^16.2.3",
877
+ "prettier-plugin-packagejson": "^2.5.19",
878
+ "simple-git-hooks": "^2.13.1",
879
+ "tsdown": "^0.15.6",
880
+ "typescript": "^5.9.3"
881
+ };
882
+ var package_default = {
883
+ name,
884
+ version,
885
+ description,
886
+ keywords,
887
+ homepage,
888
+ bugs,
889
+ repository,
890
+ author,
891
+ type,
892
+ bin,
893
+ files,
894
+ scripts,
895
+ "simple-git-hooks": simple_git_hooks,
896
+ "lint-staged": lint_staged,
897
+ dependencies,
898
+ devDependencies
899
+ };
900
+
821
901
  //#endregion
822
902
  //#region src/lib/adaptive-rate-limiter.ts
823
903
  const DEFAULT_CONFIG$1 = {
@@ -1566,8 +1646,8 @@ var ConsoleRenderer = class {
1566
1646
  /**
1567
1647
  * Get log prefix based on log type
1568
1648
  */
1569
- getLogPrefix(type) {
1570
- switch (type) {
1649
+ getLogPrefix(type$1) {
1650
+ switch (type$1) {
1571
1651
  case "error":
1572
1652
  case "fatal": return pc.red("✖");
1573
1653
  case "warn": return pc.yellow("⚠");
@@ -2097,20 +2177,50 @@ const getTokenCount = async (payload, model) => {
2097
2177
  //#region src/lib/auto-compact.ts
2098
2178
  const DEFAULT_CONFIG = {
2099
2179
  targetTokens: 12e4,
2100
- safetyMarginPercent: 2
2180
+ safetyMarginPercent: 2,
2181
+ maxRequestBodyBytes: 500 * 1024
2101
2182
  };
2102
2183
  /**
2103
- * Check if payload needs compaction based on model limits.
2184
+ * Dynamic byte limit that adjusts based on 413 errors.
2185
+ * Starts at 500KB and can be adjusted when 413 errors are encountered.
2186
+ */
2187
+ let dynamicByteLimitOverride = null;
2188
+ /**
2189
+ * Called when a 413 error is encountered with a specific payload size.
2190
+ * Adjusts the dynamic byte limit to 90% of the failing size.
2191
+ */
2192
+ function onRequestTooLarge(failingBytes) {
2193
+ const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2194
+ dynamicByteLimitOverride = newLimit;
2195
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2196
+ }
2197
+ /**
2198
+ * Check if payload needs compaction based on model limits OR request body size.
2104
2199
  * Uses a safety margin to account for token counting differences.
2105
2200
  */
2106
- async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
2201
+ async function checkNeedsCompaction(payload, model, config = {}) {
2202
+ const cfg = {
2203
+ ...DEFAULT_CONFIG,
2204
+ ...config
2205
+ };
2107
2206
  const currentTokens = (await getTokenCount(payload, model)).input;
2108
2207
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2109
- const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
2208
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2209
+ const currentBytes = JSON.stringify(payload).length;
2210
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2211
+ const exceedsTokens = currentTokens > tokenLimit;
2212
+ const exceedsBytes = currentBytes > byteLimit;
2213
+ let reason;
2214
+ if (exceedsTokens && exceedsBytes) reason = "both";
2215
+ else if (exceedsTokens) reason = "tokens";
2216
+ else if (exceedsBytes) reason = "bytes";
2110
2217
  return {
2111
- needed: currentTokens > limit,
2218
+ needed: exceedsTokens || exceedsBytes,
2112
2219
  currentTokens,
2113
- limit
2220
+ tokenLimit,
2221
+ currentBytes,
2222
+ byteLimit,
2223
+ reason
2114
2224
  };
2115
2225
  }
2116
2226
  /**
@@ -2153,18 +2263,40 @@ function getToolUseIds(message) {
2153
2263
  return [];
2154
2264
  }
2155
2265
  /**
2156
- * Find messages to keep from the end to stay under target tokens.
2157
- * Returns the starting index of messages to preserve.
2266
+ * Estimate the byte size of a message (for binary search).
2158
2267
  */
2159
- function findPreserveIndex(messages, targetTokens, systemTokens) {
2160
- const availableTokens = targetTokens - systemTokens - 500;
2161
- let accumulatedTokens = 0;
2268
+ function estimateMessageBytes(message) {
2269
+ return JSON.stringify(message).length;
2270
+ }
2271
+ /**
2272
+ * Find the optimal preserve index using binary search.
2273
+ * This finds the point where we keep as many messages as possible
2274
+ * while staying under both token and byte limits.
2275
+ */
2276
+ function findOptimalPreserveIndex(messages, systemMessages, targetTokens, targetBytes) {
2277
+ if (messages.length === 0) return 0;
2278
+ const systemTokens = systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2279
+ const systemBytes = systemMessages.reduce((sum, msg) => sum + estimateMessageBytes(msg), 0);
2280
+ const markerOverhead = 200;
2281
+ const availableTokens = targetTokens - systemTokens - markerOverhead / 4;
2282
+ const availableBytes = targetBytes - systemBytes - markerOverhead;
2283
+ const cumulativeTokens = Array.from({ length: messages.length + 1 }, () => 0);
2284
+ const cumulativeBytes = Array.from({ length: messages.length + 1 }, () => 0);
2162
2285
  for (let i = messages.length - 1; i >= 0; i--) {
2163
- const msgTokens = estimateMessageTokens(messages[i]);
2164
- if (accumulatedTokens + msgTokens > availableTokens) return i + 1;
2165
- accumulatedTokens += msgTokens;
2286
+ const msg = messages[i];
2287
+ cumulativeTokens[i] = cumulativeTokens[i + 1] + estimateMessageTokens(msg);
2288
+ cumulativeBytes[i] = cumulativeBytes[i + 1] + estimateMessageBytes(msg);
2166
2289
  }
2167
- return 0;
2290
+ let left = 0;
2291
+ let right = messages.length;
2292
+ while (left < right) {
2293
+ const mid = Math.floor((left + right) / 2);
2294
+ const tokensFromMid = cumulativeTokens[mid];
2295
+ const bytesFromMid = cumulativeBytes[mid];
2296
+ if (tokensFromMid <= availableTokens && bytesFromMid <= availableBytes) right = mid;
2297
+ else left = mid + 1;
2298
+ }
2299
+ return left;
2168
2300
  }
2169
2301
  /**
2170
2302
  * Filter out orphaned tool_result messages that don't have a matching tool_use
@@ -2202,12 +2334,6 @@ function ensureStartsWithUser(messages) {
2202
2334
  return messages.slice(startIndex);
2203
2335
  }
2204
2336
  /**
2205
- * Calculate estimated tokens for system messages.
2206
- */
2207
- function estimateSystemTokens(systemMessages) {
2208
- return systemMessages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
2209
- }
2210
- /**
2211
2337
  * Create a truncation marker message.
2212
2338
  */
2213
2339
  function createTruncationMarker(removedCount) {
@@ -2217,9 +2343,10 @@ function createTruncationMarker(removedCount) {
2217
2343
  };
2218
2344
  }
2219
2345
  /**
2220
- * Perform auto-compaction on a payload that exceeds token limits.
2221
- * This uses simple truncation - no LLM calls required.
2222
- * Uses iterative approach with decreasing target tokens until under limit.
2346
+ * Perform auto-compaction on a payload that exceeds token or size limits.
2347
+ * This uses simple truncation with binary search - no LLM calls required.
2348
+ * The algorithm finds the optimal truncation point to maximize preserved messages
2349
+ * while staying under both token and byte limits.
2223
2350
  */
2224
2351
  async function autoCompact(payload, model, config = {}) {
2225
2352
  const cfg = {
@@ -2228,63 +2355,38 @@ async function autoCompact(payload, model, config = {}) {
2228
2355
  };
2229
2356
  const originalTokens = (await getTokenCount(payload, model)).input;
2230
2357
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2231
- const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2232
- if (originalTokens <= limit) return {
2358
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2359
+ const originalBytes = JSON.stringify(payload).length;
2360
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2361
+ if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
2233
2362
  payload,
2234
2363
  wasCompacted: false,
2235
2364
  originalTokens,
2236
2365
  compactedTokens: originalTokens,
2237
2366
  removedMessageCount: 0
2238
2367
  };
2239
- consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
2368
+ const exceedsTokens = originalTokens > tokenLimit;
2369
+ const exceedsBytes = originalBytes > byteLimit;
2370
+ let reason;
2371
+ if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2372
+ else if (exceedsBytes) reason = "size";
2373
+ else reason = "tokens";
2374
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
2240
2375
  const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
2241
- const systemTokens = estimateSystemTokens(systemMessages);
2242
- consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
2243
- const MAX_ITERATIONS = 5;
2244
- const MIN_TARGET = 2e4;
2245
- let currentTarget = Math.min(cfg.targetTokens, limit);
2246
- let lastResult = null;
2247
- for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
2248
- const result = await tryCompactWithTarget({
2376
+ consola.debug(`Auto-compact: ${systemMessages.length} system messages, ${remainingMessages.length} conversation messages`);
2377
+ const preserveIndex = findOptimalPreserveIndex(remainingMessages, systemMessages, tokenLimit, byteLimit);
2378
+ if (preserveIndex === 0) {
2379
+ consola.warn("Auto-compact: Cannot truncate without losing all conversation history");
2380
+ return {
2249
2381
  payload,
2250
- model,
2251
- systemMessages,
2252
- remainingMessages,
2253
- systemTokens,
2254
- targetTokens: currentTarget,
2255
- limit,
2256
- originalTokens
2257
- });
2258
- if (!result.wasCompacted) return result;
2259
- lastResult = result;
2260
- if (result.compactedTokens <= limit) {
2261
- consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
2262
- return result;
2263
- }
2264
- consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
2265
- currentTarget = Math.floor(currentTarget * .7);
2266
- if (currentTarget < MIN_TARGET) {
2267
- consola.error("Auto-compact: Cannot reduce further, target too low");
2268
- return result;
2269
- }
2382
+ wasCompacted: false,
2383
+ originalTokens,
2384
+ compactedTokens: originalTokens,
2385
+ removedMessageCount: 0
2386
+ };
2270
2387
  }
2271
- consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
2272
- return lastResult ?? {
2273
- payload,
2274
- wasCompacted: false,
2275
- originalTokens,
2276
- compactedTokens: originalTokens,
2277
- removedMessageCount: 0
2278
- };
2279
- }
2280
- /**
2281
- * Helper to attempt compaction with a specific target token count.
2282
- */
2283
- async function tryCompactWithTarget(opts) {
2284
- const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
2285
- const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
2286
- if (preserveIndex === 0) {
2287
- consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
2388
+ if (preserveIndex >= remainingMessages.length) {
2389
+ consola.warn("Auto-compact: Would need to remove all messages, cannot compact");
2288
2390
  return {
2289
2391
  payload,
2290
2392
  wasCompacted: false,
@@ -2319,6 +2421,8 @@ async function tryCompactWithTarget(opts) {
2319
2421
  ]
2320
2422
  };
2321
2423
  const newTokenCount = await getTokenCount(newPayload, model);
2424
+ const newBytes = JSON.stringify(newPayload).length;
2425
+ consola.info(`Auto-compact: ${originalTokens} → ${newTokenCount.input} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(newBytes / 1024)}KB (removed ${removedMessages.length} messages)`);
2322
2426
  return {
2323
2427
  payload: newPayload,
2324
2428
  wasCompacted: true,
@@ -2433,12 +2537,16 @@ async function buildFinalPayload(payload, model) {
2433
2537
  }
2434
2538
  try {
2435
2539
  const check = await checkNeedsCompaction(payload, model);
2436
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
2540
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2437
2541
  if (!check.needed) return {
2438
2542
  finalPayload: payload,
2439
2543
  compactResult: null
2440
2544
  };
2441
- consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
2545
+ let reasonText;
2546
+ if (check.reason === "both") reasonText = "tokens and size";
2547
+ else if (check.reason === "bytes") reasonText = "size";
2548
+ else reasonText = "tokens";
2549
+ consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2442
2550
  const compactResult = await autoCompact(payload, model);
2443
2551
  return {
2444
2552
  finalPayload: compactResult.payload,
@@ -2454,11 +2562,13 @@ async function buildFinalPayload(payload, model) {
2454
2562
  }
2455
2563
  /**
2456
2564
  * Log helpful debugging information when a 413 error occurs.
2565
+ * Also adjusts the dynamic byte limit for future requests.
2457
2566
  */
2458
2567
  async function logPayloadSizeInfo(payload, model) {
2459
2568
  const messageCount = payload.messages.length;
2460
2569
  const bodySize = JSON.stringify(payload).length;
2461
2570
  const bodySizeKB = Math.round(bodySize / 1024);
2571
+ onRequestTooLarge(bodySize);
2462
2572
  let imageCount = 0;
2463
2573
  let largeMessages = 0;
2464
2574
  let totalImageSize = 0;
@@ -2584,7 +2694,7 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2584
2694
  ...choice$1,
2585
2695
  message: {
2586
2696
  ...choice$1.message,
2587
- content: (choice$1.message.content ?? "") + marker
2697
+ content: marker + (choice$1.message.content ?? "")
2588
2698
  }
2589
2699
  } : choice$1)
2590
2700
  };
@@ -2645,18 +2755,13 @@ async function handleStreamingResponse$1(opts) {
2645
2755
  const { stream, response, payload, ctx } = opts;
2646
2756
  const acc = createStreamAccumulator();
2647
2757
  try {
2648
- for await (const chunk of response) {
2649
- consola.debug("Streaming chunk:", JSON.stringify(chunk));
2650
- parseStreamChunk(chunk, acc);
2651
- await stream.writeSSE(chunk);
2652
- }
2653
2758
  if (ctx.compactResult?.wasCompacted) {
2654
2759
  const marker = createCompactionMarker(ctx.compactResult);
2655
2760
  const markerChunk = {
2656
2761
  id: `compact-marker-${Date.now()}`,
2657
2762
  object: "chat.completion.chunk",
2658
2763
  created: Math.floor(Date.now() / 1e3),
2659
- model: acc.model || payload.model,
2764
+ model: payload.model,
2660
2765
  choices: [{
2661
2766
  index: 0,
2662
2767
  delta: { content: marker },
@@ -2670,6 +2775,11 @@ async function handleStreamingResponse$1(opts) {
2670
2775
  });
2671
2776
  acc.content += marker;
2672
2777
  }
2778
+ for await (const chunk of response) {
2779
+ consola.debug("Streaming chunk:", JSON.stringify(chunk));
2780
+ parseStreamChunk(chunk, acc);
2781
+ await stream.writeSSE(chunk);
2782
+ }
2673
2783
  recordStreamSuccess(acc, payload.model, ctx);
2674
2784
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
2675
2785
  } catch (error) {
@@ -4460,7 +4570,7 @@ function handleNonStreamingResponse(opts) {
4460
4570
  consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
4461
4571
  if (ctx.compactResult?.wasCompacted) {
4462
4572
  const marker = createCompactionMarker(ctx.compactResult);
4463
- anthropicResponse = appendMarkerToAnthropicResponse(anthropicResponse, marker);
4573
+ anthropicResponse = prependMarkerToAnthropicResponse(anthropicResponse, marker);
4464
4574
  }
4465
4575
  recordResponse(ctx.historyId, {
4466
4576
  success: true,
@@ -4492,16 +4602,16 @@ function handleNonStreamingResponse(opts) {
4492
4602
  });
4493
4603
  return c.json(anthropicResponse);
4494
4604
  }
4495
- function appendMarkerToAnthropicResponse(response, marker) {
4605
+ function prependMarkerToAnthropicResponse(response, marker) {
4496
4606
  const content = [...response.content];
4497
- const lastTextIndex = content.findLastIndex((block) => block.type === "text");
4498
- if (lastTextIndex !== -1) {
4499
- const textBlock = content[lastTextIndex];
4500
- if (textBlock.type === "text") content[lastTextIndex] = {
4607
+ const firstTextIndex = content.findIndex((block) => block.type === "text");
4608
+ if (firstTextIndex !== -1) {
4609
+ const textBlock = content[firstTextIndex];
4610
+ if (textBlock.type === "text") content[firstTextIndex] = {
4501
4611
  ...textBlock,
4502
- text: textBlock.text + marker
4612
+ text: marker + textBlock.text
4503
4613
  };
4504
- } else content.push({
4614
+ } else content.unshift({
4505
4615
  type: "text",
4506
4616
  text: marker
4507
4617
  });
@@ -4531,6 +4641,11 @@ async function handleStreamingResponse(opts) {
4531
4641
  };
4532
4642
  const acc = createAnthropicStreamAccumulator();
4533
4643
  try {
4644
+ if (ctx.compactResult?.wasCompacted) {
4645
+ const marker = createCompactionMarker(ctx.compactResult);
4646
+ await sendCompactionMarkerEvent(stream, streamState, marker);
4647
+ acc.content += marker;
4648
+ }
4534
4649
  await processStreamChunks({
4535
4650
  stream,
4536
4651
  response,
@@ -4538,11 +4653,6 @@ async function handleStreamingResponse(opts) {
4538
4653
  streamState,
4539
4654
  acc
4540
4655
  });
4541
- if (ctx.compactResult?.wasCompacted) {
4542
- const marker = createCompactionMarker(ctx.compactResult);
4543
- await sendCompactionMarkerEvent(stream, streamState, marker);
4544
- acc.content += marker;
4545
- }
4546
4656
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
4547
4657
  completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4548
4658
  } catch (error) {
@@ -4855,6 +4965,7 @@ function formatModelInfo(model) {
4855
4965
  return ` - ${model.id.padEnd(28)} context: ${contextK.padStart(5)}, output: ${outputK.padStart(4)}${featureStr}`;
4856
4966
  }
4857
4967
  async function runServer(options) {
4968
+ consola.info(`copilot-api v${package_default.version}`);
4858
4969
  if (options.proxyEnv) initProxyFromEnv();
4859
4970
  if (options.verbose) {
4860
4971
  consola.level = 5;