@hsupu/copilot-api 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -3,8 +3,9 @@ import { defineCommand, runMain } from "citty";
3
3
  import consola from "consola";
4
4
  import fs from "node:fs/promises";
5
5
  import os from "node:os";
6
- import path from "node:path";
6
+ import path, { dirname, join } from "node:path";
7
7
  import { randomUUID } from "node:crypto";
8
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
8
9
  import clipboard from "clipboardy";
9
10
  import { serve } from "srvx";
10
11
  import invariant from "tiny-invariant";
@@ -44,7 +45,6 @@ async function ensureFile(filePath) {
44
45
  const state = {
45
46
  accountType: "individual",
46
47
  manualApprove: false,
47
- rateLimitWait: false,
48
48
  showToken: false,
49
49
  autoCompact: false
50
50
  };
@@ -136,7 +136,17 @@ function formatRequestTooLargeError() {
136
136
  }
137
137
  };
138
138
  }
139
- async function forwardError(c, error) {
139
+ /** Format Anthropic-compatible error for rate limit exceeded (429) */
140
+ function formatRateLimitError(copilotMessage) {
141
+ return {
142
+ type: "error",
143
+ error: {
144
+ type: "rate_limit_error",
145
+ message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
146
+ }
147
+ };
148
+ }
149
+ function forwardError(c, error) {
140
150
  consola.error("Error occurred:", error);
141
151
  if (error instanceof HTTPError) {
142
152
  if (error.status === 413) {
@@ -160,6 +170,11 @@ async function forwardError(c, error) {
160
170
  return c.json(formattedError, 400);
161
171
  }
162
172
  }
173
+ if (error.status === 429 || copilotError.error?.code === "rate_limited") {
174
+ const formattedError = formatRateLimitError(copilotError.error?.message);
175
+ consola.debug("Returning formatted rate limit error:", formattedError);
176
+ return c.json(formattedError, 429);
177
+ }
163
178
  return c.json({ error: {
164
179
  message: error.responseText,
165
180
  type: "error"
@@ -290,6 +305,24 @@ async function pollAccessToken(deviceCode) {
290
305
  //#region src/lib/token.ts
291
306
  const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
292
307
  const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
308
+ /**
309
+ * Refresh the Copilot token with exponential backoff retry.
310
+ * Returns the new token on success, or null if all retries fail.
311
+ */
312
+ async function refreshCopilotTokenWithRetry(maxRetries = 3) {
313
+ let lastError = null;
314
+ for (let attempt = 0; attempt < maxRetries; attempt++) try {
315
+ const { token } = await getCopilotToken();
316
+ return token;
317
+ } catch (error) {
318
+ lastError = error;
319
+ const delay = Math.min(1e3 * 2 ** attempt, 3e4);
320
+ consola.warn(`Token refresh attempt ${attempt + 1}/${maxRetries} failed, retrying in ${delay}ms`);
321
+ await new Promise((resolve) => setTimeout(resolve, delay));
322
+ }
323
+ consola.error("All token refresh attempts failed:", lastError);
324
+ return null;
325
+ }
293
326
  const setupCopilotToken = async () => {
294
327
  const { token, refresh_in } = await getCopilotToken();
295
328
  state.copilotToken = token;
@@ -298,14 +331,12 @@ const setupCopilotToken = async () => {
298
331
  const refreshInterval = (refresh_in - 60) * 1e3;
299
332
  setInterval(async () => {
300
333
  consola.debug("Refreshing Copilot token");
301
- try {
302
- const { token: token$1 } = await getCopilotToken();
303
- state.copilotToken = token$1;
334
+ const newToken = await refreshCopilotTokenWithRetry();
335
+ if (newToken) {
336
+ state.copilotToken = newToken;
304
337
  consola.debug("Copilot token refreshed");
305
- if (state.showToken) consola.info("Refreshed Copilot token:", token$1);
306
- } catch (error) {
307
- consola.error("Failed to refresh Copilot token (will retry on next interval):", error);
308
- }
338
+ if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
339
+ } else consola.error("Failed to refresh Copilot token after retries, using existing token");
309
340
  }, refreshInterval);
310
341
  };
311
342
  async function setupGitHubToken(options) {
@@ -520,6 +551,567 @@ const logout = defineCommand({
520
551
  }
521
552
  });
522
553
 
554
+ //#endregion
555
+ //#region src/patch-claude.ts
556
+ const SUPPORTED_VERSIONS = {
557
+ v2a: {
558
+ min: "2.0.0",
559
+ max: "2.1.10"
560
+ },
561
+ v2b: {
562
+ min: "2.1.11",
563
+ max: "2.1.12"
564
+ }
565
+ };
566
+ const PATTERNS = {
567
+ funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
568
+ funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
569
+ variable: /var BS9=(\d+)/
570
+ };
571
+ /**
572
+ * Parse semver version string to comparable parts
573
+ */
574
+ function parseVersion(version) {
575
+ return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
576
+ }
577
+ /**
578
+ * Compare two semver versions
579
+ * Returns: -1 if a < b, 0 if a == b, 1 if a > b
580
+ */
581
+ function compareVersions(a, b) {
582
+ const partsA = parseVersion(a);
583
+ const partsB = parseVersion(b);
584
+ const len = Math.max(partsA.length, partsB.length);
585
+ for (let i = 0; i < len; i++) {
586
+ const numA = partsA[i] || 0;
587
+ const numB = partsB[i] || 0;
588
+ if (numA < numB) return -1;
589
+ if (numA > numB) return 1;
590
+ }
591
+ return 0;
592
+ }
593
+ function getPatternTypeForVersion(version) {
594
+ if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
+ if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
596
+ return null;
597
+ }
598
+ /**
599
+ * Get supported version range string for error messages
600
+ */
601
+ function getSupportedRangeString() {
602
+ return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
603
+ }
604
+ /**
605
+ * Get Claude Code version from package.json
606
+ */
607
+ function getClaudeCodeVersion(cliPath) {
608
+ try {
609
+ const packageJsonPath = join(dirname(cliPath), "package.json");
610
+ if (!existsSync(packageJsonPath)) return null;
611
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
612
+ if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
613
+ return null;
614
+ } catch {
615
+ return null;
616
+ }
617
+ }
618
+ /**
619
+ * Search volta tools directory for Claude Code
620
+ */
621
+ function findInVoltaTools(voltaHome) {
622
+ const paths = [];
623
+ const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
624
+ if (existsSync(packagesPath)) paths.push(packagesPath);
625
+ const toolsDir = join(voltaHome, "tools", "image", "node");
626
+ if (existsSync(toolsDir)) try {
627
+ for (const version of readdirSync(toolsDir)) {
628
+ const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
629
+ if (existsSync(claudePath)) paths.push(claudePath);
630
+ }
631
+ } catch {}
632
+ return paths;
633
+ }
634
+ /**
635
+ * Find Claude Code CLI path by checking common locations
636
+ */
637
+ function findClaudeCodePath() {
638
+ const possiblePaths = [];
639
+ const home = process.env.HOME || "";
640
+ const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
641
+ if (existsSync(voltaHome)) possiblePaths.push(...findInVoltaTools(voltaHome));
642
+ const npmPrefix = process.env.npm_config_prefix;
643
+ if (npmPrefix) possiblePaths.push(join(npmPrefix, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
644
+ const globalPaths = [
645
+ join(home, ".npm-global", "lib", "node_modules"),
646
+ "/usr/local/lib/node_modules",
647
+ "/usr/lib/node_modules"
648
+ ];
649
+ for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
650
+ const bunGlobal = join(home, ".bun", "install", "global");
651
+ if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
652
+ return possiblePaths.find((p) => existsSync(p)) ?? null;
653
+ }
654
+ /**
655
+ * Get current context limit from Claude Code
656
+ */
657
+ function getCurrentLimit(content) {
658
+ const varMatch = content.match(PATTERNS.variable);
659
+ if (varMatch) return Number.parseInt(varMatch[1], 10);
660
+ const funcMatch = content.match(PATTERNS.funcPatched);
661
+ if (funcMatch) {
662
+ const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
663
+ return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
664
+ }
665
+ return null;
666
+ }
667
+ /**
668
+ * Check if Claude Code version is supported for patching
669
+ */
670
+ function checkVersionSupport(cliPath) {
671
+ const version = getClaudeCodeVersion(cliPath);
672
+ if (!version) return {
673
+ supported: false,
674
+ version: null,
675
+ patternType: null,
676
+ error: "Could not detect Claude Code version"
677
+ };
678
+ const patternType = getPatternTypeForVersion(version);
679
+ if (!patternType) return {
680
+ supported: false,
681
+ version,
682
+ patternType: null,
683
+ error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
684
+ };
685
+ return {
686
+ supported: true,
687
+ version,
688
+ patternType
689
+ };
690
+ }
691
+ /**
692
+ * Patch Claude Code to use a different context limit
693
+ */
694
+ function patchClaudeCode(cliPath, newLimit) {
695
+ const content = readFileSync(cliPath, "utf8");
696
+ const versionCheck = checkVersionSupport(cliPath);
697
+ if (!versionCheck.supported) {
698
+ consola.error(versionCheck.error);
699
+ return false;
700
+ }
701
+ consola.info(`Claude Code version: ${versionCheck.version}`);
702
+ if (getCurrentLimit(content) === newLimit) {
703
+ consola.info(`Already patched with limit ${newLimit}`);
704
+ return true;
705
+ }
706
+ let newContent;
707
+ if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
708
+ else {
709
+ const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
710
+ const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
711
+ newContent = content.replace(pattern, replacement);
712
+ }
713
+ writeFileSync(cliPath, newContent);
714
+ return true;
715
+ }
716
+ /**
717
+ * Restore Claude Code to original 200k limit
718
+ */
719
+ function restoreClaudeCode(cliPath) {
720
+ const content = readFileSync(cliPath, "utf8");
721
+ const versionCheck = checkVersionSupport(cliPath);
722
+ if (!versionCheck.supported) {
723
+ consola.error(versionCheck.error);
724
+ return false;
725
+ }
726
+ consola.info(`Claude Code version: ${versionCheck.version}`);
727
+ if (getCurrentLimit(content) === 2e5) {
728
+ consola.info("Already at original 200000 limit");
729
+ return true;
730
+ }
731
+ let newContent;
732
+ if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
733
+ else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
734
+ writeFileSync(cliPath, newContent);
735
+ return true;
736
+ }
737
+ function showStatus(cliPath, currentLimit) {
738
+ const version = getClaudeCodeVersion(cliPath);
739
+ if (version) consola.info(`Claude Code version: ${version}`);
740
+ if (currentLimit === null) {
741
+ consola.warn("Could not detect current limit - CLI may have been updated");
742
+ consola.info("Look for the BS9 variable or HR function pattern in cli.js");
743
+ } else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
744
+ else consola.info(`Status: Patched (${currentLimit} context window)`);
745
+ }
746
+ const patchClaude = defineCommand({
747
+ meta: {
748
+ name: "patch-claude",
749
+ description: "Patch Claude Code's context window limit to match Copilot's limits"
750
+ },
751
+ args: {
752
+ limit: {
753
+ alias: "l",
754
+ type: "string",
755
+ default: "128000",
756
+ description: "Context window limit in tokens (default: 128000 for Copilot)"
757
+ },
758
+ restore: {
759
+ alias: "r",
760
+ type: "boolean",
761
+ default: false,
762
+ description: "Restore original 200k limit"
763
+ },
764
+ path: {
765
+ alias: "p",
766
+ type: "string",
767
+ description: "Path to Claude Code cli.js (auto-detected if not specified)"
768
+ },
769
+ status: {
770
+ alias: "s",
771
+ type: "boolean",
772
+ default: false,
773
+ description: "Show current patch status without modifying"
774
+ }
775
+ },
776
+ run({ args }) {
777
+ const cliPath = args.path || findClaudeCodePath();
778
+ if (!cliPath) {
779
+ consola.error("Could not find Claude Code installation");
780
+ consola.info("Searched in: volta, npm global, bun global");
781
+ consola.info("Use --path to specify the path to cli.js manually");
782
+ process.exit(1);
783
+ }
784
+ if (!existsSync(cliPath)) {
785
+ consola.error(`File not found: ${cliPath}`);
786
+ process.exit(1);
787
+ }
788
+ consola.info(`Claude Code path: ${cliPath}`);
789
+ const content = readFileSync(cliPath, "utf8");
790
+ const currentLimit = getCurrentLimit(content);
791
+ if (args.status) {
792
+ showStatus(cliPath, currentLimit);
793
+ return;
794
+ }
795
+ if (args.restore) {
796
+ if (restoreClaudeCode(cliPath)) consola.success("Restored to original 200k limit");
797
+ else {
798
+ consola.error("Failed to restore - pattern not found");
799
+ consola.info("Claude Code may have been updated to a new version");
800
+ process.exit(1);
801
+ }
802
+ return;
803
+ }
804
+ const limit = Number.parseInt(args.limit, 10);
805
+ if (Number.isNaN(limit) || limit < 1e3) {
806
+ consola.error("Invalid limit value. Must be a number >= 1000");
807
+ process.exit(1);
808
+ }
809
+ if (patchClaudeCode(cliPath, limit)) {
810
+ consola.success(`Patched context window: 200000 → ${limit}`);
811
+ consola.info("Note: You may need to re-run this after Claude Code updates");
812
+ } else {
813
+ consola.error("Failed to patch - pattern not found");
814
+ consola.info("Claude Code may have been updated to a new version");
815
+ consola.info("Check the cli.js for the HR function pattern");
816
+ process.exit(1);
817
+ }
818
+ }
819
+ });
820
+
821
+ //#endregion
822
+ //#region src/lib/adaptive-rate-limiter.ts
823
+ const DEFAULT_CONFIG$1 = {
824
+ baseRetryIntervalSeconds: 10,
825
+ maxRetryIntervalSeconds: 120,
826
+ requestIntervalSeconds: 10,
827
+ recoveryTimeoutMinutes: 10,
828
+ consecutiveSuccessesForRecovery: 5,
829
+ gradualRecoverySteps: [
830
+ 5,
831
+ 2,
832
+ 1,
833
+ 0
834
+ ]
835
+ };
836
+ /**
837
+ * Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
838
+ * based on API responses.
839
+ */
840
+ var AdaptiveRateLimiter = class {
841
+ config;
842
+ mode = "normal";
843
+ queue = [];
844
+ processing = false;
845
+ rateLimitedAt = null;
846
+ consecutiveSuccesses = 0;
847
+ lastRequestTime = 0;
848
+ /** Current step in gradual recovery (index into gradualRecoverySteps) */
849
+ recoveryStepIndex = 0;
850
+ constructor(config = {}) {
851
+ this.config = {
852
+ ...DEFAULT_CONFIG$1,
853
+ ...config
854
+ };
855
+ }
856
+ /**
857
+ * Execute a request with adaptive rate limiting.
858
+ * Returns a promise that resolves when the request succeeds.
859
+ * The request will be retried automatically on 429 errors.
860
+ */
861
+ async execute(fn) {
862
+ if (this.mode === "normal") return this.executeInNormalMode(fn);
863
+ if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
864
+ return this.enqueue(fn);
865
+ }
866
+ /**
867
+ * Check if an error is a rate limit error (429) and extract Retry-After if available
868
+ */
869
+ isRateLimitError(error) {
870
+ if (error && typeof error === "object") {
871
+ if ("status" in error && error.status === 429) return {
872
+ isRateLimit: true,
873
+ retryAfter: this.extractRetryAfter(error)
874
+ };
875
+ if ("responseText" in error && typeof error.responseText === "string") try {
876
+ const parsed = JSON.parse(error.responseText);
877
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
878
+ } catch {}
879
+ }
880
+ return { isRateLimit: false };
881
+ }
882
+ /**
883
+ * Extract Retry-After value from error response
884
+ */
885
+ extractRetryAfter(error) {
886
+ if (!error || typeof error !== "object") return void 0;
887
+ if ("responseText" in error && typeof error.responseText === "string") try {
888
+ const parsed = JSON.parse(error.responseText);
889
+ if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
890
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
891
+ } catch {}
892
+ }
893
+ /**
894
+ * Execute in normal mode - full speed
895
+ */
896
+ async executeInNormalMode(fn) {
897
+ try {
898
+ return {
899
+ result: await fn(),
900
+ queueWaitMs: 0
901
+ };
902
+ } catch (error) {
903
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
904
+ if (isRateLimit) {
905
+ this.enterRateLimitedMode();
906
+ return this.enqueue(fn, retryAfter);
907
+ }
908
+ throw error;
909
+ }
910
+ }
911
+ /**
912
+ * Execute in recovering mode - gradual speedup
913
+ */
914
+ async executeInRecoveringMode(fn) {
915
+ const startTime = Date.now();
916
+ const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
917
+ if (currentInterval > 0) {
918
+ const elapsedMs = Date.now() - this.lastRequestTime;
919
+ const requiredMs = currentInterval * 1e3;
920
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
921
+ const waitMs = requiredMs - elapsedMs;
922
+ await this.sleep(waitMs);
923
+ }
924
+ }
925
+ this.lastRequestTime = Date.now();
926
+ try {
927
+ const result = await fn();
928
+ this.recoveryStepIndex++;
929
+ if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
930
+ else {
931
+ const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
932
+ consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
933
+ }
934
+ const queueWaitMs = Date.now() - startTime;
935
+ return {
936
+ result,
937
+ queueWaitMs
938
+ };
939
+ } catch (error) {
940
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
941
+ if (isRateLimit) {
942
+ consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
943
+ this.enterRateLimitedMode();
944
+ return this.enqueue(fn, retryAfter);
945
+ }
946
+ throw error;
947
+ }
948
+ }
949
+ /**
950
+ * Enter rate-limited mode
951
+ */
952
+ enterRateLimitedMode() {
953
+ if (this.mode === "rate-limited") return;
954
+ this.mode = "rate-limited";
955
+ this.rateLimitedAt = Date.now();
956
+ this.consecutiveSuccesses = 0;
957
+ consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
958
+ }
959
+ /**
960
+ * Check if we should try to recover to normal mode
961
+ */
962
+ shouldAttemptRecovery() {
963
+ if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
964
+ consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
965
+ return true;
966
+ }
967
+ if (this.rateLimitedAt) {
968
+ const elapsed = Date.now() - this.rateLimitedAt;
969
+ const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
970
+ if (elapsed >= timeout) {
971
+ consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
972
+ return true;
973
+ }
974
+ }
975
+ return false;
976
+ }
977
+ /**
978
+ * Start gradual recovery mode
979
+ */
980
+ startGradualRecovery() {
981
+ this.mode = "recovering";
982
+ this.recoveryStepIndex = 0;
983
+ this.rateLimitedAt = null;
984
+ this.consecutiveSuccesses = 0;
985
+ const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
986
+ consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
987
+ }
988
+ /**
989
+ * Complete recovery to normal mode
990
+ */
991
+ completeRecovery() {
992
+ this.mode = "normal";
993
+ this.recoveryStepIndex = 0;
994
+ consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
995
+ }
996
+ /**
997
+ * Enqueue a request for later execution
998
+ */
999
+ enqueue(fn, retryAfterSeconds) {
1000
+ return new Promise((resolve, reject) => {
1001
+ const request = {
1002
+ execute: fn,
1003
+ resolve,
1004
+ reject,
1005
+ retryCount: 0,
1006
+ retryAfterSeconds,
1007
+ enqueuedAt: Date.now()
1008
+ };
1009
+ this.queue.push(request);
1010
+ if (this.queue.length > 1) {
1011
+ const position = this.queue.length;
1012
+ const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
1013
+ consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
1014
+ }
1015
+ this.processQueue();
1016
+ });
1017
+ }
1018
+ /**
1019
+ * Calculate retry interval with exponential backoff
1020
+ */
1021
+ calculateRetryInterval(request) {
1022
+ if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
1023
+ const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
1024
+ return Math.min(backoff, this.config.maxRetryIntervalSeconds);
1025
+ }
1026
+ /**
1027
+ * Process the queue
1028
+ */
1029
+ async processQueue() {
1030
+ if (this.processing) return;
1031
+ this.processing = true;
1032
+ while (this.queue.length > 0) {
1033
+ const request = this.queue[0];
1034
+ if (this.shouldAttemptRecovery()) this.startGradualRecovery();
1035
+ const elapsedMs = Date.now() - this.lastRequestTime;
1036
+ const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
1037
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1038
+ const waitMs = requiredMs - elapsedMs;
1039
+ const waitSec = Math.ceil(waitMs / 1e3);
1040
+ consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
1041
+ await this.sleep(waitMs);
1042
+ }
1043
+ this.lastRequestTime = Date.now();
1044
+ try {
1045
+ const result = await request.execute();
1046
+ this.queue.shift();
1047
+ this.consecutiveSuccesses++;
1048
+ request.retryAfterSeconds = void 0;
1049
+ const queueWaitMs = Date.now() - request.enqueuedAt;
1050
+ request.resolve({
1051
+ result,
1052
+ queueWaitMs
1053
+ });
1054
+ if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
1055
+ } catch (error) {
1056
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
1057
+ if (isRateLimit) {
1058
+ request.retryCount++;
1059
+ request.retryAfterSeconds = retryAfter;
1060
+ this.consecutiveSuccesses = 0;
1061
+ this.rateLimitedAt = Date.now();
1062
+ const nextInterval = this.calculateRetryInterval(request);
1063
+ const source = retryAfter ? "server Retry-After" : "exponential backoff";
1064
+ consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
1065
+ } else {
1066
+ this.queue.shift();
1067
+ request.reject(error);
1068
+ }
1069
+ }
1070
+ }
1071
+ this.processing = false;
1072
+ }
1073
+ sleep(ms) {
1074
+ return new Promise((resolve) => setTimeout(resolve, ms));
1075
+ }
1076
+ /**
1077
+ * Get current status for debugging/monitoring
1078
+ */
1079
+ getStatus() {
1080
+ return {
1081
+ mode: this.mode,
1082
+ queueLength: this.queue.length,
1083
+ consecutiveSuccesses: this.consecutiveSuccesses,
1084
+ rateLimitedAt: this.rateLimitedAt
1085
+ };
1086
+ }
1087
+ };
1088
+ let rateLimiterInstance = null;
1089
+ /**
1090
+ * Initialize the adaptive rate limiter with configuration
1091
+ */
1092
+ function initAdaptiveRateLimiter(config = {}) {
1093
+ rateLimiterInstance = new AdaptiveRateLimiter(config);
1094
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1095
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1096
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1097
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1098
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1099
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1100
+ consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1101
+ }
1102
+ /**
1103
+ * Execute a request with adaptive rate limiting.
1104
+ * If rate limiter is not initialized, executes immediately.
1105
+ * Returns the result along with queue wait time.
1106
+ */
1107
+ async function executeWithAdaptiveRateLimit(fn) {
1108
+ if (!rateLimiterInstance) return {
1109
+ result: await fn(),
1110
+ queueWaitMs: 0
1111
+ };
1112
+ return rateLimiterInstance.execute(fn);
1113
+ }
1114
+
523
1115
  //#endregion
524
1116
  //#region src/lib/history.ts
525
1117
  function generateId$1() {
@@ -771,44 +1363,74 @@ function exportHistory(format = "json") {
771
1363
 
772
1364
  //#endregion
773
1365
  //#region src/lib/proxy.ts
1366
+ /**
1367
+ * Custom dispatcher that routes requests through proxies based on environment variables.
1368
+ * Extends Agent to properly inherit the Dispatcher interface.
1369
+ */
1370
+ var ProxyDispatcher = class extends Agent {
1371
+ proxies = /* @__PURE__ */ new Map();
1372
+ dispatch(options, handler) {
1373
+ try {
1374
+ const origin = this.getOriginUrl(options.origin);
1375
+ const proxyUrl = this.getProxyUrl(origin);
1376
+ if (!proxyUrl) {
1377
+ consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
1378
+ return super.dispatch(options, handler);
1379
+ }
1380
+ const agent = this.getOrCreateProxyAgent(proxyUrl);
1381
+ consola.debug(`HTTP proxy route: ${origin.hostname} via ${this.formatProxyLabel(proxyUrl)}`);
1382
+ return agent.dispatch(options, handler);
1383
+ } catch {
1384
+ return super.dispatch(options, handler);
1385
+ }
1386
+ }
1387
+ getOriginUrl(origin) {
1388
+ return typeof origin === "string" ? new URL(origin) : origin;
1389
+ }
1390
+ getProxyUrl(origin) {
1391
+ const raw = getProxyForUrl(origin.toString());
1392
+ return raw && raw.length > 0 ? raw : void 0;
1393
+ }
1394
+ getOrCreateProxyAgent(proxyUrl) {
1395
+ let agent = this.proxies.get(proxyUrl);
1396
+ if (!agent) {
1397
+ agent = new ProxyAgent(proxyUrl);
1398
+ this.proxies.set(proxyUrl, agent);
1399
+ }
1400
+ return agent;
1401
+ }
1402
+ formatProxyLabel(proxyUrl) {
1403
+ try {
1404
+ const u = new URL(proxyUrl);
1405
+ return `${u.protocol}//${u.host}`;
1406
+ } catch {
1407
+ return proxyUrl;
1408
+ }
1409
+ }
1410
+ async close() {
1411
+ await super.close();
1412
+ await Promise.all([...this.proxies.values()].map((p) => p.close()));
1413
+ this.proxies.clear();
1414
+ }
1415
+ destroy(errOrCallback, callback) {
1416
+ for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
1417
+ else if (callback) agent.destroy(errOrCallback ?? null, callback);
1418
+ else agent.destroy(errOrCallback ?? null).catch(() => {});
1419
+ this.proxies.clear();
1420
+ if (typeof errOrCallback === "function") {
1421
+ super.destroy(errOrCallback);
1422
+ return;
1423
+ } else if (callback) {
1424
+ super.destroy(errOrCallback ?? null, callback);
1425
+ return;
1426
+ } else return super.destroy(errOrCallback ?? null);
1427
+ }
1428
+ };
774
1429
  function initProxyFromEnv() {
775
1430
  if (typeof Bun !== "undefined") return;
776
1431
  try {
777
- const direct = new Agent();
778
- const proxies = /* @__PURE__ */ new Map();
779
- setGlobalDispatcher({
780
- dispatch(options, handler) {
781
- try {
782
- const origin = typeof options.origin === "string" ? new URL(options.origin) : options.origin;
783
- const raw = getProxyForUrl(origin.toString());
784
- const proxyUrl = raw && raw.length > 0 ? raw : void 0;
785
- if (!proxyUrl) {
786
- consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
787
- return direct.dispatch(options, handler);
788
- }
789
- let agent = proxies.get(proxyUrl);
790
- if (!agent) {
791
- agent = new ProxyAgent(proxyUrl);
792
- proxies.set(proxyUrl, agent);
793
- }
794
- let label = proxyUrl;
795
- try {
796
- const u = new URL(proxyUrl);
797
- label = `${u.protocol}//${u.host}`;
798
- } catch {}
799
- consola.debug(`HTTP proxy route: ${origin.hostname} via ${label}`);
800
- return agent.dispatch(options, handler);
801
- } catch {
802
- return direct.dispatch(options, handler);
803
- }
804
- },
805
- close() {
806
- return direct.close();
807
- },
808
- destroy() {
809
- return direct.destroy();
810
- }
811
- });
1432
+ const dispatcher = new ProxyDispatcher();
1433
+ setGlobalDispatcher(dispatcher);
812
1434
  consola.debug("HTTP proxy configured from environment (per-URL)");
813
1435
  } catch (err) {
814
1436
  consola.debug("Proxy setup skipped:", err);
@@ -894,23 +1516,66 @@ function formatTokens(input, output) {
894
1516
  * Console renderer that shows request lifecycle with apt-get style footer
895
1517
  *
896
1518
  * Log format:
897
- * - Start: [....] METHOD /path model-name
898
- * - Streaming: [<-->] METHOD /path model-name streaming...
899
- * - Complete: [ OK ] METHOD /path 200 1.2s 1.5K/500 model-name
1519
+ * - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
1520
+ * - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
1521
+ * - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
1522
+ * - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
1523
+ *
1524
+ * Color scheme for completed requests:
1525
+ * - Prefix: green (success) / red (error)
1526
+ * - Time: dim
1527
+ * - Method: cyan
1528
+ * - Path: white
1529
+ * - Model: magenta
1530
+ * - Status: green (success) / red (error)
1531
+ * - Duration: yellow
1532
+ * - Tokens: blue
900
1533
  *
901
1534
  * Features:
902
- * - /history API requests are displayed in gray (dim)
903
- * - Sticky footer shows active request count, updated in-place on the last line
904
- * - Footer disappears when all requests complete
1535
+ * - Start lines only shown in debug mode (--verbose)
1536
+ * - Streaming lines are dim (less important)
1537
+ * - /history API requests are always dim
1538
+ * - Sticky footer shows active request count
1539
+ * - Intercepts consola output to properly handle footer
905
1540
  */
906
1541
  var ConsoleRenderer = class {
907
1542
  activeRequests = /* @__PURE__ */ new Map();
908
1543
  showActive;
909
1544
  footerVisible = false;
910
1545
  isTTY;
1546
+ originalReporters = [];
911
1547
  constructor(options) {
912
1548
  this.showActive = options?.showActive ?? true;
913
1549
  this.isTTY = process.stdout.isTTY;
1550
+ this.installConsolaReporter();
1551
+ }
1552
+ /**
1553
+ * Install a custom consola reporter that coordinates with footer
1554
+ */
1555
+ installConsolaReporter() {
1556
+ this.originalReporters = [...consola.options.reporters];
1557
+ consola.setReporters([{ log: (logObj) => {
1558
+ this.clearFooterForLog();
1559
+ const message = logObj.args.map((arg) => typeof arg === "string" ? arg : JSON.stringify(arg)).join(" ");
1560
+ const prefix = this.getLogPrefix(logObj.type);
1561
+ if (prefix) process.stdout.write(`${prefix} ${message}\n`);
1562
+ else process.stdout.write(`${message}\n`);
1563
+ this.renderFooter();
1564
+ } }]);
1565
+ }
1566
+ /**
1567
+ * Get log prefix based on log type
1568
+ */
1569
+ getLogPrefix(type) {
1570
+ switch (type) {
1571
+ case "error":
1572
+ case "fatal": return pc.red("✖");
1573
+ case "warn": return pc.yellow("⚠");
1574
+ case "info": return pc.cyan("ℹ");
1575
+ case "success": return pc.green("✔");
1576
+ case "debug": return pc.gray("●");
1577
+ default: return "";
1578
+ }
914
1579
  }
915
1580
  /**
916
1581
  * Get footer text based on active request count
@@ -946,25 +1611,52 @@ var ConsoleRenderer = class {
946
1611
  }
947
1612
  }
948
1613
  /**
1614
+ * Format a complete log line with colored parts
1615
+ */
1616
+ formatLogLine(parts) {
1617
+ const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
1618
+ if (isDim) {
1619
+ const modelPart = model ? ` ${model}` : "";
1620
+ const extraPart = extra ? ` ${extra}` : "";
1621
+ return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
1622
+ }
1623
+ const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
1624
+ const coloredTime = pc.dim(time);
1625
+ const coloredMethod = pc.cyan(method);
1626
+ const coloredPath = pc.white(path$1);
1627
+ const coloredModel = model ? pc.magenta(` ${model}`) : "";
1628
+ let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
1629
+ if (status !== void 0) {
1630
+ const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
1631
+ result += ` ${coloredStatus}`;
1632
+ }
1633
+ if (duration) result += ` ${pc.yellow(duration)}`;
1634
+ if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
1635
+ if (tokens) result += ` ${pc.blue(tokens)}`;
1636
+ if (extra) result += isError ? pc.red(extra) : extra;
1637
+ return result;
1638
+ }
1639
+ /**
949
1640
  * Print a log line with proper footer handling
950
- * 1. Clear footer if visible
951
- * 2. Print log with newline
952
- * 3. Re-render footer on new line (no newline after footer)
953
1641
  */
954
- printLog(message, isGray = false) {
1642
+ printLog(message) {
955
1643
  this.clearFooterForLog();
956
- if (isGray) consola.log(pc.dim(message));
957
- else consola.log(message);
1644
+ process.stdout.write(message + "\n");
958
1645
  this.renderFooter();
959
1646
  }
960
1647
  onRequestStart(request) {
961
1648
  this.activeRequests.set(request.id, request);
962
- if (this.showActive) {
963
- const time = formatTime();
964
- const modelInfo = request.model ? ` ${request.model}` : "";
965
- const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
966
- const message = `${time} [....] ${request.method} ${request.path}${modelInfo}${queueInfo}`;
967
- this.printLog(message, request.isHistoryAccess);
1649
+ if (this.showActive && consola.level >= 5) {
1650
+ const message = this.formatLogLine({
1651
+ prefix: "[....]",
1652
+ time: formatTime(),
1653
+ method: request.method,
1654
+ path: request.path,
1655
+ model: request.model,
1656
+ extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
1657
+ isDim: true
1658
+ });
1659
+ this.printLog(message);
968
1660
  }
969
1661
  }
970
1662
  onRequestUpdate(id, update) {
@@ -972,28 +1664,39 @@ var ConsoleRenderer = class {
972
1664
  if (!request) return;
973
1665
  Object.assign(request, update);
974
1666
  if (this.showActive && update.status === "streaming") {
975
- const time = formatTime();
976
- const modelInfo = request.model ? ` ${request.model}` : "";
977
- const message = `${time} [<-->] ${request.method} ${request.path}${modelInfo} streaming...`;
978
- this.printLog(message, request.isHistoryAccess);
1667
+ const message = this.formatLogLine({
1668
+ prefix: "[<-->]",
1669
+ time: formatTime(),
1670
+ method: request.method,
1671
+ path: request.path,
1672
+ model: request.model,
1673
+ extra: "streaming...",
1674
+ isDim: true
1675
+ });
1676
+ this.printLog(message);
979
1677
  }
980
1678
  }
981
1679
  onRequestComplete(request) {
982
1680
  this.activeRequests.delete(request.id);
983
- const time = formatTime();
984
1681
  const status = request.statusCode ?? 0;
985
- const duration = formatDuration(request.durationMs ?? 0);
986
- const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
987
- const modelInfo = request.model ? ` ${request.model}` : "";
988
1682
  const isError = request.status === "error" || status >= 400;
989
- const prefix = isError ? "[FAIL]" : "[ OK ]";
990
- const tokensPart = tokens ? ` ${tokens}` : "";
991
- let content = `${time} ${prefix} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
992
- if (isError) {
993
- const errorInfo = request.error ? `: ${request.error}` : "";
994
- content += errorInfo;
995
- }
996
- this.printLog(content, request.isHistoryAccess);
1683
+ const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
1684
+ const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
1685
+ const message = this.formatLogLine({
1686
+ prefix: isError ? "[FAIL]" : "[ OK ]",
1687
+ time: formatTime(),
1688
+ method: request.method,
1689
+ path: request.path,
1690
+ model: request.model,
1691
+ status,
1692
+ duration: formatDuration(request.durationMs ?? 0),
1693
+ queueWait,
1694
+ tokens,
1695
+ extra: isError && request.error ? `: ${request.error}` : void 0,
1696
+ isError,
1697
+ isDim: request.isHistoryAccess
1698
+ });
1699
+ this.printLog(message);
997
1700
  }
998
1701
  destroy() {
999
1702
  if (this.footerVisible && this.isTTY) {
@@ -1001,6 +1704,7 @@ var ConsoleRenderer = class {
1001
1704
  this.footerVisible = false;
1002
1705
  }
1003
1706
  this.activeRequests.clear();
1707
+ if (this.originalReporters.length > 0) consola.setReporters(this.originalReporters);
1004
1708
  }
1005
1709
  };
1006
1710
 
@@ -1392,14 +2096,14 @@ const getTokenCount = async (payload, model) => {
1392
2096
  //#endregion
1393
2097
  //#region src/lib/auto-compact.ts
1394
2098
  const DEFAULT_CONFIG = {
1395
- targetTokens: 1e5,
1396
- safetyMarginPercent: 10
2099
+ targetTokens: 12e4,
2100
+ safetyMarginPercent: 2
1397
2101
  };
1398
2102
  /**
1399
2103
  * Check if payload needs compaction based on model limits.
1400
2104
  * Uses a safety margin to account for token counting differences.
1401
2105
  */
1402
- async function checkNeedsCompaction(payload, model, safetyMarginPercent = 10) {
2106
+ async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
1403
2107
  const currentTokens = (await getTokenCount(payload, model)).input;
1404
2108
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
1405
2109
  const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
@@ -1442,6 +2146,13 @@ function extractSystemMessages(messages) {
1442
2146
  };
1443
2147
  }
1444
2148
  /**
2149
+ * Extract tool_use ids from assistant messages with tool_calls.
2150
+ */
2151
+ function getToolUseIds(message) {
2152
+ if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
2153
+ return [];
2154
+ }
2155
+ /**
1445
2156
  * Find messages to keep from the end to stay under target tokens.
1446
2157
  * Returns the starting index of messages to preserve.
1447
2158
  */
@@ -1456,6 +2167,41 @@ function findPreserveIndex(messages, targetTokens, systemTokens) {
1456
2167
  return 0;
1457
2168
  }
1458
2169
  /**
2170
+ * Filter out orphaned tool_result messages that don't have a matching tool_use
2171
+ * in the preserved message list. This prevents API errors when truncation
2172
+ * separates tool_use/tool_result pairs.
2173
+ */
2174
+ function filterOrphanedToolResults(messages) {
2175
+ const availableToolUseIds = /* @__PURE__ */ new Set();
2176
+ for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
2177
+ const filteredMessages = [];
2178
+ let removedCount = 0;
2179
+ for (const msg of messages) {
2180
+ if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
2181
+ removedCount++;
2182
+ continue;
2183
+ }
2184
+ filteredMessages.push(msg);
2185
+ }
2186
+ if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
2187
+ return filteredMessages;
2188
+ }
2189
+ /**
2190
+ * Ensure the message list starts with a user message.
2191
+ * If it starts with assistant or tool messages, skip them until we find a user message.
2192
+ * This is required because OpenAI API expects conversations to start with user messages
2193
+ * (after system messages).
2194
+ */
2195
+ function ensureStartsWithUser(messages) {
2196
+ let startIndex = 0;
2197
+ while (startIndex < messages.length) {
2198
+ if (messages[startIndex].role === "user") break;
2199
+ startIndex++;
2200
+ }
2201
+ if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
2202
+ return messages.slice(startIndex);
2203
+ }
2204
+ /**
1459
2205
  * Calculate estimated tokens for system messages.
1460
2206
  */
1461
2207
  function estimateSystemTokens(systemMessages) {
@@ -1473,6 +2219,7 @@ function createTruncationMarker(removedCount) {
1473
2219
  /**
1474
2220
  * Perform auto-compaction on a payload that exceeds token limits.
1475
2221
  * This uses simple truncation - no LLM calls required.
2222
+ * Uses iterative approach with decreasing target tokens until under limit.
1476
2223
  */
1477
2224
  async function autoCompact(payload, model, config = {}) {
1478
2225
  const cfg = {
@@ -1493,8 +2240,49 @@ async function autoCompact(payload, model, config = {}) {
1493
2240
  const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
1494
2241
  const systemTokens = estimateSystemTokens(systemMessages);
1495
2242
  consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
1496
- const effectiveTarget = Math.min(cfg.targetTokens, limit);
1497
- const preserveIndex = findPreserveIndex(remainingMessages, effectiveTarget, systemTokens);
2243
+ const MAX_ITERATIONS = 5;
2244
+ const MIN_TARGET = 2e4;
2245
+ let currentTarget = Math.min(cfg.targetTokens, limit);
2246
+ let lastResult = null;
2247
+ for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
2248
+ const result = await tryCompactWithTarget({
2249
+ payload,
2250
+ model,
2251
+ systemMessages,
2252
+ remainingMessages,
2253
+ systemTokens,
2254
+ targetTokens: currentTarget,
2255
+ limit,
2256
+ originalTokens
2257
+ });
2258
+ if (!result.wasCompacted) return result;
2259
+ lastResult = result;
2260
+ if (result.compactedTokens <= limit) {
2261
+ consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
2262
+ return result;
2263
+ }
2264
+ consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
2265
+ currentTarget = Math.floor(currentTarget * .7);
2266
+ if (currentTarget < MIN_TARGET) {
2267
+ consola.error("Auto-compact: Cannot reduce further, target too low");
2268
+ return result;
2269
+ }
2270
+ }
2271
+ consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
2272
+ return lastResult ?? {
2273
+ payload,
2274
+ wasCompacted: false,
2275
+ originalTokens,
2276
+ compactedTokens: originalTokens,
2277
+ removedMessageCount: 0
2278
+ };
2279
+ }
2280
+ /**
2281
+ * Helper to attempt compaction with a specific target token count.
2282
+ */
2283
+ async function tryCompactWithTarget(opts) {
2284
+ const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
2285
+ const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
1498
2286
  if (preserveIndex === 0) {
1499
2287
  consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
1500
2288
  return {
@@ -1506,8 +2294,21 @@ async function autoCompact(payload, model, config = {}) {
1506
2294
  };
1507
2295
  }
1508
2296
  const removedMessages = remainingMessages.slice(0, preserveIndex);
1509
- const preservedMessages = remainingMessages.slice(preserveIndex);
1510
- consola.info(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
2297
+ let preservedMessages = remainingMessages.slice(preserveIndex);
2298
+ preservedMessages = filterOrphanedToolResults(preservedMessages);
2299
+ preservedMessages = ensureStartsWithUser(preservedMessages);
2300
+ preservedMessages = filterOrphanedToolResults(preservedMessages);
2301
+ if (preservedMessages.length === 0) {
2302
+ consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
2303
+ return {
2304
+ payload,
2305
+ wasCompacted: false,
2306
+ originalTokens,
2307
+ compactedTokens: originalTokens,
2308
+ removedMessageCount: 0
2309
+ };
2310
+ }
2311
+ consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
1511
2312
  const truncationMarker = createTruncationMarker(removedMessages.length);
1512
2313
  const newPayload = {
1513
2314
  ...payload,
@@ -1518,136 +2319,192 @@ async function autoCompact(payload, model, config = {}) {
1518
2319
  ]
1519
2320
  };
1520
2321
  const newTokenCount = await getTokenCount(newPayload, model);
1521
- consola.info(`Auto-compact: Reduced from ${originalTokens} to ${newTokenCount.input} tokens`);
1522
- if (newTokenCount.input > limit) {
1523
- consola.warn(`Auto-compact: Still over limit (${newTokenCount.input} > ${limit}), trying more aggressive truncation`);
1524
- const aggressiveTarget = Math.floor(effectiveTarget * .7);
1525
- if (aggressiveTarget < 2e4) {
1526
- consola.error("Auto-compact: Cannot reduce further, target too low");
1527
- return {
1528
- payload: newPayload,
1529
- wasCompacted: true,
1530
- originalTokens,
1531
- compactedTokens: newTokenCount.input,
1532
- removedMessageCount: removedMessages.length
1533
- };
1534
- }
1535
- return autoCompact(payload, model, {
1536
- ...cfg,
1537
- targetTokens: aggressiveTarget
1538
- });
2322
+ return {
2323
+ payload: newPayload,
2324
+ wasCompacted: true,
2325
+ originalTokens,
2326
+ compactedTokens: newTokenCount.input,
2327
+ removedMessageCount: removedMessages.length
2328
+ };
2329
+ }
2330
+ /**
2331
+ * Create a marker to append to responses indicating auto-compaction occurred.
2332
+ */
2333
+ function createCompactionMarker(result) {
2334
+ if (!result.wasCompacted) return "";
2335
+ const reduction = result.originalTokens - result.compactedTokens;
2336
+ const percentage = Math.round(reduction / result.originalTokens * 100);
2337
+ return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
2338
+ }
2339
+
2340
+ //#endregion
2341
+ //#region src/services/copilot/create-chat-completions.ts
2342
+ const createChatCompletions = async (payload) => {
2343
+ if (!state.copilotToken) throw new Error("Copilot token not found");
2344
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
2345
+ const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
2346
+ const headers = {
2347
+ ...copilotHeaders(state, enableVision),
2348
+ "X-Initiator": isAgentCall ? "agent" : "user"
2349
+ };
2350
+ const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
2351
+ method: "POST",
2352
+ headers,
2353
+ body: JSON.stringify(payload)
2354
+ });
2355
+ if (!response.ok) {
2356
+ consola.error("Failed to create chat completions", response);
2357
+ throw await HTTPError.fromResponse("Failed to create chat completions", response);
2358
+ }
2359
+ if (payload.stream) return events(response);
2360
+ return await response.json();
2361
+ };
2362
+
2363
+ //#endregion
2364
+ //#region src/routes/shared.ts
2365
+ /** Helper to update tracker model */
2366
+ function updateTrackerModel(trackingId, model) {
2367
+ if (!trackingId) return;
2368
+ const request = requestTracker.getRequest(trackingId);
2369
+ if (request) request.model = model;
2370
+ }
2371
+ /** Helper to update tracker status */
2372
+ function updateTrackerStatus(trackingId, status) {
2373
+ if (!trackingId) return;
2374
+ requestTracker.updateRequest(trackingId, { status });
2375
+ }
2376
+ /** Record error response to history */
2377
+ function recordErrorResponse(ctx, model, error) {
2378
+ recordResponse(ctx.historyId, {
2379
+ success: false,
2380
+ model,
2381
+ usage: {
2382
+ input_tokens: 0,
2383
+ output_tokens: 0
2384
+ },
2385
+ error: error instanceof Error ? error.message : "Unknown error",
2386
+ content: null
2387
+ }, Date.now() - ctx.startTime);
2388
+ }
2389
+ /** Complete TUI tracking */
2390
+ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
2391
+ if (!trackingId) return;
2392
+ requestTracker.updateRequest(trackingId, {
2393
+ inputTokens,
2394
+ outputTokens,
2395
+ queueWaitMs
2396
+ });
2397
+ requestTracker.completeRequest(trackingId, 200, {
2398
+ inputTokens,
2399
+ outputTokens
2400
+ });
2401
+ }
2402
+ /** Fail TUI tracking */
2403
+ function failTracking(trackingId, error) {
2404
+ if (!trackingId) return;
2405
+ requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
2406
+ }
2407
+ /** Record streaming error to history (works with any accumulator type) */
2408
+ function recordStreamError(opts) {
2409
+ const { acc, fallbackModel, ctx, error } = opts;
2410
+ recordResponse(ctx.historyId, {
2411
+ success: false,
2412
+ model: acc.model || fallbackModel,
2413
+ usage: {
2414
+ input_tokens: 0,
2415
+ output_tokens: 0
2416
+ },
2417
+ error: error instanceof Error ? error.message : "Stream error",
2418
+ content: null
2419
+ }, Date.now() - ctx.startTime);
2420
+ }
2421
+ /** Type guard for non-streaming responses */
2422
+ function isNonStreaming(response) {
2423
+ return Object.hasOwn(response, "choices");
2424
+ }
2425
+ /** Build final payload with auto-compact if needed */
2426
+ async function buildFinalPayload(payload, model) {
2427
+ if (!state.autoCompact || !model) {
2428
+ if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
2429
+ return {
2430
+ finalPayload: payload,
2431
+ compactResult: null
2432
+ };
2433
+ }
2434
+ try {
2435
+ const check = await checkNeedsCompaction(payload, model);
2436
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
2437
+ if (!check.needed) return {
2438
+ finalPayload: payload,
2439
+ compactResult: null
2440
+ };
2441
+ consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
2442
+ const compactResult = await autoCompact(payload, model);
2443
+ return {
2444
+ finalPayload: compactResult.payload,
2445
+ compactResult
2446
+ };
2447
+ } catch (error) {
2448
+ consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
2449
+ return {
2450
+ finalPayload: payload,
2451
+ compactResult: null
2452
+ };
1539
2453
  }
1540
- return {
1541
- payload: newPayload,
1542
- wasCompacted: true,
1543
- originalTokens,
1544
- compactedTokens: newTokenCount.input,
1545
- removedMessageCount: removedMessages.length
1546
- };
1547
2454
  }
1548
2455
  /**
1549
- * Create a marker to append to responses indicating auto-compaction occurred.
2456
+ * Log helpful debugging information when a 413 error occurs.
1550
2457
  */
1551
- function createCompactionMarker(result) {
1552
- if (!result.wasCompacted) return "";
1553
- const reduction = result.originalTokens - result.compactedTokens;
1554
- const percentage = Math.round(reduction / result.originalTokens * 100);
1555
- return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
1556
- }
1557
-
1558
- //#endregion
1559
- //#region src/lib/queue.ts
1560
- var RequestQueue = class {
1561
- queue = [];
1562
- processing = false;
1563
- lastRequestTime = 0;
1564
- async enqueue(execute, rateLimitSeconds) {
1565
- return new Promise((resolve, reject) => {
1566
- this.queue.push({
1567
- execute,
1568
- resolve,
1569
- reject
1570
- });
1571
- if (this.queue.length > 1) {
1572
- const position = this.queue.length;
1573
- const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
1574
- (waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
1575
- }
1576
- this.processQueue(rateLimitSeconds);
1577
- });
1578
- }
1579
- async processQueue(rateLimitSeconds) {
1580
- if (this.processing) return;
1581
- this.processing = true;
1582
- while (this.queue.length > 0) {
1583
- const elapsedMs = Date.now() - this.lastRequestTime;
1584
- const requiredMs = rateLimitSeconds * 1e3;
1585
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1586
- const waitMs = requiredMs - elapsedMs;
1587
- const waitSec = Math.ceil(waitMs / 1e3);
1588
- (waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
1589
- await new Promise((resolve) => setTimeout(resolve, waitMs));
1590
- }
1591
- const request = this.queue.shift();
1592
- if (!request) break;
1593
- this.lastRequestTime = Date.now();
1594
- try {
1595
- const result = await request.execute();
1596
- request.resolve(result);
1597
- } catch (error) {
1598
- request.reject(error);
2458
+ async function logPayloadSizeInfo(payload, model) {
2459
+ const messageCount = payload.messages.length;
2460
+ const bodySize = JSON.stringify(payload).length;
2461
+ const bodySizeKB = Math.round(bodySize / 1024);
2462
+ let imageCount = 0;
2463
+ let largeMessages = 0;
2464
+ let totalImageSize = 0;
2465
+ for (const msg of payload.messages) {
2466
+ if (Array.isArray(msg.content)) {
2467
+ for (const part of msg.content) if (part.type === "image_url") {
2468
+ imageCount++;
2469
+ if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
1599
2470
  }
1600
2471
  }
1601
- this.processing = false;
2472
+ if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
1602
2473
  }
1603
- get length() {
1604
- return this.queue.length;
2474
+ consola.info("");
2475
+ consola.info("╭─────────────────────────────────────────────────────────╮");
2476
+ consola.info("│ 413 Request Entity Too Large │");
2477
+ consola.info("╰─────────────────────────────────────────────────────────╯");
2478
+ consola.info("");
2479
+ consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
2480
+ consola.info(` Message count: ${messageCount}`);
2481
+ if (model) try {
2482
+ const tokenCount = await getTokenCount(payload, model);
2483
+ const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
2484
+ consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
2485
+ } catch {}
2486
+ if (imageCount > 0) {
2487
+ const imageSizeKB = Math.round(totalImageSize / 1024);
2488
+ consola.info(` Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
1605
2489
  }
1606
- };
1607
- const requestQueue = new RequestQueue();
1608
- /**
1609
- * Execute a request with rate limiting via queue.
1610
- * Requests are queued and processed sequentially at the configured rate.
1611
- */
1612
- async function executeWithRateLimit(state$1, execute) {
1613
- if (state$1.rateLimitSeconds === void 0) return execute();
1614
- return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
2490
+ if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
2491
+ consola.info("");
2492
+ consola.info(" Suggestions:");
2493
+ if (!state.autoCompact) consola.info(" • Enable --auto-compact to automatically truncate history");
2494
+ if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
2495
+ consola.info(" • Start a new conversation with /clear or /reset");
2496
+ consola.info(" • Reduce conversation history by deleting old messages");
2497
+ consola.info("");
1615
2498
  }
1616
2499
 
1617
- //#endregion
1618
- //#region src/services/copilot/create-chat-completions.ts
1619
- const createChatCompletions = async (payload) => {
1620
- if (!state.copilotToken) throw new Error("Copilot token not found");
1621
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
1622
- const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
1623
- const headers = {
1624
- ...copilotHeaders(state, enableVision),
1625
- "X-Initiator": isAgentCall ? "agent" : "user"
1626
- };
1627
- const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
1628
- method: "POST",
1629
- headers,
1630
- body: JSON.stringify(payload)
1631
- });
1632
- if (!response.ok) {
1633
- consola.error("Failed to create chat completions", response);
1634
- throw await HTTPError.fromResponse("Failed to create chat completions", response);
1635
- }
1636
- if (payload.stream) return events(response);
1637
- return await response.json();
1638
- };
1639
-
1640
2500
  //#endregion
1641
2501
  //#region src/routes/chat-completions/handler.ts
1642
- function getModelMaxOutputTokens(model) {
1643
- return model?.capabilities?.limits?.max_output_tokens;
1644
- }
1645
2502
  async function handleCompletion$1(c) {
1646
2503
  const originalPayload = await c.req.json();
1647
2504
  consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
1648
2505
  const trackingId = c.get("trackingId");
1649
2506
  const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
1650
- updateTrackerModel$1(trackingId, originalPayload.model);
2507
+ updateTrackerModel(trackingId, originalPayload.model);
1651
2508
  const ctx = {
1652
2509
  historyId: recordRequest("openai", {
1653
2510
  model: originalPayload.model,
@@ -1665,19 +2522,33 @@ async function handleCompletion$1(c) {
1665
2522
  };
1666
2523
  const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
1667
2524
  await logTokenCount(originalPayload, selectedModel);
1668
- const { finalPayload, compactResult } = await buildFinalPayload$1(originalPayload, selectedModel);
2525
+ const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
1669
2526
  if (compactResult) ctx.compactResult = compactResult;
1670
2527
  const payload = isNullish(finalPayload.max_tokens) ? {
1671
2528
  ...finalPayload,
1672
- max_tokens: getModelMaxOutputTokens(selectedModel)
2529
+ max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
1673
2530
  } : finalPayload;
1674
2531
  if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
1675
2532
  if (state.manualApprove) await awaitApproval();
2533
+ return executeRequest({
2534
+ c,
2535
+ payload,
2536
+ selectedModel,
2537
+ ctx,
2538
+ trackingId
2539
+ });
2540
+ }
2541
+ /**
2542
+ * Execute the API call with enhanced error handling for 413 errors.
2543
+ */
2544
+ async function executeRequest(opts) {
2545
+ const { c, payload, selectedModel, ctx, trackingId } = opts;
1676
2546
  try {
1677
- const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
1678
- if (isNonStreaming$1(response)) return handleNonStreamingResponse$1(c, response, ctx);
2547
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
2548
+ ctx.queueWaitMs = queueWaitMs;
2549
+ if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
1679
2550
  consola.debug("Streaming response");
1680
- updateTrackerStatus$1(trackingId, "streaming");
2551
+ updateTrackerStatus(trackingId, "streaming");
1681
2552
  return streamSSE(c, async (stream) => {
1682
2553
  await handleStreamingResponse$1({
1683
2554
  stream,
@@ -1687,39 +2558,11 @@ async function handleCompletion$1(c) {
1687
2558
  });
1688
2559
  });
1689
2560
  } catch (error) {
1690
- recordErrorResponse$1(ctx, payload.model, error);
2561
+ if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
2562
+ recordErrorResponse(ctx, payload.model, error);
1691
2563
  throw error;
1692
2564
  }
1693
2565
  }
1694
- async function buildFinalPayload$1(payload, model) {
1695
- if (!state.autoCompact || !model) {
1696
- if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
1697
- return {
1698
- finalPayload: payload,
1699
- compactResult: null
1700
- };
1701
- }
1702
- try {
1703
- const check = await checkNeedsCompaction(payload, model);
1704
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
1705
- if (!check.needed) return {
1706
- finalPayload: payload,
1707
- compactResult: null
1708
- };
1709
- consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
1710
- const compactResult = await autoCompact(payload, model);
1711
- return {
1712
- finalPayload: compactResult.payload,
1713
- compactResult
1714
- };
1715
- } catch (error) {
1716
- consola.warn("Auto-compact failed, proceeding with original payload:", error);
1717
- return {
1718
- finalPayload: payload,
1719
- compactResult: null
1720
- };
1721
- }
1722
- }
1723
2566
  async function logTokenCount(payload, selectedModel) {
1724
2567
  try {
1725
2568
  if (selectedModel) {
@@ -1730,27 +2573,6 @@ async function logTokenCount(payload, selectedModel) {
1730
2573
  consola.debug("Failed to calculate token count:", error);
1731
2574
  }
1732
2575
  }
1733
- function updateTrackerModel$1(trackingId, model) {
1734
- if (!trackingId) return;
1735
- const request = requestTracker.getRequest(trackingId);
1736
- if (request) request.model = model;
1737
- }
1738
- function updateTrackerStatus$1(trackingId, status) {
1739
- if (!trackingId) return;
1740
- requestTracker.updateRequest(trackingId, { status });
1741
- }
1742
- function recordErrorResponse$1(ctx, model, error) {
1743
- recordResponse(ctx.historyId, {
1744
- success: false,
1745
- model,
1746
- usage: {
1747
- input_tokens: 0,
1748
- output_tokens: 0
1749
- },
1750
- error: error instanceof Error ? error.message : "Unknown error",
1751
- content: null
1752
- }, Date.now() - ctx.startTime);
1753
- }
1754
2576
  function handleNonStreamingResponse$1(c, originalResponse, ctx) {
1755
2577
  consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
1756
2578
  let response = originalResponse;
@@ -1782,7 +2604,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
1782
2604
  }, Date.now() - ctx.startTime);
1783
2605
  if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
1784
2606
  inputTokens: usage.prompt_tokens,
1785
- outputTokens: usage.completion_tokens
2607
+ outputTokens: usage.completion_tokens,
2608
+ queueWaitMs: ctx.queueWaitMs
1786
2609
  });
1787
2610
  return c.json(response);
1788
2611
  }
@@ -1848,7 +2671,7 @@ async function handleStreamingResponse$1(opts) {
1848
2671
  acc.content += marker;
1849
2672
  }
1850
2673
  recordStreamSuccess(acc, payload.model, ctx);
1851
- completeTracking$1(ctx.trackingId, acc.inputTokens, acc.outputTokens);
2674
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
1852
2675
  } catch (error) {
1853
2676
  recordStreamError({
1854
2677
  acc,
@@ -1856,7 +2679,7 @@ async function handleStreamingResponse$1(opts) {
1856
2679
  ctx,
1857
2680
  error
1858
2681
  });
1859
- failTracking$1(ctx.trackingId, error);
2682
+ failTracking(ctx.trackingId, error);
1860
2683
  throw error;
1861
2684
  }
1862
2685
  }
@@ -1864,42 +2687,31 @@ function parseStreamChunk(chunk, acc) {
1864
2687
  if (!chunk.data || chunk.data === "[DONE]") return;
1865
2688
  try {
1866
2689
  const parsed = JSON.parse(chunk.data);
1867
- accumulateModel(parsed, acc);
1868
- accumulateUsage(parsed, acc);
1869
- accumulateChoice(parsed.choices[0], acc);
1870
- } catch {}
1871
- }
1872
- function accumulateModel(parsed, acc) {
1873
- if (parsed.model && !acc.model) acc.model = parsed.model;
1874
- }
1875
- function accumulateUsage(parsed, acc) {
1876
- if (parsed.usage) {
1877
- acc.inputTokens = parsed.usage.prompt_tokens;
1878
- acc.outputTokens = parsed.usage.completion_tokens;
1879
- }
1880
- }
1881
- function accumulateChoice(choice, acc) {
1882
- if (!choice) return;
1883
- if (choice.delta.content) acc.content += choice.delta.content;
1884
- if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
1885
- if (choice.finish_reason) acc.finishReason = choice.finish_reason;
1886
- }
1887
- function accumulateToolCalls(toolCalls, acc) {
1888
- if (!toolCalls) return;
1889
- for (const tc of toolCalls) {
1890
- const idx = tc.index;
1891
- if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
1892
- id: tc.id ?? "",
1893
- name: tc.function?.name ?? "",
1894
- arguments: ""
1895
- });
1896
- const item = acc.toolCallMap.get(idx);
1897
- if (item) {
1898
- if (tc.id) item.id = tc.id;
1899
- if (tc.function?.name) item.name = tc.function.name;
1900
- if (tc.function?.arguments) item.arguments += tc.function.arguments;
2690
+ if (parsed.model && !acc.model) acc.model = parsed.model;
2691
+ if (parsed.usage) {
2692
+ acc.inputTokens = parsed.usage.prompt_tokens;
2693
+ acc.outputTokens = parsed.usage.completion_tokens;
1901
2694
  }
1902
- }
2695
+ const choice = parsed.choices[0];
2696
+ if (choice) {
2697
+ if (choice.delta.content) acc.content += choice.delta.content;
2698
+ if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
2699
+ const idx = tc.index;
2700
+ if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
2701
+ id: tc.id ?? "",
2702
+ name: tc.function?.name ?? "",
2703
+ arguments: ""
2704
+ });
2705
+ const item = acc.toolCallMap.get(idx);
2706
+ if (item) {
2707
+ if (tc.id) item.id = tc.id;
2708
+ if (tc.function?.name) item.name = tc.function.name;
2709
+ if (tc.function?.arguments) item.arguments += tc.function.arguments;
2710
+ }
2711
+ }
2712
+ if (choice.finish_reason) acc.finishReason = choice.finish_reason;
2713
+ }
2714
+ } catch {}
1903
2715
  }
1904
2716
  function recordStreamSuccess(acc, fallbackModel, ctx) {
1905
2717
  for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
@@ -1931,35 +2743,6 @@ function recordStreamSuccess(acc, fallbackModel, ctx) {
1931
2743
  })) : void 0
1932
2744
  }, Date.now() - ctx.startTime);
1933
2745
  }
1934
- function recordStreamError(opts) {
1935
- const { acc, fallbackModel, ctx, error } = opts;
1936
- recordResponse(ctx.historyId, {
1937
- success: false,
1938
- model: acc.model || fallbackModel,
1939
- usage: {
1940
- input_tokens: 0,
1941
- output_tokens: 0
1942
- },
1943
- error: error instanceof Error ? error.message : "Stream error",
1944
- content: null
1945
- }, Date.now() - ctx.startTime);
1946
- }
1947
- function completeTracking$1(trackingId, inputTokens, outputTokens) {
1948
- if (!trackingId) return;
1949
- requestTracker.updateRequest(trackingId, {
1950
- inputTokens,
1951
- outputTokens
1952
- });
1953
- requestTracker.completeRequest(trackingId, 200, {
1954
- inputTokens,
1955
- outputTokens
1956
- });
1957
- }
1958
- function failTracking$1(trackingId, error) {
1959
- if (!trackingId) return;
1960
- requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
1961
- }
1962
- const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
1963
2746
  function convertOpenAIMessages(messages) {
1964
2747
  return messages.map((msg) => {
1965
2748
  const result = {
@@ -1987,7 +2770,7 @@ completionRoutes.post("/", async (c) => {
1987
2770
  try {
1988
2771
  return await handleCompletion$1(c);
1989
2772
  } catch (error) {
1990
- return await forwardError(c, error);
2773
+ return forwardError(c, error);
1991
2774
  }
1992
2775
  });
1993
2776
 
@@ -2013,7 +2796,7 @@ embeddingRoutes.post("/", async (c) => {
2013
2796
  const response = await createEmbeddings(payload);
2014
2797
  return c.json(response);
2015
2798
  } catch (error) {
2016
- return await forwardError(c, error);
2799
+ return forwardError(c, error);
2017
2800
  }
2018
2801
  });
2019
2802
 
@@ -3160,6 +3943,15 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
3160
3943
  //#endregion
3161
3944
  //#region src/routes/messages/non-stream-translation.ts
3162
3945
  const OPENAI_TOOL_NAME_LIMIT = 64;
3946
+ /**
3947
+ * Ensure all tool_use blocks have corresponding tool_result responses.
3948
+ * This handles edge cases where conversation history may be incomplete:
3949
+ * - Session interruptions where tool execution was cut off
3950
+ * - Previous request failures
3951
+ * - Client sending truncated history
3952
+ *
3953
+ * Adding placeholder responses prevents API errors and maintains protocol compliance.
3954
+ */
3163
3955
  function fixMessageSequence(messages) {
3164
3956
  const fixedMessages = [];
3165
3957
  for (let i = 0; i < messages.length; i++) {
@@ -3318,7 +4110,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
3318
4110
  for (let i = 0; i < originalName.length; i++) {
3319
4111
  const char = originalName.codePointAt(i) ?? 0;
3320
4112
  hash = (hash << 5) - hash + char;
3321
- hash = hash & hash;
4113
+ hash = Math.trunc(hash);
3322
4114
  }
3323
4115
  const hashSuffix = Math.abs(hash).toString(36).slice(0, 8);
3324
4116
  const truncatedName = originalName.slice(0, OPENAI_TOOL_NAME_LIMIT - 9) + "_" + hashSuffix;
@@ -3636,7 +4428,8 @@ async function handleCompletion(c) {
3636
4428
  if (compactResult) ctx.compactResult = compactResult;
3637
4429
  if (state.manualApprove) await awaitApproval();
3638
4430
  try {
3639
- const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
4431
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
4432
+ ctx.queueWaitMs = queueWaitMs;
3640
4433
  if (isNonStreaming(response)) return handleNonStreamingResponse({
3641
4434
  c,
3642
4435
  response,
@@ -3655,60 +4448,11 @@ async function handleCompletion(c) {
3655
4448
  });
3656
4449
  });
3657
4450
  } catch (error) {
4451
+ if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(openAIPayload, selectedModel);
3658
4452
  recordErrorResponse(ctx, anthropicPayload.model, error);
3659
4453
  throw error;
3660
4454
  }
3661
4455
  }
3662
- function updateTrackerModel(trackingId, model) {
3663
- if (!trackingId) return;
3664
- const request = requestTracker.getRequest(trackingId);
3665
- if (request) request.model = model;
3666
- }
3667
- async function buildFinalPayload(payload, model) {
3668
- if (!state.autoCompact || !model) {
3669
- if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
3670
- return {
3671
- finalPayload: payload,
3672
- compactResult: null
3673
- };
3674
- }
3675
- try {
3676
- const check = await checkNeedsCompaction(payload, model);
3677
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
3678
- if (!check.needed) return {
3679
- finalPayload: payload,
3680
- compactResult: null
3681
- };
3682
- consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
3683
- const compactResult = await autoCompact(payload, model);
3684
- return {
3685
- finalPayload: compactResult.payload,
3686
- compactResult
3687
- };
3688
- } catch (error) {
3689
- consola.warn("Auto-compact failed, proceeding with original payload:", error);
3690
- return {
3691
- finalPayload: payload,
3692
- compactResult: null
3693
- };
3694
- }
3695
- }
3696
- function updateTrackerStatus(trackingId, status) {
3697
- if (!trackingId) return;
3698
- requestTracker.updateRequest(trackingId, { status });
3699
- }
3700
- function recordErrorResponse(ctx, model, error) {
3701
- recordResponse(ctx.historyId, {
3702
- success: false,
3703
- model,
3704
- usage: {
3705
- input_tokens: 0,
3706
- output_tokens: 0
3707
- },
3708
- error: error instanceof Error ? error.message : "Unknown error",
3709
- content: null
3710
- }, Date.now() - ctx.startTime);
3711
- }
3712
4456
  function handleNonStreamingResponse(opts) {
3713
4457
  const { c, response, toolNameMapping, ctx } = opts;
3714
4458
  consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
@@ -3743,7 +4487,8 @@ function handleNonStreamingResponse(opts) {
3743
4487
  }, Date.now() - ctx.startTime);
3744
4488
  if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
3745
4489
  inputTokens: anthropicResponse.usage.input_tokens,
3746
- outputTokens: anthropicResponse.usage.output_tokens
4490
+ outputTokens: anthropicResponse.usage.output_tokens,
4491
+ queueWaitMs: ctx.queueWaitMs
3747
4492
  });
3748
4493
  return c.json(anthropicResponse);
3749
4494
  }
@@ -3799,10 +4544,10 @@ async function handleStreamingResponse(opts) {
3799
4544
  acc.content += marker;
3800
4545
  }
3801
4546
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
3802
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
4547
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
3803
4548
  } catch (error) {
3804
4549
  consola.error("Stream error:", error);
3805
- recordStreamingError({
4550
+ recordStreamError({
3806
4551
  acc,
3807
4552
  fallbackModel: anthropicPayload.model,
3808
4553
  ctx,
@@ -3942,34 +4687,6 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
3942
4687
  toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
3943
4688
  }, Date.now() - ctx.startTime);
3944
4689
  }
3945
- function recordStreamingError(opts) {
3946
- const { acc, fallbackModel, ctx, error } = opts;
3947
- recordResponse(ctx.historyId, {
3948
- success: false,
3949
- model: acc.model || fallbackModel,
3950
- usage: {
3951
- input_tokens: 0,
3952
- output_tokens: 0
3953
- },
3954
- error: error instanceof Error ? error.message : "Stream error",
3955
- content: null
3956
- }, Date.now() - ctx.startTime);
3957
- }
3958
- function completeTracking(trackingId, inputTokens, outputTokens) {
3959
- if (!trackingId) return;
3960
- requestTracker.updateRequest(trackingId, {
3961
- inputTokens,
3962
- outputTokens
3963
- });
3964
- requestTracker.completeRequest(trackingId, 200, {
3965
- inputTokens,
3966
- outputTokens
3967
- });
3968
- }
3969
- function failTracking(trackingId, error) {
3970
- if (!trackingId) return;
3971
- requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
3972
- }
3973
4690
  function convertAnthropicMessages(messages) {
3974
4691
  return messages.map((msg) => {
3975
4692
  if (typeof msg.content === "string") return {
@@ -4017,7 +4734,6 @@ function extractToolCallsFromContent(content) {
4017
4734
  });
4018
4735
  return tools.length > 0 ? tools : void 0;
4019
4736
  }
4020
- const isNonStreaming = (response) => Object.hasOwn(response, "choices");
4021
4737
 
4022
4738
  //#endregion
4023
4739
  //#region src/routes/messages/route.ts
@@ -4026,14 +4742,14 @@ messageRoutes.post("/", async (c) => {
4026
4742
  try {
4027
4743
  return await handleCompletion(c);
4028
4744
  } catch (error) {
4029
- return await forwardError(c, error);
4745
+ return forwardError(c, error);
4030
4746
  }
4031
4747
  });
4032
4748
  messageRoutes.post("/count_tokens", async (c) => {
4033
4749
  try {
4034
4750
  return await handleCountTokens(c);
4035
4751
  } catch (error) {
4036
- return await forwardError(c, error);
4752
+ return forwardError(c, error);
4037
4753
  }
4038
4754
  });
4039
4755
 
@@ -4072,18 +4788,18 @@ modelRoutes.get("/", async (c) => {
4072
4788
  has_more: false
4073
4789
  });
4074
4790
  } catch (error) {
4075
- return await forwardError(c, error);
4791
+ return forwardError(c, error);
4076
4792
  }
4077
4793
  });
4078
4794
 
4079
4795
  //#endregion
4080
4796
  //#region src/routes/token/route.ts
4081
4797
  const tokenRoute = new Hono();
4082
- tokenRoute.get("/", async (c) => {
4798
+ tokenRoute.get("/", (c) => {
4083
4799
  try {
4084
4800
  return c.json({ token: state.copilotToken });
4085
4801
  } catch (error) {
4086
- return await forwardError(c, error);
4802
+ return forwardError(c, error);
4087
4803
  }
4088
4804
  });
4089
4805
 
@@ -4095,7 +4811,7 @@ usageRoute.get("/", async (c) => {
4095
4811
  const usage = await getCopilotUsage();
4096
4812
  return c.json(usage);
4097
4813
  } catch (error) {
4098
- return await forwardError(c, error);
4814
+ return forwardError(c, error);
4099
4815
  }
4100
4816
  });
4101
4817
 
@@ -4147,10 +4863,15 @@ async function runServer(options) {
4147
4863
  state.accountType = options.accountType;
4148
4864
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
4149
4865
  state.manualApprove = options.manual;
4150
- state.rateLimitSeconds = options.rateLimit;
4151
- state.rateLimitWait = options.rateLimitWait;
4152
4866
  state.showToken = options.showToken;
4153
4867
  state.autoCompact = options.autoCompact;
4868
+ if (options.rateLimit) initAdaptiveRateLimiter({
4869
+ baseRetryIntervalSeconds: options.retryInterval,
4870
+ requestIntervalSeconds: options.requestInterval,
4871
+ recoveryTimeoutMinutes: options.recoveryTimeout,
4872
+ consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4873
+ });
4874
+ else consola.info("Rate limiting disabled");
4154
4875
  if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
4155
4876
  initHistory(options.history, options.historyLimit);
4156
4877
  if (options.history) {
@@ -4237,16 +4958,30 @@ const start = defineCommand({
4237
4958
  default: false,
4238
4959
  description: "Enable manual request approval"
4239
4960
  },
4240
- "rate-limit": {
4241
- alias: "r",
4242
- type: "string",
4243
- description: "Rate limit in seconds between requests"
4244
- },
4245
- wait: {
4246
- alias: "w",
4961
+ "no-rate-limit": {
4247
4962
  type: "boolean",
4248
4963
  default: false,
4249
- description: "Wait instead of error when rate limit is hit. Has no effect if rate limit is not set"
4964
+ description: "Disable adaptive rate limiting"
4965
+ },
4966
+ "retry-interval": {
4967
+ type: "string",
4968
+ default: "10",
4969
+ description: "Seconds to wait before retrying after rate limit error (default: 10)"
4970
+ },
4971
+ "request-interval": {
4972
+ type: "string",
4973
+ default: "10",
4974
+ description: "Seconds between requests in rate-limited mode (default: 10)"
4975
+ },
4976
+ "recovery-timeout": {
4977
+ type: "string",
4978
+ default: "10",
4979
+ description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
4980
+ },
4981
+ "consecutive-successes": {
4982
+ type: "string",
4983
+ default: "5",
4984
+ description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
4250
4985
  },
4251
4986
  "github-token": {
4252
4987
  alias: "g",
@@ -4269,10 +5004,10 @@ const start = defineCommand({
4269
5004
  default: false,
4270
5005
  description: "Initialize proxy from environment variables"
4271
5006
  },
4272
- history: {
5007
+ "no-history": {
4273
5008
  type: "boolean",
4274
5009
  default: false,
4275
- description: "Enable request history recording and Web UI at /history"
5010
+ description: "Disable request history recording and Web UI"
4276
5011
  },
4277
5012
  "history-limit": {
4278
5013
  type: "string",
@@ -4286,21 +5021,22 @@ const start = defineCommand({
4286
5021
  }
4287
5022
  },
4288
5023
  run({ args }) {
4289
- const rateLimitRaw = args["rate-limit"];
4290
- const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
4291
5024
  return runServer({
4292
5025
  port: Number.parseInt(args.port, 10),
4293
5026
  host: args.host,
4294
5027
  verbose: args.verbose,
4295
5028
  accountType: args["account-type"],
4296
5029
  manual: args.manual,
4297
- rateLimit,
4298
- rateLimitWait: args.wait,
5030
+ rateLimit: !args["no-rate-limit"],
5031
+ retryInterval: Number.parseInt(args["retry-interval"], 10),
5032
+ requestInterval: Number.parseInt(args["request-interval"], 10),
5033
+ recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
5034
+ consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
4299
5035
  githubToken: args["github-token"],
4300
5036
  claudeCode: args["claude-code"],
4301
5037
  showToken: args["show-token"],
4302
5038
  proxyEnv: args["proxy-env"],
4303
- history: args.history,
5039
+ history: !args["no-history"],
4304
5040
  historyLimit: Number.parseInt(args["history-limit"], 10),
4305
5041
  autoCompact: args["auto-compact"]
4306
5042
  });
@@ -4320,7 +5056,8 @@ const main = defineCommand({
4320
5056
  logout,
4321
5057
  start,
4322
5058
  "check-usage": checkUsage,
4323
- debug
5059
+ debug,
5060
+ "patch-claude": patchClaude
4324
5061
  }
4325
5062
  });
4326
5063
  await runMain(main);