@hsupu/copilot-api 0.7.5 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -3,7 +3,7 @@ import { defineCommand, runMain } from "citty";
3
3
  import consola from "consola";
4
4
  import fs from "node:fs/promises";
5
5
  import os from "node:os";
6
- import path, { join } from "node:path";
6
+ import path, { dirname, join } from "node:path";
7
7
  import { randomUUID } from "node:crypto";
8
8
  import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
9
9
  import clipboard from "clipboardy";
@@ -45,7 +45,6 @@ async function ensureFile(filePath) {
45
45
  const state = {
46
46
  accountType: "individual",
47
47
  manualApprove: false,
48
- rateLimitWait: false,
49
48
  showToken: false,
50
49
  autoCompact: false
51
50
  };
@@ -137,6 +136,16 @@ function formatRequestTooLargeError() {
137
136
  }
138
137
  };
139
138
  }
139
+ /** Format Anthropic-compatible error for rate limit exceeded (429) */
140
+ function formatRateLimitError(copilotMessage) {
141
+ return {
142
+ type: "error",
143
+ error: {
144
+ type: "rate_limit_error",
145
+ message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
146
+ }
147
+ };
148
+ }
140
149
  function forwardError(c, error) {
141
150
  consola.error("Error occurred:", error);
142
151
  if (error instanceof HTTPError) {
@@ -161,6 +170,11 @@ function forwardError(c, error) {
161
170
  return c.json(formattedError, 400);
162
171
  }
163
172
  }
173
+ if (error.status === 429 || copilotError.error?.code === "rate_limited") {
174
+ const formattedError = formatRateLimitError(copilotError.error?.message);
175
+ consola.debug("Returning formatted rate limit error:", formattedError);
176
+ return c.json(formattedError, 429);
177
+ }
164
178
  return c.json({ error: {
165
179
  message: error.responseText,
166
180
  type: "error"
@@ -539,16 +553,77 @@ const logout = defineCommand({
539
553
 
540
554
  //#endregion
541
555
  //#region src/patch-claude.ts
542
- const ORIGINAL_PATTERN = /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/;
543
- const PATCHED_PATTERN = /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/;
556
+ const SUPPORTED_VERSIONS = {
557
+ v2a: {
558
+ min: "2.0.0",
559
+ max: "2.1.10"
560
+ },
561
+ v2b: {
562
+ min: "2.1.11",
563
+ max: "2.1.12"
564
+ }
565
+ };
566
+ const PATTERNS = {
567
+ funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
568
+ funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
569
+ variable: /var BS9=(\d+)/
570
+ };
571
+ /**
572
+ * Parse semver version string to comparable parts
573
+ */
574
+ function parseVersion(version) {
575
+ return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
576
+ }
577
+ /**
578
+ * Compare two semver versions
579
+ * Returns: -1 if a < b, 0 if a == b, 1 if a > b
580
+ */
581
+ function compareVersions(a, b) {
582
+ const partsA = parseVersion(a);
583
+ const partsB = parseVersion(b);
584
+ const len = Math.max(partsA.length, partsB.length);
585
+ for (let i = 0; i < len; i++) {
586
+ const numA = partsA[i] || 0;
587
+ const numB = partsB[i] || 0;
588
+ if (numA < numB) return -1;
589
+ if (numA > numB) return 1;
590
+ }
591
+ return 0;
592
+ }
593
+ function getPatternTypeForVersion(version) {
594
+ if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
595
+ if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
596
+ return null;
597
+ }
598
+ /**
599
+ * Get supported version range string for error messages
600
+ */
601
+ function getSupportedRangeString() {
602
+ return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
603
+ }
604
+ /**
605
+ * Get Claude Code version from package.json
606
+ */
607
+ function getClaudeCodeVersion(cliPath) {
608
+ try {
609
+ const packageJsonPath = join(dirname(cliPath), "package.json");
610
+ if (!existsSync(packageJsonPath)) return null;
611
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
612
+ if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
613
+ return null;
614
+ } catch {
615
+ return null;
616
+ }
617
+ }
544
618
  /**
545
619
  * Search volta tools directory for Claude Code
546
620
  */
547
621
  function findInVoltaTools(voltaHome) {
548
622
  const paths = [];
623
+ const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
624
+ if (existsSync(packagesPath)) paths.push(packagesPath);
549
625
  const toolsDir = join(voltaHome, "tools", "image", "node");
550
- if (!existsSync(toolsDir)) return paths;
551
- try {
626
+ if (existsSync(toolsDir)) try {
552
627
  for (const version of readdirSync(toolsDir)) {
553
628
  const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
554
629
  if (existsSync(claudePath)) paths.push(claudePath);
@@ -580,25 +655,61 @@ function findClaudeCodePath() {
580
655
  * Get current context limit from Claude Code
581
656
  */
582
657
  function getCurrentLimit(content) {
583
- const match = content.match(PATCHED_PATTERN);
584
- if (!match) return null;
585
- const limitMatch = match[0].match(/return (\d+)\}$/);
586
- return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
658
+ const varMatch = content.match(PATTERNS.variable);
659
+ if (varMatch) return Number.parseInt(varMatch[1], 10);
660
+ const funcMatch = content.match(PATTERNS.funcPatched);
661
+ if (funcMatch) {
662
+ const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
663
+ return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
664
+ }
665
+ return null;
666
+ }
667
+ /**
668
+ * Check if Claude Code version is supported for patching
669
+ */
670
+ function checkVersionSupport(cliPath) {
671
+ const version = getClaudeCodeVersion(cliPath);
672
+ if (!version) return {
673
+ supported: false,
674
+ version: null,
675
+ patternType: null,
676
+ error: "Could not detect Claude Code version"
677
+ };
678
+ const patternType = getPatternTypeForVersion(version);
679
+ if (!patternType) return {
680
+ supported: false,
681
+ version,
682
+ patternType: null,
683
+ error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
684
+ };
685
+ return {
686
+ supported: true,
687
+ version,
688
+ patternType
689
+ };
587
690
  }
588
691
  /**
589
692
  * Patch Claude Code to use a different context limit
590
693
  */
591
694
  function patchClaudeCode(cliPath, newLimit) {
592
695
  const content = readFileSync(cliPath, "utf8");
696
+ const versionCheck = checkVersionSupport(cliPath);
697
+ if (!versionCheck.supported) {
698
+ consola.error(versionCheck.error);
699
+ return false;
700
+ }
701
+ consola.info(`Claude Code version: ${versionCheck.version}`);
593
702
  if (getCurrentLimit(content) === newLimit) {
594
703
  consola.info(`Already patched with limit ${newLimit}`);
595
704
  return true;
596
705
  }
597
- const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
598
706
  let newContent;
599
- if (ORIGINAL_PATTERN.test(content)) newContent = content.replace(ORIGINAL_PATTERN, replacement);
600
- else if (PATCHED_PATTERN.test(content)) newContent = content.replace(PATCHED_PATTERN, replacement);
601
- else return false;
707
+ if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
708
+ else {
709
+ const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
710
+ const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
711
+ newContent = content.replace(pattern, replacement);
712
+ }
602
713
  writeFileSync(cliPath, newContent);
603
714
  return true;
604
715
  }
@@ -607,19 +718,28 @@ function patchClaudeCode(cliPath, newLimit) {
607
718
  */
608
719
  function restoreClaudeCode(cliPath) {
609
720
  const content = readFileSync(cliPath, "utf8");
721
+ const versionCheck = checkVersionSupport(cliPath);
722
+ if (!versionCheck.supported) {
723
+ consola.error(versionCheck.error);
724
+ return false;
725
+ }
726
+ consola.info(`Claude Code version: ${versionCheck.version}`);
610
727
  if (getCurrentLimit(content) === 2e5) {
611
728
  consola.info("Already at original 200000 limit");
612
729
  return true;
613
730
  }
614
- if (!PATCHED_PATTERN.test(content)) return false;
615
- const newContent = content.replace(PATCHED_PATTERN, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
731
+ let newContent;
732
+ if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
733
+ else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
616
734
  writeFileSync(cliPath, newContent);
617
735
  return true;
618
736
  }
619
- function showStatus(currentLimit) {
737
+ function showStatus(cliPath, currentLimit) {
738
+ const version = getClaudeCodeVersion(cliPath);
739
+ if (version) consola.info(`Claude Code version: ${version}`);
620
740
  if (currentLimit === null) {
621
741
  consola.warn("Could not detect current limit - CLI may have been updated");
622
- consola.info("Look for the HR function pattern in cli.js");
742
+ consola.info("Look for the BS9 variable or HR function pattern in cli.js");
623
743
  } else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
624
744
  else consola.info(`Status: Patched (${currentLimit} context window)`);
625
745
  }
@@ -669,7 +789,7 @@ const patchClaude = defineCommand({
669
789
  const content = readFileSync(cliPath, "utf8");
670
790
  const currentLimit = getCurrentLimit(content);
671
791
  if (args.status) {
672
- showStatus(currentLimit);
792
+ showStatus(cliPath, currentLimit);
673
793
  return;
674
794
  }
675
795
  if (args.restore) {
@@ -698,6 +818,300 @@ const patchClaude = defineCommand({
698
818
  }
699
819
  });
700
820
 
821
+ //#endregion
822
+ //#region src/lib/adaptive-rate-limiter.ts
823
+ const DEFAULT_CONFIG$1 = {
824
+ baseRetryIntervalSeconds: 10,
825
+ maxRetryIntervalSeconds: 120,
826
+ requestIntervalSeconds: 10,
827
+ recoveryTimeoutMinutes: 10,
828
+ consecutiveSuccessesForRecovery: 5,
829
+ gradualRecoverySteps: [
830
+ 5,
831
+ 2,
832
+ 1,
833
+ 0
834
+ ]
835
+ };
836
+ /**
837
+ * Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
838
+ * based on API responses.
839
+ */
840
+ var AdaptiveRateLimiter = class {
841
+ config;
842
+ mode = "normal";
843
+ queue = [];
844
+ processing = false;
845
+ rateLimitedAt = null;
846
+ consecutiveSuccesses = 0;
847
+ lastRequestTime = 0;
848
+ /** Current step in gradual recovery (index into gradualRecoverySteps) */
849
+ recoveryStepIndex = 0;
850
+ constructor(config = {}) {
851
+ this.config = {
852
+ ...DEFAULT_CONFIG$1,
853
+ ...config
854
+ };
855
+ }
856
+ /**
857
+ * Execute a request with adaptive rate limiting.
858
+ * Returns a promise that resolves when the request succeeds.
859
+ * The request will be retried automatically on 429 errors.
860
+ */
861
+ async execute(fn) {
862
+ if (this.mode === "normal") return this.executeInNormalMode(fn);
863
+ if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
864
+ return this.enqueue(fn);
865
+ }
866
+ /**
867
+ * Check if an error is a rate limit error (429) and extract Retry-After if available
868
+ */
869
+ isRateLimitError(error) {
870
+ if (error && typeof error === "object") {
871
+ if ("status" in error && error.status === 429) return {
872
+ isRateLimit: true,
873
+ retryAfter: this.extractRetryAfter(error)
874
+ };
875
+ if ("responseText" in error && typeof error.responseText === "string") try {
876
+ const parsed = JSON.parse(error.responseText);
877
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
878
+ } catch {}
879
+ }
880
+ return { isRateLimit: false };
881
+ }
882
+ /**
883
+ * Extract Retry-After value from error response
884
+ */
885
+ extractRetryAfter(error) {
886
+ if (!error || typeof error !== "object") return void 0;
887
+ if ("responseText" in error && typeof error.responseText === "string") try {
888
+ const parsed = JSON.parse(error.responseText);
889
+ if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
890
+ if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
891
+ } catch {}
892
+ }
893
+ /**
894
+ * Execute in normal mode - full speed
895
+ */
896
+ async executeInNormalMode(fn) {
897
+ try {
898
+ return {
899
+ result: await fn(),
900
+ queueWaitMs: 0
901
+ };
902
+ } catch (error) {
903
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
904
+ if (isRateLimit) {
905
+ this.enterRateLimitedMode();
906
+ return this.enqueue(fn, retryAfter);
907
+ }
908
+ throw error;
909
+ }
910
+ }
911
+ /**
912
+ * Execute in recovering mode - gradual speedup
913
+ */
914
+ async executeInRecoveringMode(fn) {
915
+ const startTime = Date.now();
916
+ const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
917
+ if (currentInterval > 0) {
918
+ const elapsedMs = Date.now() - this.lastRequestTime;
919
+ const requiredMs = currentInterval * 1e3;
920
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
921
+ const waitMs = requiredMs - elapsedMs;
922
+ await this.sleep(waitMs);
923
+ }
924
+ }
925
+ this.lastRequestTime = Date.now();
926
+ try {
927
+ const result = await fn();
928
+ this.recoveryStepIndex++;
929
+ if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
930
+ else {
931
+ const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
932
+ consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
933
+ }
934
+ const queueWaitMs = Date.now() - startTime;
935
+ return {
936
+ result,
937
+ queueWaitMs
938
+ };
939
+ } catch (error) {
940
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
941
+ if (isRateLimit) {
942
+ consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
943
+ this.enterRateLimitedMode();
944
+ return this.enqueue(fn, retryAfter);
945
+ }
946
+ throw error;
947
+ }
948
+ }
949
+ /**
950
+ * Enter rate-limited mode
951
+ */
952
+ enterRateLimitedMode() {
953
+ if (this.mode === "rate-limited") return;
954
+ this.mode = "rate-limited";
955
+ this.rateLimitedAt = Date.now();
956
+ this.consecutiveSuccesses = 0;
957
+ consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
958
+ }
959
+ /**
960
+ * Check if we should try to recover to normal mode
961
+ */
962
+ shouldAttemptRecovery() {
963
+ if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
964
+ consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
965
+ return true;
966
+ }
967
+ if (this.rateLimitedAt) {
968
+ const elapsed = Date.now() - this.rateLimitedAt;
969
+ const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
970
+ if (elapsed >= timeout) {
971
+ consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
972
+ return true;
973
+ }
974
+ }
975
+ return false;
976
+ }
977
+ /**
978
+ * Start gradual recovery mode
979
+ */
980
+ startGradualRecovery() {
981
+ this.mode = "recovering";
982
+ this.recoveryStepIndex = 0;
983
+ this.rateLimitedAt = null;
984
+ this.consecutiveSuccesses = 0;
985
+ const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
986
+ consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
987
+ }
988
+ /**
989
+ * Complete recovery to normal mode
990
+ */
991
+ completeRecovery() {
992
+ this.mode = "normal";
993
+ this.recoveryStepIndex = 0;
994
+ consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
995
+ }
996
+ /**
997
+ * Enqueue a request for later execution
998
+ */
999
+ enqueue(fn, retryAfterSeconds) {
1000
+ return new Promise((resolve, reject) => {
1001
+ const request = {
1002
+ execute: fn,
1003
+ resolve,
1004
+ reject,
1005
+ retryCount: 0,
1006
+ retryAfterSeconds,
1007
+ enqueuedAt: Date.now()
1008
+ };
1009
+ this.queue.push(request);
1010
+ if (this.queue.length > 1) {
1011
+ const position = this.queue.length;
1012
+ const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
1013
+ consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
1014
+ }
1015
+ this.processQueue();
1016
+ });
1017
+ }
1018
+ /**
1019
+ * Calculate retry interval with exponential backoff
1020
+ */
1021
+ calculateRetryInterval(request) {
1022
+ if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
1023
+ const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
1024
+ return Math.min(backoff, this.config.maxRetryIntervalSeconds);
1025
+ }
1026
+ /**
1027
+ * Process the queue
1028
+ */
1029
+ async processQueue() {
1030
+ if (this.processing) return;
1031
+ this.processing = true;
1032
+ while (this.queue.length > 0) {
1033
+ const request = this.queue[0];
1034
+ if (this.shouldAttemptRecovery()) this.startGradualRecovery();
1035
+ const elapsedMs = Date.now() - this.lastRequestTime;
1036
+ const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
1037
+ if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1038
+ const waitMs = requiredMs - elapsedMs;
1039
+ const waitSec = Math.ceil(waitMs / 1e3);
1040
+ consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
1041
+ await this.sleep(waitMs);
1042
+ }
1043
+ this.lastRequestTime = Date.now();
1044
+ try {
1045
+ const result = await request.execute();
1046
+ this.queue.shift();
1047
+ this.consecutiveSuccesses++;
1048
+ request.retryAfterSeconds = void 0;
1049
+ const queueWaitMs = Date.now() - request.enqueuedAt;
1050
+ request.resolve({
1051
+ result,
1052
+ queueWaitMs
1053
+ });
1054
+ if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
1055
+ } catch (error) {
1056
+ const { isRateLimit, retryAfter } = this.isRateLimitError(error);
1057
+ if (isRateLimit) {
1058
+ request.retryCount++;
1059
+ request.retryAfterSeconds = retryAfter;
1060
+ this.consecutiveSuccesses = 0;
1061
+ this.rateLimitedAt = Date.now();
1062
+ const nextInterval = this.calculateRetryInterval(request);
1063
+ const source = retryAfter ? "server Retry-After" : "exponential backoff";
1064
+ consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
1065
+ } else {
1066
+ this.queue.shift();
1067
+ request.reject(error);
1068
+ }
1069
+ }
1070
+ }
1071
+ this.processing = false;
1072
+ }
1073
+ sleep(ms) {
1074
+ return new Promise((resolve) => setTimeout(resolve, ms));
1075
+ }
1076
+ /**
1077
+ * Get current status for debugging/monitoring
1078
+ */
1079
+ getStatus() {
1080
+ return {
1081
+ mode: this.mode,
1082
+ queueLength: this.queue.length,
1083
+ consecutiveSuccesses: this.consecutiveSuccesses,
1084
+ rateLimitedAt: this.rateLimitedAt
1085
+ };
1086
+ }
1087
+ };
1088
+ let rateLimiterInstance = null;
1089
+ /**
1090
+ * Initialize the adaptive rate limiter with configuration
1091
+ */
1092
+ function initAdaptiveRateLimiter(config = {}) {
1093
+ rateLimiterInstance = new AdaptiveRateLimiter(config);
1094
+ const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
1095
+ const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
1096
+ const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
1097
+ const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
1098
+ const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
1099
+ const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
1100
+ consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
1101
+ }
1102
+ /**
1103
+ * Execute a request with adaptive rate limiting.
1104
+ * If rate limiter is not initialized, executes immediately.
1105
+ * Returns the result along with queue wait time.
1106
+ */
1107
+ async function executeWithAdaptiveRateLimit(fn) {
1108
+ if (!rateLimiterInstance) return {
1109
+ result: await fn(),
1110
+ queueWaitMs: 0
1111
+ };
1112
+ return rateLimiterInstance.execute(fn);
1113
+ }
1114
+
701
1115
  //#endregion
702
1116
  //#region src/lib/history.ts
703
1117
  function generateId$1() {
@@ -1101,16 +1515,27 @@ function formatTokens(input, output) {
1101
1515
  /**
1102
1516
  * Console renderer that shows request lifecycle with apt-get style footer
1103
1517
  *
1104
- * Log format (status prefix first, then timestamp):
1105
- * - Start: [....] HH:MM:SS METHOD /path model-name
1106
- * - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming...
1107
- * - Complete: [ OK ] HH:MM:SS METHOD /path 200 1.2s 1.5K/500 model-name
1108
- * - Error: [FAIL] HH:MM:SS METHOD /path 500 1.2s model-name: error message
1518
+ * Log format:
1519
+ * - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
1520
+ * - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
1521
+ * - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
1522
+ * - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
1523
+ *
1524
+ * Color scheme for completed requests:
1525
+ * - Prefix: green (success) / red (error)
1526
+ * - Time: dim
1527
+ * - Method: cyan
1528
+ * - Path: white
1529
+ * - Model: magenta
1530
+ * - Status: green (success) / red (error)
1531
+ * - Duration: yellow
1532
+ * - Tokens: blue
1109
1533
  *
1110
1534
  * Features:
1111
- * - /history API requests are displayed in gray (dim)
1112
- * - Sticky footer shows active request count, updated in-place on the last line
1113
- * - Footer disappears when all requests complete
1535
+ * - Start lines only shown in debug mode (--verbose)
1536
+ * - Streaming lines are dim (less important)
1537
+ * - /history API requests are always dim
1538
+ * - Sticky footer shows active request count
1114
1539
  * - Intercepts consola output to properly handle footer
1115
1540
  */
1116
1541
  var ConsoleRenderer = class {
@@ -1186,25 +1611,52 @@ var ConsoleRenderer = class {
1186
1611
  }
1187
1612
  }
1188
1613
  /**
1614
+ * Format a complete log line with colored parts
1615
+ */
1616
+ formatLogLine(parts) {
1617
+ const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
1618
+ if (isDim) {
1619
+ const modelPart = model ? ` ${model}` : "";
1620
+ const extraPart = extra ? ` ${extra}` : "";
1621
+ return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
1622
+ }
1623
+ const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
1624
+ const coloredTime = pc.dim(time);
1625
+ const coloredMethod = pc.cyan(method);
1626
+ const coloredPath = pc.white(path$1);
1627
+ const coloredModel = model ? pc.magenta(` ${model}`) : "";
1628
+ let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
1629
+ if (status !== void 0) {
1630
+ const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
1631
+ result += ` ${coloredStatus}`;
1632
+ }
1633
+ if (duration) result += ` ${pc.yellow(duration)}`;
1634
+ if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
1635
+ if (tokens) result += ` ${pc.blue(tokens)}`;
1636
+ if (extra) result += isError ? pc.red(extra) : extra;
1637
+ return result;
1638
+ }
1639
+ /**
1189
1640
  * Print a log line with proper footer handling
1190
- * 1. Clear footer if visible
1191
- * 2. Print log with newline
1192
- * 3. Re-render footer on new line (no newline after footer)
1193
1641
  */
1194
- printLog(message, isGray = false) {
1642
+ printLog(message) {
1195
1643
  this.clearFooterForLog();
1196
- if (isGray) process.stdout.write(pc.dim(message) + "\n");
1197
- else process.stdout.write(message + "\n");
1644
+ process.stdout.write(message + "\n");
1198
1645
  this.renderFooter();
1199
1646
  }
1200
1647
  onRequestStart(request) {
1201
1648
  this.activeRequests.set(request.id, request);
1202
- if (this.showActive) {
1203
- const time = formatTime();
1204
- const modelInfo = request.model ? ` ${request.model}` : "";
1205
- const queueInfo = request.queuePosition !== void 0 && request.queuePosition > 0 ? ` [q#${request.queuePosition}]` : "";
1206
- const message = `[....] ${time} ${request.method} ${request.path}${modelInfo}${queueInfo}`;
1207
- this.printLog(message, request.isHistoryAccess);
1649
+ if (this.showActive && consola.level >= 5) {
1650
+ const message = this.formatLogLine({
1651
+ prefix: "[....]",
1652
+ time: formatTime(),
1653
+ method: request.method,
1654
+ path: request.path,
1655
+ model: request.model,
1656
+ extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
1657
+ isDim: true
1658
+ });
1659
+ this.printLog(message);
1208
1660
  }
1209
1661
  }
1210
1662
  onRequestUpdate(id, update) {
@@ -1212,28 +1664,39 @@ var ConsoleRenderer = class {
1212
1664
  if (!request) return;
1213
1665
  Object.assign(request, update);
1214
1666
  if (this.showActive && update.status === "streaming") {
1215
- const time = formatTime();
1216
- const modelInfo = request.model ? ` ${request.model}` : "";
1217
- const message = `[<-->] ${time} ${request.method} ${request.path}${modelInfo} streaming...`;
1218
- this.printLog(message, request.isHistoryAccess);
1667
+ const message = this.formatLogLine({
1668
+ prefix: "[<-->]",
1669
+ time: formatTime(),
1670
+ method: request.method,
1671
+ path: request.path,
1672
+ model: request.model,
1673
+ extra: "streaming...",
1674
+ isDim: true
1675
+ });
1676
+ this.printLog(message);
1219
1677
  }
1220
1678
  }
1221
1679
  onRequestComplete(request) {
1222
1680
  this.activeRequests.delete(request.id);
1223
- const time = formatTime();
1224
1681
  const status = request.statusCode ?? 0;
1225
- const duration = formatDuration(request.durationMs ?? 0);
1226
- const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
1227
- const modelInfo = request.model ? ` ${request.model}` : "";
1228
1682
  const isError = request.status === "error" || status >= 400;
1229
- const prefix = isError ? "[FAIL]" : "[ OK ]";
1230
- const tokensPart = tokens ? ` ${tokens}` : "";
1231
- let content = `${prefix} ${time} ${request.method} ${request.path} ${status} ${duration}${tokensPart}${modelInfo}`;
1232
- if (isError) {
1233
- const errorInfo = request.error ? `: ${request.error}` : "";
1234
- content += errorInfo;
1235
- }
1236
- this.printLog(content, request.isHistoryAccess);
1683
+ const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
1684
+ const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
1685
+ const message = this.formatLogLine({
1686
+ prefix: isError ? "[FAIL]" : "[ OK ]",
1687
+ time: formatTime(),
1688
+ method: request.method,
1689
+ path: request.path,
1690
+ model: request.model,
1691
+ status,
1692
+ duration: formatDuration(request.durationMs ?? 0),
1693
+ queueWait,
1694
+ tokens,
1695
+ extra: isError && request.error ? `: ${request.error}` : void 0,
1696
+ isError,
1697
+ isDim: request.isHistoryAccess
1698
+ });
1699
+ this.printLog(message);
1237
1700
  }
1238
1701
  destroy() {
1239
1702
  if (this.footerVisible && this.isTTY) {
@@ -1634,20 +2097,50 @@ const getTokenCount = async (payload, model) => {
1634
2097
  //#region src/lib/auto-compact.ts
1635
2098
  const DEFAULT_CONFIG = {
1636
2099
  targetTokens: 12e4,
1637
- safetyMarginPercent: 2
2100
+ safetyMarginPercent: 2,
2101
+ maxRequestBodyBytes: 500 * 1024
1638
2102
  };
1639
2103
  /**
1640
- * Check if payload needs compaction based on model limits.
2104
+ * Dynamic byte limit that adjusts based on 413 errors.
2105
+ * Starts at 500KB and can be adjusted when 413 errors are encountered.
2106
+ */
2107
+ let dynamicByteLimitOverride = null;
2108
+ /**
2109
+ * Called when a 413 error is encountered with a specific payload size.
2110
+ * Adjusts the dynamic byte limit to 90% of the failing size.
2111
+ */
2112
+ function onRequestTooLarge(failingBytes) {
2113
+ const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
2114
+ dynamicByteLimitOverride = newLimit;
2115
+ consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
2116
+ }
2117
+ /**
2118
+ * Check if payload needs compaction based on model limits OR request body size.
1641
2119
  * Uses a safety margin to account for token counting differences.
1642
2120
  */
1643
- async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
2121
+ async function checkNeedsCompaction(payload, model, config = {}) {
2122
+ const cfg = {
2123
+ ...DEFAULT_CONFIG,
2124
+ ...config
2125
+ };
1644
2126
  const currentTokens = (await getTokenCount(payload, model)).input;
1645
2127
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
1646
- const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
2128
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2129
+ const currentBytes = JSON.stringify(payload).length;
2130
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2131
+ const exceedsTokens = currentTokens > tokenLimit;
2132
+ const exceedsBytes = currentBytes > byteLimit;
2133
+ let reason;
2134
+ if (exceedsTokens && exceedsBytes) reason = "both";
2135
+ else if (exceedsTokens) reason = "tokens";
2136
+ else if (exceedsBytes) reason = "bytes";
1647
2137
  return {
1648
- needed: currentTokens > limit,
2138
+ needed: exceedsTokens || exceedsBytes,
1649
2139
  currentTokens,
1650
- limit
2140
+ tokenLimit,
2141
+ currentBytes,
2142
+ byteLimit,
2143
+ reason
1651
2144
  };
1652
2145
  }
1653
2146
  /**
@@ -1754,7 +2247,7 @@ function createTruncationMarker(removedCount) {
1754
2247
  };
1755
2248
  }
1756
2249
  /**
1757
- * Perform auto-compaction on a payload that exceeds token limits.
2250
+ * Perform auto-compaction on a payload that exceeds token or size limits.
1758
2251
  * This uses simple truncation - no LLM calls required.
1759
2252
  * Uses iterative approach with decreasing target tokens until under limit.
1760
2253
  */
@@ -1765,21 +2258,29 @@ async function autoCompact(payload, model, config = {}) {
1765
2258
  };
1766
2259
  const originalTokens = (await getTokenCount(payload, model)).input;
1767
2260
  const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
1768
- const limit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
1769
- if (originalTokens <= limit) return {
2261
+ const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
2262
+ const originalBytes = JSON.stringify(payload).length;
2263
+ const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
2264
+ if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
1770
2265
  payload,
1771
2266
  wasCompacted: false,
1772
2267
  originalTokens,
1773
2268
  compactedTokens: originalTokens,
1774
2269
  removedMessageCount: 0
1775
2270
  };
1776
- consola.info(`Auto-compact: ${originalTokens} tokens exceeds limit of ${limit}, truncating...`);
2271
+ const exceedsTokens = originalTokens > tokenLimit;
2272
+ const exceedsBytes = originalBytes > byteLimit;
2273
+ let reason;
2274
+ if (exceedsTokens && exceedsBytes) reason = "tokens and size";
2275
+ else if (exceedsBytes) reason = "size";
2276
+ else reason = "tokens";
2277
+ consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
1777
2278
  const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
1778
2279
  const systemTokens = estimateSystemTokens(systemMessages);
1779
2280
  consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
1780
2281
  const MAX_ITERATIONS = 5;
1781
2282
  const MIN_TARGET = 2e4;
1782
- let currentTarget = Math.min(cfg.targetTokens, limit);
2283
+ let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
1783
2284
  let lastResult = null;
1784
2285
  for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
1785
2286
  const result = await tryCompactWithTarget({
@@ -1789,16 +2290,21 @@ async function autoCompact(payload, model, config = {}) {
1789
2290
  remainingMessages,
1790
2291
  systemTokens,
1791
2292
  targetTokens: currentTarget,
1792
- limit,
2293
+ limit: tokenLimit,
1793
2294
  originalTokens
1794
2295
  });
1795
2296
  if (!result.wasCompacted) return result;
1796
2297
  lastResult = result;
1797
- if (result.compactedTokens <= limit) {
1798
- consola.info(`Auto-compact: ${originalTokens} ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
2298
+ const resultBytes = JSON.stringify(result.payload).length;
2299
+ const underTokenLimit = result.compactedTokens <= tokenLimit;
2300
+ const underByteLimit = resultBytes <= byteLimit;
2301
+ if (underTokenLimit && underByteLimit) {
2302
+ consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
1799
2303
  return result;
1800
2304
  }
1801
- consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
2305
+ const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
2306
+ const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
2307
+ consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
1802
2308
  currentTarget = Math.floor(currentTarget * .7);
1803
2309
  if (currentTarget < MIN_TARGET) {
1804
2310
  consola.error("Auto-compact: Cannot reduce further, target too low");
@@ -1874,66 +2380,6 @@ function createCompactionMarker(result) {
1874
2380
  return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
1875
2381
  }
1876
2382
 
1877
- //#endregion
1878
- //#region src/lib/queue.ts
1879
- var RequestQueue = class {
1880
- queue = [];
1881
- processing = false;
1882
- lastRequestTime = 0;
1883
- async enqueue(execute, rateLimitSeconds) {
1884
- return new Promise((resolve, reject) => {
1885
- const request = {
1886
- execute,
1887
- resolve,
1888
- reject
1889
- };
1890
- this.queue.push(request);
1891
- if (this.queue.length > 1) {
1892
- const position = this.queue.length;
1893
- const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
1894
- (waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
1895
- }
1896
- this.processQueue(rateLimitSeconds);
1897
- });
1898
- }
1899
- async processQueue(rateLimitSeconds) {
1900
- if (this.processing) return;
1901
- this.processing = true;
1902
- while (this.queue.length > 0) {
1903
- const elapsedMs = Date.now() - this.lastRequestTime;
1904
- const requiredMs = rateLimitSeconds * 1e3;
1905
- if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
1906
- const waitMs = requiredMs - elapsedMs;
1907
- const waitSec = Math.ceil(waitMs / 1e3);
1908
- (waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
1909
- await new Promise((resolve) => setTimeout(resolve, waitMs));
1910
- }
1911
- const request = this.queue.shift();
1912
- if (!request) break;
1913
- this.lastRequestTime = Date.now();
1914
- try {
1915
- const result = await request.execute();
1916
- request.resolve(result);
1917
- } catch (error) {
1918
- request.reject(error);
1919
- }
1920
- }
1921
- this.processing = false;
1922
- }
1923
- get length() {
1924
- return this.queue.length;
1925
- }
1926
- };
1927
- const requestQueue = new RequestQueue();
1928
- /**
1929
- * Execute a request with rate limiting via queue.
1930
- * Requests are queued and processed sequentially at the configured rate.
1931
- */
1932
- async function executeWithRateLimit(state$1, execute) {
1933
- if (state$1.rateLimitSeconds === void 0) return execute();
1934
- return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
1935
- }
1936
-
1937
2383
  //#endregion
1938
2384
  //#region src/services/copilot/create-chat-completions.ts
1939
2385
  const createChatCompletions = async (payload) => {
@@ -1984,11 +2430,12 @@ function recordErrorResponse(ctx, model, error) {
1984
2430
  }, Date.now() - ctx.startTime);
1985
2431
  }
1986
2432
  /** Complete TUI tracking */
1987
- function completeTracking(trackingId, inputTokens, outputTokens) {
2433
+ function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
1988
2434
  if (!trackingId) return;
1989
2435
  requestTracker.updateRequest(trackingId, {
1990
2436
  inputTokens,
1991
- outputTokens
2437
+ outputTokens,
2438
+ queueWaitMs
1992
2439
  });
1993
2440
  requestTracker.completeRequest(trackingId, 200, {
1994
2441
  inputTokens,
@@ -2029,12 +2476,16 @@ async function buildFinalPayload(payload, model) {
2029
2476
  }
2030
2477
  try {
2031
2478
  const check = await checkNeedsCompaction(payload, model);
2032
- consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
2479
+ consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
2033
2480
  if (!check.needed) return {
2034
2481
  finalPayload: payload,
2035
2482
  compactResult: null
2036
2483
  };
2037
- consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
2484
+ let reasonText;
2485
+ if (check.reason === "both") reasonText = "tokens and size";
2486
+ else if (check.reason === "bytes") reasonText = "size";
2487
+ else reasonText = "tokens";
2488
+ consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
2038
2489
  const compactResult = await autoCompact(payload, model);
2039
2490
  return {
2040
2491
  finalPayload: compactResult.payload,
@@ -2050,11 +2501,13 @@ async function buildFinalPayload(payload, model) {
2050
2501
  }
2051
2502
  /**
2052
2503
  * Log helpful debugging information when a 413 error occurs.
2504
+ * Also adjusts the dynamic byte limit for future requests.
2053
2505
  */
2054
2506
  async function logPayloadSizeInfo(payload, model) {
2055
2507
  const messageCount = payload.messages.length;
2056
2508
  const bodySize = JSON.stringify(payload).length;
2057
2509
  const bodySizeKB = Math.round(bodySize / 1024);
2510
+ onRequestTooLarge(bodySize);
2058
2511
  let imageCount = 0;
2059
2512
  let largeMessages = 0;
2060
2513
  let totalImageSize = 0;
@@ -2140,7 +2593,8 @@ async function handleCompletion$1(c) {
2140
2593
  async function executeRequest(opts) {
2141
2594
  const { c, payload, selectedModel, ctx, trackingId } = opts;
2142
2595
  try {
2143
- const response = await executeWithRateLimit(state, () => createChatCompletions(payload));
2596
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
2597
+ ctx.queueWaitMs = queueWaitMs;
2144
2598
  if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
2145
2599
  consola.debug("Streaming response");
2146
2600
  updateTrackerStatus(trackingId, "streaming");
@@ -2199,7 +2653,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
2199
2653
  }, Date.now() - ctx.startTime);
2200
2654
  if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
2201
2655
  inputTokens: usage.prompt_tokens,
2202
- outputTokens: usage.completion_tokens
2656
+ outputTokens: usage.completion_tokens,
2657
+ queueWaitMs: ctx.queueWaitMs
2203
2658
  });
2204
2659
  return c.json(response);
2205
2660
  }
@@ -2265,7 +2720,7 @@ async function handleStreamingResponse$1(opts) {
2265
2720
  acc.content += marker;
2266
2721
  }
2267
2722
  recordStreamSuccess(acc, payload.model, ctx);
2268
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
2723
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
2269
2724
  } catch (error) {
2270
2725
  recordStreamError({
2271
2726
  acc,
@@ -2364,7 +2819,7 @@ completionRoutes.post("/", async (c) => {
2364
2819
  try {
2365
2820
  return await handleCompletion$1(c);
2366
2821
  } catch (error) {
2367
- return await forwardError(c, error);
2822
+ return forwardError(c, error);
2368
2823
  }
2369
2824
  });
2370
2825
 
@@ -2390,7 +2845,7 @@ embeddingRoutes.post("/", async (c) => {
2390
2845
  const response = await createEmbeddings(payload);
2391
2846
  return c.json(response);
2392
2847
  } catch (error) {
2393
- return await forwardError(c, error);
2848
+ return forwardError(c, error);
2394
2849
  }
2395
2850
  });
2396
2851
 
@@ -4022,7 +4477,8 @@ async function handleCompletion(c) {
4022
4477
  if (compactResult) ctx.compactResult = compactResult;
4023
4478
  if (state.manualApprove) await awaitApproval();
4024
4479
  try {
4025
- const response = await executeWithRateLimit(state, () => createChatCompletions(openAIPayload));
4480
+ const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
4481
+ ctx.queueWaitMs = queueWaitMs;
4026
4482
  if (isNonStreaming(response)) return handleNonStreamingResponse({
4027
4483
  c,
4028
4484
  response,
@@ -4080,7 +4536,8 @@ function handleNonStreamingResponse(opts) {
4080
4536
  }, Date.now() - ctx.startTime);
4081
4537
  if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
4082
4538
  inputTokens: anthropicResponse.usage.input_tokens,
4083
- outputTokens: anthropicResponse.usage.output_tokens
4539
+ outputTokens: anthropicResponse.usage.output_tokens,
4540
+ queueWaitMs: ctx.queueWaitMs
4084
4541
  });
4085
4542
  return c.json(anthropicResponse);
4086
4543
  }
@@ -4136,7 +4593,7 @@ async function handleStreamingResponse(opts) {
4136
4593
  acc.content += marker;
4137
4594
  }
4138
4595
  recordStreamingResponse(acc, anthropicPayload.model, ctx);
4139
- completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
4596
+ completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
4140
4597
  } catch (error) {
4141
4598
  consola.error("Stream error:", error);
4142
4599
  recordStreamError({
@@ -4334,14 +4791,14 @@ messageRoutes.post("/", async (c) => {
4334
4791
  try {
4335
4792
  return await handleCompletion(c);
4336
4793
  } catch (error) {
4337
- return await forwardError(c, error);
4794
+ return forwardError(c, error);
4338
4795
  }
4339
4796
  });
4340
4797
  messageRoutes.post("/count_tokens", async (c) => {
4341
4798
  try {
4342
4799
  return await handleCountTokens(c);
4343
4800
  } catch (error) {
4344
- return await forwardError(c, error);
4801
+ return forwardError(c, error);
4345
4802
  }
4346
4803
  });
4347
4804
 
@@ -4380,18 +4837,18 @@ modelRoutes.get("/", async (c) => {
4380
4837
  has_more: false
4381
4838
  });
4382
4839
  } catch (error) {
4383
- return await forwardError(c, error);
4840
+ return forwardError(c, error);
4384
4841
  }
4385
4842
  });
4386
4843
 
4387
4844
  //#endregion
4388
4845
  //#region src/routes/token/route.ts
4389
4846
  const tokenRoute = new Hono();
4390
- tokenRoute.get("/", async (c) => {
4847
+ tokenRoute.get("/", (c) => {
4391
4848
  try {
4392
4849
  return c.json({ token: state.copilotToken });
4393
4850
  } catch (error) {
4394
- return await forwardError(c, error);
4851
+ return forwardError(c, error);
4395
4852
  }
4396
4853
  });
4397
4854
 
@@ -4403,7 +4860,7 @@ usageRoute.get("/", async (c) => {
4403
4860
  const usage = await getCopilotUsage();
4404
4861
  return c.json(usage);
4405
4862
  } catch (error) {
4406
- return await forwardError(c, error);
4863
+ return forwardError(c, error);
4407
4864
  }
4408
4865
  });
4409
4866
 
@@ -4455,10 +4912,15 @@ async function runServer(options) {
4455
4912
  state.accountType = options.accountType;
4456
4913
  if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
4457
4914
  state.manualApprove = options.manual;
4458
- state.rateLimitSeconds = options.rateLimit;
4459
- state.rateLimitWait = options.rateLimitWait;
4460
4915
  state.showToken = options.showToken;
4461
4916
  state.autoCompact = options.autoCompact;
4917
+ if (options.rateLimit) initAdaptiveRateLimiter({
4918
+ baseRetryIntervalSeconds: options.retryInterval,
4919
+ requestIntervalSeconds: options.requestInterval,
4920
+ recoveryTimeoutMinutes: options.recoveryTimeout,
4921
+ consecutiveSuccessesForRecovery: options.consecutiveSuccesses
4922
+ });
4923
+ else consola.info("Rate limiting disabled");
4462
4924
  if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
4463
4925
  initHistory(options.history, options.historyLimit);
4464
4926
  if (options.history) {
@@ -4545,16 +5007,30 @@ const start = defineCommand({
4545
5007
  default: false,
4546
5008
  description: "Enable manual request approval"
4547
5009
  },
4548
- "rate-limit": {
4549
- alias: "r",
4550
- type: "string",
4551
- description: "Rate limit in seconds between requests"
4552
- },
4553
- wait: {
4554
- alias: "w",
5010
+ "no-rate-limit": {
4555
5011
  type: "boolean",
4556
5012
  default: false,
4557
- description: "Wait instead of error when rate limit is hit. Has no effect if rate limit is not set"
5013
+ description: "Disable adaptive rate limiting"
5014
+ },
5015
+ "retry-interval": {
5016
+ type: "string",
5017
+ default: "10",
5018
+ description: "Seconds to wait before retrying after rate limit error (default: 10)"
5019
+ },
5020
+ "request-interval": {
5021
+ type: "string",
5022
+ default: "10",
5023
+ description: "Seconds between requests in rate-limited mode (default: 10)"
5024
+ },
5025
+ "recovery-timeout": {
5026
+ type: "string",
5027
+ default: "10",
5028
+ description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
5029
+ },
5030
+ "consecutive-successes": {
5031
+ type: "string",
5032
+ default: "5",
5033
+ description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
4558
5034
  },
4559
5035
  "github-token": {
4560
5036
  alias: "g",
@@ -4577,10 +5053,10 @@ const start = defineCommand({
4577
5053
  default: false,
4578
5054
  description: "Initialize proxy from environment variables"
4579
5055
  },
4580
- history: {
5056
+ "no-history": {
4581
5057
  type: "boolean",
4582
5058
  default: false,
4583
- description: "Enable request history recording and Web UI at /history"
5059
+ description: "Disable request history recording and Web UI"
4584
5060
  },
4585
5061
  "history-limit": {
4586
5062
  type: "string",
@@ -4594,21 +5070,22 @@ const start = defineCommand({
4594
5070
  }
4595
5071
  },
4596
5072
  run({ args }) {
4597
- const rateLimitRaw = args["rate-limit"];
4598
- const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
4599
5073
  return runServer({
4600
5074
  port: Number.parseInt(args.port, 10),
4601
5075
  host: args.host,
4602
5076
  verbose: args.verbose,
4603
5077
  accountType: args["account-type"],
4604
5078
  manual: args.manual,
4605
- rateLimit,
4606
- rateLimitWait: args.wait,
5079
+ rateLimit: !args["no-rate-limit"],
5080
+ retryInterval: Number.parseInt(args["retry-interval"], 10),
5081
+ requestInterval: Number.parseInt(args["request-interval"], 10),
5082
+ recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
5083
+ consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
4607
5084
  githubToken: args["github-token"],
4608
5085
  claudeCode: args["claude-code"],
4609
5086
  showToken: args["show-token"],
4610
5087
  proxyEnv: args["proxy-env"],
4611
- history: args.history,
5088
+ history: !args["no-history"],
4612
5089
  historyLimit: Number.parseInt(args["history-limit"], 10),
4613
5090
  autoCompact: args["auto-compact"]
4614
5091
  });