@hsupu/copilot-api 0.7.5 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +642 -165
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -3,7 +3,7 @@ import { defineCommand, runMain } from "citty";
|
|
|
3
3
|
import consola from "consola";
|
|
4
4
|
import fs from "node:fs/promises";
|
|
5
5
|
import os from "node:os";
|
|
6
|
-
import path, { join } from "node:path";
|
|
6
|
+
import path, { dirname, join } from "node:path";
|
|
7
7
|
import { randomUUID } from "node:crypto";
|
|
8
8
|
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
9
9
|
import clipboard from "clipboardy";
|
|
@@ -45,7 +45,6 @@ async function ensureFile(filePath) {
|
|
|
45
45
|
const state = {
|
|
46
46
|
accountType: "individual",
|
|
47
47
|
manualApprove: false,
|
|
48
|
-
rateLimitWait: false,
|
|
49
48
|
showToken: false,
|
|
50
49
|
autoCompact: false
|
|
51
50
|
};
|
|
@@ -137,6 +136,16 @@ function formatRequestTooLargeError() {
|
|
|
137
136
|
}
|
|
138
137
|
};
|
|
139
138
|
}
|
|
139
|
+
/** Format Anthropic-compatible error for rate limit exceeded (429) */
|
|
140
|
+
function formatRateLimitError(copilotMessage) {
|
|
141
|
+
return {
|
|
142
|
+
type: "error",
|
|
143
|
+
error: {
|
|
144
|
+
type: "rate_limit_error",
|
|
145
|
+
message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
}
|
|
140
149
|
function forwardError(c, error) {
|
|
141
150
|
consola.error("Error occurred:", error);
|
|
142
151
|
if (error instanceof HTTPError) {
|
|
@@ -161,6 +170,11 @@ function forwardError(c, error) {
|
|
|
161
170
|
return c.json(formattedError, 400);
|
|
162
171
|
}
|
|
163
172
|
}
|
|
173
|
+
if (error.status === 429 || copilotError.error?.code === "rate_limited") {
|
|
174
|
+
const formattedError = formatRateLimitError(copilotError.error?.message);
|
|
175
|
+
consola.debug("Returning formatted rate limit error:", formattedError);
|
|
176
|
+
return c.json(formattedError, 429);
|
|
177
|
+
}
|
|
164
178
|
return c.json({ error: {
|
|
165
179
|
message: error.responseText,
|
|
166
180
|
type: "error"
|
|
@@ -539,16 +553,77 @@ const logout = defineCommand({
|
|
|
539
553
|
|
|
540
554
|
//#endregion
|
|
541
555
|
//#region src/patch-claude.ts
|
|
542
|
-
const
|
|
543
|
-
|
|
556
|
+
const SUPPORTED_VERSIONS = {
|
|
557
|
+
v2a: {
|
|
558
|
+
min: "2.0.0",
|
|
559
|
+
max: "2.1.10"
|
|
560
|
+
},
|
|
561
|
+
v2b: {
|
|
562
|
+
min: "2.1.11",
|
|
563
|
+
max: "2.1.12"
|
|
564
|
+
}
|
|
565
|
+
};
|
|
566
|
+
const PATTERNS = {
|
|
567
|
+
funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
|
|
568
|
+
funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
|
|
569
|
+
variable: /var BS9=(\d+)/
|
|
570
|
+
};
|
|
571
|
+
/**
|
|
572
|
+
* Parse semver version string to comparable parts
|
|
573
|
+
*/
|
|
574
|
+
function parseVersion(version) {
|
|
575
|
+
return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Compare two semver versions
|
|
579
|
+
* Returns: -1 if a < b, 0 if a == b, 1 if a > b
|
|
580
|
+
*/
|
|
581
|
+
function compareVersions(a, b) {
|
|
582
|
+
const partsA = parseVersion(a);
|
|
583
|
+
const partsB = parseVersion(b);
|
|
584
|
+
const len = Math.max(partsA.length, partsB.length);
|
|
585
|
+
for (let i = 0; i < len; i++) {
|
|
586
|
+
const numA = partsA[i] || 0;
|
|
587
|
+
const numB = partsB[i] || 0;
|
|
588
|
+
if (numA < numB) return -1;
|
|
589
|
+
if (numA > numB) return 1;
|
|
590
|
+
}
|
|
591
|
+
return 0;
|
|
592
|
+
}
|
|
593
|
+
function getPatternTypeForVersion(version) {
|
|
594
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
596
|
+
return null;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Get supported version range string for error messages
|
|
600
|
+
*/
|
|
601
|
+
function getSupportedRangeString() {
|
|
602
|
+
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Get Claude Code version from package.json
|
|
606
|
+
*/
|
|
607
|
+
function getClaudeCodeVersion(cliPath) {
|
|
608
|
+
try {
|
|
609
|
+
const packageJsonPath = join(dirname(cliPath), "package.json");
|
|
610
|
+
if (!existsSync(packageJsonPath)) return null;
|
|
611
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
612
|
+
if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
|
|
613
|
+
return null;
|
|
614
|
+
} catch {
|
|
615
|
+
return null;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
544
618
|
/**
|
|
545
619
|
* Search volta tools directory for Claude Code
|
|
546
620
|
*/
|
|
547
621
|
function findInVoltaTools(voltaHome) {
|
|
548
622
|
const paths = [];
|
|
623
|
+
const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
624
|
+
if (existsSync(packagesPath)) paths.push(packagesPath);
|
|
549
625
|
const toolsDir = join(voltaHome, "tools", "image", "node");
|
|
550
|
-
if (
|
|
551
|
-
try {
|
|
626
|
+
if (existsSync(toolsDir)) try {
|
|
552
627
|
for (const version of readdirSync(toolsDir)) {
|
|
553
628
|
const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
554
629
|
if (existsSync(claudePath)) paths.push(claudePath);
|
|
@@ -580,25 +655,61 @@ function findClaudeCodePath() {
|
|
|
580
655
|
* Get current context limit from Claude Code
|
|
581
656
|
*/
|
|
582
657
|
function getCurrentLimit(content) {
|
|
583
|
-
const
|
|
584
|
-
if (
|
|
585
|
-
const
|
|
586
|
-
|
|
658
|
+
const varMatch = content.match(PATTERNS.variable);
|
|
659
|
+
if (varMatch) return Number.parseInt(varMatch[1], 10);
|
|
660
|
+
const funcMatch = content.match(PATTERNS.funcPatched);
|
|
661
|
+
if (funcMatch) {
|
|
662
|
+
const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
|
|
663
|
+
return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
|
|
664
|
+
}
|
|
665
|
+
return null;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Check if Claude Code version is supported for patching
|
|
669
|
+
*/
|
|
670
|
+
function checkVersionSupport(cliPath) {
|
|
671
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
672
|
+
if (!version) return {
|
|
673
|
+
supported: false,
|
|
674
|
+
version: null,
|
|
675
|
+
patternType: null,
|
|
676
|
+
error: "Could not detect Claude Code version"
|
|
677
|
+
};
|
|
678
|
+
const patternType = getPatternTypeForVersion(version);
|
|
679
|
+
if (!patternType) return {
|
|
680
|
+
supported: false,
|
|
681
|
+
version,
|
|
682
|
+
patternType: null,
|
|
683
|
+
error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
|
|
684
|
+
};
|
|
685
|
+
return {
|
|
686
|
+
supported: true,
|
|
687
|
+
version,
|
|
688
|
+
patternType
|
|
689
|
+
};
|
|
587
690
|
}
|
|
588
691
|
/**
|
|
589
692
|
* Patch Claude Code to use a different context limit
|
|
590
693
|
*/
|
|
591
694
|
function patchClaudeCode(cliPath, newLimit) {
|
|
592
695
|
const content = readFileSync(cliPath, "utf8");
|
|
696
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
697
|
+
if (!versionCheck.supported) {
|
|
698
|
+
consola.error(versionCheck.error);
|
|
699
|
+
return false;
|
|
700
|
+
}
|
|
701
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
593
702
|
if (getCurrentLimit(content) === newLimit) {
|
|
594
703
|
consola.info(`Already patched with limit ${newLimit}`);
|
|
595
704
|
return true;
|
|
596
705
|
}
|
|
597
|
-
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
598
706
|
let newContent;
|
|
599
|
-
if (
|
|
600
|
-
else
|
|
601
|
-
|
|
707
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
|
|
708
|
+
else {
|
|
709
|
+
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
710
|
+
const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
|
|
711
|
+
newContent = content.replace(pattern, replacement);
|
|
712
|
+
}
|
|
602
713
|
writeFileSync(cliPath, newContent);
|
|
603
714
|
return true;
|
|
604
715
|
}
|
|
@@ -607,19 +718,28 @@ function patchClaudeCode(cliPath, newLimit) {
|
|
|
607
718
|
*/
|
|
608
719
|
function restoreClaudeCode(cliPath) {
|
|
609
720
|
const content = readFileSync(cliPath, "utf8");
|
|
721
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
722
|
+
if (!versionCheck.supported) {
|
|
723
|
+
consola.error(versionCheck.error);
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
610
727
|
if (getCurrentLimit(content) === 2e5) {
|
|
611
728
|
consola.info("Already at original 200000 limit");
|
|
612
729
|
return true;
|
|
613
730
|
}
|
|
614
|
-
|
|
615
|
-
|
|
731
|
+
let newContent;
|
|
732
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
|
|
733
|
+
else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
|
|
616
734
|
writeFileSync(cliPath, newContent);
|
|
617
735
|
return true;
|
|
618
736
|
}
|
|
619
|
-
function showStatus(currentLimit) {
|
|
737
|
+
function showStatus(cliPath, currentLimit) {
|
|
738
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
739
|
+
if (version) consola.info(`Claude Code version: ${version}`);
|
|
620
740
|
if (currentLimit === null) {
|
|
621
741
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
622
|
-
consola.info("Look for the HR function pattern in cli.js");
|
|
742
|
+
consola.info("Look for the BS9 variable or HR function pattern in cli.js");
|
|
623
743
|
} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
|
|
624
744
|
else consola.info(`Status: Patched (${currentLimit} context window)`);
|
|
625
745
|
}
|
|
@@ -669,7 +789,7 @@ const patchClaude = defineCommand({
|
|
|
669
789
|
const content = readFileSync(cliPath, "utf8");
|
|
670
790
|
const currentLimit = getCurrentLimit(content);
|
|
671
791
|
if (args.status) {
|
|
672
|
-
showStatus(currentLimit);
|
|
792
|
+
showStatus(cliPath, currentLimit);
|
|
673
793
|
return;
|
|
674
794
|
}
|
|
675
795
|
if (args.restore) {
|
|
@@ -698,6 +818,300 @@ const patchClaude = defineCommand({
|
|
|
698
818
|
}
|
|
699
819
|
});
|
|
700
820
|
|
|
821
|
+
//#endregion
|
|
822
|
+
//#region src/lib/adaptive-rate-limiter.ts
|
|
823
|
+
const DEFAULT_CONFIG$1 = {
|
|
824
|
+
baseRetryIntervalSeconds: 10,
|
|
825
|
+
maxRetryIntervalSeconds: 120,
|
|
826
|
+
requestIntervalSeconds: 10,
|
|
827
|
+
recoveryTimeoutMinutes: 10,
|
|
828
|
+
consecutiveSuccessesForRecovery: 5,
|
|
829
|
+
gradualRecoverySteps: [
|
|
830
|
+
5,
|
|
831
|
+
2,
|
|
832
|
+
1,
|
|
833
|
+
0
|
|
834
|
+
]
|
|
835
|
+
};
|
|
836
|
+
/**
|
|
837
|
+
* Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
|
|
838
|
+
* based on API responses.
|
|
839
|
+
*/
|
|
840
|
+
var AdaptiveRateLimiter = class {
|
|
841
|
+
config;
|
|
842
|
+
mode = "normal";
|
|
843
|
+
queue = [];
|
|
844
|
+
processing = false;
|
|
845
|
+
rateLimitedAt = null;
|
|
846
|
+
consecutiveSuccesses = 0;
|
|
847
|
+
lastRequestTime = 0;
|
|
848
|
+
/** Current step in gradual recovery (index into gradualRecoverySteps) */
|
|
849
|
+
recoveryStepIndex = 0;
|
|
850
|
+
constructor(config = {}) {
|
|
851
|
+
this.config = {
|
|
852
|
+
...DEFAULT_CONFIG$1,
|
|
853
|
+
...config
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Execute a request with adaptive rate limiting.
|
|
858
|
+
* Returns a promise that resolves when the request succeeds.
|
|
859
|
+
* The request will be retried automatically on 429 errors.
|
|
860
|
+
*/
|
|
861
|
+
async execute(fn) {
|
|
862
|
+
if (this.mode === "normal") return this.executeInNormalMode(fn);
|
|
863
|
+
if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
|
|
864
|
+
return this.enqueue(fn);
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Check if an error is a rate limit error (429) and extract Retry-After if available
|
|
868
|
+
*/
|
|
869
|
+
isRateLimitError(error) {
|
|
870
|
+
if (error && typeof error === "object") {
|
|
871
|
+
if ("status" in error && error.status === 429) return {
|
|
872
|
+
isRateLimit: true,
|
|
873
|
+
retryAfter: this.extractRetryAfter(error)
|
|
874
|
+
};
|
|
875
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
876
|
+
const parsed = JSON.parse(error.responseText);
|
|
877
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
|
|
878
|
+
} catch {}
|
|
879
|
+
}
|
|
880
|
+
return { isRateLimit: false };
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* Extract Retry-After value from error response
|
|
884
|
+
*/
|
|
885
|
+
extractRetryAfter(error) {
|
|
886
|
+
if (!error || typeof error !== "object") return void 0;
|
|
887
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
888
|
+
const parsed = JSON.parse(error.responseText);
|
|
889
|
+
if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
890
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
|
|
891
|
+
} catch {}
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Execute in normal mode - full speed
|
|
895
|
+
*/
|
|
896
|
+
async executeInNormalMode(fn) {
|
|
897
|
+
try {
|
|
898
|
+
return {
|
|
899
|
+
result: await fn(),
|
|
900
|
+
queueWaitMs: 0
|
|
901
|
+
};
|
|
902
|
+
} catch (error) {
|
|
903
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
904
|
+
if (isRateLimit) {
|
|
905
|
+
this.enterRateLimitedMode();
|
|
906
|
+
return this.enqueue(fn, retryAfter);
|
|
907
|
+
}
|
|
908
|
+
throw error;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
/**
|
|
912
|
+
* Execute in recovering mode - gradual speedup
|
|
913
|
+
*/
|
|
914
|
+
async executeInRecoveringMode(fn) {
|
|
915
|
+
const startTime = Date.now();
|
|
916
|
+
const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
917
|
+
if (currentInterval > 0) {
|
|
918
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
919
|
+
const requiredMs = currentInterval * 1e3;
|
|
920
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
921
|
+
const waitMs = requiredMs - elapsedMs;
|
|
922
|
+
await this.sleep(waitMs);
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
this.lastRequestTime = Date.now();
|
|
926
|
+
try {
|
|
927
|
+
const result = await fn();
|
|
928
|
+
this.recoveryStepIndex++;
|
|
929
|
+
if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
|
|
930
|
+
else {
|
|
931
|
+
const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
932
|
+
consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
|
|
933
|
+
}
|
|
934
|
+
const queueWaitMs = Date.now() - startTime;
|
|
935
|
+
return {
|
|
936
|
+
result,
|
|
937
|
+
queueWaitMs
|
|
938
|
+
};
|
|
939
|
+
} catch (error) {
|
|
940
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
941
|
+
if (isRateLimit) {
|
|
942
|
+
consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
|
|
943
|
+
this.enterRateLimitedMode();
|
|
944
|
+
return this.enqueue(fn, retryAfter);
|
|
945
|
+
}
|
|
946
|
+
throw error;
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Enter rate-limited mode
|
|
951
|
+
*/
|
|
952
|
+
enterRateLimitedMode() {
|
|
953
|
+
if (this.mode === "rate-limited") return;
|
|
954
|
+
this.mode = "rate-limited";
|
|
955
|
+
this.rateLimitedAt = Date.now();
|
|
956
|
+
this.consecutiveSuccesses = 0;
|
|
957
|
+
consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Check if we should try to recover to normal mode
|
|
961
|
+
*/
|
|
962
|
+
shouldAttemptRecovery() {
|
|
963
|
+
if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
|
|
964
|
+
consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
|
|
965
|
+
return true;
|
|
966
|
+
}
|
|
967
|
+
if (this.rateLimitedAt) {
|
|
968
|
+
const elapsed = Date.now() - this.rateLimitedAt;
|
|
969
|
+
const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
|
|
970
|
+
if (elapsed >= timeout) {
|
|
971
|
+
consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
|
|
972
|
+
return true;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return false;
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Start gradual recovery mode
|
|
979
|
+
*/
|
|
980
|
+
startGradualRecovery() {
|
|
981
|
+
this.mode = "recovering";
|
|
982
|
+
this.recoveryStepIndex = 0;
|
|
983
|
+
this.rateLimitedAt = null;
|
|
984
|
+
this.consecutiveSuccesses = 0;
|
|
985
|
+
const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
|
|
986
|
+
consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* Complete recovery to normal mode
|
|
990
|
+
*/
|
|
991
|
+
completeRecovery() {
|
|
992
|
+
this.mode = "normal";
|
|
993
|
+
this.recoveryStepIndex = 0;
|
|
994
|
+
consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Enqueue a request for later execution
|
|
998
|
+
*/
|
|
999
|
+
enqueue(fn, retryAfterSeconds) {
|
|
1000
|
+
return new Promise((resolve, reject) => {
|
|
1001
|
+
const request = {
|
|
1002
|
+
execute: fn,
|
|
1003
|
+
resolve,
|
|
1004
|
+
reject,
|
|
1005
|
+
retryCount: 0,
|
|
1006
|
+
retryAfterSeconds,
|
|
1007
|
+
enqueuedAt: Date.now()
|
|
1008
|
+
};
|
|
1009
|
+
this.queue.push(request);
|
|
1010
|
+
if (this.queue.length > 1) {
|
|
1011
|
+
const position = this.queue.length;
|
|
1012
|
+
const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
|
|
1013
|
+
consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
|
|
1014
|
+
}
|
|
1015
|
+
this.processQueue();
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Calculate retry interval with exponential backoff
|
|
1020
|
+
*/
|
|
1021
|
+
calculateRetryInterval(request) {
|
|
1022
|
+
if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
|
|
1023
|
+
const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
|
|
1024
|
+
return Math.min(backoff, this.config.maxRetryIntervalSeconds);
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* Process the queue
|
|
1028
|
+
*/
|
|
1029
|
+
async processQueue() {
|
|
1030
|
+
if (this.processing) return;
|
|
1031
|
+
this.processing = true;
|
|
1032
|
+
while (this.queue.length > 0) {
|
|
1033
|
+
const request = this.queue[0];
|
|
1034
|
+
if (this.shouldAttemptRecovery()) this.startGradualRecovery();
|
|
1035
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1036
|
+
const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
|
|
1037
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1038
|
+
const waitMs = requiredMs - elapsedMs;
|
|
1039
|
+
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1040
|
+
consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
|
|
1041
|
+
await this.sleep(waitMs);
|
|
1042
|
+
}
|
|
1043
|
+
this.lastRequestTime = Date.now();
|
|
1044
|
+
try {
|
|
1045
|
+
const result = await request.execute();
|
|
1046
|
+
this.queue.shift();
|
|
1047
|
+
this.consecutiveSuccesses++;
|
|
1048
|
+
request.retryAfterSeconds = void 0;
|
|
1049
|
+
const queueWaitMs = Date.now() - request.enqueuedAt;
|
|
1050
|
+
request.resolve({
|
|
1051
|
+
result,
|
|
1052
|
+
queueWaitMs
|
|
1053
|
+
});
|
|
1054
|
+
if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
|
|
1055
|
+
} catch (error) {
|
|
1056
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
1057
|
+
if (isRateLimit) {
|
|
1058
|
+
request.retryCount++;
|
|
1059
|
+
request.retryAfterSeconds = retryAfter;
|
|
1060
|
+
this.consecutiveSuccesses = 0;
|
|
1061
|
+
this.rateLimitedAt = Date.now();
|
|
1062
|
+
const nextInterval = this.calculateRetryInterval(request);
|
|
1063
|
+
const source = retryAfter ? "server Retry-After" : "exponential backoff";
|
|
1064
|
+
consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
|
|
1065
|
+
} else {
|
|
1066
|
+
this.queue.shift();
|
|
1067
|
+
request.reject(error);
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
this.processing = false;
|
|
1072
|
+
}
|
|
1073
|
+
sleep(ms) {
|
|
1074
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1075
|
+
}
|
|
1076
|
+
/**
|
|
1077
|
+
* Get current status for debugging/monitoring
|
|
1078
|
+
*/
|
|
1079
|
+
getStatus() {
|
|
1080
|
+
return {
|
|
1081
|
+
mode: this.mode,
|
|
1082
|
+
queueLength: this.queue.length,
|
|
1083
|
+
consecutiveSuccesses: this.consecutiveSuccesses,
|
|
1084
|
+
rateLimitedAt: this.rateLimitedAt
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
let rateLimiterInstance = null;
|
|
1089
|
+
/**
|
|
1090
|
+
* Initialize the adaptive rate limiter with configuration
|
|
1091
|
+
*/
|
|
1092
|
+
function initAdaptiveRateLimiter(config = {}) {
|
|
1093
|
+
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1094
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
|
|
1095
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
|
|
1096
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
|
|
1097
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
|
|
1098
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
|
|
1099
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
|
|
1100
|
+
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Execute a request with adaptive rate limiting.
|
|
1104
|
+
* If rate limiter is not initialized, executes immediately.
|
|
1105
|
+
* Returns the result along with queue wait time.
|
|
1106
|
+
*/
|
|
1107
|
+
async function executeWithAdaptiveRateLimit(fn) {
|
|
1108
|
+
if (!rateLimiterInstance) return {
|
|
1109
|
+
result: await fn(),
|
|
1110
|
+
queueWaitMs: 0
|
|
1111
|
+
};
|
|
1112
|
+
return rateLimiterInstance.execute(fn);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
701
1115
|
//#endregion
|
|
702
1116
|
//#region src/lib/history.ts
|
|
703
1117
|
function generateId$1() {
|
|
@@ -1101,16 +1515,27 @@ function formatTokens(input, output) {
|
|
|
1101
1515
|
/**
|
|
1102
1516
|
* Console renderer that shows request lifecycle with apt-get style footer
|
|
1103
1517
|
*
|
|
1104
|
-
* Log format
|
|
1105
|
-
* - Start: [....] HH:MM:SS METHOD /path model-name
|
|
1106
|
-
* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming...
|
|
1107
|
-
* - Complete: [ OK ] HH:MM:SS METHOD /path 200 1.2s 1.5K/500
|
|
1108
|
-
* - Error: [FAIL] HH:MM:SS METHOD /path 500 1.2s
|
|
1518
|
+
* Log format:
|
|
1519
|
+
* - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
|
|
1520
|
+
* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
|
|
1521
|
+
* - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
|
|
1522
|
+
* - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
|
|
1523
|
+
*
|
|
1524
|
+
* Color scheme for completed requests:
|
|
1525
|
+
* - Prefix: green (success) / red (error)
|
|
1526
|
+
* - Time: dim
|
|
1527
|
+
* - Method: cyan
|
|
1528
|
+
* - Path: white
|
|
1529
|
+
* - Model: magenta
|
|
1530
|
+
* - Status: green (success) / red (error)
|
|
1531
|
+
* - Duration: yellow
|
|
1532
|
+
* - Tokens: blue
|
|
1109
1533
|
*
|
|
1110
1534
|
* Features:
|
|
1111
|
-
* -
|
|
1112
|
-
* -
|
|
1113
|
-
* -
|
|
1535
|
+
* - Start lines only shown in debug mode (--verbose)
|
|
1536
|
+
* - Streaming lines are dim (less important)
|
|
1537
|
+
* - /history API requests are always dim
|
|
1538
|
+
* - Sticky footer shows active request count
|
|
1114
1539
|
* - Intercepts consola output to properly handle footer
|
|
1115
1540
|
*/
|
|
1116
1541
|
var ConsoleRenderer = class {
|
|
@@ -1186,25 +1611,52 @@ var ConsoleRenderer = class {
|
|
|
1186
1611
|
}
|
|
1187
1612
|
}
|
|
1188
1613
|
/**
|
|
1614
|
+
* Format a complete log line with colored parts
|
|
1615
|
+
*/
|
|
1616
|
+
formatLogLine(parts) {
|
|
1617
|
+
const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
|
|
1618
|
+
if (isDim) {
|
|
1619
|
+
const modelPart = model ? ` ${model}` : "";
|
|
1620
|
+
const extraPart = extra ? ` ${extra}` : "";
|
|
1621
|
+
return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
|
|
1622
|
+
}
|
|
1623
|
+
const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
|
|
1624
|
+
const coloredTime = pc.dim(time);
|
|
1625
|
+
const coloredMethod = pc.cyan(method);
|
|
1626
|
+
const coloredPath = pc.white(path$1);
|
|
1627
|
+
const coloredModel = model ? pc.magenta(` ${model}`) : "";
|
|
1628
|
+
let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
|
|
1629
|
+
if (status !== void 0) {
|
|
1630
|
+
const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
|
|
1631
|
+
result += ` ${coloredStatus}`;
|
|
1632
|
+
}
|
|
1633
|
+
if (duration) result += ` ${pc.yellow(duration)}`;
|
|
1634
|
+
if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
|
|
1635
|
+
if (tokens) result += ` ${pc.blue(tokens)}`;
|
|
1636
|
+
if (extra) result += isError ? pc.red(extra) : extra;
|
|
1637
|
+
return result;
|
|
1638
|
+
}
|
|
1639
|
+
/**
|
|
1189
1640
|
* Print a log line with proper footer handling
|
|
1190
|
-
* 1. Clear footer if visible
|
|
1191
|
-
* 2. Print log with newline
|
|
1192
|
-
* 3. Re-render footer on new line (no newline after footer)
|
|
1193
1641
|
*/
|
|
1194
|
-
printLog(message
|
|
1642
|
+
printLog(message) {
|
|
1195
1643
|
this.clearFooterForLog();
|
|
1196
|
-
|
|
1197
|
-
else process.stdout.write(message + "\n");
|
|
1644
|
+
process.stdout.write(message + "\n");
|
|
1198
1645
|
this.renderFooter();
|
|
1199
1646
|
}
|
|
1200
1647
|
onRequestStart(request) {
|
|
1201
1648
|
this.activeRequests.set(request.id, request);
|
|
1202
|
-
if (this.showActive) {
|
|
1203
|
-
const
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1649
|
+
if (this.showActive && consola.level >= 5) {
|
|
1650
|
+
const message = this.formatLogLine({
|
|
1651
|
+
prefix: "[....]",
|
|
1652
|
+
time: formatTime(),
|
|
1653
|
+
method: request.method,
|
|
1654
|
+
path: request.path,
|
|
1655
|
+
model: request.model,
|
|
1656
|
+
extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
|
|
1657
|
+
isDim: true
|
|
1658
|
+
});
|
|
1659
|
+
this.printLog(message);
|
|
1208
1660
|
}
|
|
1209
1661
|
}
|
|
1210
1662
|
onRequestUpdate(id, update) {
|
|
@@ -1212,28 +1664,39 @@ var ConsoleRenderer = class {
|
|
|
1212
1664
|
if (!request) return;
|
|
1213
1665
|
Object.assign(request, update);
|
|
1214
1666
|
if (this.showActive && update.status === "streaming") {
|
|
1215
|
-
const
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1667
|
+
const message = this.formatLogLine({
|
|
1668
|
+
prefix: "[<-->]",
|
|
1669
|
+
time: formatTime(),
|
|
1670
|
+
method: request.method,
|
|
1671
|
+
path: request.path,
|
|
1672
|
+
model: request.model,
|
|
1673
|
+
extra: "streaming...",
|
|
1674
|
+
isDim: true
|
|
1675
|
+
});
|
|
1676
|
+
this.printLog(message);
|
|
1219
1677
|
}
|
|
1220
1678
|
}
|
|
1221
1679
|
onRequestComplete(request) {
|
|
1222
1680
|
this.activeRequests.delete(request.id);
|
|
1223
|
-
const time = formatTime();
|
|
1224
1681
|
const status = request.statusCode ?? 0;
|
|
1225
|
-
const duration = formatDuration(request.durationMs ?? 0);
|
|
1226
|
-
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
|
|
1227
|
-
const modelInfo = request.model ? ` ${request.model}` : "";
|
|
1228
1682
|
const isError = request.status === "error" || status >= 400;
|
|
1229
|
-
const
|
|
1230
|
-
const
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1683
|
+
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
|
|
1684
|
+
const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
|
|
1685
|
+
const message = this.formatLogLine({
|
|
1686
|
+
prefix: isError ? "[FAIL]" : "[ OK ]",
|
|
1687
|
+
time: formatTime(),
|
|
1688
|
+
method: request.method,
|
|
1689
|
+
path: request.path,
|
|
1690
|
+
model: request.model,
|
|
1691
|
+
status,
|
|
1692
|
+
duration: formatDuration(request.durationMs ?? 0),
|
|
1693
|
+
queueWait,
|
|
1694
|
+
tokens,
|
|
1695
|
+
extra: isError && request.error ? `: ${request.error}` : void 0,
|
|
1696
|
+
isError,
|
|
1697
|
+
isDim: request.isHistoryAccess
|
|
1698
|
+
});
|
|
1699
|
+
this.printLog(message);
|
|
1237
1700
|
}
|
|
1238
1701
|
destroy() {
|
|
1239
1702
|
if (this.footerVisible && this.isTTY) {
|
|
@@ -1634,20 +2097,50 @@ const getTokenCount = async (payload, model) => {
|
|
|
1634
2097
|
//#region src/lib/auto-compact.ts
|
|
1635
2098
|
const DEFAULT_CONFIG = {
|
|
1636
2099
|
targetTokens: 12e4,
|
|
1637
|
-
safetyMarginPercent: 2
|
|
2100
|
+
safetyMarginPercent: 2,
|
|
2101
|
+
maxRequestBodyBytes: 500 * 1024
|
|
1638
2102
|
};
|
|
1639
2103
|
/**
|
|
1640
|
-
*
|
|
2104
|
+
* Dynamic byte limit that adjusts based on 413 errors.
|
|
2105
|
+
* Starts at 500KB and can be adjusted when 413 errors are encountered.
|
|
2106
|
+
*/
|
|
2107
|
+
let dynamicByteLimitOverride = null;
|
|
2108
|
+
/**
|
|
2109
|
+
* Called when a 413 error is encountered with a specific payload size.
|
|
2110
|
+
* Adjusts the dynamic byte limit to 90% of the failing size.
|
|
2111
|
+
*/
|
|
2112
|
+
function onRequestTooLarge(failingBytes) {
|
|
2113
|
+
const newLimit = Math.max(Math.floor(failingBytes * .9), 100 * 1024);
|
|
2114
|
+
dynamicByteLimitOverride = newLimit;
|
|
2115
|
+
consola.info(`[Auto-compact] Adjusted byte limit: ${Math.round(failingBytes / 1024)}KB failed, new limit: ${Math.round(newLimit / 1024)}KB`);
|
|
2116
|
+
}
|
|
2117
|
+
/**
|
|
2118
|
+
* Check if payload needs compaction based on model limits OR request body size.
|
|
1641
2119
|
* Uses a safety margin to account for token counting differences.
|
|
1642
2120
|
*/
|
|
1643
|
-
async function checkNeedsCompaction(payload, model,
|
|
2121
|
+
async function checkNeedsCompaction(payload, model, config = {}) {
|
|
2122
|
+
const cfg = {
|
|
2123
|
+
...DEFAULT_CONFIG,
|
|
2124
|
+
...config
|
|
2125
|
+
};
|
|
1644
2126
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
1645
2127
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
1646
|
-
const
|
|
2128
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2129
|
+
const currentBytes = JSON.stringify(payload).length;
|
|
2130
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2131
|
+
const exceedsTokens = currentTokens > tokenLimit;
|
|
2132
|
+
const exceedsBytes = currentBytes > byteLimit;
|
|
2133
|
+
let reason;
|
|
2134
|
+
if (exceedsTokens && exceedsBytes) reason = "both";
|
|
2135
|
+
else if (exceedsTokens) reason = "tokens";
|
|
2136
|
+
else if (exceedsBytes) reason = "bytes";
|
|
1647
2137
|
return {
|
|
1648
|
-
needed:
|
|
2138
|
+
needed: exceedsTokens || exceedsBytes,
|
|
1649
2139
|
currentTokens,
|
|
1650
|
-
|
|
2140
|
+
tokenLimit,
|
|
2141
|
+
currentBytes,
|
|
2142
|
+
byteLimit,
|
|
2143
|
+
reason
|
|
1651
2144
|
};
|
|
1652
2145
|
}
|
|
1653
2146
|
/**
|
|
@@ -1754,7 +2247,7 @@ function createTruncationMarker(removedCount) {
|
|
|
1754
2247
|
};
|
|
1755
2248
|
}
|
|
1756
2249
|
/**
|
|
1757
|
-
* Perform auto-compaction on a payload that exceeds token limits.
|
|
2250
|
+
* Perform auto-compaction on a payload that exceeds token or size limits.
|
|
1758
2251
|
* This uses simple truncation - no LLM calls required.
|
|
1759
2252
|
* Uses iterative approach with decreasing target tokens until under limit.
|
|
1760
2253
|
*/
|
|
@@ -1765,21 +2258,29 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
1765
2258
|
};
|
|
1766
2259
|
const originalTokens = (await getTokenCount(payload, model)).input;
|
|
1767
2260
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
1768
|
-
const
|
|
1769
|
-
|
|
2261
|
+
const tokenLimit = Math.floor(rawLimit * (1 - cfg.safetyMarginPercent / 100));
|
|
2262
|
+
const originalBytes = JSON.stringify(payload).length;
|
|
2263
|
+
const byteLimit = dynamicByteLimitOverride ?? cfg.maxRequestBodyBytes;
|
|
2264
|
+
if (originalTokens <= tokenLimit && originalBytes <= byteLimit) return {
|
|
1770
2265
|
payload,
|
|
1771
2266
|
wasCompacted: false,
|
|
1772
2267
|
originalTokens,
|
|
1773
2268
|
compactedTokens: originalTokens,
|
|
1774
2269
|
removedMessageCount: 0
|
|
1775
2270
|
};
|
|
1776
|
-
|
|
2271
|
+
const exceedsTokens = originalTokens > tokenLimit;
|
|
2272
|
+
const exceedsBytes = originalBytes > byteLimit;
|
|
2273
|
+
let reason;
|
|
2274
|
+
if (exceedsTokens && exceedsBytes) reason = "tokens and size";
|
|
2275
|
+
else if (exceedsBytes) reason = "size";
|
|
2276
|
+
else reason = "tokens";
|
|
2277
|
+
consola.info(`Auto-compact: Exceeds ${reason} limit (${originalTokens} tokens, ${Math.round(originalBytes / 1024)}KB), truncating...`);
|
|
1777
2278
|
const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
|
|
1778
2279
|
const systemTokens = estimateSystemTokens(systemMessages);
|
|
1779
2280
|
consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
|
|
1780
2281
|
const MAX_ITERATIONS = 5;
|
|
1781
2282
|
const MIN_TARGET = 2e4;
|
|
1782
|
-
let currentTarget = Math.min(cfg.targetTokens,
|
|
2283
|
+
let currentTarget = Math.min(cfg.targetTokens, tokenLimit);
|
|
1783
2284
|
let lastResult = null;
|
|
1784
2285
|
for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
|
|
1785
2286
|
const result = await tryCompactWithTarget({
|
|
@@ -1789,16 +2290,21 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
1789
2290
|
remainingMessages,
|
|
1790
2291
|
systemTokens,
|
|
1791
2292
|
targetTokens: currentTarget,
|
|
1792
|
-
limit,
|
|
2293
|
+
limit: tokenLimit,
|
|
1793
2294
|
originalTokens
|
|
1794
2295
|
});
|
|
1795
2296
|
if (!result.wasCompacted) return result;
|
|
1796
2297
|
lastResult = result;
|
|
1797
|
-
|
|
1798
|
-
|
|
2298
|
+
const resultBytes = JSON.stringify(result.payload).length;
|
|
2299
|
+
const underTokenLimit = result.compactedTokens <= tokenLimit;
|
|
2300
|
+
const underByteLimit = resultBytes <= byteLimit;
|
|
2301
|
+
if (underTokenLimit && underByteLimit) {
|
|
2302
|
+
consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens, ${Math.round(originalBytes / 1024)}KB → ${Math.round(resultBytes / 1024)}KB (removed ${result.removedMessageCount} messages)`);
|
|
1799
2303
|
return result;
|
|
1800
2304
|
}
|
|
1801
|
-
|
|
2305
|
+
const tokenStatus = underTokenLimit ? "OK" : `${result.compactedTokens} > ${tokenLimit}`;
|
|
2306
|
+
const byteStatus = underByteLimit ? "OK" : `${Math.round(resultBytes / 1024)}KB > ${Math.round(byteLimit / 1024)}KB`;
|
|
2307
|
+
consola.warn(`Auto-compact: Still over limit (tokens: ${tokenStatus}, size: ${byteStatus}), trying more aggressive truncation`);
|
|
1802
2308
|
currentTarget = Math.floor(currentTarget * .7);
|
|
1803
2309
|
if (currentTarget < MIN_TARGET) {
|
|
1804
2310
|
consola.error("Auto-compact: Cannot reduce further, target too low");
|
|
@@ -1874,66 +2380,6 @@ function createCompactionMarker(result) {
|
|
|
1874
2380
|
return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
1875
2381
|
}
|
|
1876
2382
|
|
|
1877
|
-
//#endregion
|
|
1878
|
-
//#region src/lib/queue.ts
|
|
1879
|
-
var RequestQueue = class {
|
|
1880
|
-
queue = [];
|
|
1881
|
-
processing = false;
|
|
1882
|
-
lastRequestTime = 0;
|
|
1883
|
-
async enqueue(execute, rateLimitSeconds) {
|
|
1884
|
-
return new Promise((resolve, reject) => {
|
|
1885
|
-
const request = {
|
|
1886
|
-
execute,
|
|
1887
|
-
resolve,
|
|
1888
|
-
reject
|
|
1889
|
-
};
|
|
1890
|
-
this.queue.push(request);
|
|
1891
|
-
if (this.queue.length > 1) {
|
|
1892
|
-
const position = this.queue.length;
|
|
1893
|
-
const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
|
|
1894
|
-
(waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
|
|
1895
|
-
}
|
|
1896
|
-
this.processQueue(rateLimitSeconds);
|
|
1897
|
-
});
|
|
1898
|
-
}
|
|
1899
|
-
async processQueue(rateLimitSeconds) {
|
|
1900
|
-
if (this.processing) return;
|
|
1901
|
-
this.processing = true;
|
|
1902
|
-
while (this.queue.length > 0) {
|
|
1903
|
-
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1904
|
-
const requiredMs = rateLimitSeconds * 1e3;
|
|
1905
|
-
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1906
|
-
const waitMs = requiredMs - elapsedMs;
|
|
1907
|
-
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1908
|
-
(waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
|
|
1909
|
-
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
|
1910
|
-
}
|
|
1911
|
-
const request = this.queue.shift();
|
|
1912
|
-
if (!request) break;
|
|
1913
|
-
this.lastRequestTime = Date.now();
|
|
1914
|
-
try {
|
|
1915
|
-
const result = await request.execute();
|
|
1916
|
-
request.resolve(result);
|
|
1917
|
-
} catch (error) {
|
|
1918
|
-
request.reject(error);
|
|
1919
|
-
}
|
|
1920
|
-
}
|
|
1921
|
-
this.processing = false;
|
|
1922
|
-
}
|
|
1923
|
-
get length() {
|
|
1924
|
-
return this.queue.length;
|
|
1925
|
-
}
|
|
1926
|
-
};
|
|
1927
|
-
const requestQueue = new RequestQueue();
|
|
1928
|
-
/**
|
|
1929
|
-
* Execute a request with rate limiting via queue.
|
|
1930
|
-
* Requests are queued and processed sequentially at the configured rate.
|
|
1931
|
-
*/
|
|
1932
|
-
async function executeWithRateLimit(state$1, execute) {
|
|
1933
|
-
if (state$1.rateLimitSeconds === void 0) return execute();
|
|
1934
|
-
return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
|
|
1935
|
-
}
|
|
1936
|
-
|
|
1937
2383
|
//#endregion
|
|
1938
2384
|
//#region src/services/copilot/create-chat-completions.ts
|
|
1939
2385
|
const createChatCompletions = async (payload) => {
|
|
@@ -1984,11 +2430,12 @@ function recordErrorResponse(ctx, model, error) {
|
|
|
1984
2430
|
}, Date.now() - ctx.startTime);
|
|
1985
2431
|
}
|
|
1986
2432
|
/** Complete TUI tracking */
|
|
1987
|
-
function completeTracking(trackingId, inputTokens, outputTokens) {
|
|
2433
|
+
function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
|
|
1988
2434
|
if (!trackingId) return;
|
|
1989
2435
|
requestTracker.updateRequest(trackingId, {
|
|
1990
2436
|
inputTokens,
|
|
1991
|
-
outputTokens
|
|
2437
|
+
outputTokens,
|
|
2438
|
+
queueWaitMs
|
|
1992
2439
|
});
|
|
1993
2440
|
requestTracker.completeRequest(trackingId, 200, {
|
|
1994
2441
|
inputTokens,
|
|
@@ -2029,12 +2476,16 @@ async function buildFinalPayload(payload, model) {
|
|
|
2029
2476
|
}
|
|
2030
2477
|
try {
|
|
2031
2478
|
const check = await checkNeedsCompaction(payload, model);
|
|
2032
|
-
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.
|
|
2479
|
+
consola.debug(`Auto-compact check: ${check.currentTokens} tokens (limit ${check.tokenLimit}), ${Math.round(check.currentBytes / 1024)}KB (limit ${Math.round(check.byteLimit / 1024)}KB), needed: ${check.needed}${check.reason ? ` (${check.reason})` : ""}`);
|
|
2033
2480
|
if (!check.needed) return {
|
|
2034
2481
|
finalPayload: payload,
|
|
2035
2482
|
compactResult: null
|
|
2036
2483
|
};
|
|
2037
|
-
|
|
2484
|
+
let reasonText;
|
|
2485
|
+
if (check.reason === "both") reasonText = "tokens and size";
|
|
2486
|
+
else if (check.reason === "bytes") reasonText = "size";
|
|
2487
|
+
else reasonText = "tokens";
|
|
2488
|
+
consola.info(`Auto-compact triggered: exceeds ${reasonText} limit`);
|
|
2038
2489
|
const compactResult = await autoCompact(payload, model);
|
|
2039
2490
|
return {
|
|
2040
2491
|
finalPayload: compactResult.payload,
|
|
@@ -2050,11 +2501,13 @@ async function buildFinalPayload(payload, model) {
|
|
|
2050
2501
|
}
|
|
2051
2502
|
/**
|
|
2052
2503
|
* Log helpful debugging information when a 413 error occurs.
|
|
2504
|
+
* Also adjusts the dynamic byte limit for future requests.
|
|
2053
2505
|
*/
|
|
2054
2506
|
async function logPayloadSizeInfo(payload, model) {
|
|
2055
2507
|
const messageCount = payload.messages.length;
|
|
2056
2508
|
const bodySize = JSON.stringify(payload).length;
|
|
2057
2509
|
const bodySizeKB = Math.round(bodySize / 1024);
|
|
2510
|
+
onRequestTooLarge(bodySize);
|
|
2058
2511
|
let imageCount = 0;
|
|
2059
2512
|
let largeMessages = 0;
|
|
2060
2513
|
let totalImageSize = 0;
|
|
@@ -2140,7 +2593,8 @@ async function handleCompletion$1(c) {
|
|
|
2140
2593
|
async function executeRequest(opts) {
|
|
2141
2594
|
const { c, payload, selectedModel, ctx, trackingId } = opts;
|
|
2142
2595
|
try {
|
|
2143
|
-
const response = await
|
|
2596
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
|
|
2597
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
2144
2598
|
if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
|
|
2145
2599
|
consola.debug("Streaming response");
|
|
2146
2600
|
updateTrackerStatus(trackingId, "streaming");
|
|
@@ -2199,7 +2653,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
2199
2653
|
}, Date.now() - ctx.startTime);
|
|
2200
2654
|
if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
|
|
2201
2655
|
inputTokens: usage.prompt_tokens,
|
|
2202
|
-
outputTokens: usage.completion_tokens
|
|
2656
|
+
outputTokens: usage.completion_tokens,
|
|
2657
|
+
queueWaitMs: ctx.queueWaitMs
|
|
2203
2658
|
});
|
|
2204
2659
|
return c.json(response);
|
|
2205
2660
|
}
|
|
@@ -2265,7 +2720,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2265
2720
|
acc.content += marker;
|
|
2266
2721
|
}
|
|
2267
2722
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
2268
|
-
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
|
|
2723
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
2269
2724
|
} catch (error) {
|
|
2270
2725
|
recordStreamError({
|
|
2271
2726
|
acc,
|
|
@@ -2364,7 +2819,7 @@ completionRoutes.post("/", async (c) => {
|
|
|
2364
2819
|
try {
|
|
2365
2820
|
return await handleCompletion$1(c);
|
|
2366
2821
|
} catch (error) {
|
|
2367
|
-
return
|
|
2822
|
+
return forwardError(c, error);
|
|
2368
2823
|
}
|
|
2369
2824
|
});
|
|
2370
2825
|
|
|
@@ -2390,7 +2845,7 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
2390
2845
|
const response = await createEmbeddings(payload);
|
|
2391
2846
|
return c.json(response);
|
|
2392
2847
|
} catch (error) {
|
|
2393
|
-
return
|
|
2848
|
+
return forwardError(c, error);
|
|
2394
2849
|
}
|
|
2395
2850
|
});
|
|
2396
2851
|
|
|
@@ -4022,7 +4477,8 @@ async function handleCompletion(c) {
|
|
|
4022
4477
|
if (compactResult) ctx.compactResult = compactResult;
|
|
4023
4478
|
if (state.manualApprove) await awaitApproval();
|
|
4024
4479
|
try {
|
|
4025
|
-
const response = await
|
|
4480
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
|
|
4481
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
4026
4482
|
if (isNonStreaming(response)) return handleNonStreamingResponse({
|
|
4027
4483
|
c,
|
|
4028
4484
|
response,
|
|
@@ -4080,7 +4536,8 @@ function handleNonStreamingResponse(opts) {
|
|
|
4080
4536
|
}, Date.now() - ctx.startTime);
|
|
4081
4537
|
if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
|
|
4082
4538
|
inputTokens: anthropicResponse.usage.input_tokens,
|
|
4083
|
-
outputTokens: anthropicResponse.usage.output_tokens
|
|
4539
|
+
outputTokens: anthropicResponse.usage.output_tokens,
|
|
4540
|
+
queueWaitMs: ctx.queueWaitMs
|
|
4084
4541
|
});
|
|
4085
4542
|
return c.json(anthropicResponse);
|
|
4086
4543
|
}
|
|
@@ -4136,7 +4593,7 @@ async function handleStreamingResponse(opts) {
|
|
|
4136
4593
|
acc.content += marker;
|
|
4137
4594
|
}
|
|
4138
4595
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
4139
|
-
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
|
|
4596
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4140
4597
|
} catch (error) {
|
|
4141
4598
|
consola.error("Stream error:", error);
|
|
4142
4599
|
recordStreamError({
|
|
@@ -4334,14 +4791,14 @@ messageRoutes.post("/", async (c) => {
|
|
|
4334
4791
|
try {
|
|
4335
4792
|
return await handleCompletion(c);
|
|
4336
4793
|
} catch (error) {
|
|
4337
|
-
return
|
|
4794
|
+
return forwardError(c, error);
|
|
4338
4795
|
}
|
|
4339
4796
|
});
|
|
4340
4797
|
messageRoutes.post("/count_tokens", async (c) => {
|
|
4341
4798
|
try {
|
|
4342
4799
|
return await handleCountTokens(c);
|
|
4343
4800
|
} catch (error) {
|
|
4344
|
-
return
|
|
4801
|
+
return forwardError(c, error);
|
|
4345
4802
|
}
|
|
4346
4803
|
});
|
|
4347
4804
|
|
|
@@ -4380,18 +4837,18 @@ modelRoutes.get("/", async (c) => {
|
|
|
4380
4837
|
has_more: false
|
|
4381
4838
|
});
|
|
4382
4839
|
} catch (error) {
|
|
4383
|
-
return
|
|
4840
|
+
return forwardError(c, error);
|
|
4384
4841
|
}
|
|
4385
4842
|
});
|
|
4386
4843
|
|
|
4387
4844
|
//#endregion
|
|
4388
4845
|
//#region src/routes/token/route.ts
|
|
4389
4846
|
const tokenRoute = new Hono();
|
|
4390
|
-
tokenRoute.get("/",
|
|
4847
|
+
tokenRoute.get("/", (c) => {
|
|
4391
4848
|
try {
|
|
4392
4849
|
return c.json({ token: state.copilotToken });
|
|
4393
4850
|
} catch (error) {
|
|
4394
|
-
return
|
|
4851
|
+
return forwardError(c, error);
|
|
4395
4852
|
}
|
|
4396
4853
|
});
|
|
4397
4854
|
|
|
@@ -4403,7 +4860,7 @@ usageRoute.get("/", async (c) => {
|
|
|
4403
4860
|
const usage = await getCopilotUsage();
|
|
4404
4861
|
return c.json(usage);
|
|
4405
4862
|
} catch (error) {
|
|
4406
|
-
return
|
|
4863
|
+
return forwardError(c, error);
|
|
4407
4864
|
}
|
|
4408
4865
|
});
|
|
4409
4866
|
|
|
@@ -4455,10 +4912,15 @@ async function runServer(options) {
|
|
|
4455
4912
|
state.accountType = options.accountType;
|
|
4456
4913
|
if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
|
|
4457
4914
|
state.manualApprove = options.manual;
|
|
4458
|
-
state.rateLimitSeconds = options.rateLimit;
|
|
4459
|
-
state.rateLimitWait = options.rateLimitWait;
|
|
4460
4915
|
state.showToken = options.showToken;
|
|
4461
4916
|
state.autoCompact = options.autoCompact;
|
|
4917
|
+
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
4918
|
+
baseRetryIntervalSeconds: options.retryInterval,
|
|
4919
|
+
requestIntervalSeconds: options.requestInterval,
|
|
4920
|
+
recoveryTimeoutMinutes: options.recoveryTimeout,
|
|
4921
|
+
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4922
|
+
});
|
|
4923
|
+
else consola.info("Rate limiting disabled");
|
|
4462
4924
|
if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
|
|
4463
4925
|
initHistory(options.history, options.historyLimit);
|
|
4464
4926
|
if (options.history) {
|
|
@@ -4545,16 +5007,30 @@ const start = defineCommand({
|
|
|
4545
5007
|
default: false,
|
|
4546
5008
|
description: "Enable manual request approval"
|
|
4547
5009
|
},
|
|
4548
|
-
"rate-limit": {
|
|
4549
|
-
alias: "r",
|
|
4550
|
-
type: "string",
|
|
4551
|
-
description: "Rate limit in seconds between requests"
|
|
4552
|
-
},
|
|
4553
|
-
wait: {
|
|
4554
|
-
alias: "w",
|
|
5010
|
+
"no-rate-limit": {
|
|
4555
5011
|
type: "boolean",
|
|
4556
5012
|
default: false,
|
|
4557
|
-
description: "
|
|
5013
|
+
description: "Disable adaptive rate limiting"
|
|
5014
|
+
},
|
|
5015
|
+
"retry-interval": {
|
|
5016
|
+
type: "string",
|
|
5017
|
+
default: "10",
|
|
5018
|
+
description: "Seconds to wait before retrying after rate limit error (default: 10)"
|
|
5019
|
+
},
|
|
5020
|
+
"request-interval": {
|
|
5021
|
+
type: "string",
|
|
5022
|
+
default: "10",
|
|
5023
|
+
description: "Seconds between requests in rate-limited mode (default: 10)"
|
|
5024
|
+
},
|
|
5025
|
+
"recovery-timeout": {
|
|
5026
|
+
type: "string",
|
|
5027
|
+
default: "10",
|
|
5028
|
+
description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
|
|
5029
|
+
},
|
|
5030
|
+
"consecutive-successes": {
|
|
5031
|
+
type: "string",
|
|
5032
|
+
default: "5",
|
|
5033
|
+
description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
|
|
4558
5034
|
},
|
|
4559
5035
|
"github-token": {
|
|
4560
5036
|
alias: "g",
|
|
@@ -4577,10 +5053,10 @@ const start = defineCommand({
|
|
|
4577
5053
|
default: false,
|
|
4578
5054
|
description: "Initialize proxy from environment variables"
|
|
4579
5055
|
},
|
|
4580
|
-
history: {
|
|
5056
|
+
"no-history": {
|
|
4581
5057
|
type: "boolean",
|
|
4582
5058
|
default: false,
|
|
4583
|
-
description: "
|
|
5059
|
+
description: "Disable request history recording and Web UI"
|
|
4584
5060
|
},
|
|
4585
5061
|
"history-limit": {
|
|
4586
5062
|
type: "string",
|
|
@@ -4594,21 +5070,22 @@ const start = defineCommand({
|
|
|
4594
5070
|
}
|
|
4595
5071
|
},
|
|
4596
5072
|
run({ args }) {
|
|
4597
|
-
const rateLimitRaw = args["rate-limit"];
|
|
4598
|
-
const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
|
|
4599
5073
|
return runServer({
|
|
4600
5074
|
port: Number.parseInt(args.port, 10),
|
|
4601
5075
|
host: args.host,
|
|
4602
5076
|
verbose: args.verbose,
|
|
4603
5077
|
accountType: args["account-type"],
|
|
4604
5078
|
manual: args.manual,
|
|
4605
|
-
rateLimit,
|
|
4606
|
-
|
|
5079
|
+
rateLimit: !args["no-rate-limit"],
|
|
5080
|
+
retryInterval: Number.parseInt(args["retry-interval"], 10),
|
|
5081
|
+
requestInterval: Number.parseInt(args["request-interval"], 10),
|
|
5082
|
+
recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
|
|
5083
|
+
consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
|
|
4607
5084
|
githubToken: args["github-token"],
|
|
4608
5085
|
claudeCode: args["claude-code"],
|
|
4609
5086
|
showToken: args["show-token"],
|
|
4610
5087
|
proxyEnv: args["proxy-env"],
|
|
4611
|
-
history: args
|
|
5088
|
+
history: !args["no-history"],
|
|
4612
5089
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
4613
5090
|
autoCompact: args["auto-compact"]
|
|
4614
5091
|
});
|