@hsupu/copilot-api 0.7.5 → 0.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +576 -148
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -3,7 +3,7 @@ import { defineCommand, runMain } from "citty";
|
|
|
3
3
|
import consola from "consola";
|
|
4
4
|
import fs from "node:fs/promises";
|
|
5
5
|
import os from "node:os";
|
|
6
|
-
import path, { join } from "node:path";
|
|
6
|
+
import path, { dirname, join } from "node:path";
|
|
7
7
|
import { randomUUID } from "node:crypto";
|
|
8
8
|
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
9
9
|
import clipboard from "clipboardy";
|
|
@@ -45,7 +45,6 @@ async function ensureFile(filePath) {
|
|
|
45
45
|
const state = {
|
|
46
46
|
accountType: "individual",
|
|
47
47
|
manualApprove: false,
|
|
48
|
-
rateLimitWait: false,
|
|
49
48
|
showToken: false,
|
|
50
49
|
autoCompact: false
|
|
51
50
|
};
|
|
@@ -137,6 +136,16 @@ function formatRequestTooLargeError() {
|
|
|
137
136
|
}
|
|
138
137
|
};
|
|
139
138
|
}
|
|
139
|
+
/** Format Anthropic-compatible error for rate limit exceeded (429) */
|
|
140
|
+
function formatRateLimitError(copilotMessage) {
|
|
141
|
+
return {
|
|
142
|
+
type: "error",
|
|
143
|
+
error: {
|
|
144
|
+
type: "rate_limit_error",
|
|
145
|
+
message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
}
|
|
140
149
|
function forwardError(c, error) {
|
|
141
150
|
consola.error("Error occurred:", error);
|
|
142
151
|
if (error instanceof HTTPError) {
|
|
@@ -161,6 +170,11 @@ function forwardError(c, error) {
|
|
|
161
170
|
return c.json(formattedError, 400);
|
|
162
171
|
}
|
|
163
172
|
}
|
|
173
|
+
if (error.status === 429 || copilotError.error?.code === "rate_limited") {
|
|
174
|
+
const formattedError = formatRateLimitError(copilotError.error?.message);
|
|
175
|
+
consola.debug("Returning formatted rate limit error:", formattedError);
|
|
176
|
+
return c.json(formattedError, 429);
|
|
177
|
+
}
|
|
164
178
|
return c.json({ error: {
|
|
165
179
|
message: error.responseText,
|
|
166
180
|
type: "error"
|
|
@@ -539,16 +553,77 @@ const logout = defineCommand({
|
|
|
539
553
|
|
|
540
554
|
//#endregion
|
|
541
555
|
//#region src/patch-claude.ts
|
|
542
|
-
const
|
|
543
|
-
|
|
556
|
+
const SUPPORTED_VERSIONS = {
|
|
557
|
+
v2a: {
|
|
558
|
+
min: "2.0.0",
|
|
559
|
+
max: "2.1.10"
|
|
560
|
+
},
|
|
561
|
+
v2b: {
|
|
562
|
+
min: "2.1.11",
|
|
563
|
+
max: "2.1.12"
|
|
564
|
+
}
|
|
565
|
+
};
|
|
566
|
+
const PATTERNS = {
|
|
567
|
+
funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
|
|
568
|
+
funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
|
|
569
|
+
variable: /var BS9=(\d+)/
|
|
570
|
+
};
|
|
571
|
+
/**
|
|
572
|
+
* Parse semver version string to comparable parts
|
|
573
|
+
*/
|
|
574
|
+
function parseVersion(version) {
|
|
575
|
+
return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Compare two semver versions
|
|
579
|
+
* Returns: -1 if a < b, 0 if a == b, 1 if a > b
|
|
580
|
+
*/
|
|
581
|
+
function compareVersions(a, b) {
|
|
582
|
+
const partsA = parseVersion(a);
|
|
583
|
+
const partsB = parseVersion(b);
|
|
584
|
+
const len = Math.max(partsA.length, partsB.length);
|
|
585
|
+
for (let i = 0; i < len; i++) {
|
|
586
|
+
const numA = partsA[i] || 0;
|
|
587
|
+
const numB = partsB[i] || 0;
|
|
588
|
+
if (numA < numB) return -1;
|
|
589
|
+
if (numA > numB) return 1;
|
|
590
|
+
}
|
|
591
|
+
return 0;
|
|
592
|
+
}
|
|
593
|
+
function getPatternTypeForVersion(version) {
|
|
594
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
596
|
+
return null;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Get supported version range string for error messages
|
|
600
|
+
*/
|
|
601
|
+
function getSupportedRangeString() {
|
|
602
|
+
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Get Claude Code version from package.json
|
|
606
|
+
*/
|
|
607
|
+
function getClaudeCodeVersion(cliPath) {
|
|
608
|
+
try {
|
|
609
|
+
const packageJsonPath = join(dirname(cliPath), "package.json");
|
|
610
|
+
if (!existsSync(packageJsonPath)) return null;
|
|
611
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
612
|
+
if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
|
|
613
|
+
return null;
|
|
614
|
+
} catch {
|
|
615
|
+
return null;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
544
618
|
/**
|
|
545
619
|
* Search volta tools directory for Claude Code
|
|
546
620
|
*/
|
|
547
621
|
function findInVoltaTools(voltaHome) {
|
|
548
622
|
const paths = [];
|
|
623
|
+
const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
624
|
+
if (existsSync(packagesPath)) paths.push(packagesPath);
|
|
549
625
|
const toolsDir = join(voltaHome, "tools", "image", "node");
|
|
550
|
-
if (
|
|
551
|
-
try {
|
|
626
|
+
if (existsSync(toolsDir)) try {
|
|
552
627
|
for (const version of readdirSync(toolsDir)) {
|
|
553
628
|
const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
554
629
|
if (existsSync(claudePath)) paths.push(claudePath);
|
|
@@ -580,25 +655,61 @@ function findClaudeCodePath() {
|
|
|
580
655
|
* Get current context limit from Claude Code
|
|
581
656
|
*/
|
|
582
657
|
function getCurrentLimit(content) {
|
|
583
|
-
const
|
|
584
|
-
if (
|
|
585
|
-
const
|
|
586
|
-
|
|
658
|
+
const varMatch = content.match(PATTERNS.variable);
|
|
659
|
+
if (varMatch) return Number.parseInt(varMatch[1], 10);
|
|
660
|
+
const funcMatch = content.match(PATTERNS.funcPatched);
|
|
661
|
+
if (funcMatch) {
|
|
662
|
+
const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
|
|
663
|
+
return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
|
|
664
|
+
}
|
|
665
|
+
return null;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Check if Claude Code version is supported for patching
|
|
669
|
+
*/
|
|
670
|
+
function checkVersionSupport(cliPath) {
|
|
671
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
672
|
+
if (!version) return {
|
|
673
|
+
supported: false,
|
|
674
|
+
version: null,
|
|
675
|
+
patternType: null,
|
|
676
|
+
error: "Could not detect Claude Code version"
|
|
677
|
+
};
|
|
678
|
+
const patternType = getPatternTypeForVersion(version);
|
|
679
|
+
if (!patternType) return {
|
|
680
|
+
supported: false,
|
|
681
|
+
version,
|
|
682
|
+
patternType: null,
|
|
683
|
+
error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
|
|
684
|
+
};
|
|
685
|
+
return {
|
|
686
|
+
supported: true,
|
|
687
|
+
version,
|
|
688
|
+
patternType
|
|
689
|
+
};
|
|
587
690
|
}
|
|
588
691
|
/**
|
|
589
692
|
* Patch Claude Code to use a different context limit
|
|
590
693
|
*/
|
|
591
694
|
function patchClaudeCode(cliPath, newLimit) {
|
|
592
695
|
const content = readFileSync(cliPath, "utf8");
|
|
696
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
697
|
+
if (!versionCheck.supported) {
|
|
698
|
+
consola.error(versionCheck.error);
|
|
699
|
+
return false;
|
|
700
|
+
}
|
|
701
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
593
702
|
if (getCurrentLimit(content) === newLimit) {
|
|
594
703
|
consola.info(`Already patched with limit ${newLimit}`);
|
|
595
704
|
return true;
|
|
596
705
|
}
|
|
597
|
-
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
598
706
|
let newContent;
|
|
599
|
-
if (
|
|
600
|
-
else
|
|
601
|
-
|
|
707
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
|
|
708
|
+
else {
|
|
709
|
+
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
710
|
+
const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
|
|
711
|
+
newContent = content.replace(pattern, replacement);
|
|
712
|
+
}
|
|
602
713
|
writeFileSync(cliPath, newContent);
|
|
603
714
|
return true;
|
|
604
715
|
}
|
|
@@ -607,19 +718,28 @@ function patchClaudeCode(cliPath, newLimit) {
|
|
|
607
718
|
*/
|
|
608
719
|
function restoreClaudeCode(cliPath) {
|
|
609
720
|
const content = readFileSync(cliPath, "utf8");
|
|
721
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
722
|
+
if (!versionCheck.supported) {
|
|
723
|
+
consola.error(versionCheck.error);
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
610
727
|
if (getCurrentLimit(content) === 2e5) {
|
|
611
728
|
consola.info("Already at original 200000 limit");
|
|
612
729
|
return true;
|
|
613
730
|
}
|
|
614
|
-
|
|
615
|
-
|
|
731
|
+
let newContent;
|
|
732
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
|
|
733
|
+
else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
|
|
616
734
|
writeFileSync(cliPath, newContent);
|
|
617
735
|
return true;
|
|
618
736
|
}
|
|
619
|
-
function showStatus(currentLimit) {
|
|
737
|
+
function showStatus(cliPath, currentLimit) {
|
|
738
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
739
|
+
if (version) consola.info(`Claude Code version: ${version}`);
|
|
620
740
|
if (currentLimit === null) {
|
|
621
741
|
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
622
|
-
consola.info("Look for the HR function pattern in cli.js");
|
|
742
|
+
consola.info("Look for the BS9 variable or HR function pattern in cli.js");
|
|
623
743
|
} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
|
|
624
744
|
else consola.info(`Status: Patched (${currentLimit} context window)`);
|
|
625
745
|
}
|
|
@@ -669,7 +789,7 @@ const patchClaude = defineCommand({
|
|
|
669
789
|
const content = readFileSync(cliPath, "utf8");
|
|
670
790
|
const currentLimit = getCurrentLimit(content);
|
|
671
791
|
if (args.status) {
|
|
672
|
-
showStatus(currentLimit);
|
|
792
|
+
showStatus(cliPath, currentLimit);
|
|
673
793
|
return;
|
|
674
794
|
}
|
|
675
795
|
if (args.restore) {
|
|
@@ -698,6 +818,300 @@ const patchClaude = defineCommand({
|
|
|
698
818
|
}
|
|
699
819
|
});
|
|
700
820
|
|
|
821
|
+
//#endregion
|
|
822
|
+
//#region src/lib/adaptive-rate-limiter.ts
|
|
823
|
+
const DEFAULT_CONFIG$1 = {
|
|
824
|
+
baseRetryIntervalSeconds: 10,
|
|
825
|
+
maxRetryIntervalSeconds: 120,
|
|
826
|
+
requestIntervalSeconds: 10,
|
|
827
|
+
recoveryTimeoutMinutes: 10,
|
|
828
|
+
consecutiveSuccessesForRecovery: 5,
|
|
829
|
+
gradualRecoverySteps: [
|
|
830
|
+
5,
|
|
831
|
+
2,
|
|
832
|
+
1,
|
|
833
|
+
0
|
|
834
|
+
]
|
|
835
|
+
};
|
|
836
|
+
/**
|
|
837
|
+
* Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
|
|
838
|
+
* based on API responses.
|
|
839
|
+
*/
|
|
840
|
+
var AdaptiveRateLimiter = class {
|
|
841
|
+
config;
|
|
842
|
+
mode = "normal";
|
|
843
|
+
queue = [];
|
|
844
|
+
processing = false;
|
|
845
|
+
rateLimitedAt = null;
|
|
846
|
+
consecutiveSuccesses = 0;
|
|
847
|
+
lastRequestTime = 0;
|
|
848
|
+
/** Current step in gradual recovery (index into gradualRecoverySteps) */
|
|
849
|
+
recoveryStepIndex = 0;
|
|
850
|
+
constructor(config = {}) {
|
|
851
|
+
this.config = {
|
|
852
|
+
...DEFAULT_CONFIG$1,
|
|
853
|
+
...config
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Execute a request with adaptive rate limiting.
|
|
858
|
+
* Returns a promise that resolves when the request succeeds.
|
|
859
|
+
* The request will be retried automatically on 429 errors.
|
|
860
|
+
*/
|
|
861
|
+
async execute(fn) {
|
|
862
|
+
if (this.mode === "normal") return this.executeInNormalMode(fn);
|
|
863
|
+
if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
|
|
864
|
+
return this.enqueue(fn);
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Check if an error is a rate limit error (429) and extract Retry-After if available
|
|
868
|
+
*/
|
|
869
|
+
isRateLimitError(error) {
|
|
870
|
+
if (error && typeof error === "object") {
|
|
871
|
+
if ("status" in error && error.status === 429) return {
|
|
872
|
+
isRateLimit: true,
|
|
873
|
+
retryAfter: this.extractRetryAfter(error)
|
|
874
|
+
};
|
|
875
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
876
|
+
const parsed = JSON.parse(error.responseText);
|
|
877
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
|
|
878
|
+
} catch {}
|
|
879
|
+
}
|
|
880
|
+
return { isRateLimit: false };
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* Extract Retry-After value from error response
|
|
884
|
+
*/
|
|
885
|
+
extractRetryAfter(error) {
|
|
886
|
+
if (!error || typeof error !== "object") return void 0;
|
|
887
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
888
|
+
const parsed = JSON.parse(error.responseText);
|
|
889
|
+
if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
890
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
|
|
891
|
+
} catch {}
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Execute in normal mode - full speed
|
|
895
|
+
*/
|
|
896
|
+
async executeInNormalMode(fn) {
|
|
897
|
+
try {
|
|
898
|
+
return {
|
|
899
|
+
result: await fn(),
|
|
900
|
+
queueWaitMs: 0
|
|
901
|
+
};
|
|
902
|
+
} catch (error) {
|
|
903
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
904
|
+
if (isRateLimit) {
|
|
905
|
+
this.enterRateLimitedMode();
|
|
906
|
+
return this.enqueue(fn, retryAfter);
|
|
907
|
+
}
|
|
908
|
+
throw error;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
/**
|
|
912
|
+
* Execute in recovering mode - gradual speedup
|
|
913
|
+
*/
|
|
914
|
+
async executeInRecoveringMode(fn) {
|
|
915
|
+
const startTime = Date.now();
|
|
916
|
+
const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
917
|
+
if (currentInterval > 0) {
|
|
918
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
919
|
+
const requiredMs = currentInterval * 1e3;
|
|
920
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
921
|
+
const waitMs = requiredMs - elapsedMs;
|
|
922
|
+
await this.sleep(waitMs);
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
this.lastRequestTime = Date.now();
|
|
926
|
+
try {
|
|
927
|
+
const result = await fn();
|
|
928
|
+
this.recoveryStepIndex++;
|
|
929
|
+
if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
|
|
930
|
+
else {
|
|
931
|
+
const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
932
|
+
consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
|
|
933
|
+
}
|
|
934
|
+
const queueWaitMs = Date.now() - startTime;
|
|
935
|
+
return {
|
|
936
|
+
result,
|
|
937
|
+
queueWaitMs
|
|
938
|
+
};
|
|
939
|
+
} catch (error) {
|
|
940
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
941
|
+
if (isRateLimit) {
|
|
942
|
+
consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
|
|
943
|
+
this.enterRateLimitedMode();
|
|
944
|
+
return this.enqueue(fn, retryAfter);
|
|
945
|
+
}
|
|
946
|
+
throw error;
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Enter rate-limited mode
|
|
951
|
+
*/
|
|
952
|
+
enterRateLimitedMode() {
|
|
953
|
+
if (this.mode === "rate-limited") return;
|
|
954
|
+
this.mode = "rate-limited";
|
|
955
|
+
this.rateLimitedAt = Date.now();
|
|
956
|
+
this.consecutiveSuccesses = 0;
|
|
957
|
+
consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Check if we should try to recover to normal mode
|
|
961
|
+
*/
|
|
962
|
+
shouldAttemptRecovery() {
|
|
963
|
+
if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
|
|
964
|
+
consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
|
|
965
|
+
return true;
|
|
966
|
+
}
|
|
967
|
+
if (this.rateLimitedAt) {
|
|
968
|
+
const elapsed = Date.now() - this.rateLimitedAt;
|
|
969
|
+
const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
|
|
970
|
+
if (elapsed >= timeout) {
|
|
971
|
+
consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
|
|
972
|
+
return true;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return false;
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Start gradual recovery mode
|
|
979
|
+
*/
|
|
980
|
+
startGradualRecovery() {
|
|
981
|
+
this.mode = "recovering";
|
|
982
|
+
this.recoveryStepIndex = 0;
|
|
983
|
+
this.rateLimitedAt = null;
|
|
984
|
+
this.consecutiveSuccesses = 0;
|
|
985
|
+
const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
|
|
986
|
+
consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* Complete recovery to normal mode
|
|
990
|
+
*/
|
|
991
|
+
completeRecovery() {
|
|
992
|
+
this.mode = "normal";
|
|
993
|
+
this.recoveryStepIndex = 0;
|
|
994
|
+
consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Enqueue a request for later execution
|
|
998
|
+
*/
|
|
999
|
+
enqueue(fn, retryAfterSeconds) {
|
|
1000
|
+
return new Promise((resolve, reject) => {
|
|
1001
|
+
const request = {
|
|
1002
|
+
execute: fn,
|
|
1003
|
+
resolve,
|
|
1004
|
+
reject,
|
|
1005
|
+
retryCount: 0,
|
|
1006
|
+
retryAfterSeconds,
|
|
1007
|
+
enqueuedAt: Date.now()
|
|
1008
|
+
};
|
|
1009
|
+
this.queue.push(request);
|
|
1010
|
+
if (this.queue.length > 1) {
|
|
1011
|
+
const position = this.queue.length;
|
|
1012
|
+
const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
|
|
1013
|
+
consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
|
|
1014
|
+
}
|
|
1015
|
+
this.processQueue();
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Calculate retry interval with exponential backoff
|
|
1020
|
+
*/
|
|
1021
|
+
calculateRetryInterval(request) {
|
|
1022
|
+
if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
|
|
1023
|
+
const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
|
|
1024
|
+
return Math.min(backoff, this.config.maxRetryIntervalSeconds);
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* Process the queue
|
|
1028
|
+
*/
|
|
1029
|
+
async processQueue() {
|
|
1030
|
+
if (this.processing) return;
|
|
1031
|
+
this.processing = true;
|
|
1032
|
+
while (this.queue.length > 0) {
|
|
1033
|
+
const request = this.queue[0];
|
|
1034
|
+
if (this.shouldAttemptRecovery()) this.startGradualRecovery();
|
|
1035
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1036
|
+
const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
|
|
1037
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1038
|
+
const waitMs = requiredMs - elapsedMs;
|
|
1039
|
+
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1040
|
+
consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
|
|
1041
|
+
await this.sleep(waitMs);
|
|
1042
|
+
}
|
|
1043
|
+
this.lastRequestTime = Date.now();
|
|
1044
|
+
try {
|
|
1045
|
+
const result = await request.execute();
|
|
1046
|
+
this.queue.shift();
|
|
1047
|
+
this.consecutiveSuccesses++;
|
|
1048
|
+
request.retryAfterSeconds = void 0;
|
|
1049
|
+
const queueWaitMs = Date.now() - request.enqueuedAt;
|
|
1050
|
+
request.resolve({
|
|
1051
|
+
result,
|
|
1052
|
+
queueWaitMs
|
|
1053
|
+
});
|
|
1054
|
+
if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
|
|
1055
|
+
} catch (error) {
|
|
1056
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
1057
|
+
if (isRateLimit) {
|
|
1058
|
+
request.retryCount++;
|
|
1059
|
+
request.retryAfterSeconds = retryAfter;
|
|
1060
|
+
this.consecutiveSuccesses = 0;
|
|
1061
|
+
this.rateLimitedAt = Date.now();
|
|
1062
|
+
const nextInterval = this.calculateRetryInterval(request);
|
|
1063
|
+
const source = retryAfter ? "server Retry-After" : "exponential backoff";
|
|
1064
|
+
consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
|
|
1065
|
+
} else {
|
|
1066
|
+
this.queue.shift();
|
|
1067
|
+
request.reject(error);
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
this.processing = false;
|
|
1072
|
+
}
|
|
1073
|
+
sleep(ms) {
|
|
1074
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1075
|
+
}
|
|
1076
|
+
/**
|
|
1077
|
+
* Get current status for debugging/monitoring
|
|
1078
|
+
*/
|
|
1079
|
+
getStatus() {
|
|
1080
|
+
return {
|
|
1081
|
+
mode: this.mode,
|
|
1082
|
+
queueLength: this.queue.length,
|
|
1083
|
+
consecutiveSuccesses: this.consecutiveSuccesses,
|
|
1084
|
+
rateLimitedAt: this.rateLimitedAt
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
let rateLimiterInstance = null;
|
|
1089
|
+
/**
|
|
1090
|
+
* Initialize the adaptive rate limiter with configuration
|
|
1091
|
+
*/
|
|
1092
|
+
function initAdaptiveRateLimiter(config = {}) {
|
|
1093
|
+
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1094
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
|
|
1095
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
|
|
1096
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
|
|
1097
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
|
|
1098
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
|
|
1099
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
|
|
1100
|
+
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Execute a request with adaptive rate limiting.
|
|
1104
|
+
* If rate limiter is not initialized, executes immediately.
|
|
1105
|
+
* Returns the result along with queue wait time.
|
|
1106
|
+
*/
|
|
1107
|
+
async function executeWithAdaptiveRateLimit(fn) {
|
|
1108
|
+
if (!rateLimiterInstance) return {
|
|
1109
|
+
result: await fn(),
|
|
1110
|
+
queueWaitMs: 0
|
|
1111
|
+
};
|
|
1112
|
+
return rateLimiterInstance.execute(fn);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
701
1115
|
//#endregion
|
|
702
1116
|
//#region src/lib/history.ts
|
|
703
1117
|
function generateId$1() {
|
|
@@ -1101,16 +1515,27 @@ function formatTokens(input, output) {
|
|
|
1101
1515
|
/**
|
|
1102
1516
|
* Console renderer that shows request lifecycle with apt-get style footer
|
|
1103
1517
|
*
|
|
1104
|
-
* Log format
|
|
1105
|
-
* - Start: [....] HH:MM:SS METHOD /path model-name
|
|
1106
|
-
* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming...
|
|
1107
|
-
* - Complete: [ OK ] HH:MM:SS METHOD /path 200 1.2s 1.5K/500
|
|
1108
|
-
* - Error: [FAIL] HH:MM:SS METHOD /path 500 1.2s
|
|
1518
|
+
* Log format:
|
|
1519
|
+
* - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
|
|
1520
|
+
* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
|
|
1521
|
+
* - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
|
|
1522
|
+
* - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
|
|
1523
|
+
*
|
|
1524
|
+
* Color scheme for completed requests:
|
|
1525
|
+
* - Prefix: green (success) / red (error)
|
|
1526
|
+
* - Time: dim
|
|
1527
|
+
* - Method: cyan
|
|
1528
|
+
* - Path: white
|
|
1529
|
+
* - Model: magenta
|
|
1530
|
+
* - Status: green (success) / red (error)
|
|
1531
|
+
* - Duration: yellow
|
|
1532
|
+
* - Tokens: blue
|
|
1109
1533
|
*
|
|
1110
1534
|
* Features:
|
|
1111
|
-
* -
|
|
1112
|
-
* -
|
|
1113
|
-
* -
|
|
1535
|
+
* - Start lines only shown in debug mode (--verbose)
|
|
1536
|
+
* - Streaming lines are dim (less important)
|
|
1537
|
+
* - /history API requests are always dim
|
|
1538
|
+
* - Sticky footer shows active request count
|
|
1114
1539
|
* - Intercepts consola output to properly handle footer
|
|
1115
1540
|
*/
|
|
1116
1541
|
var ConsoleRenderer = class {
|
|
@@ -1186,25 +1611,52 @@ var ConsoleRenderer = class {
|
|
|
1186
1611
|
}
|
|
1187
1612
|
}
|
|
1188
1613
|
/**
|
|
1614
|
+
* Format a complete log line with colored parts
|
|
1615
|
+
*/
|
|
1616
|
+
formatLogLine(parts) {
|
|
1617
|
+
const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
|
|
1618
|
+
if (isDim) {
|
|
1619
|
+
const modelPart = model ? ` ${model}` : "";
|
|
1620
|
+
const extraPart = extra ? ` ${extra}` : "";
|
|
1621
|
+
return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
|
|
1622
|
+
}
|
|
1623
|
+
const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
|
|
1624
|
+
const coloredTime = pc.dim(time);
|
|
1625
|
+
const coloredMethod = pc.cyan(method);
|
|
1626
|
+
const coloredPath = pc.white(path$1);
|
|
1627
|
+
const coloredModel = model ? pc.magenta(` ${model}`) : "";
|
|
1628
|
+
let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
|
|
1629
|
+
if (status !== void 0) {
|
|
1630
|
+
const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
|
|
1631
|
+
result += ` ${coloredStatus}`;
|
|
1632
|
+
}
|
|
1633
|
+
if (duration) result += ` ${pc.yellow(duration)}`;
|
|
1634
|
+
if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
|
|
1635
|
+
if (tokens) result += ` ${pc.blue(tokens)}`;
|
|
1636
|
+
if (extra) result += isError ? pc.red(extra) : extra;
|
|
1637
|
+
return result;
|
|
1638
|
+
}
|
|
1639
|
+
/**
|
|
1189
1640
|
* Print a log line with proper footer handling
|
|
1190
|
-
* 1. Clear footer if visible
|
|
1191
|
-
* 2. Print log with newline
|
|
1192
|
-
* 3. Re-render footer on new line (no newline after footer)
|
|
1193
1641
|
*/
|
|
1194
|
-
printLog(message
|
|
1642
|
+
printLog(message) {
|
|
1195
1643
|
this.clearFooterForLog();
|
|
1196
|
-
|
|
1197
|
-
else process.stdout.write(message + "\n");
|
|
1644
|
+
process.stdout.write(message + "\n");
|
|
1198
1645
|
this.renderFooter();
|
|
1199
1646
|
}
|
|
1200
1647
|
onRequestStart(request) {
|
|
1201
1648
|
this.activeRequests.set(request.id, request);
|
|
1202
|
-
if (this.showActive) {
|
|
1203
|
-
const
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1649
|
+
if (this.showActive && consola.level >= 5) {
|
|
1650
|
+
const message = this.formatLogLine({
|
|
1651
|
+
prefix: "[....]",
|
|
1652
|
+
time: formatTime(),
|
|
1653
|
+
method: request.method,
|
|
1654
|
+
path: request.path,
|
|
1655
|
+
model: request.model,
|
|
1656
|
+
extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
|
|
1657
|
+
isDim: true
|
|
1658
|
+
});
|
|
1659
|
+
this.printLog(message);
|
|
1208
1660
|
}
|
|
1209
1661
|
}
|
|
1210
1662
|
onRequestUpdate(id, update) {
|
|
@@ -1212,28 +1664,39 @@ var ConsoleRenderer = class {
|
|
|
1212
1664
|
if (!request) return;
|
|
1213
1665
|
Object.assign(request, update);
|
|
1214
1666
|
if (this.showActive && update.status === "streaming") {
|
|
1215
|
-
const
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1667
|
+
const message = this.formatLogLine({
|
|
1668
|
+
prefix: "[<-->]",
|
|
1669
|
+
time: formatTime(),
|
|
1670
|
+
method: request.method,
|
|
1671
|
+
path: request.path,
|
|
1672
|
+
model: request.model,
|
|
1673
|
+
extra: "streaming...",
|
|
1674
|
+
isDim: true
|
|
1675
|
+
});
|
|
1676
|
+
this.printLog(message);
|
|
1219
1677
|
}
|
|
1220
1678
|
}
|
|
1221
1679
|
onRequestComplete(request) {
|
|
1222
1680
|
this.activeRequests.delete(request.id);
|
|
1223
|
-
const time = formatTime();
|
|
1224
1681
|
const status = request.statusCode ?? 0;
|
|
1225
|
-
const duration = formatDuration(request.durationMs ?? 0);
|
|
1226
|
-
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
|
|
1227
|
-
const modelInfo = request.model ? ` ${request.model}` : "";
|
|
1228
1682
|
const isError = request.status === "error" || status >= 400;
|
|
1229
|
-
const
|
|
1230
|
-
const
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1683
|
+
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
|
|
1684
|
+
const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
|
|
1685
|
+
const message = this.formatLogLine({
|
|
1686
|
+
prefix: isError ? "[FAIL]" : "[ OK ]",
|
|
1687
|
+
time: formatTime(),
|
|
1688
|
+
method: request.method,
|
|
1689
|
+
path: request.path,
|
|
1690
|
+
model: request.model,
|
|
1691
|
+
status,
|
|
1692
|
+
duration: formatDuration(request.durationMs ?? 0),
|
|
1693
|
+
queueWait,
|
|
1694
|
+
tokens,
|
|
1695
|
+
extra: isError && request.error ? `: ${request.error}` : void 0,
|
|
1696
|
+
isError,
|
|
1697
|
+
isDim: request.isHistoryAccess
|
|
1698
|
+
});
|
|
1699
|
+
this.printLog(message);
|
|
1237
1700
|
}
|
|
1238
1701
|
destroy() {
|
|
1239
1702
|
if (this.footerVisible && this.isTTY) {
|
|
@@ -1874,66 +2337,6 @@ function createCompactionMarker(result) {
|
|
|
1874
2337
|
return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
1875
2338
|
}
|
|
1876
2339
|
|
|
1877
|
-
//#endregion
|
|
1878
|
-
//#region src/lib/queue.ts
|
|
1879
|
-
var RequestQueue = class {
|
|
1880
|
-
queue = [];
|
|
1881
|
-
processing = false;
|
|
1882
|
-
lastRequestTime = 0;
|
|
1883
|
-
async enqueue(execute, rateLimitSeconds) {
|
|
1884
|
-
return new Promise((resolve, reject) => {
|
|
1885
|
-
const request = {
|
|
1886
|
-
execute,
|
|
1887
|
-
resolve,
|
|
1888
|
-
reject
|
|
1889
|
-
};
|
|
1890
|
-
this.queue.push(request);
|
|
1891
|
-
if (this.queue.length > 1) {
|
|
1892
|
-
const position = this.queue.length;
|
|
1893
|
-
const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
|
|
1894
|
-
(waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
|
|
1895
|
-
}
|
|
1896
|
-
this.processQueue(rateLimitSeconds);
|
|
1897
|
-
});
|
|
1898
|
-
}
|
|
1899
|
-
async processQueue(rateLimitSeconds) {
|
|
1900
|
-
if (this.processing) return;
|
|
1901
|
-
this.processing = true;
|
|
1902
|
-
while (this.queue.length > 0) {
|
|
1903
|
-
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1904
|
-
const requiredMs = rateLimitSeconds * 1e3;
|
|
1905
|
-
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1906
|
-
const waitMs = requiredMs - elapsedMs;
|
|
1907
|
-
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1908
|
-
(waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
|
|
1909
|
-
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
|
1910
|
-
}
|
|
1911
|
-
const request = this.queue.shift();
|
|
1912
|
-
if (!request) break;
|
|
1913
|
-
this.lastRequestTime = Date.now();
|
|
1914
|
-
try {
|
|
1915
|
-
const result = await request.execute();
|
|
1916
|
-
request.resolve(result);
|
|
1917
|
-
} catch (error) {
|
|
1918
|
-
request.reject(error);
|
|
1919
|
-
}
|
|
1920
|
-
}
|
|
1921
|
-
this.processing = false;
|
|
1922
|
-
}
|
|
1923
|
-
get length() {
|
|
1924
|
-
return this.queue.length;
|
|
1925
|
-
}
|
|
1926
|
-
};
|
|
1927
|
-
const requestQueue = new RequestQueue();
|
|
1928
|
-
/**
|
|
1929
|
-
* Execute a request with rate limiting via queue.
|
|
1930
|
-
* Requests are queued and processed sequentially at the configured rate.
|
|
1931
|
-
*/
|
|
1932
|
-
async function executeWithRateLimit(state$1, execute) {
|
|
1933
|
-
if (state$1.rateLimitSeconds === void 0) return execute();
|
|
1934
|
-
return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
|
|
1935
|
-
}
|
|
1936
|
-
|
|
1937
2340
|
//#endregion
|
|
1938
2341
|
//#region src/services/copilot/create-chat-completions.ts
|
|
1939
2342
|
const createChatCompletions = async (payload) => {
|
|
@@ -1984,11 +2387,12 @@ function recordErrorResponse(ctx, model, error) {
|
|
|
1984
2387
|
}, Date.now() - ctx.startTime);
|
|
1985
2388
|
}
|
|
1986
2389
|
/** Complete TUI tracking */
|
|
1987
|
-
function completeTracking(trackingId, inputTokens, outputTokens) {
|
|
2390
|
+
function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
|
|
1988
2391
|
if (!trackingId) return;
|
|
1989
2392
|
requestTracker.updateRequest(trackingId, {
|
|
1990
2393
|
inputTokens,
|
|
1991
|
-
outputTokens
|
|
2394
|
+
outputTokens,
|
|
2395
|
+
queueWaitMs
|
|
1992
2396
|
});
|
|
1993
2397
|
requestTracker.completeRequest(trackingId, 200, {
|
|
1994
2398
|
inputTokens,
|
|
@@ -2140,7 +2544,8 @@ async function handleCompletion$1(c) {
|
|
|
2140
2544
|
async function executeRequest(opts) {
|
|
2141
2545
|
const { c, payload, selectedModel, ctx, trackingId } = opts;
|
|
2142
2546
|
try {
|
|
2143
|
-
const response = await
|
|
2547
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
|
|
2548
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
2144
2549
|
if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
|
|
2145
2550
|
consola.debug("Streaming response");
|
|
2146
2551
|
updateTrackerStatus(trackingId, "streaming");
|
|
@@ -2199,7 +2604,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
2199
2604
|
}, Date.now() - ctx.startTime);
|
|
2200
2605
|
if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
|
|
2201
2606
|
inputTokens: usage.prompt_tokens,
|
|
2202
|
-
outputTokens: usage.completion_tokens
|
|
2607
|
+
outputTokens: usage.completion_tokens,
|
|
2608
|
+
queueWaitMs: ctx.queueWaitMs
|
|
2203
2609
|
});
|
|
2204
2610
|
return c.json(response);
|
|
2205
2611
|
}
|
|
@@ -2265,7 +2671,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
2265
2671
|
acc.content += marker;
|
|
2266
2672
|
}
|
|
2267
2673
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
2268
|
-
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
|
|
2674
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
2269
2675
|
} catch (error) {
|
|
2270
2676
|
recordStreamError({
|
|
2271
2677
|
acc,
|
|
@@ -2364,7 +2770,7 @@ completionRoutes.post("/", async (c) => {
|
|
|
2364
2770
|
try {
|
|
2365
2771
|
return await handleCompletion$1(c);
|
|
2366
2772
|
} catch (error) {
|
|
2367
|
-
return
|
|
2773
|
+
return forwardError(c, error);
|
|
2368
2774
|
}
|
|
2369
2775
|
});
|
|
2370
2776
|
|
|
@@ -2390,7 +2796,7 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
2390
2796
|
const response = await createEmbeddings(payload);
|
|
2391
2797
|
return c.json(response);
|
|
2392
2798
|
} catch (error) {
|
|
2393
|
-
return
|
|
2799
|
+
return forwardError(c, error);
|
|
2394
2800
|
}
|
|
2395
2801
|
});
|
|
2396
2802
|
|
|
@@ -4022,7 +4428,8 @@ async function handleCompletion(c) {
|
|
|
4022
4428
|
if (compactResult) ctx.compactResult = compactResult;
|
|
4023
4429
|
if (state.manualApprove) await awaitApproval();
|
|
4024
4430
|
try {
|
|
4025
|
-
const response = await
|
|
4431
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
|
|
4432
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
4026
4433
|
if (isNonStreaming(response)) return handleNonStreamingResponse({
|
|
4027
4434
|
c,
|
|
4028
4435
|
response,
|
|
@@ -4080,7 +4487,8 @@ function handleNonStreamingResponse(opts) {
|
|
|
4080
4487
|
}, Date.now() - ctx.startTime);
|
|
4081
4488
|
if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
|
|
4082
4489
|
inputTokens: anthropicResponse.usage.input_tokens,
|
|
4083
|
-
outputTokens: anthropicResponse.usage.output_tokens
|
|
4490
|
+
outputTokens: anthropicResponse.usage.output_tokens,
|
|
4491
|
+
queueWaitMs: ctx.queueWaitMs
|
|
4084
4492
|
});
|
|
4085
4493
|
return c.json(anthropicResponse);
|
|
4086
4494
|
}
|
|
@@ -4136,7 +4544,7 @@ async function handleStreamingResponse(opts) {
|
|
|
4136
4544
|
acc.content += marker;
|
|
4137
4545
|
}
|
|
4138
4546
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
4139
|
-
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
|
|
4547
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
4140
4548
|
} catch (error) {
|
|
4141
4549
|
consola.error("Stream error:", error);
|
|
4142
4550
|
recordStreamError({
|
|
@@ -4334,14 +4742,14 @@ messageRoutes.post("/", async (c) => {
|
|
|
4334
4742
|
try {
|
|
4335
4743
|
return await handleCompletion(c);
|
|
4336
4744
|
} catch (error) {
|
|
4337
|
-
return
|
|
4745
|
+
return forwardError(c, error);
|
|
4338
4746
|
}
|
|
4339
4747
|
});
|
|
4340
4748
|
messageRoutes.post("/count_tokens", async (c) => {
|
|
4341
4749
|
try {
|
|
4342
4750
|
return await handleCountTokens(c);
|
|
4343
4751
|
} catch (error) {
|
|
4344
|
-
return
|
|
4752
|
+
return forwardError(c, error);
|
|
4345
4753
|
}
|
|
4346
4754
|
});
|
|
4347
4755
|
|
|
@@ -4380,18 +4788,18 @@ modelRoutes.get("/", async (c) => {
|
|
|
4380
4788
|
has_more: false
|
|
4381
4789
|
});
|
|
4382
4790
|
} catch (error) {
|
|
4383
|
-
return
|
|
4791
|
+
return forwardError(c, error);
|
|
4384
4792
|
}
|
|
4385
4793
|
});
|
|
4386
4794
|
|
|
4387
4795
|
//#endregion
|
|
4388
4796
|
//#region src/routes/token/route.ts
|
|
4389
4797
|
const tokenRoute = new Hono();
|
|
4390
|
-
tokenRoute.get("/",
|
|
4798
|
+
tokenRoute.get("/", (c) => {
|
|
4391
4799
|
try {
|
|
4392
4800
|
return c.json({ token: state.copilotToken });
|
|
4393
4801
|
} catch (error) {
|
|
4394
|
-
return
|
|
4802
|
+
return forwardError(c, error);
|
|
4395
4803
|
}
|
|
4396
4804
|
});
|
|
4397
4805
|
|
|
@@ -4403,7 +4811,7 @@ usageRoute.get("/", async (c) => {
|
|
|
4403
4811
|
const usage = await getCopilotUsage();
|
|
4404
4812
|
return c.json(usage);
|
|
4405
4813
|
} catch (error) {
|
|
4406
|
-
return
|
|
4814
|
+
return forwardError(c, error);
|
|
4407
4815
|
}
|
|
4408
4816
|
});
|
|
4409
4817
|
|
|
@@ -4455,10 +4863,15 @@ async function runServer(options) {
|
|
|
4455
4863
|
state.accountType = options.accountType;
|
|
4456
4864
|
if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
|
|
4457
4865
|
state.manualApprove = options.manual;
|
|
4458
|
-
state.rateLimitSeconds = options.rateLimit;
|
|
4459
|
-
state.rateLimitWait = options.rateLimitWait;
|
|
4460
4866
|
state.showToken = options.showToken;
|
|
4461
4867
|
state.autoCompact = options.autoCompact;
|
|
4868
|
+
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
4869
|
+
baseRetryIntervalSeconds: options.retryInterval,
|
|
4870
|
+
requestIntervalSeconds: options.requestInterval,
|
|
4871
|
+
recoveryTimeoutMinutes: options.recoveryTimeout,
|
|
4872
|
+
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4873
|
+
});
|
|
4874
|
+
else consola.info("Rate limiting disabled");
|
|
4462
4875
|
if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
|
|
4463
4876
|
initHistory(options.history, options.historyLimit);
|
|
4464
4877
|
if (options.history) {
|
|
@@ -4545,16 +4958,30 @@ const start = defineCommand({
|
|
|
4545
4958
|
default: false,
|
|
4546
4959
|
description: "Enable manual request approval"
|
|
4547
4960
|
},
|
|
4548
|
-
"rate-limit": {
|
|
4549
|
-
alias: "r",
|
|
4550
|
-
type: "string",
|
|
4551
|
-
description: "Rate limit in seconds between requests"
|
|
4552
|
-
},
|
|
4553
|
-
wait: {
|
|
4554
|
-
alias: "w",
|
|
4961
|
+
"no-rate-limit": {
|
|
4555
4962
|
type: "boolean",
|
|
4556
4963
|
default: false,
|
|
4557
|
-
description: "
|
|
4964
|
+
description: "Disable adaptive rate limiting"
|
|
4965
|
+
},
|
|
4966
|
+
"retry-interval": {
|
|
4967
|
+
type: "string",
|
|
4968
|
+
default: "10",
|
|
4969
|
+
description: "Seconds to wait before retrying after rate limit error (default: 10)"
|
|
4970
|
+
},
|
|
4971
|
+
"request-interval": {
|
|
4972
|
+
type: "string",
|
|
4973
|
+
default: "10",
|
|
4974
|
+
description: "Seconds between requests in rate-limited mode (default: 10)"
|
|
4975
|
+
},
|
|
4976
|
+
"recovery-timeout": {
|
|
4977
|
+
type: "string",
|
|
4978
|
+
default: "10",
|
|
4979
|
+
description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
|
|
4980
|
+
},
|
|
4981
|
+
"consecutive-successes": {
|
|
4982
|
+
type: "string",
|
|
4983
|
+
default: "5",
|
|
4984
|
+
description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
|
|
4558
4985
|
},
|
|
4559
4986
|
"github-token": {
|
|
4560
4987
|
alias: "g",
|
|
@@ -4577,10 +5004,10 @@ const start = defineCommand({
|
|
|
4577
5004
|
default: false,
|
|
4578
5005
|
description: "Initialize proxy from environment variables"
|
|
4579
5006
|
},
|
|
4580
|
-
history: {
|
|
5007
|
+
"no-history": {
|
|
4581
5008
|
type: "boolean",
|
|
4582
5009
|
default: false,
|
|
4583
|
-
description: "
|
|
5010
|
+
description: "Disable request history recording and Web UI"
|
|
4584
5011
|
},
|
|
4585
5012
|
"history-limit": {
|
|
4586
5013
|
type: "string",
|
|
@@ -4594,21 +5021,22 @@ const start = defineCommand({
|
|
|
4594
5021
|
}
|
|
4595
5022
|
},
|
|
4596
5023
|
run({ args }) {
|
|
4597
|
-
const rateLimitRaw = args["rate-limit"];
|
|
4598
|
-
const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
|
|
4599
5024
|
return runServer({
|
|
4600
5025
|
port: Number.parseInt(args.port, 10),
|
|
4601
5026
|
host: args.host,
|
|
4602
5027
|
verbose: args.verbose,
|
|
4603
5028
|
accountType: args["account-type"],
|
|
4604
5029
|
manual: args.manual,
|
|
4605
|
-
rateLimit,
|
|
4606
|
-
|
|
5030
|
+
rateLimit: !args["no-rate-limit"],
|
|
5031
|
+
retryInterval: Number.parseInt(args["retry-interval"], 10),
|
|
5032
|
+
requestInterval: Number.parseInt(args["request-interval"], 10),
|
|
5033
|
+
recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
|
|
5034
|
+
consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
|
|
4607
5035
|
githubToken: args["github-token"],
|
|
4608
5036
|
claudeCode: args["claude-code"],
|
|
4609
5037
|
showToken: args["show-token"],
|
|
4610
5038
|
proxyEnv: args["proxy-env"],
|
|
4611
|
-
history: args
|
|
5039
|
+
history: !args["no-history"],
|
|
4612
5040
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
4613
5041
|
autoCompact: args["auto-compact"]
|
|
4614
5042
|
});
|