@hsupu/copilot-api 0.7.4 → 0.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/main.js +1169 -432
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -3,8 +3,9 @@ import { defineCommand, runMain } from "citty";
|
|
|
3
3
|
import consola from "consola";
|
|
4
4
|
import fs from "node:fs/promises";
|
|
5
5
|
import os from "node:os";
|
|
6
|
-
import path from "node:path";
|
|
6
|
+
import path, { dirname, join } from "node:path";
|
|
7
7
|
import { randomUUID } from "node:crypto";
|
|
8
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
8
9
|
import clipboard from "clipboardy";
|
|
9
10
|
import { serve } from "srvx";
|
|
10
11
|
import invariant from "tiny-invariant";
|
|
@@ -44,7 +45,6 @@ async function ensureFile(filePath) {
|
|
|
44
45
|
const state = {
|
|
45
46
|
accountType: "individual",
|
|
46
47
|
manualApprove: false,
|
|
47
|
-
rateLimitWait: false,
|
|
48
48
|
showToken: false,
|
|
49
49
|
autoCompact: false
|
|
50
50
|
};
|
|
@@ -136,7 +136,17 @@ function formatRequestTooLargeError() {
|
|
|
136
136
|
}
|
|
137
137
|
};
|
|
138
138
|
}
|
|
139
|
-
|
|
139
|
+
/** Format Anthropic-compatible error for rate limit exceeded (429) */
|
|
140
|
+
function formatRateLimitError(copilotMessage) {
|
|
141
|
+
return {
|
|
142
|
+
type: "error",
|
|
143
|
+
error: {
|
|
144
|
+
type: "rate_limit_error",
|
|
145
|
+
message: copilotMessage ?? "You have exceeded your rate limit. Please try again later."
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
function forwardError(c, error) {
|
|
140
150
|
consola.error("Error occurred:", error);
|
|
141
151
|
if (error instanceof HTTPError) {
|
|
142
152
|
if (error.status === 413) {
|
|
@@ -160,6 +170,11 @@ async function forwardError(c, error) {
|
|
|
160
170
|
return c.json(formattedError, 400);
|
|
161
171
|
}
|
|
162
172
|
}
|
|
173
|
+
if (error.status === 429 || copilotError.error?.code === "rate_limited") {
|
|
174
|
+
const formattedError = formatRateLimitError(copilotError.error?.message);
|
|
175
|
+
consola.debug("Returning formatted rate limit error:", formattedError);
|
|
176
|
+
return c.json(formattedError, 429);
|
|
177
|
+
}
|
|
163
178
|
return c.json({ error: {
|
|
164
179
|
message: error.responseText,
|
|
165
180
|
type: "error"
|
|
@@ -290,6 +305,24 @@ async function pollAccessToken(deviceCode) {
|
|
|
290
305
|
//#region src/lib/token.ts
|
|
291
306
|
const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8");
|
|
292
307
|
const writeGithubToken = (token) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token);
|
|
308
|
+
/**
|
|
309
|
+
* Refresh the Copilot token with exponential backoff retry.
|
|
310
|
+
* Returns the new token on success, or null if all retries fail.
|
|
311
|
+
*/
|
|
312
|
+
async function refreshCopilotTokenWithRetry(maxRetries = 3) {
|
|
313
|
+
let lastError = null;
|
|
314
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) try {
|
|
315
|
+
const { token } = await getCopilotToken();
|
|
316
|
+
return token;
|
|
317
|
+
} catch (error) {
|
|
318
|
+
lastError = error;
|
|
319
|
+
const delay = Math.min(1e3 * 2 ** attempt, 3e4);
|
|
320
|
+
consola.warn(`Token refresh attempt ${attempt + 1}/${maxRetries} failed, retrying in ${delay}ms`);
|
|
321
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
322
|
+
}
|
|
323
|
+
consola.error("All token refresh attempts failed:", lastError);
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
293
326
|
const setupCopilotToken = async () => {
|
|
294
327
|
const { token, refresh_in } = await getCopilotToken();
|
|
295
328
|
state.copilotToken = token;
|
|
@@ -298,14 +331,12 @@ const setupCopilotToken = async () => {
|
|
|
298
331
|
const refreshInterval = (refresh_in - 60) * 1e3;
|
|
299
332
|
setInterval(async () => {
|
|
300
333
|
consola.debug("Refreshing Copilot token");
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
state.copilotToken =
|
|
334
|
+
const newToken = await refreshCopilotTokenWithRetry();
|
|
335
|
+
if (newToken) {
|
|
336
|
+
state.copilotToken = newToken;
|
|
304
337
|
consola.debug("Copilot token refreshed");
|
|
305
|
-
if (state.showToken) consola.info("Refreshed Copilot token:",
|
|
306
|
-
}
|
|
307
|
-
consola.error("Failed to refresh Copilot token (will retry on next interval):", error);
|
|
308
|
-
}
|
|
338
|
+
if (state.showToken) consola.info("Refreshed Copilot token:", newToken);
|
|
339
|
+
} else consola.error("Failed to refresh Copilot token after retries, using existing token");
|
|
309
340
|
}, refreshInterval);
|
|
310
341
|
};
|
|
311
342
|
async function setupGitHubToken(options) {
|
|
@@ -520,6 +551,567 @@ const logout = defineCommand({
|
|
|
520
551
|
}
|
|
521
552
|
});
|
|
522
553
|
|
|
554
|
+
//#endregion
|
|
555
|
+
//#region src/patch-claude.ts
|
|
556
|
+
const SUPPORTED_VERSIONS = {
|
|
557
|
+
v2a: {
|
|
558
|
+
min: "2.0.0",
|
|
559
|
+
max: "2.1.10"
|
|
560
|
+
},
|
|
561
|
+
v2b: {
|
|
562
|
+
min: "2.1.11",
|
|
563
|
+
max: "2.1.12"
|
|
564
|
+
}
|
|
565
|
+
};
|
|
566
|
+
const PATTERNS = {
|
|
567
|
+
funcOriginal: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return 200000\}/,
|
|
568
|
+
funcPatched: /function HR\(A\)\{if\(A\.includes\("\[1m\]"\)\)return 1e6;return \d+\}/,
|
|
569
|
+
variable: /var BS9=(\d+)/
|
|
570
|
+
};
|
|
571
|
+
/**
|
|
572
|
+
* Parse semver version string to comparable parts
|
|
573
|
+
*/
|
|
574
|
+
function parseVersion(version) {
|
|
575
|
+
return version.split(".").map((n) => Number.parseInt(n, 10) || 0);
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Compare two semver versions
|
|
579
|
+
* Returns: -1 if a < b, 0 if a == b, 1 if a > b
|
|
580
|
+
*/
|
|
581
|
+
function compareVersions(a, b) {
|
|
582
|
+
const partsA = parseVersion(a);
|
|
583
|
+
const partsB = parseVersion(b);
|
|
584
|
+
const len = Math.max(partsA.length, partsB.length);
|
|
585
|
+
for (let i = 0; i < len; i++) {
|
|
586
|
+
const numA = partsA[i] || 0;
|
|
587
|
+
const numB = partsB[i] || 0;
|
|
588
|
+
if (numA < numB) return -1;
|
|
589
|
+
if (numA > numB) return 1;
|
|
590
|
+
}
|
|
591
|
+
return 0;
|
|
592
|
+
}
|
|
593
|
+
function getPatternTypeForVersion(version) {
|
|
594
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2a.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2a.max) <= 0) return "func";
|
|
595
|
+
if (compareVersions(version, SUPPORTED_VERSIONS.v2b.min) >= 0 && compareVersions(version, SUPPORTED_VERSIONS.v2b.max) <= 0) return "variable";
|
|
596
|
+
return null;
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Get supported version range string for error messages
|
|
600
|
+
*/
|
|
601
|
+
function getSupportedRangeString() {
|
|
602
|
+
return `${SUPPORTED_VERSIONS.v2a.min}-${SUPPORTED_VERSIONS.v2a.max}, ${SUPPORTED_VERSIONS.v2b.min}-${SUPPORTED_VERSIONS.v2b.max}`;
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Get Claude Code version from package.json
|
|
606
|
+
*/
|
|
607
|
+
function getClaudeCodeVersion(cliPath) {
|
|
608
|
+
try {
|
|
609
|
+
const packageJsonPath = join(dirname(cliPath), "package.json");
|
|
610
|
+
if (!existsSync(packageJsonPath)) return null;
|
|
611
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
612
|
+
if (typeof packageJson === "object" && packageJson !== null && "version" in packageJson && typeof packageJson.version === "string") return packageJson.version;
|
|
613
|
+
return null;
|
|
614
|
+
} catch {
|
|
615
|
+
return null;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Search volta tools directory for Claude Code
|
|
620
|
+
*/
|
|
621
|
+
function findInVoltaTools(voltaHome) {
|
|
622
|
+
const paths = [];
|
|
623
|
+
const packagesPath = join(voltaHome, "tools", "image", "packages", "@anthropic-ai", "claude-code", "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
624
|
+
if (existsSync(packagesPath)) paths.push(packagesPath);
|
|
625
|
+
const toolsDir = join(voltaHome, "tools", "image", "node");
|
|
626
|
+
if (existsSync(toolsDir)) try {
|
|
627
|
+
for (const version of readdirSync(toolsDir)) {
|
|
628
|
+
const claudePath = join(toolsDir, version, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js");
|
|
629
|
+
if (existsSync(claudePath)) paths.push(claudePath);
|
|
630
|
+
}
|
|
631
|
+
} catch {}
|
|
632
|
+
return paths;
|
|
633
|
+
}
|
|
634
|
+
/**
|
|
635
|
+
* Find Claude Code CLI path by checking common locations
|
|
636
|
+
*/
|
|
637
|
+
function findClaudeCodePath() {
|
|
638
|
+
const possiblePaths = [];
|
|
639
|
+
const home = process.env.HOME || "";
|
|
640
|
+
const voltaHome = process.env.VOLTA_HOME || join(home, ".volta");
|
|
641
|
+
if (existsSync(voltaHome)) possiblePaths.push(...findInVoltaTools(voltaHome));
|
|
642
|
+
const npmPrefix = process.env.npm_config_prefix;
|
|
643
|
+
if (npmPrefix) possiblePaths.push(join(npmPrefix, "lib", "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
|
|
644
|
+
const globalPaths = [
|
|
645
|
+
join(home, ".npm-global", "lib", "node_modules"),
|
|
646
|
+
"/usr/local/lib/node_modules",
|
|
647
|
+
"/usr/lib/node_modules"
|
|
648
|
+
];
|
|
649
|
+
for (const base of globalPaths) possiblePaths.push(join(base, "@anthropic-ai", "claude-code", "cli.js"));
|
|
650
|
+
const bunGlobal = join(home, ".bun", "install", "global");
|
|
651
|
+
if (existsSync(bunGlobal)) possiblePaths.push(join(bunGlobal, "node_modules", "@anthropic-ai", "claude-code", "cli.js"));
|
|
652
|
+
return possiblePaths.find((p) => existsSync(p)) ?? null;
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Get current context limit from Claude Code
|
|
656
|
+
*/
|
|
657
|
+
function getCurrentLimit(content) {
|
|
658
|
+
const varMatch = content.match(PATTERNS.variable);
|
|
659
|
+
if (varMatch) return Number.parseInt(varMatch[1], 10);
|
|
660
|
+
const funcMatch = content.match(PATTERNS.funcPatched);
|
|
661
|
+
if (funcMatch) {
|
|
662
|
+
const limitMatch = funcMatch[0].match(/return (\d+)\}$/);
|
|
663
|
+
return limitMatch ? Number.parseInt(limitMatch[1], 10) : null;
|
|
664
|
+
}
|
|
665
|
+
return null;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Check if Claude Code version is supported for patching
|
|
669
|
+
*/
|
|
670
|
+
function checkVersionSupport(cliPath) {
|
|
671
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
672
|
+
if (!version) return {
|
|
673
|
+
supported: false,
|
|
674
|
+
version: null,
|
|
675
|
+
patternType: null,
|
|
676
|
+
error: "Could not detect Claude Code version"
|
|
677
|
+
};
|
|
678
|
+
const patternType = getPatternTypeForVersion(version);
|
|
679
|
+
if (!patternType) return {
|
|
680
|
+
supported: false,
|
|
681
|
+
version,
|
|
682
|
+
patternType: null,
|
|
683
|
+
error: `Version ${version} is not supported. Supported: ${getSupportedRangeString()}`
|
|
684
|
+
};
|
|
685
|
+
return {
|
|
686
|
+
supported: true,
|
|
687
|
+
version,
|
|
688
|
+
patternType
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
/**
|
|
692
|
+
* Patch Claude Code to use a different context limit
|
|
693
|
+
*/
|
|
694
|
+
function patchClaudeCode(cliPath, newLimit) {
|
|
695
|
+
const content = readFileSync(cliPath, "utf8");
|
|
696
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
697
|
+
if (!versionCheck.supported) {
|
|
698
|
+
consola.error(versionCheck.error);
|
|
699
|
+
return false;
|
|
700
|
+
}
|
|
701
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
702
|
+
if (getCurrentLimit(content) === newLimit) {
|
|
703
|
+
consola.info(`Already patched with limit ${newLimit}`);
|
|
704
|
+
return true;
|
|
705
|
+
}
|
|
706
|
+
let newContent;
|
|
707
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, `var BS9=${newLimit}`);
|
|
708
|
+
else {
|
|
709
|
+
const replacement = `function HR(A){if(A.includes("[1m]"))return 1e6;return ${newLimit}}`;
|
|
710
|
+
const pattern = PATTERNS.funcOriginal.test(content) ? PATTERNS.funcOriginal : PATTERNS.funcPatched;
|
|
711
|
+
newContent = content.replace(pattern, replacement);
|
|
712
|
+
}
|
|
713
|
+
writeFileSync(cliPath, newContent);
|
|
714
|
+
return true;
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Restore Claude Code to original 200k limit
|
|
718
|
+
*/
|
|
719
|
+
function restoreClaudeCode(cliPath) {
|
|
720
|
+
const content = readFileSync(cliPath, "utf8");
|
|
721
|
+
const versionCheck = checkVersionSupport(cliPath);
|
|
722
|
+
if (!versionCheck.supported) {
|
|
723
|
+
consola.error(versionCheck.error);
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
consola.info(`Claude Code version: ${versionCheck.version}`);
|
|
727
|
+
if (getCurrentLimit(content) === 2e5) {
|
|
728
|
+
consola.info("Already at original 200000 limit");
|
|
729
|
+
return true;
|
|
730
|
+
}
|
|
731
|
+
let newContent;
|
|
732
|
+
if (versionCheck.patternType === "variable") newContent = content.replace(PATTERNS.variable, "var BS9=200000");
|
|
733
|
+
else newContent = content.replace(PATTERNS.funcPatched, "function HR(A){if(A.includes(\"[1m]\"))return 1e6;return 200000}");
|
|
734
|
+
writeFileSync(cliPath, newContent);
|
|
735
|
+
return true;
|
|
736
|
+
}
|
|
737
|
+
function showStatus(cliPath, currentLimit) {
|
|
738
|
+
const version = getClaudeCodeVersion(cliPath);
|
|
739
|
+
if (version) consola.info(`Claude Code version: ${version}`);
|
|
740
|
+
if (currentLimit === null) {
|
|
741
|
+
consola.warn("Could not detect current limit - CLI may have been updated");
|
|
742
|
+
consola.info("Look for the BS9 variable or HR function pattern in cli.js");
|
|
743
|
+
} else if (currentLimit === 2e5) consola.info("Status: Original (200k context window)");
|
|
744
|
+
else consola.info(`Status: Patched (${currentLimit} context window)`);
|
|
745
|
+
}
|
|
746
|
+
const patchClaude = defineCommand({
|
|
747
|
+
meta: {
|
|
748
|
+
name: "patch-claude",
|
|
749
|
+
description: "Patch Claude Code's context window limit to match Copilot's limits"
|
|
750
|
+
},
|
|
751
|
+
args: {
|
|
752
|
+
limit: {
|
|
753
|
+
alias: "l",
|
|
754
|
+
type: "string",
|
|
755
|
+
default: "128000",
|
|
756
|
+
description: "Context window limit in tokens (default: 128000 for Copilot)"
|
|
757
|
+
},
|
|
758
|
+
restore: {
|
|
759
|
+
alias: "r",
|
|
760
|
+
type: "boolean",
|
|
761
|
+
default: false,
|
|
762
|
+
description: "Restore original 200k limit"
|
|
763
|
+
},
|
|
764
|
+
path: {
|
|
765
|
+
alias: "p",
|
|
766
|
+
type: "string",
|
|
767
|
+
description: "Path to Claude Code cli.js (auto-detected if not specified)"
|
|
768
|
+
},
|
|
769
|
+
status: {
|
|
770
|
+
alias: "s",
|
|
771
|
+
type: "boolean",
|
|
772
|
+
default: false,
|
|
773
|
+
description: "Show current patch status without modifying"
|
|
774
|
+
}
|
|
775
|
+
},
|
|
776
|
+
run({ args }) {
|
|
777
|
+
const cliPath = args.path || findClaudeCodePath();
|
|
778
|
+
if (!cliPath) {
|
|
779
|
+
consola.error("Could not find Claude Code installation");
|
|
780
|
+
consola.info("Searched in: volta, npm global, bun global");
|
|
781
|
+
consola.info("Use --path to specify the path to cli.js manually");
|
|
782
|
+
process.exit(1);
|
|
783
|
+
}
|
|
784
|
+
if (!existsSync(cliPath)) {
|
|
785
|
+
consola.error(`File not found: ${cliPath}`);
|
|
786
|
+
process.exit(1);
|
|
787
|
+
}
|
|
788
|
+
consola.info(`Claude Code path: ${cliPath}`);
|
|
789
|
+
const content = readFileSync(cliPath, "utf8");
|
|
790
|
+
const currentLimit = getCurrentLimit(content);
|
|
791
|
+
if (args.status) {
|
|
792
|
+
showStatus(cliPath, currentLimit);
|
|
793
|
+
return;
|
|
794
|
+
}
|
|
795
|
+
if (args.restore) {
|
|
796
|
+
if (restoreClaudeCode(cliPath)) consola.success("Restored to original 200k limit");
|
|
797
|
+
else {
|
|
798
|
+
consola.error("Failed to restore - pattern not found");
|
|
799
|
+
consola.info("Claude Code may have been updated to a new version");
|
|
800
|
+
process.exit(1);
|
|
801
|
+
}
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
const limit = Number.parseInt(args.limit, 10);
|
|
805
|
+
if (Number.isNaN(limit) || limit < 1e3) {
|
|
806
|
+
consola.error("Invalid limit value. Must be a number >= 1000");
|
|
807
|
+
process.exit(1);
|
|
808
|
+
}
|
|
809
|
+
if (patchClaudeCode(cliPath, limit)) {
|
|
810
|
+
consola.success(`Patched context window: 200000 → ${limit}`);
|
|
811
|
+
consola.info("Note: You may need to re-run this after Claude Code updates");
|
|
812
|
+
} else {
|
|
813
|
+
consola.error("Failed to patch - pattern not found");
|
|
814
|
+
consola.info("Claude Code may have been updated to a new version");
|
|
815
|
+
consola.info("Check the cli.js for the HR function pattern");
|
|
816
|
+
process.exit(1);
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
});
|
|
820
|
+
|
|
821
|
+
//#endregion
|
|
822
|
+
//#region src/lib/adaptive-rate-limiter.ts
|
|
823
|
+
const DEFAULT_CONFIG$1 = {
|
|
824
|
+
baseRetryIntervalSeconds: 10,
|
|
825
|
+
maxRetryIntervalSeconds: 120,
|
|
826
|
+
requestIntervalSeconds: 10,
|
|
827
|
+
recoveryTimeoutMinutes: 10,
|
|
828
|
+
consecutiveSuccessesForRecovery: 5,
|
|
829
|
+
gradualRecoverySteps: [
|
|
830
|
+
5,
|
|
831
|
+
2,
|
|
832
|
+
1,
|
|
833
|
+
0
|
|
834
|
+
]
|
|
835
|
+
};
|
|
836
|
+
/**
|
|
837
|
+
* Adaptive rate limiter that switches between normal, rate-limited, and recovering modes
|
|
838
|
+
* based on API responses.
|
|
839
|
+
*/
|
|
840
|
+
var AdaptiveRateLimiter = class {
|
|
841
|
+
config;
|
|
842
|
+
mode = "normal";
|
|
843
|
+
queue = [];
|
|
844
|
+
processing = false;
|
|
845
|
+
rateLimitedAt = null;
|
|
846
|
+
consecutiveSuccesses = 0;
|
|
847
|
+
lastRequestTime = 0;
|
|
848
|
+
/** Current step in gradual recovery (index into gradualRecoverySteps) */
|
|
849
|
+
recoveryStepIndex = 0;
|
|
850
|
+
constructor(config = {}) {
|
|
851
|
+
this.config = {
|
|
852
|
+
...DEFAULT_CONFIG$1,
|
|
853
|
+
...config
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Execute a request with adaptive rate limiting.
|
|
858
|
+
* Returns a promise that resolves when the request succeeds.
|
|
859
|
+
* The request will be retried automatically on 429 errors.
|
|
860
|
+
*/
|
|
861
|
+
async execute(fn) {
|
|
862
|
+
if (this.mode === "normal") return this.executeInNormalMode(fn);
|
|
863
|
+
if (this.mode === "recovering") return this.executeInRecoveringMode(fn);
|
|
864
|
+
return this.enqueue(fn);
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Check if an error is a rate limit error (429) and extract Retry-After if available
|
|
868
|
+
*/
|
|
869
|
+
isRateLimitError(error) {
|
|
870
|
+
if (error && typeof error === "object") {
|
|
871
|
+
if ("status" in error && error.status === 429) return {
|
|
872
|
+
isRateLimit: true,
|
|
873
|
+
retryAfter: this.extractRetryAfter(error)
|
|
874
|
+
};
|
|
875
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
876
|
+
const parsed = JSON.parse(error.responseText);
|
|
877
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "code" in parsed.error && parsed.error.code === "rate_limited") return { isRateLimit: true };
|
|
878
|
+
} catch {}
|
|
879
|
+
}
|
|
880
|
+
return { isRateLimit: false };
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* Extract Retry-After value from error response
|
|
884
|
+
*/
|
|
885
|
+
extractRetryAfter(error) {
|
|
886
|
+
if (!error || typeof error !== "object") return void 0;
|
|
887
|
+
if ("responseText" in error && typeof error.responseText === "string") try {
|
|
888
|
+
const parsed = JSON.parse(error.responseText);
|
|
889
|
+
if (parsed && typeof parsed === "object" && "retry_after" in parsed && typeof parsed.retry_after === "number") return parsed.retry_after;
|
|
890
|
+
if (parsed && typeof parsed === "object" && "error" in parsed && parsed.error && typeof parsed.error === "object" && "retry_after" in parsed.error && typeof parsed.error.retry_after === "number") return parsed.error.retry_after;
|
|
891
|
+
} catch {}
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Execute in normal mode - full speed
|
|
895
|
+
*/
|
|
896
|
+
async executeInNormalMode(fn) {
|
|
897
|
+
try {
|
|
898
|
+
return {
|
|
899
|
+
result: await fn(),
|
|
900
|
+
queueWaitMs: 0
|
|
901
|
+
};
|
|
902
|
+
} catch (error) {
|
|
903
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
904
|
+
if (isRateLimit) {
|
|
905
|
+
this.enterRateLimitedMode();
|
|
906
|
+
return this.enqueue(fn, retryAfter);
|
|
907
|
+
}
|
|
908
|
+
throw error;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
/**
|
|
912
|
+
* Execute in recovering mode - gradual speedup
|
|
913
|
+
*/
|
|
914
|
+
async executeInRecoveringMode(fn) {
|
|
915
|
+
const startTime = Date.now();
|
|
916
|
+
const currentInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
917
|
+
if (currentInterval > 0) {
|
|
918
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
919
|
+
const requiredMs = currentInterval * 1e3;
|
|
920
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
921
|
+
const waitMs = requiredMs - elapsedMs;
|
|
922
|
+
await this.sleep(waitMs);
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
this.lastRequestTime = Date.now();
|
|
926
|
+
try {
|
|
927
|
+
const result = await fn();
|
|
928
|
+
this.recoveryStepIndex++;
|
|
929
|
+
if (this.recoveryStepIndex >= this.config.gradualRecoverySteps.length) this.completeRecovery();
|
|
930
|
+
else {
|
|
931
|
+
const nextInterval = this.config.gradualRecoverySteps[this.recoveryStepIndex] ?? 0;
|
|
932
|
+
consola.info(`[RateLimiter] Recovery step ${this.recoveryStepIndex}/${this.config.gradualRecoverySteps.length} (next interval: ${nextInterval}s)`);
|
|
933
|
+
}
|
|
934
|
+
const queueWaitMs = Date.now() - startTime;
|
|
935
|
+
return {
|
|
936
|
+
result,
|
|
937
|
+
queueWaitMs
|
|
938
|
+
};
|
|
939
|
+
} catch (error) {
|
|
940
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
941
|
+
if (isRateLimit) {
|
|
942
|
+
consola.warn("[RateLimiter] Hit rate limit during recovery, returning to rate-limited mode");
|
|
943
|
+
this.enterRateLimitedMode();
|
|
944
|
+
return this.enqueue(fn, retryAfter);
|
|
945
|
+
}
|
|
946
|
+
throw error;
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Enter rate-limited mode
|
|
951
|
+
*/
|
|
952
|
+
enterRateLimitedMode() {
|
|
953
|
+
if (this.mode === "rate-limited") return;
|
|
954
|
+
this.mode = "rate-limited";
|
|
955
|
+
this.rateLimitedAt = Date.now();
|
|
956
|
+
this.consecutiveSuccesses = 0;
|
|
957
|
+
consola.warn(`[RateLimiter] Entering rate-limited mode. Requests will be queued with exponential backoff (base: ${this.config.baseRetryIntervalSeconds}s).`);
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Check if we should try to recover to normal mode
|
|
961
|
+
*/
|
|
962
|
+
shouldAttemptRecovery() {
|
|
963
|
+
if (this.consecutiveSuccesses >= this.config.consecutiveSuccessesForRecovery) {
|
|
964
|
+
consola.info(`[RateLimiter] ${this.consecutiveSuccesses} consecutive successes. Starting gradual recovery.`);
|
|
965
|
+
return true;
|
|
966
|
+
}
|
|
967
|
+
if (this.rateLimitedAt) {
|
|
968
|
+
const elapsed = Date.now() - this.rateLimitedAt;
|
|
969
|
+
const timeout = this.config.recoveryTimeoutMinutes * 60 * 1e3;
|
|
970
|
+
if (elapsed >= timeout) {
|
|
971
|
+
consola.info(`[RateLimiter] ${this.config.recoveryTimeoutMinutes} minutes elapsed. Starting gradual recovery.`);
|
|
972
|
+
return true;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return false;
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Start gradual recovery mode
|
|
979
|
+
*/
|
|
980
|
+
startGradualRecovery() {
|
|
981
|
+
this.mode = "recovering";
|
|
982
|
+
this.recoveryStepIndex = 0;
|
|
983
|
+
this.rateLimitedAt = null;
|
|
984
|
+
this.consecutiveSuccesses = 0;
|
|
985
|
+
const firstInterval = this.config.gradualRecoverySteps[0] ?? 0;
|
|
986
|
+
consola.info(`[RateLimiter] Starting gradual recovery (${this.config.gradualRecoverySteps.length} steps, first interval: ${firstInterval}s)`);
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* Complete recovery to normal mode
|
|
990
|
+
*/
|
|
991
|
+
completeRecovery() {
|
|
992
|
+
this.mode = "normal";
|
|
993
|
+
this.recoveryStepIndex = 0;
|
|
994
|
+
consola.success("[RateLimiter] Recovery complete. Full speed enabled.");
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Enqueue a request for later execution
|
|
998
|
+
*/
|
|
999
|
+
enqueue(fn, retryAfterSeconds) {
|
|
1000
|
+
return new Promise((resolve, reject) => {
|
|
1001
|
+
const request = {
|
|
1002
|
+
execute: fn,
|
|
1003
|
+
resolve,
|
|
1004
|
+
reject,
|
|
1005
|
+
retryCount: 0,
|
|
1006
|
+
retryAfterSeconds,
|
|
1007
|
+
enqueuedAt: Date.now()
|
|
1008
|
+
};
|
|
1009
|
+
this.queue.push(request);
|
|
1010
|
+
if (this.queue.length > 1) {
|
|
1011
|
+
const position = this.queue.length;
|
|
1012
|
+
const estimatedWait = (position - 1) * this.config.requestIntervalSeconds;
|
|
1013
|
+
consola.info(`[RateLimiter] Request queued (position ${position}, ~${estimatedWait}s wait)`);
|
|
1014
|
+
}
|
|
1015
|
+
this.processQueue();
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Calculate retry interval with exponential backoff
|
|
1020
|
+
*/
|
|
1021
|
+
calculateRetryInterval(request) {
|
|
1022
|
+
if (request.retryAfterSeconds !== void 0 && request.retryAfterSeconds > 0) return request.retryAfterSeconds;
|
|
1023
|
+
const backoff = this.config.baseRetryIntervalSeconds * Math.pow(2, request.retryCount);
|
|
1024
|
+
return Math.min(backoff, this.config.maxRetryIntervalSeconds);
|
|
1025
|
+
}
|
|
1026
|
+
/**
|
|
1027
|
+
* Process the queue
|
|
1028
|
+
*/
|
|
1029
|
+
async processQueue() {
|
|
1030
|
+
if (this.processing) return;
|
|
1031
|
+
this.processing = true;
|
|
1032
|
+
while (this.queue.length > 0) {
|
|
1033
|
+
const request = this.queue[0];
|
|
1034
|
+
if (this.shouldAttemptRecovery()) this.startGradualRecovery();
|
|
1035
|
+
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1036
|
+
const requiredMs = (request.retryCount > 0 ? this.calculateRetryInterval(request) : this.config.requestIntervalSeconds) * 1e3;
|
|
1037
|
+
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1038
|
+
const waitMs = requiredMs - elapsedMs;
|
|
1039
|
+
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1040
|
+
consola.info(`[RateLimiter] Waiting ${waitSec}s before next request...`);
|
|
1041
|
+
await this.sleep(waitMs);
|
|
1042
|
+
}
|
|
1043
|
+
this.lastRequestTime = Date.now();
|
|
1044
|
+
try {
|
|
1045
|
+
const result = await request.execute();
|
|
1046
|
+
this.queue.shift();
|
|
1047
|
+
this.consecutiveSuccesses++;
|
|
1048
|
+
request.retryAfterSeconds = void 0;
|
|
1049
|
+
const queueWaitMs = Date.now() - request.enqueuedAt;
|
|
1050
|
+
request.resolve({
|
|
1051
|
+
result,
|
|
1052
|
+
queueWaitMs
|
|
1053
|
+
});
|
|
1054
|
+
if (this.mode === "rate-limited") consola.info(`[RateLimiter] Request succeeded (${this.consecutiveSuccesses}/${this.config.consecutiveSuccessesForRecovery} for recovery)`);
|
|
1055
|
+
} catch (error) {
|
|
1056
|
+
const { isRateLimit, retryAfter } = this.isRateLimitError(error);
|
|
1057
|
+
if (isRateLimit) {
|
|
1058
|
+
request.retryCount++;
|
|
1059
|
+
request.retryAfterSeconds = retryAfter;
|
|
1060
|
+
this.consecutiveSuccesses = 0;
|
|
1061
|
+
this.rateLimitedAt = Date.now();
|
|
1062
|
+
const nextInterval = this.calculateRetryInterval(request);
|
|
1063
|
+
const source = retryAfter ? "server Retry-After" : "exponential backoff";
|
|
1064
|
+
consola.warn(`[RateLimiter] Request failed with 429 (retry #${request.retryCount}). Retrying in ${nextInterval}s (${source})...`);
|
|
1065
|
+
} else {
|
|
1066
|
+
this.queue.shift();
|
|
1067
|
+
request.reject(error);
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
this.processing = false;
|
|
1072
|
+
}
|
|
1073
|
+
sleep(ms) {
|
|
1074
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1075
|
+
}
|
|
1076
|
+
/**
|
|
1077
|
+
* Get current status for debugging/monitoring
|
|
1078
|
+
*/
|
|
1079
|
+
getStatus() {
|
|
1080
|
+
return {
|
|
1081
|
+
mode: this.mode,
|
|
1082
|
+
queueLength: this.queue.length,
|
|
1083
|
+
consecutiveSuccesses: this.consecutiveSuccesses,
|
|
1084
|
+
rateLimitedAt: this.rateLimitedAt
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
let rateLimiterInstance = null;
|
|
1089
|
+
/**
|
|
1090
|
+
* Initialize the adaptive rate limiter with configuration
|
|
1091
|
+
*/
|
|
1092
|
+
function initAdaptiveRateLimiter(config = {}) {
|
|
1093
|
+
rateLimiterInstance = new AdaptiveRateLimiter(config);
|
|
1094
|
+
const baseRetry = config.baseRetryIntervalSeconds ?? DEFAULT_CONFIG$1.baseRetryIntervalSeconds;
|
|
1095
|
+
const maxRetry = config.maxRetryIntervalSeconds ?? DEFAULT_CONFIG$1.maxRetryIntervalSeconds;
|
|
1096
|
+
const interval = config.requestIntervalSeconds ?? DEFAULT_CONFIG$1.requestIntervalSeconds;
|
|
1097
|
+
const recovery = config.recoveryTimeoutMinutes ?? DEFAULT_CONFIG$1.recoveryTimeoutMinutes;
|
|
1098
|
+
const successes = config.consecutiveSuccessesForRecovery ?? DEFAULT_CONFIG$1.consecutiveSuccessesForRecovery;
|
|
1099
|
+
const steps = config.gradualRecoverySteps ?? DEFAULT_CONFIG$1.gradualRecoverySteps;
|
|
1100
|
+
consola.info(`[RateLimiter] Initialized (backoff: ${baseRetry}s-${maxRetry}s, interval: ${interval}s, recovery: ${recovery}min or ${successes} successes, gradual: [${steps.join("s, ")}s])`);
|
|
1101
|
+
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Execute a request with adaptive rate limiting.
|
|
1104
|
+
* If rate limiter is not initialized, executes immediately.
|
|
1105
|
+
* Returns the result along with queue wait time.
|
|
1106
|
+
*/
|
|
1107
|
+
async function executeWithAdaptiveRateLimit(fn) {
|
|
1108
|
+
if (!rateLimiterInstance) return {
|
|
1109
|
+
result: await fn(),
|
|
1110
|
+
queueWaitMs: 0
|
|
1111
|
+
};
|
|
1112
|
+
return rateLimiterInstance.execute(fn);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
523
1115
|
//#endregion
|
|
524
1116
|
//#region src/lib/history.ts
|
|
525
1117
|
function generateId$1() {
|
|
@@ -771,44 +1363,74 @@ function exportHistory(format = "json") {
|
|
|
771
1363
|
|
|
772
1364
|
//#endregion
|
|
773
1365
|
//#region src/lib/proxy.ts
|
|
1366
|
+
/**
|
|
1367
|
+
* Custom dispatcher that routes requests through proxies based on environment variables.
|
|
1368
|
+
* Extends Agent to properly inherit the Dispatcher interface.
|
|
1369
|
+
*/
|
|
1370
|
+
var ProxyDispatcher = class extends Agent {
|
|
1371
|
+
proxies = /* @__PURE__ */ new Map();
|
|
1372
|
+
dispatch(options, handler) {
|
|
1373
|
+
try {
|
|
1374
|
+
const origin = this.getOriginUrl(options.origin);
|
|
1375
|
+
const proxyUrl = this.getProxyUrl(origin);
|
|
1376
|
+
if (!proxyUrl) {
|
|
1377
|
+
consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
|
|
1378
|
+
return super.dispatch(options, handler);
|
|
1379
|
+
}
|
|
1380
|
+
const agent = this.getOrCreateProxyAgent(proxyUrl);
|
|
1381
|
+
consola.debug(`HTTP proxy route: ${origin.hostname} via ${this.formatProxyLabel(proxyUrl)}`);
|
|
1382
|
+
return agent.dispatch(options, handler);
|
|
1383
|
+
} catch {
|
|
1384
|
+
return super.dispatch(options, handler);
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
getOriginUrl(origin) {
|
|
1388
|
+
return typeof origin === "string" ? new URL(origin) : origin;
|
|
1389
|
+
}
|
|
1390
|
+
getProxyUrl(origin) {
|
|
1391
|
+
const raw = getProxyForUrl(origin.toString());
|
|
1392
|
+
return raw && raw.length > 0 ? raw : void 0;
|
|
1393
|
+
}
|
|
1394
|
+
getOrCreateProxyAgent(proxyUrl) {
|
|
1395
|
+
let agent = this.proxies.get(proxyUrl);
|
|
1396
|
+
if (!agent) {
|
|
1397
|
+
agent = new ProxyAgent(proxyUrl);
|
|
1398
|
+
this.proxies.set(proxyUrl, agent);
|
|
1399
|
+
}
|
|
1400
|
+
return agent;
|
|
1401
|
+
}
|
|
1402
|
+
formatProxyLabel(proxyUrl) {
|
|
1403
|
+
try {
|
|
1404
|
+
const u = new URL(proxyUrl);
|
|
1405
|
+
return `${u.protocol}//${u.host}`;
|
|
1406
|
+
} catch {
|
|
1407
|
+
return proxyUrl;
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
async close() {
|
|
1411
|
+
await super.close();
|
|
1412
|
+
await Promise.all([...this.proxies.values()].map((p) => p.close()));
|
|
1413
|
+
this.proxies.clear();
|
|
1414
|
+
}
|
|
1415
|
+
destroy(errOrCallback, callback) {
|
|
1416
|
+
for (const agent of this.proxies.values()) if (typeof errOrCallback === "function") agent.destroy(errOrCallback);
|
|
1417
|
+
else if (callback) agent.destroy(errOrCallback ?? null, callback);
|
|
1418
|
+
else agent.destroy(errOrCallback ?? null).catch(() => {});
|
|
1419
|
+
this.proxies.clear();
|
|
1420
|
+
if (typeof errOrCallback === "function") {
|
|
1421
|
+
super.destroy(errOrCallback);
|
|
1422
|
+
return;
|
|
1423
|
+
} else if (callback) {
|
|
1424
|
+
super.destroy(errOrCallback ?? null, callback);
|
|
1425
|
+
return;
|
|
1426
|
+
} else return super.destroy(errOrCallback ?? null);
|
|
1427
|
+
}
|
|
1428
|
+
};
|
|
774
1429
|
function initProxyFromEnv() {
|
|
775
1430
|
if (typeof Bun !== "undefined") return;
|
|
776
1431
|
try {
|
|
777
|
-
const
|
|
778
|
-
|
|
779
|
-
setGlobalDispatcher({
|
|
780
|
-
dispatch(options, handler) {
|
|
781
|
-
try {
|
|
782
|
-
const origin = typeof options.origin === "string" ? new URL(options.origin) : options.origin;
|
|
783
|
-
const raw = getProxyForUrl(origin.toString());
|
|
784
|
-
const proxyUrl = raw && raw.length > 0 ? raw : void 0;
|
|
785
|
-
if (!proxyUrl) {
|
|
786
|
-
consola.debug(`HTTP proxy bypass: ${origin.hostname}`);
|
|
787
|
-
return direct.dispatch(options, handler);
|
|
788
|
-
}
|
|
789
|
-
let agent = proxies.get(proxyUrl);
|
|
790
|
-
if (!agent) {
|
|
791
|
-
agent = new ProxyAgent(proxyUrl);
|
|
792
|
-
proxies.set(proxyUrl, agent);
|
|
793
|
-
}
|
|
794
|
-
let label = proxyUrl;
|
|
795
|
-
try {
|
|
796
|
-
const u = new URL(proxyUrl);
|
|
797
|
-
label = `${u.protocol}//${u.host}`;
|
|
798
|
-
} catch {}
|
|
799
|
-
consola.debug(`HTTP proxy route: ${origin.hostname} via ${label}`);
|
|
800
|
-
return agent.dispatch(options, handler);
|
|
801
|
-
} catch {
|
|
802
|
-
return direct.dispatch(options, handler);
|
|
803
|
-
}
|
|
804
|
-
},
|
|
805
|
-
close() {
|
|
806
|
-
return direct.close();
|
|
807
|
-
},
|
|
808
|
-
destroy() {
|
|
809
|
-
return direct.destroy();
|
|
810
|
-
}
|
|
811
|
-
});
|
|
1432
|
+
const dispatcher = new ProxyDispatcher();
|
|
1433
|
+
setGlobalDispatcher(dispatcher);
|
|
812
1434
|
consola.debug("HTTP proxy configured from environment (per-URL)");
|
|
813
1435
|
} catch (err) {
|
|
814
1436
|
consola.debug("Proxy setup skipped:", err);
|
|
@@ -894,23 +1516,66 @@ function formatTokens(input, output) {
|
|
|
894
1516
|
* Console renderer that shows request lifecycle with apt-get style footer
|
|
895
1517
|
*
|
|
896
1518
|
* Log format:
|
|
897
|
-
* - Start: [....] METHOD /path model-name
|
|
898
|
-
* - Streaming: [<-->] METHOD /path model-name streaming...
|
|
899
|
-
* - Complete: [ OK ] METHOD /path 200 1.2s 1.5K/500
|
|
1519
|
+
* - Start: [....] HH:MM:SS METHOD /path model-name (debug only, dim)
|
|
1520
|
+
* - Streaming: [<-->] HH:MM:SS METHOD /path model-name streaming... (dim)
|
|
1521
|
+
* - Complete: [ OK ] HH:MM:SS METHOD /path model-name 200 1.2s 1.5K/500 (colored)
|
|
1522
|
+
* - Error: [FAIL] HH:MM:SS METHOD /path model-name 500 1.2s: error message (red)
|
|
1523
|
+
*
|
|
1524
|
+
* Color scheme for completed requests:
|
|
1525
|
+
* - Prefix: green (success) / red (error)
|
|
1526
|
+
* - Time: dim
|
|
1527
|
+
* - Method: cyan
|
|
1528
|
+
* - Path: white
|
|
1529
|
+
* - Model: magenta
|
|
1530
|
+
* - Status: green (success) / red (error)
|
|
1531
|
+
* - Duration: yellow
|
|
1532
|
+
* - Tokens: blue
|
|
900
1533
|
*
|
|
901
1534
|
* Features:
|
|
902
|
-
* -
|
|
903
|
-
* -
|
|
904
|
-
* -
|
|
1535
|
+
* - Start lines only shown in debug mode (--verbose)
|
|
1536
|
+
* - Streaming lines are dim (less important)
|
|
1537
|
+
* - /history API requests are always dim
|
|
1538
|
+
* - Sticky footer shows active request count
|
|
1539
|
+
* - Intercepts consola output to properly handle footer
|
|
905
1540
|
*/
|
|
906
1541
|
var ConsoleRenderer = class {
|
|
907
1542
|
activeRequests = /* @__PURE__ */ new Map();
|
|
908
1543
|
showActive;
|
|
909
1544
|
footerVisible = false;
|
|
910
1545
|
isTTY;
|
|
1546
|
+
originalReporters = [];
|
|
911
1547
|
constructor(options) {
|
|
912
1548
|
this.showActive = options?.showActive ?? true;
|
|
913
1549
|
this.isTTY = process.stdout.isTTY;
|
|
1550
|
+
this.installConsolaReporter();
|
|
1551
|
+
}
|
|
1552
|
+
/**
|
|
1553
|
+
* Install a custom consola reporter that coordinates with footer
|
|
1554
|
+
*/
|
|
1555
|
+
installConsolaReporter() {
|
|
1556
|
+
this.originalReporters = [...consola.options.reporters];
|
|
1557
|
+
consola.setReporters([{ log: (logObj) => {
|
|
1558
|
+
this.clearFooterForLog();
|
|
1559
|
+
const message = logObj.args.map((arg) => typeof arg === "string" ? arg : JSON.stringify(arg)).join(" ");
|
|
1560
|
+
const prefix = this.getLogPrefix(logObj.type);
|
|
1561
|
+
if (prefix) process.stdout.write(`${prefix} ${message}\n`);
|
|
1562
|
+
else process.stdout.write(`${message}\n`);
|
|
1563
|
+
this.renderFooter();
|
|
1564
|
+
} }]);
|
|
1565
|
+
}
|
|
1566
|
+
/**
|
|
1567
|
+
* Get log prefix based on log type
|
|
1568
|
+
*/
|
|
1569
|
+
getLogPrefix(type) {
|
|
1570
|
+
switch (type) {
|
|
1571
|
+
case "error":
|
|
1572
|
+
case "fatal": return pc.red("✖");
|
|
1573
|
+
case "warn": return pc.yellow("⚠");
|
|
1574
|
+
case "info": return pc.cyan("ℹ");
|
|
1575
|
+
case "success": return pc.green("✔");
|
|
1576
|
+
case "debug": return pc.gray("●");
|
|
1577
|
+
default: return "";
|
|
1578
|
+
}
|
|
914
1579
|
}
|
|
915
1580
|
/**
|
|
916
1581
|
* Get footer text based on active request count
|
|
@@ -946,25 +1611,52 @@ var ConsoleRenderer = class {
|
|
|
946
1611
|
}
|
|
947
1612
|
}
|
|
948
1613
|
/**
|
|
1614
|
+
* Format a complete log line with colored parts
|
|
1615
|
+
*/
|
|
1616
|
+
formatLogLine(parts) {
|
|
1617
|
+
const { prefix, time, method, path: path$1, model, status, duration, tokens, queueWait, extra, isError, isDim } = parts;
|
|
1618
|
+
if (isDim) {
|
|
1619
|
+
const modelPart = model ? ` ${model}` : "";
|
|
1620
|
+
const extraPart = extra ? ` ${extra}` : "";
|
|
1621
|
+
return pc.dim(`${prefix} ${time} ${method} ${path$1}${modelPart}${extraPart}`);
|
|
1622
|
+
}
|
|
1623
|
+
const coloredPrefix = isError ? pc.red(prefix) : pc.green(prefix);
|
|
1624
|
+
const coloredTime = pc.dim(time);
|
|
1625
|
+
const coloredMethod = pc.cyan(method);
|
|
1626
|
+
const coloredPath = pc.white(path$1);
|
|
1627
|
+
const coloredModel = model ? pc.magenta(` ${model}`) : "";
|
|
1628
|
+
let result = `${coloredPrefix} ${coloredTime} ${coloredMethod} ${coloredPath}${coloredModel}`;
|
|
1629
|
+
if (status !== void 0) {
|
|
1630
|
+
const coloredStatus = isError ? pc.red(String(status)) : pc.green(String(status));
|
|
1631
|
+
result += ` ${coloredStatus}`;
|
|
1632
|
+
}
|
|
1633
|
+
if (duration) result += ` ${pc.yellow(duration)}`;
|
|
1634
|
+
if (queueWait) result += ` ${pc.dim(`(queued ${queueWait})`)}`;
|
|
1635
|
+
if (tokens) result += ` ${pc.blue(tokens)}`;
|
|
1636
|
+
if (extra) result += isError ? pc.red(extra) : extra;
|
|
1637
|
+
return result;
|
|
1638
|
+
}
|
|
1639
|
+
/**
|
|
949
1640
|
* Print a log line with proper footer handling
|
|
950
|
-
* 1. Clear footer if visible
|
|
951
|
-
* 2. Print log with newline
|
|
952
|
-
* 3. Re-render footer on new line (no newline after footer)
|
|
953
1641
|
*/
|
|
954
|
-
printLog(message
|
|
1642
|
+
printLog(message) {
|
|
955
1643
|
this.clearFooterForLog();
|
|
956
|
-
|
|
957
|
-
else consola.log(message);
|
|
1644
|
+
process.stdout.write(message + "\n");
|
|
958
1645
|
this.renderFooter();
|
|
959
1646
|
}
|
|
960
1647
|
onRequestStart(request) {
|
|
961
1648
|
this.activeRequests.set(request.id, request);
|
|
962
|
-
if (this.showActive) {
|
|
963
|
-
const
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
1649
|
+
if (this.showActive && consola.level >= 5) {
|
|
1650
|
+
const message = this.formatLogLine({
|
|
1651
|
+
prefix: "[....]",
|
|
1652
|
+
time: formatTime(),
|
|
1653
|
+
method: request.method,
|
|
1654
|
+
path: request.path,
|
|
1655
|
+
model: request.model,
|
|
1656
|
+
extra: request.queuePosition !== void 0 && request.queuePosition > 0 ? `[q#${request.queuePosition}]` : void 0,
|
|
1657
|
+
isDim: true
|
|
1658
|
+
});
|
|
1659
|
+
this.printLog(message);
|
|
968
1660
|
}
|
|
969
1661
|
}
|
|
970
1662
|
onRequestUpdate(id, update) {
|
|
@@ -972,28 +1664,39 @@ var ConsoleRenderer = class {
|
|
|
972
1664
|
if (!request) return;
|
|
973
1665
|
Object.assign(request, update);
|
|
974
1666
|
if (this.showActive && update.status === "streaming") {
|
|
975
|
-
const
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1667
|
+
const message = this.formatLogLine({
|
|
1668
|
+
prefix: "[<-->]",
|
|
1669
|
+
time: formatTime(),
|
|
1670
|
+
method: request.method,
|
|
1671
|
+
path: request.path,
|
|
1672
|
+
model: request.model,
|
|
1673
|
+
extra: "streaming...",
|
|
1674
|
+
isDim: true
|
|
1675
|
+
});
|
|
1676
|
+
this.printLog(message);
|
|
979
1677
|
}
|
|
980
1678
|
}
|
|
981
1679
|
onRequestComplete(request) {
|
|
982
1680
|
this.activeRequests.delete(request.id);
|
|
983
|
-
const time = formatTime();
|
|
984
1681
|
const status = request.statusCode ?? 0;
|
|
985
|
-
const duration = formatDuration(request.durationMs ?? 0);
|
|
986
|
-
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : "";
|
|
987
|
-
const modelInfo = request.model ? ` ${request.model}` : "";
|
|
988
1682
|
const isError = request.status === "error" || status >= 400;
|
|
989
|
-
const
|
|
990
|
-
const
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
1683
|
+
const tokens = request.model ? formatTokens(request.inputTokens, request.outputTokens) : void 0;
|
|
1684
|
+
const queueWait = request.queueWaitMs && request.queueWaitMs > 100 ? formatDuration(request.queueWaitMs) : void 0;
|
|
1685
|
+
const message = this.formatLogLine({
|
|
1686
|
+
prefix: isError ? "[FAIL]" : "[ OK ]",
|
|
1687
|
+
time: formatTime(),
|
|
1688
|
+
method: request.method,
|
|
1689
|
+
path: request.path,
|
|
1690
|
+
model: request.model,
|
|
1691
|
+
status,
|
|
1692
|
+
duration: formatDuration(request.durationMs ?? 0),
|
|
1693
|
+
queueWait,
|
|
1694
|
+
tokens,
|
|
1695
|
+
extra: isError && request.error ? `: ${request.error}` : void 0,
|
|
1696
|
+
isError,
|
|
1697
|
+
isDim: request.isHistoryAccess
|
|
1698
|
+
});
|
|
1699
|
+
this.printLog(message);
|
|
997
1700
|
}
|
|
998
1701
|
destroy() {
|
|
999
1702
|
if (this.footerVisible && this.isTTY) {
|
|
@@ -1001,6 +1704,7 @@ var ConsoleRenderer = class {
|
|
|
1001
1704
|
this.footerVisible = false;
|
|
1002
1705
|
}
|
|
1003
1706
|
this.activeRequests.clear();
|
|
1707
|
+
if (this.originalReporters.length > 0) consola.setReporters(this.originalReporters);
|
|
1004
1708
|
}
|
|
1005
1709
|
};
|
|
1006
1710
|
|
|
@@ -1392,14 +2096,14 @@ const getTokenCount = async (payload, model) => {
|
|
|
1392
2096
|
//#endregion
|
|
1393
2097
|
//#region src/lib/auto-compact.ts
|
|
1394
2098
|
const DEFAULT_CONFIG = {
|
|
1395
|
-
targetTokens:
|
|
1396
|
-
safetyMarginPercent:
|
|
2099
|
+
targetTokens: 12e4,
|
|
2100
|
+
safetyMarginPercent: 2
|
|
1397
2101
|
};
|
|
1398
2102
|
/**
|
|
1399
2103
|
* Check if payload needs compaction based on model limits.
|
|
1400
2104
|
* Uses a safety margin to account for token counting differences.
|
|
1401
2105
|
*/
|
|
1402
|
-
async function checkNeedsCompaction(payload, model, safetyMarginPercent =
|
|
2106
|
+
async function checkNeedsCompaction(payload, model, safetyMarginPercent = 2) {
|
|
1403
2107
|
const currentTokens = (await getTokenCount(payload, model)).input;
|
|
1404
2108
|
const rawLimit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
1405
2109
|
const limit = Math.floor(rawLimit * (1 - safetyMarginPercent / 100));
|
|
@@ -1442,6 +2146,13 @@ function extractSystemMessages(messages) {
|
|
|
1442
2146
|
};
|
|
1443
2147
|
}
|
|
1444
2148
|
/**
|
|
2149
|
+
* Extract tool_use ids from assistant messages with tool_calls.
|
|
2150
|
+
*/
|
|
2151
|
+
function getToolUseIds(message) {
|
|
2152
|
+
if (message.role === "assistant" && message.tool_calls) return message.tool_calls.map((tc) => tc.id);
|
|
2153
|
+
return [];
|
|
2154
|
+
}
|
|
2155
|
+
/**
|
|
1445
2156
|
* Find messages to keep from the end to stay under target tokens.
|
|
1446
2157
|
* Returns the starting index of messages to preserve.
|
|
1447
2158
|
*/
|
|
@@ -1456,6 +2167,41 @@ function findPreserveIndex(messages, targetTokens, systemTokens) {
|
|
|
1456
2167
|
return 0;
|
|
1457
2168
|
}
|
|
1458
2169
|
/**
|
|
2170
|
+
* Filter out orphaned tool_result messages that don't have a matching tool_use
|
|
2171
|
+
* in the preserved message list. This prevents API errors when truncation
|
|
2172
|
+
* separates tool_use/tool_result pairs.
|
|
2173
|
+
*/
|
|
2174
|
+
function filterOrphanedToolResults(messages) {
|
|
2175
|
+
const availableToolUseIds = /* @__PURE__ */ new Set();
|
|
2176
|
+
for (const msg of messages) for (const id of getToolUseIds(msg)) availableToolUseIds.add(id);
|
|
2177
|
+
const filteredMessages = [];
|
|
2178
|
+
let removedCount = 0;
|
|
2179
|
+
for (const msg of messages) {
|
|
2180
|
+
if (msg.role === "tool" && msg.tool_call_id && !availableToolUseIds.has(msg.tool_call_id)) {
|
|
2181
|
+
removedCount++;
|
|
2182
|
+
continue;
|
|
2183
|
+
}
|
|
2184
|
+
filteredMessages.push(msg);
|
|
2185
|
+
}
|
|
2186
|
+
if (removedCount > 0) consola.info(`Auto-compact: Removed ${removedCount} orphaned tool_result message(s) without matching tool_use`);
|
|
2187
|
+
return filteredMessages;
|
|
2188
|
+
}
|
|
2189
|
+
/**
|
|
2190
|
+
* Ensure the message list starts with a user message.
|
|
2191
|
+
* If it starts with assistant or tool messages, skip them until we find a user message.
|
|
2192
|
+
* This is required because OpenAI API expects conversations to start with user messages
|
|
2193
|
+
* (after system messages).
|
|
2194
|
+
*/
|
|
2195
|
+
function ensureStartsWithUser(messages) {
|
|
2196
|
+
let startIndex = 0;
|
|
2197
|
+
while (startIndex < messages.length) {
|
|
2198
|
+
if (messages[startIndex].role === "user") break;
|
|
2199
|
+
startIndex++;
|
|
2200
|
+
}
|
|
2201
|
+
if (startIndex > 0) consola.info(`Auto-compact: Skipped ${startIndex} leading non-user message(s) to ensure valid sequence`);
|
|
2202
|
+
return messages.slice(startIndex);
|
|
2203
|
+
}
|
|
2204
|
+
/**
|
|
1459
2205
|
* Calculate estimated tokens for system messages.
|
|
1460
2206
|
*/
|
|
1461
2207
|
function estimateSystemTokens(systemMessages) {
|
|
@@ -1473,6 +2219,7 @@ function createTruncationMarker(removedCount) {
|
|
|
1473
2219
|
/**
|
|
1474
2220
|
* Perform auto-compaction on a payload that exceeds token limits.
|
|
1475
2221
|
* This uses simple truncation - no LLM calls required.
|
|
2222
|
+
* Uses iterative approach with decreasing target tokens until under limit.
|
|
1476
2223
|
*/
|
|
1477
2224
|
async function autoCompact(payload, model, config = {}) {
|
|
1478
2225
|
const cfg = {
|
|
@@ -1493,8 +2240,49 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
1493
2240
|
const { systemMessages, remainingMessages } = extractSystemMessages(payload.messages);
|
|
1494
2241
|
const systemTokens = estimateSystemTokens(systemMessages);
|
|
1495
2242
|
consola.debug(`Auto-compact: ${systemMessages.length} system messages (~${systemTokens} tokens)`);
|
|
1496
|
-
const
|
|
1497
|
-
const
|
|
2243
|
+
const MAX_ITERATIONS = 5;
|
|
2244
|
+
const MIN_TARGET = 2e4;
|
|
2245
|
+
let currentTarget = Math.min(cfg.targetTokens, limit);
|
|
2246
|
+
let lastResult = null;
|
|
2247
|
+
for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
|
|
2248
|
+
const result = await tryCompactWithTarget({
|
|
2249
|
+
payload,
|
|
2250
|
+
model,
|
|
2251
|
+
systemMessages,
|
|
2252
|
+
remainingMessages,
|
|
2253
|
+
systemTokens,
|
|
2254
|
+
targetTokens: currentTarget,
|
|
2255
|
+
limit,
|
|
2256
|
+
originalTokens
|
|
2257
|
+
});
|
|
2258
|
+
if (!result.wasCompacted) return result;
|
|
2259
|
+
lastResult = result;
|
|
2260
|
+
if (result.compactedTokens <= limit) {
|
|
2261
|
+
consola.info(`Auto-compact: ${originalTokens} → ${result.compactedTokens} tokens (removed ${result.removedMessageCount} messages)`);
|
|
2262
|
+
return result;
|
|
2263
|
+
}
|
|
2264
|
+
consola.warn(`Auto-compact: Still over limit (${result.compactedTokens} > ${limit}), trying more aggressive truncation`);
|
|
2265
|
+
currentTarget = Math.floor(currentTarget * .7);
|
|
2266
|
+
if (currentTarget < MIN_TARGET) {
|
|
2267
|
+
consola.error("Auto-compact: Cannot reduce further, target too low");
|
|
2268
|
+
return result;
|
|
2269
|
+
}
|
|
2270
|
+
}
|
|
2271
|
+
consola.error(`Auto-compact: Exhausted ${MAX_ITERATIONS} iterations, returning best effort`);
|
|
2272
|
+
return lastResult ?? {
|
|
2273
|
+
payload,
|
|
2274
|
+
wasCompacted: false,
|
|
2275
|
+
originalTokens,
|
|
2276
|
+
compactedTokens: originalTokens,
|
|
2277
|
+
removedMessageCount: 0
|
|
2278
|
+
};
|
|
2279
|
+
}
|
|
2280
|
+
/**
|
|
2281
|
+
* Helper to attempt compaction with a specific target token count.
|
|
2282
|
+
*/
|
|
2283
|
+
async function tryCompactWithTarget(opts) {
|
|
2284
|
+
const { payload, model, systemMessages, remainingMessages, systemTokens, targetTokens, originalTokens } = opts;
|
|
2285
|
+
const preserveIndex = findPreserveIndex(remainingMessages, targetTokens, systemTokens);
|
|
1498
2286
|
if (preserveIndex === 0) {
|
|
1499
2287
|
consola.warn("Auto-compact: Cannot truncate further without losing all conversation history");
|
|
1500
2288
|
return {
|
|
@@ -1506,8 +2294,21 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
1506
2294
|
};
|
|
1507
2295
|
}
|
|
1508
2296
|
const removedMessages = remainingMessages.slice(0, preserveIndex);
|
|
1509
|
-
|
|
1510
|
-
|
|
2297
|
+
let preservedMessages = remainingMessages.slice(preserveIndex);
|
|
2298
|
+
preservedMessages = filterOrphanedToolResults(preservedMessages);
|
|
2299
|
+
preservedMessages = ensureStartsWithUser(preservedMessages);
|
|
2300
|
+
preservedMessages = filterOrphanedToolResults(preservedMessages);
|
|
2301
|
+
if (preservedMessages.length === 0) {
|
|
2302
|
+
consola.warn("Auto-compact: All messages were filtered out after cleanup, cannot compact");
|
|
2303
|
+
return {
|
|
2304
|
+
payload,
|
|
2305
|
+
wasCompacted: false,
|
|
2306
|
+
originalTokens,
|
|
2307
|
+
compactedTokens: originalTokens,
|
|
2308
|
+
removedMessageCount: 0
|
|
2309
|
+
};
|
|
2310
|
+
}
|
|
2311
|
+
consola.debug(`Auto-compact: Removing ${removedMessages.length} messages, keeping ${preservedMessages.length}`);
|
|
1511
2312
|
const truncationMarker = createTruncationMarker(removedMessages.length);
|
|
1512
2313
|
const newPayload = {
|
|
1513
2314
|
...payload,
|
|
@@ -1518,136 +2319,192 @@ async function autoCompact(payload, model, config = {}) {
|
|
|
1518
2319
|
]
|
|
1519
2320
|
};
|
|
1520
2321
|
const newTokenCount = await getTokenCount(newPayload, model);
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
2322
|
+
return {
|
|
2323
|
+
payload: newPayload,
|
|
2324
|
+
wasCompacted: true,
|
|
2325
|
+
originalTokens,
|
|
2326
|
+
compactedTokens: newTokenCount.input,
|
|
2327
|
+
removedMessageCount: removedMessages.length
|
|
2328
|
+
};
|
|
2329
|
+
}
|
|
2330
|
+
/**
|
|
2331
|
+
* Create a marker to append to responses indicating auto-compaction occurred.
|
|
2332
|
+
*/
|
|
2333
|
+
function createCompactionMarker(result) {
|
|
2334
|
+
if (!result.wasCompacted) return "";
|
|
2335
|
+
const reduction = result.originalTokens - result.compactedTokens;
|
|
2336
|
+
const percentage = Math.round(reduction / result.originalTokens * 100);
|
|
2337
|
+
return `\n\n---\n[Auto-compacted: ${result.removedMessageCount} messages removed, ${result.originalTokens} → ${result.compactedTokens} tokens (${percentage}% reduction)]`;
|
|
2338
|
+
}
|
|
2339
|
+
|
|
2340
|
+
//#endregion
|
|
2341
|
+
//#region src/services/copilot/create-chat-completions.ts
|
|
2342
|
+
const createChatCompletions = async (payload) => {
|
|
2343
|
+
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
2344
|
+
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
2345
|
+
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
2346
|
+
const headers = {
|
|
2347
|
+
...copilotHeaders(state, enableVision),
|
|
2348
|
+
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
2349
|
+
};
|
|
2350
|
+
const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
|
|
2351
|
+
method: "POST",
|
|
2352
|
+
headers,
|
|
2353
|
+
body: JSON.stringify(payload)
|
|
2354
|
+
});
|
|
2355
|
+
if (!response.ok) {
|
|
2356
|
+
consola.error("Failed to create chat completions", response);
|
|
2357
|
+
throw await HTTPError.fromResponse("Failed to create chat completions", response);
|
|
2358
|
+
}
|
|
2359
|
+
if (payload.stream) return events(response);
|
|
2360
|
+
return await response.json();
|
|
2361
|
+
};
|
|
2362
|
+
|
|
2363
|
+
//#endregion
|
|
2364
|
+
//#region src/routes/shared.ts
|
|
2365
|
+
/** Helper to update tracker model */
|
|
2366
|
+
function updateTrackerModel(trackingId, model) {
|
|
2367
|
+
if (!trackingId) return;
|
|
2368
|
+
const request = requestTracker.getRequest(trackingId);
|
|
2369
|
+
if (request) request.model = model;
|
|
2370
|
+
}
|
|
2371
|
+
/** Helper to update tracker status */
|
|
2372
|
+
function updateTrackerStatus(trackingId, status) {
|
|
2373
|
+
if (!trackingId) return;
|
|
2374
|
+
requestTracker.updateRequest(trackingId, { status });
|
|
2375
|
+
}
|
|
2376
|
+
/** Record error response to history */
|
|
2377
|
+
function recordErrorResponse(ctx, model, error) {
|
|
2378
|
+
recordResponse(ctx.historyId, {
|
|
2379
|
+
success: false,
|
|
2380
|
+
model,
|
|
2381
|
+
usage: {
|
|
2382
|
+
input_tokens: 0,
|
|
2383
|
+
output_tokens: 0
|
|
2384
|
+
},
|
|
2385
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
|
2386
|
+
content: null
|
|
2387
|
+
}, Date.now() - ctx.startTime);
|
|
2388
|
+
}
|
|
2389
|
+
/** Complete TUI tracking */
|
|
2390
|
+
function completeTracking(trackingId, inputTokens, outputTokens, queueWaitMs) {
|
|
2391
|
+
if (!trackingId) return;
|
|
2392
|
+
requestTracker.updateRequest(trackingId, {
|
|
2393
|
+
inputTokens,
|
|
2394
|
+
outputTokens,
|
|
2395
|
+
queueWaitMs
|
|
2396
|
+
});
|
|
2397
|
+
requestTracker.completeRequest(trackingId, 200, {
|
|
2398
|
+
inputTokens,
|
|
2399
|
+
outputTokens
|
|
2400
|
+
});
|
|
2401
|
+
}
|
|
2402
|
+
/** Fail TUI tracking */
|
|
2403
|
+
function failTracking(trackingId, error) {
|
|
2404
|
+
if (!trackingId) return;
|
|
2405
|
+
requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
|
|
2406
|
+
}
|
|
2407
|
+
/** Record streaming error to history (works with any accumulator type) */
|
|
2408
|
+
function recordStreamError(opts) {
|
|
2409
|
+
const { acc, fallbackModel, ctx, error } = opts;
|
|
2410
|
+
recordResponse(ctx.historyId, {
|
|
2411
|
+
success: false,
|
|
2412
|
+
model: acc.model || fallbackModel,
|
|
2413
|
+
usage: {
|
|
2414
|
+
input_tokens: 0,
|
|
2415
|
+
output_tokens: 0
|
|
2416
|
+
},
|
|
2417
|
+
error: error instanceof Error ? error.message : "Stream error",
|
|
2418
|
+
content: null
|
|
2419
|
+
}, Date.now() - ctx.startTime);
|
|
2420
|
+
}
|
|
2421
|
+
/** Type guard for non-streaming responses */
|
|
2422
|
+
function isNonStreaming(response) {
|
|
2423
|
+
return Object.hasOwn(response, "choices");
|
|
2424
|
+
}
|
|
2425
|
+
/** Build final payload with auto-compact if needed */
|
|
2426
|
+
async function buildFinalPayload(payload, model) {
|
|
2427
|
+
if (!state.autoCompact || !model) {
|
|
2428
|
+
if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
|
|
2429
|
+
return {
|
|
2430
|
+
finalPayload: payload,
|
|
2431
|
+
compactResult: null
|
|
2432
|
+
};
|
|
2433
|
+
}
|
|
2434
|
+
try {
|
|
2435
|
+
const check = await checkNeedsCompaction(payload, model);
|
|
2436
|
+
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
|
|
2437
|
+
if (!check.needed) return {
|
|
2438
|
+
finalPayload: payload,
|
|
2439
|
+
compactResult: null
|
|
2440
|
+
};
|
|
2441
|
+
consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
|
|
2442
|
+
const compactResult = await autoCompact(payload, model);
|
|
2443
|
+
return {
|
|
2444
|
+
finalPayload: compactResult.payload,
|
|
2445
|
+
compactResult
|
|
2446
|
+
};
|
|
2447
|
+
} catch (error) {
|
|
2448
|
+
consola.warn("Auto-compact failed, proceeding with original payload:", error instanceof Error ? error.message : error);
|
|
2449
|
+
return {
|
|
2450
|
+
finalPayload: payload,
|
|
2451
|
+
compactResult: null
|
|
2452
|
+
};
|
|
1539
2453
|
}
|
|
1540
|
-
return {
|
|
1541
|
-
payload: newPayload,
|
|
1542
|
-
wasCompacted: true,
|
|
1543
|
-
originalTokens,
|
|
1544
|
-
compactedTokens: newTokenCount.input,
|
|
1545
|
-
removedMessageCount: removedMessages.length
|
|
1546
|
-
};
|
|
1547
2454
|
}
|
|
1548
2455
|
/**
|
|
1549
|
-
*
|
|
2456
|
+
* Log helpful debugging information when a 413 error occurs.
|
|
1550
2457
|
*/
|
|
1551
|
-
function
|
|
1552
|
-
|
|
1553
|
-
const
|
|
1554
|
-
const
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
lastRequestTime = 0;
|
|
1564
|
-
async enqueue(execute, rateLimitSeconds) {
|
|
1565
|
-
return new Promise((resolve, reject) => {
|
|
1566
|
-
this.queue.push({
|
|
1567
|
-
execute,
|
|
1568
|
-
resolve,
|
|
1569
|
-
reject
|
|
1570
|
-
});
|
|
1571
|
-
if (this.queue.length > 1) {
|
|
1572
|
-
const position = this.queue.length;
|
|
1573
|
-
const waitTime = Math.ceil((position - 1) * rateLimitSeconds);
|
|
1574
|
-
(waitTime > 10 ? consola.warn : consola.info)(`Rate limit: request queued (position ${position}, ~${waitTime}s wait)`);
|
|
1575
|
-
}
|
|
1576
|
-
this.processQueue(rateLimitSeconds);
|
|
1577
|
-
});
|
|
1578
|
-
}
|
|
1579
|
-
async processQueue(rateLimitSeconds) {
|
|
1580
|
-
if (this.processing) return;
|
|
1581
|
-
this.processing = true;
|
|
1582
|
-
while (this.queue.length > 0) {
|
|
1583
|
-
const elapsedMs = Date.now() - this.lastRequestTime;
|
|
1584
|
-
const requiredMs = rateLimitSeconds * 1e3;
|
|
1585
|
-
if (this.lastRequestTime > 0 && elapsedMs < requiredMs) {
|
|
1586
|
-
const waitMs = requiredMs - elapsedMs;
|
|
1587
|
-
const waitSec = Math.ceil(waitMs / 1e3);
|
|
1588
|
-
(waitSec > 10 ? consola.warn : consola.info)(`Rate limit: waiting ${waitSec}s before next request...`);
|
|
1589
|
-
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
|
1590
|
-
}
|
|
1591
|
-
const request = this.queue.shift();
|
|
1592
|
-
if (!request) break;
|
|
1593
|
-
this.lastRequestTime = Date.now();
|
|
1594
|
-
try {
|
|
1595
|
-
const result = await request.execute();
|
|
1596
|
-
request.resolve(result);
|
|
1597
|
-
} catch (error) {
|
|
1598
|
-
request.reject(error);
|
|
2458
|
+
async function logPayloadSizeInfo(payload, model) {
|
|
2459
|
+
const messageCount = payload.messages.length;
|
|
2460
|
+
const bodySize = JSON.stringify(payload).length;
|
|
2461
|
+
const bodySizeKB = Math.round(bodySize / 1024);
|
|
2462
|
+
let imageCount = 0;
|
|
2463
|
+
let largeMessages = 0;
|
|
2464
|
+
let totalImageSize = 0;
|
|
2465
|
+
for (const msg of payload.messages) {
|
|
2466
|
+
if (Array.isArray(msg.content)) {
|
|
2467
|
+
for (const part of msg.content) if (part.type === "image_url") {
|
|
2468
|
+
imageCount++;
|
|
2469
|
+
if (part.image_url.url.startsWith("data:")) totalImageSize += part.image_url.url.length;
|
|
1599
2470
|
}
|
|
1600
2471
|
}
|
|
1601
|
-
|
|
2472
|
+
if ((typeof msg.content === "string" ? msg.content.length : JSON.stringify(msg.content).length) > 5e4) largeMessages++;
|
|
1602
2473
|
}
|
|
1603
|
-
|
|
1604
|
-
|
|
2474
|
+
consola.info("");
|
|
2475
|
+
consola.info("╭─────────────────────────────────────────────────────────╮");
|
|
2476
|
+
consola.info("│ 413 Request Entity Too Large │");
|
|
2477
|
+
consola.info("╰─────────────────────────────────────────────────────────╯");
|
|
2478
|
+
consola.info("");
|
|
2479
|
+
consola.info(` Request body size: ${bodySizeKB} KB (${bodySize.toLocaleString()} bytes)`);
|
|
2480
|
+
consola.info(` Message count: ${messageCount}`);
|
|
2481
|
+
if (model) try {
|
|
2482
|
+
const tokenCount = await getTokenCount(payload, model);
|
|
2483
|
+
const limit = model.capabilities?.limits?.max_prompt_tokens ?? 128e3;
|
|
2484
|
+
consola.info(` Estimated tokens: ${tokenCount.input.toLocaleString()} / ${limit.toLocaleString()}`);
|
|
2485
|
+
} catch {}
|
|
2486
|
+
if (imageCount > 0) {
|
|
2487
|
+
const imageSizeKB = Math.round(totalImageSize / 1024);
|
|
2488
|
+
consola.info(` Images: ${imageCount} (${imageSizeKB} KB base64 data)`);
|
|
1605
2489
|
}
|
|
1606
|
-
};
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
return requestQueue.enqueue(execute, state$1.rateLimitSeconds);
|
|
2490
|
+
if (largeMessages > 0) consola.info(` Large messages (>50KB): ${largeMessages}`);
|
|
2491
|
+
consola.info("");
|
|
2492
|
+
consola.info(" Suggestions:");
|
|
2493
|
+
if (!state.autoCompact) consola.info(" • Enable --auto-compact to automatically truncate history");
|
|
2494
|
+
if (imageCount > 0) consola.info(" • Remove or resize large images in the conversation");
|
|
2495
|
+
consola.info(" • Start a new conversation with /clear or /reset");
|
|
2496
|
+
consola.info(" • Reduce conversation history by deleting old messages");
|
|
2497
|
+
consola.info("");
|
|
1615
2498
|
}
|
|
1616
2499
|
|
|
1617
|
-
//#endregion
|
|
1618
|
-
//#region src/services/copilot/create-chat-completions.ts
|
|
1619
|
-
const createChatCompletions = async (payload) => {
|
|
1620
|
-
if (!state.copilotToken) throw new Error("Copilot token not found");
|
|
1621
|
-
const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
|
|
1622
|
-
const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
|
|
1623
|
-
const headers = {
|
|
1624
|
-
...copilotHeaders(state, enableVision),
|
|
1625
|
-
"X-Initiator": isAgentCall ? "agent" : "user"
|
|
1626
|
-
};
|
|
1627
|
-
const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
|
|
1628
|
-
method: "POST",
|
|
1629
|
-
headers,
|
|
1630
|
-
body: JSON.stringify(payload)
|
|
1631
|
-
});
|
|
1632
|
-
if (!response.ok) {
|
|
1633
|
-
consola.error("Failed to create chat completions", response);
|
|
1634
|
-
throw await HTTPError.fromResponse("Failed to create chat completions", response);
|
|
1635
|
-
}
|
|
1636
|
-
if (payload.stream) return events(response);
|
|
1637
|
-
return await response.json();
|
|
1638
|
-
};
|
|
1639
|
-
|
|
1640
2500
|
//#endregion
|
|
1641
2501
|
//#region src/routes/chat-completions/handler.ts
|
|
1642
|
-
function getModelMaxOutputTokens(model) {
|
|
1643
|
-
return model?.capabilities?.limits?.max_output_tokens;
|
|
1644
|
-
}
|
|
1645
2502
|
async function handleCompletion$1(c) {
|
|
1646
2503
|
const originalPayload = await c.req.json();
|
|
1647
2504
|
consola.debug("Request payload:", JSON.stringify(originalPayload).slice(-400));
|
|
1648
2505
|
const trackingId = c.get("trackingId");
|
|
1649
2506
|
const startTime = (trackingId ? requestTracker.getRequest(trackingId) : void 0)?.startTime ?? Date.now();
|
|
1650
|
-
updateTrackerModel
|
|
2507
|
+
updateTrackerModel(trackingId, originalPayload.model);
|
|
1651
2508
|
const ctx = {
|
|
1652
2509
|
historyId: recordRequest("openai", {
|
|
1653
2510
|
model: originalPayload.model,
|
|
@@ -1665,19 +2522,33 @@ async function handleCompletion$1(c) {
|
|
|
1665
2522
|
};
|
|
1666
2523
|
const selectedModel = state.models?.data.find((model) => model.id === originalPayload.model);
|
|
1667
2524
|
await logTokenCount(originalPayload, selectedModel);
|
|
1668
|
-
const { finalPayload, compactResult } = await buildFinalPayload
|
|
2525
|
+
const { finalPayload, compactResult } = await buildFinalPayload(originalPayload, selectedModel);
|
|
1669
2526
|
if (compactResult) ctx.compactResult = compactResult;
|
|
1670
2527
|
const payload = isNullish(finalPayload.max_tokens) ? {
|
|
1671
2528
|
...finalPayload,
|
|
1672
|
-
max_tokens:
|
|
2529
|
+
max_tokens: selectedModel?.capabilities?.limits?.max_output_tokens
|
|
1673
2530
|
} : finalPayload;
|
|
1674
2531
|
if (isNullish(originalPayload.max_tokens)) consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
|
|
1675
2532
|
if (state.manualApprove) await awaitApproval();
|
|
2533
|
+
return executeRequest({
|
|
2534
|
+
c,
|
|
2535
|
+
payload,
|
|
2536
|
+
selectedModel,
|
|
2537
|
+
ctx,
|
|
2538
|
+
trackingId
|
|
2539
|
+
});
|
|
2540
|
+
}
|
|
2541
|
+
/**
|
|
2542
|
+
* Execute the API call with enhanced error handling for 413 errors.
|
|
2543
|
+
*/
|
|
2544
|
+
async function executeRequest(opts) {
|
|
2545
|
+
const { c, payload, selectedModel, ctx, trackingId } = opts;
|
|
1676
2546
|
try {
|
|
1677
|
-
const response = await
|
|
1678
|
-
|
|
2547
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(payload));
|
|
2548
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
2549
|
+
if (isNonStreaming(response)) return handleNonStreamingResponse$1(c, response, ctx);
|
|
1679
2550
|
consola.debug("Streaming response");
|
|
1680
|
-
updateTrackerStatus
|
|
2551
|
+
updateTrackerStatus(trackingId, "streaming");
|
|
1681
2552
|
return streamSSE(c, async (stream) => {
|
|
1682
2553
|
await handleStreamingResponse$1({
|
|
1683
2554
|
stream,
|
|
@@ -1687,39 +2558,11 @@ async function handleCompletion$1(c) {
|
|
|
1687
2558
|
});
|
|
1688
2559
|
});
|
|
1689
2560
|
} catch (error) {
|
|
1690
|
-
|
|
2561
|
+
if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(payload, selectedModel);
|
|
2562
|
+
recordErrorResponse(ctx, payload.model, error);
|
|
1691
2563
|
throw error;
|
|
1692
2564
|
}
|
|
1693
2565
|
}
|
|
1694
|
-
async function buildFinalPayload$1(payload, model) {
|
|
1695
|
-
if (!state.autoCompact || !model) {
|
|
1696
|
-
if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
|
|
1697
|
-
return {
|
|
1698
|
-
finalPayload: payload,
|
|
1699
|
-
compactResult: null
|
|
1700
|
-
};
|
|
1701
|
-
}
|
|
1702
|
-
try {
|
|
1703
|
-
const check = await checkNeedsCompaction(payload, model);
|
|
1704
|
-
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
|
|
1705
|
-
if (!check.needed) return {
|
|
1706
|
-
finalPayload: payload,
|
|
1707
|
-
compactResult: null
|
|
1708
|
-
};
|
|
1709
|
-
consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
|
|
1710
|
-
const compactResult = await autoCompact(payload, model);
|
|
1711
|
-
return {
|
|
1712
|
-
finalPayload: compactResult.payload,
|
|
1713
|
-
compactResult
|
|
1714
|
-
};
|
|
1715
|
-
} catch (error) {
|
|
1716
|
-
consola.warn("Auto-compact failed, proceeding with original payload:", error);
|
|
1717
|
-
return {
|
|
1718
|
-
finalPayload: payload,
|
|
1719
|
-
compactResult: null
|
|
1720
|
-
};
|
|
1721
|
-
}
|
|
1722
|
-
}
|
|
1723
2566
|
async function logTokenCount(payload, selectedModel) {
|
|
1724
2567
|
try {
|
|
1725
2568
|
if (selectedModel) {
|
|
@@ -1730,27 +2573,6 @@ async function logTokenCount(payload, selectedModel) {
|
|
|
1730
2573
|
consola.debug("Failed to calculate token count:", error);
|
|
1731
2574
|
}
|
|
1732
2575
|
}
|
|
1733
|
-
function updateTrackerModel$1(trackingId, model) {
|
|
1734
|
-
if (!trackingId) return;
|
|
1735
|
-
const request = requestTracker.getRequest(trackingId);
|
|
1736
|
-
if (request) request.model = model;
|
|
1737
|
-
}
|
|
1738
|
-
function updateTrackerStatus$1(trackingId, status) {
|
|
1739
|
-
if (!trackingId) return;
|
|
1740
|
-
requestTracker.updateRequest(trackingId, { status });
|
|
1741
|
-
}
|
|
1742
|
-
function recordErrorResponse$1(ctx, model, error) {
|
|
1743
|
-
recordResponse(ctx.historyId, {
|
|
1744
|
-
success: false,
|
|
1745
|
-
model,
|
|
1746
|
-
usage: {
|
|
1747
|
-
input_tokens: 0,
|
|
1748
|
-
output_tokens: 0
|
|
1749
|
-
},
|
|
1750
|
-
error: error instanceof Error ? error.message : "Unknown error",
|
|
1751
|
-
content: null
|
|
1752
|
-
}, Date.now() - ctx.startTime);
|
|
1753
|
-
}
|
|
1754
2576
|
function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
1755
2577
|
consola.debug("Non-streaming response:", JSON.stringify(originalResponse));
|
|
1756
2578
|
let response = originalResponse;
|
|
@@ -1782,7 +2604,8 @@ function handleNonStreamingResponse$1(c, originalResponse, ctx) {
|
|
|
1782
2604
|
}, Date.now() - ctx.startTime);
|
|
1783
2605
|
if (ctx.trackingId && usage) requestTracker.updateRequest(ctx.trackingId, {
|
|
1784
2606
|
inputTokens: usage.prompt_tokens,
|
|
1785
|
-
outputTokens: usage.completion_tokens
|
|
2607
|
+
outputTokens: usage.completion_tokens,
|
|
2608
|
+
queueWaitMs: ctx.queueWaitMs
|
|
1786
2609
|
});
|
|
1787
2610
|
return c.json(response);
|
|
1788
2611
|
}
|
|
@@ -1848,7 +2671,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
1848
2671
|
acc.content += marker;
|
|
1849
2672
|
}
|
|
1850
2673
|
recordStreamSuccess(acc, payload.model, ctx);
|
|
1851
|
-
completeTracking
|
|
2674
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
1852
2675
|
} catch (error) {
|
|
1853
2676
|
recordStreamError({
|
|
1854
2677
|
acc,
|
|
@@ -1856,7 +2679,7 @@ async function handleStreamingResponse$1(opts) {
|
|
|
1856
2679
|
ctx,
|
|
1857
2680
|
error
|
|
1858
2681
|
});
|
|
1859
|
-
failTracking
|
|
2682
|
+
failTracking(ctx.trackingId, error);
|
|
1860
2683
|
throw error;
|
|
1861
2684
|
}
|
|
1862
2685
|
}
|
|
@@ -1864,42 +2687,31 @@ function parseStreamChunk(chunk, acc) {
|
|
|
1864
2687
|
if (!chunk.data || chunk.data === "[DONE]") return;
|
|
1865
2688
|
try {
|
|
1866
2689
|
const parsed = JSON.parse(chunk.data);
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
}
|
|
1872
|
-
function accumulateModel(parsed, acc) {
|
|
1873
|
-
if (parsed.model && !acc.model) acc.model = parsed.model;
|
|
1874
|
-
}
|
|
1875
|
-
function accumulateUsage(parsed, acc) {
|
|
1876
|
-
if (parsed.usage) {
|
|
1877
|
-
acc.inputTokens = parsed.usage.prompt_tokens;
|
|
1878
|
-
acc.outputTokens = parsed.usage.completion_tokens;
|
|
1879
|
-
}
|
|
1880
|
-
}
|
|
1881
|
-
function accumulateChoice(choice, acc) {
|
|
1882
|
-
if (!choice) return;
|
|
1883
|
-
if (choice.delta.content) acc.content += choice.delta.content;
|
|
1884
|
-
if (choice.delta.tool_calls) accumulateToolCalls(choice.delta.tool_calls, acc);
|
|
1885
|
-
if (choice.finish_reason) acc.finishReason = choice.finish_reason;
|
|
1886
|
-
}
|
|
1887
|
-
function accumulateToolCalls(toolCalls, acc) {
|
|
1888
|
-
if (!toolCalls) return;
|
|
1889
|
-
for (const tc of toolCalls) {
|
|
1890
|
-
const idx = tc.index;
|
|
1891
|
-
if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
|
|
1892
|
-
id: tc.id ?? "",
|
|
1893
|
-
name: tc.function?.name ?? "",
|
|
1894
|
-
arguments: ""
|
|
1895
|
-
});
|
|
1896
|
-
const item = acc.toolCallMap.get(idx);
|
|
1897
|
-
if (item) {
|
|
1898
|
-
if (tc.id) item.id = tc.id;
|
|
1899
|
-
if (tc.function?.name) item.name = tc.function.name;
|
|
1900
|
-
if (tc.function?.arguments) item.arguments += tc.function.arguments;
|
|
2690
|
+
if (parsed.model && !acc.model) acc.model = parsed.model;
|
|
2691
|
+
if (parsed.usage) {
|
|
2692
|
+
acc.inputTokens = parsed.usage.prompt_tokens;
|
|
2693
|
+
acc.outputTokens = parsed.usage.completion_tokens;
|
|
1901
2694
|
}
|
|
1902
|
-
|
|
2695
|
+
const choice = parsed.choices[0];
|
|
2696
|
+
if (choice) {
|
|
2697
|
+
if (choice.delta.content) acc.content += choice.delta.content;
|
|
2698
|
+
if (choice.delta.tool_calls) for (const tc of choice.delta.tool_calls) {
|
|
2699
|
+
const idx = tc.index;
|
|
2700
|
+
if (!acc.toolCallMap.has(idx)) acc.toolCallMap.set(idx, {
|
|
2701
|
+
id: tc.id ?? "",
|
|
2702
|
+
name: tc.function?.name ?? "",
|
|
2703
|
+
arguments: ""
|
|
2704
|
+
});
|
|
2705
|
+
const item = acc.toolCallMap.get(idx);
|
|
2706
|
+
if (item) {
|
|
2707
|
+
if (tc.id) item.id = tc.id;
|
|
2708
|
+
if (tc.function?.name) item.name = tc.function.name;
|
|
2709
|
+
if (tc.function?.arguments) item.arguments += tc.function.arguments;
|
|
2710
|
+
}
|
|
2711
|
+
}
|
|
2712
|
+
if (choice.finish_reason) acc.finishReason = choice.finish_reason;
|
|
2713
|
+
}
|
|
2714
|
+
} catch {}
|
|
1903
2715
|
}
|
|
1904
2716
|
function recordStreamSuccess(acc, fallbackModel, ctx) {
|
|
1905
2717
|
for (const tc of acc.toolCallMap.values()) if (tc.id && tc.name) acc.toolCalls.push(tc);
|
|
@@ -1931,35 +2743,6 @@ function recordStreamSuccess(acc, fallbackModel, ctx) {
|
|
|
1931
2743
|
})) : void 0
|
|
1932
2744
|
}, Date.now() - ctx.startTime);
|
|
1933
2745
|
}
|
|
1934
|
-
function recordStreamError(opts) {
|
|
1935
|
-
const { acc, fallbackModel, ctx, error } = opts;
|
|
1936
|
-
recordResponse(ctx.historyId, {
|
|
1937
|
-
success: false,
|
|
1938
|
-
model: acc.model || fallbackModel,
|
|
1939
|
-
usage: {
|
|
1940
|
-
input_tokens: 0,
|
|
1941
|
-
output_tokens: 0
|
|
1942
|
-
},
|
|
1943
|
-
error: error instanceof Error ? error.message : "Stream error",
|
|
1944
|
-
content: null
|
|
1945
|
-
}, Date.now() - ctx.startTime);
|
|
1946
|
-
}
|
|
1947
|
-
function completeTracking$1(trackingId, inputTokens, outputTokens) {
|
|
1948
|
-
if (!trackingId) return;
|
|
1949
|
-
requestTracker.updateRequest(trackingId, {
|
|
1950
|
-
inputTokens,
|
|
1951
|
-
outputTokens
|
|
1952
|
-
});
|
|
1953
|
-
requestTracker.completeRequest(trackingId, 200, {
|
|
1954
|
-
inputTokens,
|
|
1955
|
-
outputTokens
|
|
1956
|
-
});
|
|
1957
|
-
}
|
|
1958
|
-
function failTracking$1(trackingId, error) {
|
|
1959
|
-
if (!trackingId) return;
|
|
1960
|
-
requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
|
|
1961
|
-
}
|
|
1962
|
-
const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
|
|
1963
2746
|
function convertOpenAIMessages(messages) {
|
|
1964
2747
|
return messages.map((msg) => {
|
|
1965
2748
|
const result = {
|
|
@@ -1987,7 +2770,7 @@ completionRoutes.post("/", async (c) => {
|
|
|
1987
2770
|
try {
|
|
1988
2771
|
return await handleCompletion$1(c);
|
|
1989
2772
|
} catch (error) {
|
|
1990
|
-
return
|
|
2773
|
+
return forwardError(c, error);
|
|
1991
2774
|
}
|
|
1992
2775
|
});
|
|
1993
2776
|
|
|
@@ -2013,7 +2796,7 @@ embeddingRoutes.post("/", async (c) => {
|
|
|
2013
2796
|
const response = await createEmbeddings(payload);
|
|
2014
2797
|
return c.json(response);
|
|
2015
2798
|
} catch (error) {
|
|
2016
|
-
return
|
|
2799
|
+
return forwardError(c, error);
|
|
2017
2800
|
}
|
|
2018
2801
|
});
|
|
2019
2802
|
|
|
@@ -3160,6 +3943,15 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
|
3160
3943
|
//#endregion
|
|
3161
3944
|
//#region src/routes/messages/non-stream-translation.ts
|
|
3162
3945
|
const OPENAI_TOOL_NAME_LIMIT = 64;
|
|
3946
|
+
/**
|
|
3947
|
+
* Ensure all tool_use blocks have corresponding tool_result responses.
|
|
3948
|
+
* This handles edge cases where conversation history may be incomplete:
|
|
3949
|
+
* - Session interruptions where tool execution was cut off
|
|
3950
|
+
* - Previous request failures
|
|
3951
|
+
* - Client sending truncated history
|
|
3952
|
+
*
|
|
3953
|
+
* Adding placeholder responses prevents API errors and maintains protocol compliance.
|
|
3954
|
+
*/
|
|
3163
3955
|
function fixMessageSequence(messages) {
|
|
3164
3956
|
const fixedMessages = [];
|
|
3165
3957
|
for (let i = 0; i < messages.length; i++) {
|
|
@@ -3318,7 +4110,7 @@ function getTruncatedToolName(originalName, toolNameMapping) {
|
|
|
3318
4110
|
for (let i = 0; i < originalName.length; i++) {
|
|
3319
4111
|
const char = originalName.codePointAt(i) ?? 0;
|
|
3320
4112
|
hash = (hash << 5) - hash + char;
|
|
3321
|
-
hash = hash
|
|
4113
|
+
hash = Math.trunc(hash);
|
|
3322
4114
|
}
|
|
3323
4115
|
const hashSuffix = Math.abs(hash).toString(36).slice(0, 8);
|
|
3324
4116
|
const truncatedName = originalName.slice(0, OPENAI_TOOL_NAME_LIMIT - 9) + "_" + hashSuffix;
|
|
@@ -3636,7 +4428,8 @@ async function handleCompletion(c) {
|
|
|
3636
4428
|
if (compactResult) ctx.compactResult = compactResult;
|
|
3637
4429
|
if (state.manualApprove) await awaitApproval();
|
|
3638
4430
|
try {
|
|
3639
|
-
const response = await
|
|
4431
|
+
const { result: response, queueWaitMs } = await executeWithAdaptiveRateLimit(() => createChatCompletions(openAIPayload));
|
|
4432
|
+
ctx.queueWaitMs = queueWaitMs;
|
|
3640
4433
|
if (isNonStreaming(response)) return handleNonStreamingResponse({
|
|
3641
4434
|
c,
|
|
3642
4435
|
response,
|
|
@@ -3655,60 +4448,11 @@ async function handleCompletion(c) {
|
|
|
3655
4448
|
});
|
|
3656
4449
|
});
|
|
3657
4450
|
} catch (error) {
|
|
4451
|
+
if (error instanceof HTTPError && error.status === 413) await logPayloadSizeInfo(openAIPayload, selectedModel);
|
|
3658
4452
|
recordErrorResponse(ctx, anthropicPayload.model, error);
|
|
3659
4453
|
throw error;
|
|
3660
4454
|
}
|
|
3661
4455
|
}
|
|
3662
|
-
function updateTrackerModel(trackingId, model) {
|
|
3663
|
-
if (!trackingId) return;
|
|
3664
|
-
const request = requestTracker.getRequest(trackingId);
|
|
3665
|
-
if (request) request.model = model;
|
|
3666
|
-
}
|
|
3667
|
-
async function buildFinalPayload(payload, model) {
|
|
3668
|
-
if (!state.autoCompact || !model) {
|
|
3669
|
-
if (state.autoCompact && !model) consola.warn(`Auto-compact: Model '${payload.model}' not found in cached models, skipping`);
|
|
3670
|
-
return {
|
|
3671
|
-
finalPayload: payload,
|
|
3672
|
-
compactResult: null
|
|
3673
|
-
};
|
|
3674
|
-
}
|
|
3675
|
-
try {
|
|
3676
|
-
const check = await checkNeedsCompaction(payload, model);
|
|
3677
|
-
consola.debug(`Auto-compact check: ${check.currentTokens} tokens, limit ${check.limit}, needed: ${check.needed}`);
|
|
3678
|
-
if (!check.needed) return {
|
|
3679
|
-
finalPayload: payload,
|
|
3680
|
-
compactResult: null
|
|
3681
|
-
};
|
|
3682
|
-
consola.info(`Auto-compact triggered: ${check.currentTokens} tokens > ${check.limit} limit`);
|
|
3683
|
-
const compactResult = await autoCompact(payload, model);
|
|
3684
|
-
return {
|
|
3685
|
-
finalPayload: compactResult.payload,
|
|
3686
|
-
compactResult
|
|
3687
|
-
};
|
|
3688
|
-
} catch (error) {
|
|
3689
|
-
consola.warn("Auto-compact failed, proceeding with original payload:", error);
|
|
3690
|
-
return {
|
|
3691
|
-
finalPayload: payload,
|
|
3692
|
-
compactResult: null
|
|
3693
|
-
};
|
|
3694
|
-
}
|
|
3695
|
-
}
|
|
3696
|
-
function updateTrackerStatus(trackingId, status) {
|
|
3697
|
-
if (!trackingId) return;
|
|
3698
|
-
requestTracker.updateRequest(trackingId, { status });
|
|
3699
|
-
}
|
|
3700
|
-
function recordErrorResponse(ctx, model, error) {
|
|
3701
|
-
recordResponse(ctx.historyId, {
|
|
3702
|
-
success: false,
|
|
3703
|
-
model,
|
|
3704
|
-
usage: {
|
|
3705
|
-
input_tokens: 0,
|
|
3706
|
-
output_tokens: 0
|
|
3707
|
-
},
|
|
3708
|
-
error: error instanceof Error ? error.message : "Unknown error",
|
|
3709
|
-
content: null
|
|
3710
|
-
}, Date.now() - ctx.startTime);
|
|
3711
|
-
}
|
|
3712
4456
|
function handleNonStreamingResponse(opts) {
|
|
3713
4457
|
const { c, response, toolNameMapping, ctx } = opts;
|
|
3714
4458
|
consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
|
|
@@ -3743,7 +4487,8 @@ function handleNonStreamingResponse(opts) {
|
|
|
3743
4487
|
}, Date.now() - ctx.startTime);
|
|
3744
4488
|
if (ctx.trackingId) requestTracker.updateRequest(ctx.trackingId, {
|
|
3745
4489
|
inputTokens: anthropicResponse.usage.input_tokens,
|
|
3746
|
-
outputTokens: anthropicResponse.usage.output_tokens
|
|
4490
|
+
outputTokens: anthropicResponse.usage.output_tokens,
|
|
4491
|
+
queueWaitMs: ctx.queueWaitMs
|
|
3747
4492
|
});
|
|
3748
4493
|
return c.json(anthropicResponse);
|
|
3749
4494
|
}
|
|
@@ -3799,10 +4544,10 @@ async function handleStreamingResponse(opts) {
|
|
|
3799
4544
|
acc.content += marker;
|
|
3800
4545
|
}
|
|
3801
4546
|
recordStreamingResponse(acc, anthropicPayload.model, ctx);
|
|
3802
|
-
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens);
|
|
4547
|
+
completeTracking(ctx.trackingId, acc.inputTokens, acc.outputTokens, ctx.queueWaitMs);
|
|
3803
4548
|
} catch (error) {
|
|
3804
4549
|
consola.error("Stream error:", error);
|
|
3805
|
-
|
|
4550
|
+
recordStreamError({
|
|
3806
4551
|
acc,
|
|
3807
4552
|
fallbackModel: anthropicPayload.model,
|
|
3808
4553
|
ctx,
|
|
@@ -3942,34 +4687,6 @@ function recordStreamingResponse(acc, fallbackModel, ctx) {
|
|
|
3942
4687
|
toolCalls: acc.toolCalls.length > 0 ? acc.toolCalls : void 0
|
|
3943
4688
|
}, Date.now() - ctx.startTime);
|
|
3944
4689
|
}
|
|
3945
|
-
function recordStreamingError(opts) {
|
|
3946
|
-
const { acc, fallbackModel, ctx, error } = opts;
|
|
3947
|
-
recordResponse(ctx.historyId, {
|
|
3948
|
-
success: false,
|
|
3949
|
-
model: acc.model || fallbackModel,
|
|
3950
|
-
usage: {
|
|
3951
|
-
input_tokens: 0,
|
|
3952
|
-
output_tokens: 0
|
|
3953
|
-
},
|
|
3954
|
-
error: error instanceof Error ? error.message : "Stream error",
|
|
3955
|
-
content: null
|
|
3956
|
-
}, Date.now() - ctx.startTime);
|
|
3957
|
-
}
|
|
3958
|
-
function completeTracking(trackingId, inputTokens, outputTokens) {
|
|
3959
|
-
if (!trackingId) return;
|
|
3960
|
-
requestTracker.updateRequest(trackingId, {
|
|
3961
|
-
inputTokens,
|
|
3962
|
-
outputTokens
|
|
3963
|
-
});
|
|
3964
|
-
requestTracker.completeRequest(trackingId, 200, {
|
|
3965
|
-
inputTokens,
|
|
3966
|
-
outputTokens
|
|
3967
|
-
});
|
|
3968
|
-
}
|
|
3969
|
-
function failTracking(trackingId, error) {
|
|
3970
|
-
if (!trackingId) return;
|
|
3971
|
-
requestTracker.failRequest(trackingId, error instanceof Error ? error.message : "Stream error");
|
|
3972
|
-
}
|
|
3973
4690
|
function convertAnthropicMessages(messages) {
|
|
3974
4691
|
return messages.map((msg) => {
|
|
3975
4692
|
if (typeof msg.content === "string") return {
|
|
@@ -4017,7 +4734,6 @@ function extractToolCallsFromContent(content) {
|
|
|
4017
4734
|
});
|
|
4018
4735
|
return tools.length > 0 ? tools : void 0;
|
|
4019
4736
|
}
|
|
4020
|
-
const isNonStreaming = (response) => Object.hasOwn(response, "choices");
|
|
4021
4737
|
|
|
4022
4738
|
//#endregion
|
|
4023
4739
|
//#region src/routes/messages/route.ts
|
|
@@ -4026,14 +4742,14 @@ messageRoutes.post("/", async (c) => {
|
|
|
4026
4742
|
try {
|
|
4027
4743
|
return await handleCompletion(c);
|
|
4028
4744
|
} catch (error) {
|
|
4029
|
-
return
|
|
4745
|
+
return forwardError(c, error);
|
|
4030
4746
|
}
|
|
4031
4747
|
});
|
|
4032
4748
|
messageRoutes.post("/count_tokens", async (c) => {
|
|
4033
4749
|
try {
|
|
4034
4750
|
return await handleCountTokens(c);
|
|
4035
4751
|
} catch (error) {
|
|
4036
|
-
return
|
|
4752
|
+
return forwardError(c, error);
|
|
4037
4753
|
}
|
|
4038
4754
|
});
|
|
4039
4755
|
|
|
@@ -4072,18 +4788,18 @@ modelRoutes.get("/", async (c) => {
|
|
|
4072
4788
|
has_more: false
|
|
4073
4789
|
});
|
|
4074
4790
|
} catch (error) {
|
|
4075
|
-
return
|
|
4791
|
+
return forwardError(c, error);
|
|
4076
4792
|
}
|
|
4077
4793
|
});
|
|
4078
4794
|
|
|
4079
4795
|
//#endregion
|
|
4080
4796
|
//#region src/routes/token/route.ts
|
|
4081
4797
|
const tokenRoute = new Hono();
|
|
4082
|
-
tokenRoute.get("/",
|
|
4798
|
+
tokenRoute.get("/", (c) => {
|
|
4083
4799
|
try {
|
|
4084
4800
|
return c.json({ token: state.copilotToken });
|
|
4085
4801
|
} catch (error) {
|
|
4086
|
-
return
|
|
4802
|
+
return forwardError(c, error);
|
|
4087
4803
|
}
|
|
4088
4804
|
});
|
|
4089
4805
|
|
|
@@ -4095,7 +4811,7 @@ usageRoute.get("/", async (c) => {
|
|
|
4095
4811
|
const usage = await getCopilotUsage();
|
|
4096
4812
|
return c.json(usage);
|
|
4097
4813
|
} catch (error) {
|
|
4098
|
-
return
|
|
4814
|
+
return forwardError(c, error);
|
|
4099
4815
|
}
|
|
4100
4816
|
});
|
|
4101
4817
|
|
|
@@ -4147,10 +4863,15 @@ async function runServer(options) {
|
|
|
4147
4863
|
state.accountType = options.accountType;
|
|
4148
4864
|
if (options.accountType !== "individual") consola.info(`Using ${options.accountType} plan GitHub account`);
|
|
4149
4865
|
state.manualApprove = options.manual;
|
|
4150
|
-
state.rateLimitSeconds = options.rateLimit;
|
|
4151
|
-
state.rateLimitWait = options.rateLimitWait;
|
|
4152
4866
|
state.showToken = options.showToken;
|
|
4153
4867
|
state.autoCompact = options.autoCompact;
|
|
4868
|
+
if (options.rateLimit) initAdaptiveRateLimiter({
|
|
4869
|
+
baseRetryIntervalSeconds: options.retryInterval,
|
|
4870
|
+
requestIntervalSeconds: options.requestInterval,
|
|
4871
|
+
recoveryTimeoutMinutes: options.recoveryTimeout,
|
|
4872
|
+
consecutiveSuccessesForRecovery: options.consecutiveSuccesses
|
|
4873
|
+
});
|
|
4874
|
+
else consola.info("Rate limiting disabled");
|
|
4154
4875
|
if (options.autoCompact) consola.info("Auto-compact enabled: will compress context when exceeding token limits");
|
|
4155
4876
|
initHistory(options.history, options.historyLimit);
|
|
4156
4877
|
if (options.history) {
|
|
@@ -4237,16 +4958,30 @@ const start = defineCommand({
|
|
|
4237
4958
|
default: false,
|
|
4238
4959
|
description: "Enable manual request approval"
|
|
4239
4960
|
},
|
|
4240
|
-
"rate-limit": {
|
|
4241
|
-
alias: "r",
|
|
4242
|
-
type: "string",
|
|
4243
|
-
description: "Rate limit in seconds between requests"
|
|
4244
|
-
},
|
|
4245
|
-
wait: {
|
|
4246
|
-
alias: "w",
|
|
4961
|
+
"no-rate-limit": {
|
|
4247
4962
|
type: "boolean",
|
|
4248
4963
|
default: false,
|
|
4249
|
-
description: "
|
|
4964
|
+
description: "Disable adaptive rate limiting"
|
|
4965
|
+
},
|
|
4966
|
+
"retry-interval": {
|
|
4967
|
+
type: "string",
|
|
4968
|
+
default: "10",
|
|
4969
|
+
description: "Seconds to wait before retrying after rate limit error (default: 10)"
|
|
4970
|
+
},
|
|
4971
|
+
"request-interval": {
|
|
4972
|
+
type: "string",
|
|
4973
|
+
default: "10",
|
|
4974
|
+
description: "Seconds between requests in rate-limited mode (default: 10)"
|
|
4975
|
+
},
|
|
4976
|
+
"recovery-timeout": {
|
|
4977
|
+
type: "string",
|
|
4978
|
+
default: "10",
|
|
4979
|
+
description: "Minutes before attempting to recover from rate-limited mode (default: 10)"
|
|
4980
|
+
},
|
|
4981
|
+
"consecutive-successes": {
|
|
4982
|
+
type: "string",
|
|
4983
|
+
default: "5",
|
|
4984
|
+
description: "Number of consecutive successes needed to recover from rate-limited mode (default: 5)"
|
|
4250
4985
|
},
|
|
4251
4986
|
"github-token": {
|
|
4252
4987
|
alias: "g",
|
|
@@ -4269,10 +5004,10 @@ const start = defineCommand({
|
|
|
4269
5004
|
default: false,
|
|
4270
5005
|
description: "Initialize proxy from environment variables"
|
|
4271
5006
|
},
|
|
4272
|
-
history: {
|
|
5007
|
+
"no-history": {
|
|
4273
5008
|
type: "boolean",
|
|
4274
5009
|
default: false,
|
|
4275
|
-
description: "
|
|
5010
|
+
description: "Disable request history recording and Web UI"
|
|
4276
5011
|
},
|
|
4277
5012
|
"history-limit": {
|
|
4278
5013
|
type: "string",
|
|
@@ -4286,21 +5021,22 @@ const start = defineCommand({
|
|
|
4286
5021
|
}
|
|
4287
5022
|
},
|
|
4288
5023
|
run({ args }) {
|
|
4289
|
-
const rateLimitRaw = args["rate-limit"];
|
|
4290
|
-
const rateLimit = rateLimitRaw === void 0 ? void 0 : Number.parseInt(rateLimitRaw, 10);
|
|
4291
5024
|
return runServer({
|
|
4292
5025
|
port: Number.parseInt(args.port, 10),
|
|
4293
5026
|
host: args.host,
|
|
4294
5027
|
verbose: args.verbose,
|
|
4295
5028
|
accountType: args["account-type"],
|
|
4296
5029
|
manual: args.manual,
|
|
4297
|
-
rateLimit,
|
|
4298
|
-
|
|
5030
|
+
rateLimit: !args["no-rate-limit"],
|
|
5031
|
+
retryInterval: Number.parseInt(args["retry-interval"], 10),
|
|
5032
|
+
requestInterval: Number.parseInt(args["request-interval"], 10),
|
|
5033
|
+
recoveryTimeout: Number.parseInt(args["recovery-timeout"], 10),
|
|
5034
|
+
consecutiveSuccesses: Number.parseInt(args["consecutive-successes"], 10),
|
|
4299
5035
|
githubToken: args["github-token"],
|
|
4300
5036
|
claudeCode: args["claude-code"],
|
|
4301
5037
|
showToken: args["show-token"],
|
|
4302
5038
|
proxyEnv: args["proxy-env"],
|
|
4303
|
-
history: args
|
|
5039
|
+
history: !args["no-history"],
|
|
4304
5040
|
historyLimit: Number.parseInt(args["history-limit"], 10),
|
|
4305
5041
|
autoCompact: args["auto-compact"]
|
|
4306
5042
|
});
|
|
@@ -4320,7 +5056,8 @@ const main = defineCommand({
|
|
|
4320
5056
|
logout,
|
|
4321
5057
|
start,
|
|
4322
5058
|
"check-usage": checkUsage,
|
|
4323
|
-
debug
|
|
5059
|
+
debug,
|
|
5060
|
+
"patch-claude": patchClaude
|
|
4324
5061
|
}
|
|
4325
5062
|
});
|
|
4326
5063
|
await runMain(main);
|