ghc-proxy 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +189 -77
- package/dist/{main.js → main.mjs} +252 -233
- package/dist/main.mjs.map +1 -0
- package/package.json +18 -19
- package/dist/main.js.map +0 -1
|
@@ -83,20 +83,6 @@ async function writeConfigField(field, value) {
|
|
|
83
83
|
}
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
//#endregion
|
|
87
|
-
//#region src/lib/state.ts
|
|
88
|
-
const state = {
|
|
89
|
-
auth: {},
|
|
90
|
-
config: {
|
|
91
|
-
accountType: "individual",
|
|
92
|
-
manualApprove: false,
|
|
93
|
-
rateLimitWait: false,
|
|
94
|
-
showToken: false
|
|
95
|
-
},
|
|
96
|
-
cache: {},
|
|
97
|
-
rateLimit: {}
|
|
98
|
-
};
|
|
99
|
-
|
|
100
86
|
//#endregion
|
|
101
87
|
//#region src/lib/api-config.ts
|
|
102
88
|
function standardHeaders() {
|
|
@@ -112,9 +98,9 @@ const API_VERSION = "2025-04-01";
|
|
|
112
98
|
function copilotBaseUrl(config) {
|
|
113
99
|
return config.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${config.accountType}.githubcopilot.com`;
|
|
114
100
|
}
|
|
115
|
-
function copilotHeaders(auth
|
|
101
|
+
function copilotHeaders(auth, config, vision = false) {
|
|
116
102
|
const headers = {
|
|
117
|
-
"Authorization": `Bearer ${auth
|
|
103
|
+
"Authorization": `Bearer ${auth.copilotToken}`,
|
|
118
104
|
"content-type": standardHeaders()["content-type"],
|
|
119
105
|
"copilot-integration-id": "vscode-chat",
|
|
120
106
|
"editor-version": `vscode/${config.vsCodeVersion ?? "unknown"}`,
|
|
@@ -129,10 +115,10 @@ function copilotHeaders(auth$1, config, vision = false) {
|
|
|
129
115
|
return headers;
|
|
130
116
|
}
|
|
131
117
|
const GITHUB_API_BASE_URL = "https://api.github.com";
|
|
132
|
-
function githubHeaders(auth
|
|
118
|
+
function githubHeaders(auth, config) {
|
|
133
119
|
return {
|
|
134
120
|
...standardHeaders(),
|
|
135
|
-
"authorization": `token ${auth
|
|
121
|
+
"authorization": `token ${auth.githubToken}`,
|
|
136
122
|
"editor-version": `vscode/${config.vsCodeVersion ?? "unknown"}`,
|
|
137
123
|
"editor-plugin-version": EDITOR_PLUGIN_VERSION,
|
|
138
124
|
"user-agent": USER_AGENT,
|
|
@@ -169,10 +155,6 @@ async function forwardError(c, error) {
|
|
|
169
155
|
type: "error"
|
|
170
156
|
} }, error.response.status);
|
|
171
157
|
}
|
|
172
|
-
if (error instanceof DOMException && error.name === "AbortError") return c.json({ error: {
|
|
173
|
-
message: "Upstream request was aborted",
|
|
174
|
-
type: "timeout_error"
|
|
175
|
-
} }, 504);
|
|
176
158
|
if (error instanceof Error && error.name === "AbortError") return c.json({ error: {
|
|
177
159
|
message: "Upstream request was aborted",
|
|
178
160
|
type: "timeout_error"
|
|
@@ -189,8 +171,8 @@ var CopilotClient = class {
|
|
|
189
171
|
auth;
|
|
190
172
|
config;
|
|
191
173
|
fetchImpl;
|
|
192
|
-
constructor(auth
|
|
193
|
-
this.auth = auth
|
|
174
|
+
constructor(auth, config, deps) {
|
|
175
|
+
this.auth = auth;
|
|
194
176
|
this.config = config;
|
|
195
177
|
this.fetchImpl = deps?.fetch ?? fetch;
|
|
196
178
|
}
|
|
@@ -231,35 +213,17 @@ var CopilotClient = class {
|
|
|
231
213
|
return await response.json();
|
|
232
214
|
}
|
|
233
215
|
};
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
//#region src/lib/client-config.ts
|
|
237
|
-
function getClientConfig(appState) {
|
|
238
|
-
return {
|
|
239
|
-
accountType: appState.config.accountType,
|
|
240
|
-
vsCodeVersion: appState.cache.vsCodeVersion
|
|
241
|
-
};
|
|
216
|
+
function isNonStreamingResponse(response) {
|
|
217
|
+
return Object.hasOwn(response, "choices");
|
|
242
218
|
}
|
|
243
219
|
|
|
244
220
|
//#endregion
|
|
245
|
-
//#region src/lib/
|
|
221
|
+
//#region src/lib/sleep.ts
|
|
246
222
|
function sleep(ms) {
|
|
247
223
|
return new Promise((resolve) => {
|
|
248
224
|
setTimeout(resolve, ms);
|
|
249
225
|
});
|
|
250
226
|
}
|
|
251
|
-
function isNullish(value) {
|
|
252
|
-
return value === null || value === void 0;
|
|
253
|
-
}
|
|
254
|
-
async function cacheModels(client) {
|
|
255
|
-
const models = await (client ?? new CopilotClient(state.auth, getClientConfig(state))).getModels();
|
|
256
|
-
state.cache.models = models;
|
|
257
|
-
}
|
|
258
|
-
async function cacheVSCodeVersion() {
|
|
259
|
-
const response = await getVSCodeVersion();
|
|
260
|
-
state.cache.vsCodeVersion = response;
|
|
261
|
-
consola.info(`Using VSCode version: ${response}`);
|
|
262
|
-
}
|
|
263
227
|
|
|
264
228
|
//#endregion
|
|
265
229
|
//#region src/clients/github-client.ts
|
|
@@ -267,8 +231,8 @@ var GitHubClient = class {
|
|
|
267
231
|
auth;
|
|
268
232
|
config;
|
|
269
233
|
fetchImpl;
|
|
270
|
-
constructor(auth
|
|
271
|
-
this.auth = auth
|
|
234
|
+
constructor(auth, config, deps) {
|
|
235
|
+
this.auth = auth;
|
|
272
236
|
this.config = config;
|
|
273
237
|
this.fetchImpl = deps?.fetch ?? fetch;
|
|
274
238
|
}
|
|
@@ -295,9 +259,10 @@ var GitHubClient = class {
|
|
|
295
259
|
return await response.json();
|
|
296
260
|
}
|
|
297
261
|
async pollAccessToken(deviceCode) {
|
|
262
|
+
const MAX_POLL_ATTEMPTS = 60;
|
|
298
263
|
const sleepDuration = (deviceCode.interval + 1) * 1e3;
|
|
299
264
|
consola.debug(`Polling access token with interval of ${sleepDuration}ms`);
|
|
300
|
-
|
|
265
|
+
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt++) {
|
|
301
266
|
const response = await this.fetchImpl(`${GITHUB_BASE_URL}/login/oauth/access_token`, {
|
|
302
267
|
method: "POST",
|
|
303
268
|
headers: standardHeaders(),
|
|
@@ -317,6 +282,7 @@ var GitHubClient = class {
|
|
|
317
282
|
if (json.access_token) return json.access_token;
|
|
318
283
|
await sleep(sleepDuration);
|
|
319
284
|
}
|
|
285
|
+
throw new Error("Device code authorization timed out");
|
|
320
286
|
}
|
|
321
287
|
async getGitHubUser() {
|
|
322
288
|
const response = await this.fetchImpl(`${GITHUB_API_BASE_URL}/user`, { headers: {
|
|
@@ -347,6 +313,35 @@ async function getVSCodeVersion() {
|
|
|
347
313
|
}
|
|
348
314
|
}
|
|
349
315
|
|
|
316
|
+
//#endregion
|
|
317
|
+
//#region src/lib/state.ts
|
|
318
|
+
const state = {
|
|
319
|
+
auth: {},
|
|
320
|
+
config: {
|
|
321
|
+
accountType: "individual",
|
|
322
|
+
manualApprove: false,
|
|
323
|
+
rateLimitWait: false,
|
|
324
|
+
showToken: false
|
|
325
|
+
},
|
|
326
|
+
cache: {},
|
|
327
|
+
rateLimit: {}
|
|
328
|
+
};
|
|
329
|
+
function getClientConfig() {
|
|
330
|
+
return {
|
|
331
|
+
accountType: state.config.accountType,
|
|
332
|
+
vsCodeVersion: state.cache.vsCodeVersion
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
async function cacheModels(client) {
|
|
336
|
+
const models = await (client ?? new CopilotClient(state.auth, getClientConfig())).getModels();
|
|
337
|
+
state.cache.models = models;
|
|
338
|
+
}
|
|
339
|
+
async function cacheVSCodeVersion() {
|
|
340
|
+
const response = await getVSCodeVersion();
|
|
341
|
+
state.cache.vsCodeVersion = response;
|
|
342
|
+
consola.info(`Using VSCode version: ${response}`);
|
|
343
|
+
}
|
|
344
|
+
|
|
350
345
|
//#endregion
|
|
351
346
|
//#region src/lib/token.ts
|
|
352
347
|
async function writeGithubToken(token) {
|
|
@@ -363,10 +358,10 @@ async function setupCopilotToken() {
|
|
|
363
358
|
const refreshCopilotToken = async () => {
|
|
364
359
|
consola.debug("Refreshing Copilot token");
|
|
365
360
|
try {
|
|
366
|
-
const { token
|
|
367
|
-
state.auth.copilotToken = token
|
|
361
|
+
const { token } = await githubClient.getCopilotToken();
|
|
362
|
+
state.auth.copilotToken = token;
|
|
368
363
|
consola.debug("Copilot token refreshed");
|
|
369
|
-
if (state.config.showToken) consola.info("Refreshed Copilot token:", token
|
|
364
|
+
if (state.config.showToken) consola.info("Refreshed Copilot token:", token);
|
|
370
365
|
} catch (error) {
|
|
371
366
|
consola.error("Failed to refresh Copilot token:", error);
|
|
372
367
|
}
|
|
@@ -421,7 +416,7 @@ async function logUser() {
|
|
|
421
416
|
consola.info(`Logged in as ${user.login}`);
|
|
422
417
|
}
|
|
423
418
|
function createGitHubClient() {
|
|
424
|
-
return new GitHubClient(state.auth, getClientConfig(
|
|
419
|
+
return new GitHubClient(state.auth, getClientConfig());
|
|
425
420
|
}
|
|
426
421
|
async function ensureVSCodeVersion() {
|
|
427
422
|
if (!state.cache.vsCodeVersion) await cacheVSCodeVersion();
|
|
@@ -480,7 +475,7 @@ const checkUsage = defineCommand({
|
|
|
480
475
|
await cacheVSCodeVersion();
|
|
481
476
|
await setupGitHubToken();
|
|
482
477
|
try {
|
|
483
|
-
const usage = await new GitHubClient(state.auth, getClientConfig(
|
|
478
|
+
const usage = await new GitHubClient(state.auth, getClientConfig()).getCopilotUsage();
|
|
484
479
|
const premium = usage.quota_snapshots.premium_interactions;
|
|
485
480
|
const premiumTotal = premium.entitlement;
|
|
486
481
|
const premiumUsed = premiumTotal - premium.remaining;
|
|
@@ -637,8 +632,7 @@ function getShell() {
|
|
|
637
632
|
const { platform, ppid, env } = process;
|
|
638
633
|
if (platform === "win32") {
|
|
639
634
|
try {
|
|
640
|
-
|
|
641
|
-
if (execSync(command, { stdio: "pipe" }).toString().toLowerCase().includes("powershell.exe")) return "powershell";
|
|
635
|
+
if (execSync(`wmic process get ParentProcessId,Name | findstr "${ppid}"`, { stdio: "pipe" }).toString().toLowerCase().includes("powershell.exe")) return "powershell";
|
|
642
636
|
} catch {
|
|
643
637
|
return "cmd";
|
|
644
638
|
}
|
|
@@ -686,8 +680,8 @@ function generateEnvScript(envVars, commandToRun = "") {
|
|
|
686
680
|
|
|
687
681
|
//#endregion
|
|
688
682
|
//#region src/lib/request-logger.ts
|
|
689
|
-
function formatElapsed(start
|
|
690
|
-
const delta = Date.now() - start
|
|
683
|
+
function formatElapsed(start) {
|
|
684
|
+
const delta = Date.now() - start;
|
|
691
685
|
return delta < 1e3 ? `${delta}ms` : `${Math.round(delta / 1e3)}s`;
|
|
692
686
|
}
|
|
693
687
|
function formatPath(rawUrl) {
|
|
@@ -725,17 +719,17 @@ function formatModelMapping(info) {
|
|
|
725
719
|
}
|
|
726
720
|
const requestLogger = async (c, next) => {
|
|
727
721
|
const { method, url } = c.req;
|
|
728
|
-
const path
|
|
729
|
-
const start
|
|
722
|
+
const path = formatPath(url);
|
|
723
|
+
const start = Date.now();
|
|
730
724
|
try {
|
|
731
725
|
await next();
|
|
732
726
|
} finally {
|
|
733
|
-
const elapsed = formatElapsed(start
|
|
727
|
+
const elapsed = formatElapsed(start);
|
|
734
728
|
const status = c.res.status;
|
|
735
729
|
const modelInfo = c.get("modelMappingInfo");
|
|
736
730
|
const line = [
|
|
737
731
|
colorizeMethod(method),
|
|
738
|
-
colorize("white", path
|
|
732
|
+
colorize("white", path),
|
|
739
733
|
colorizeStatus(status),
|
|
740
734
|
colorize("dim", elapsed)
|
|
741
735
|
].join(" ");
|
|
@@ -754,33 +748,33 @@ async function awaitApproval() {
|
|
|
754
748
|
|
|
755
749
|
//#endregion
|
|
756
750
|
//#region src/lib/rate-limit.ts
|
|
757
|
-
async function checkRateLimit(state
|
|
758
|
-
if (state
|
|
751
|
+
async function checkRateLimit(state) {
|
|
752
|
+
if (state.config.rateLimitSeconds === void 0) return;
|
|
759
753
|
const now = Date.now();
|
|
760
|
-
if (!state
|
|
761
|
-
state
|
|
754
|
+
if (!state.rateLimit.lastRequestTimestamp) {
|
|
755
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
762
756
|
return;
|
|
763
757
|
}
|
|
764
|
-
const elapsedSeconds = (now - state
|
|
765
|
-
if (elapsedSeconds > state
|
|
766
|
-
state
|
|
758
|
+
const elapsedSeconds = (now - state.rateLimit.lastRequestTimestamp) / 1e3;
|
|
759
|
+
if (elapsedSeconds > state.config.rateLimitSeconds) {
|
|
760
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
767
761
|
return;
|
|
768
762
|
}
|
|
769
|
-
const waitTimeSeconds = Math.ceil(state
|
|
770
|
-
if (!state
|
|
763
|
+
const waitTimeSeconds = Math.ceil(state.config.rateLimitSeconds - elapsedSeconds);
|
|
764
|
+
if (!state.config.rateLimitWait) {
|
|
771
765
|
consola.warn(`Rate limit exceeded. Need to wait ${waitTimeSeconds} more seconds.`);
|
|
772
766
|
throw new HTTPError("Rate limit exceeded", Response.json({ message: "Rate limit exceeded" }, { status: 429 }));
|
|
773
767
|
}
|
|
774
768
|
const waitTimeMs = waitTimeSeconds * 1e3;
|
|
775
769
|
consola.warn(`Rate limit reached. Waiting ${waitTimeSeconds} seconds before proceeding...`);
|
|
776
770
|
await sleep(waitTimeMs);
|
|
777
|
-
state
|
|
771
|
+
state.rateLimit.lastRequestTimestamp = now;
|
|
778
772
|
consola.info("Rate limit wait completed, proceeding with request");
|
|
779
773
|
}
|
|
780
774
|
|
|
781
775
|
//#endregion
|
|
782
776
|
//#region src/routes/middleware/request-guard.ts
|
|
783
|
-
const requestGuard = async (
|
|
777
|
+
const requestGuard = async (_c, next) => {
|
|
784
778
|
await checkRateLimit(state);
|
|
785
779
|
if (state.config.manualApprove) await awaitApproval();
|
|
786
780
|
await next();
|
|
@@ -796,16 +790,26 @@ const ENCODING_MAP = {
|
|
|
796
790
|
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
797
791
|
};
|
|
798
792
|
const encodingCache = /* @__PURE__ */ new Map();
|
|
793
|
+
const TOKENS_PER_MESSAGE = 3;
|
|
794
|
+
const TOKENS_PER_NAME = 1;
|
|
795
|
+
const REPLY_PRIMING_TOKENS = 3;
|
|
796
|
+
const BASE_CONSTANTS = {
|
|
797
|
+
propertyInitOverhead: 3,
|
|
798
|
+
propertyKeyOverhead: 3,
|
|
799
|
+
enumOverhead: -3,
|
|
800
|
+
enumItemCost: 3,
|
|
801
|
+
functionEndOverhead: 12
|
|
802
|
+
};
|
|
799
803
|
/**
|
|
800
804
|
* Calculate tokens for tool calls
|
|
801
805
|
*/
|
|
802
806
|
function calculateToolCallsTokens(toolCalls, encoder, constants) {
|
|
803
807
|
let tokens = 0;
|
|
804
808
|
for (const toolCall of toolCalls) {
|
|
805
|
-
tokens += constants.
|
|
809
|
+
tokens += constants.functionInitOverhead;
|
|
806
810
|
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
807
811
|
}
|
|
808
|
-
tokens += constants.
|
|
812
|
+
tokens += constants.functionEndOverhead;
|
|
809
813
|
return tokens;
|
|
810
814
|
}
|
|
811
815
|
/**
|
|
@@ -821,12 +825,10 @@ function calculateContentPartsTokens(contentParts, encoder) {
|
|
|
821
825
|
* Calculate tokens for a single message
|
|
822
826
|
*/
|
|
823
827
|
function calculateMessageTokens(message, encoder, constants) {
|
|
824
|
-
|
|
825
|
-
const tokensPerName = 1;
|
|
826
|
-
let tokens = tokensPerMessage;
|
|
828
|
+
let tokens = TOKENS_PER_MESSAGE;
|
|
827
829
|
for (const [key, value] of Object.entries(message)) {
|
|
828
830
|
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
829
|
-
if (key === "name") tokens +=
|
|
831
|
+
if (key === "name") tokens += TOKENS_PER_NAME;
|
|
830
832
|
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
831
833
|
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
832
834
|
}
|
|
@@ -839,17 +841,15 @@ function calculateTokens(messages, encoder, constants) {
|
|
|
839
841
|
if (messages.length === 0) return 0;
|
|
840
842
|
let numTokens = 0;
|
|
841
843
|
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
842
|
-
numTokens +=
|
|
844
|
+
numTokens += REPLY_PRIMING_TOKENS;
|
|
843
845
|
return numTokens;
|
|
844
846
|
}
|
|
845
847
|
/**
|
|
846
848
|
* Get the corresponding encoder module based on encoding type
|
|
847
849
|
*/
|
|
848
|
-
async function
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
if (cached) return cached;
|
|
852
|
-
}
|
|
850
|
+
async function getEncoder(encoding) {
|
|
851
|
+
const cached = encodingCache.get(encoding);
|
|
852
|
+
if (cached) return cached;
|
|
853
853
|
const supportedEncoding = encoding;
|
|
854
854
|
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
855
855
|
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
@@ -870,20 +870,10 @@ function getTokenizerFromModel(model) {
|
|
|
870
870
|
* Get model-specific constants for token calculation
|
|
871
871
|
*/
|
|
872
872
|
function getModelConstants(model) {
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
enumInit: -3,
|
|
878
|
-
enumItem: 3,
|
|
879
|
-
funcEnd: 12
|
|
880
|
-
} : {
|
|
881
|
-
funcInit: 7,
|
|
882
|
-
propInit: 3,
|
|
883
|
-
propKey: 3,
|
|
884
|
-
enumInit: -3,
|
|
885
|
-
enumItem: 3,
|
|
886
|
-
funcEnd: 12
|
|
873
|
+
const isLegacy = model.id === "gpt-3.5-turbo" || model.id === "gpt-4";
|
|
874
|
+
return {
|
|
875
|
+
...BASE_CONSTANTS,
|
|
876
|
+
functionInitOverhead: isLegacy ? 10 : 7
|
|
887
877
|
};
|
|
888
878
|
}
|
|
889
879
|
/**
|
|
@@ -891,16 +881,16 @@ function getModelConstants(model) {
|
|
|
891
881
|
*/
|
|
892
882
|
function calculateParameterTokens(key, prop, context) {
|
|
893
883
|
const { encoder, constants } = context;
|
|
894
|
-
let tokens = constants.
|
|
884
|
+
let tokens = constants.propertyKeyOverhead;
|
|
895
885
|
if (typeof prop !== "object" || prop === null) return tokens;
|
|
896
886
|
const param = prop;
|
|
897
887
|
const paramName = key;
|
|
898
888
|
const paramType = param.type || "string";
|
|
899
889
|
let paramDesc = param.description || "";
|
|
900
890
|
if (param.enum && Array.isArray(param.enum)) {
|
|
901
|
-
tokens += constants.
|
|
891
|
+
tokens += constants.enumOverhead;
|
|
902
892
|
for (const item of param.enum) {
|
|
903
|
-
tokens += constants.
|
|
893
|
+
tokens += constants.enumItemCost;
|
|
904
894
|
tokens += encoder.encode(String(item)).length;
|
|
905
895
|
}
|
|
906
896
|
}
|
|
@@ -929,7 +919,7 @@ function calculateParametersTokens(parameters, encoder, constants) {
|
|
|
929
919
|
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
930
920
|
const properties = value;
|
|
931
921
|
if (Object.keys(properties).length > 0) {
|
|
932
|
-
tokens += constants.
|
|
922
|
+
tokens += constants.propertyInitOverhead;
|
|
933
923
|
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
934
924
|
encoder,
|
|
935
925
|
constants
|
|
@@ -945,12 +935,12 @@ function calculateParametersTokens(parameters, encoder, constants) {
|
|
|
945
935
|
* Calculate tokens for a single tool
|
|
946
936
|
*/
|
|
947
937
|
function calculateToolTokens(tool, encoder, constants) {
|
|
948
|
-
let tokens = constants.
|
|
938
|
+
let tokens = constants.functionInitOverhead;
|
|
949
939
|
const func = tool.function;
|
|
950
|
-
const
|
|
951
|
-
let
|
|
952
|
-
if (
|
|
953
|
-
const line = `${
|
|
940
|
+
const functionName = func.name;
|
|
941
|
+
let functionDescription = func.description || "";
|
|
942
|
+
if (functionDescription.endsWith(".")) functionDescription = functionDescription.slice(0, -1);
|
|
943
|
+
const line = `${functionName}:${functionDescription}`;
|
|
954
944
|
tokens += encoder.encode(line).length;
|
|
955
945
|
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
956
946
|
return tokens;
|
|
@@ -959,20 +949,18 @@ function calculateToolTokens(tool, encoder, constants) {
|
|
|
959
949
|
* Calculate token count for tools based on model
|
|
960
950
|
*/
|
|
961
951
|
function numTokensForTools(tools, encoder, constants) {
|
|
962
|
-
let
|
|
963
|
-
for (const tool of tools)
|
|
964
|
-
|
|
965
|
-
return
|
|
952
|
+
let toolTokenCount = 0;
|
|
953
|
+
for (const tool of tools) toolTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
954
|
+
toolTokenCount += constants.functionEndOverhead;
|
|
955
|
+
return toolTokenCount;
|
|
966
956
|
}
|
|
967
957
|
/**
|
|
968
958
|
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
969
959
|
*/
|
|
970
960
|
async function getTokenCount(payload, model) {
|
|
971
|
-
const
|
|
972
|
-
const
|
|
973
|
-
const
|
|
974
|
-
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
975
|
-
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
961
|
+
const encoder = await getEncoder(getTokenizerFromModel(model));
|
|
962
|
+
const inputMessages = payload.messages.filter((msg) => msg.role !== "assistant");
|
|
963
|
+
const outputMessages = payload.messages.filter((msg) => msg.role === "assistant");
|
|
976
964
|
const constants = getModelConstants(model);
|
|
977
965
|
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
978
966
|
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
@@ -986,28 +974,17 @@ async function getTokenCount(payload, model) {
|
|
|
986
974
|
//#endregion
|
|
987
975
|
//#region src/lib/upstream-signal.ts
|
|
988
976
|
const DEFAULT_TIMEOUT_MS = 3e5;
|
|
989
|
-
function createUpstreamSignal(
|
|
990
|
-
const { clientSignal, timeoutMs = DEFAULT_TIMEOUT_MS } = options ?? {};
|
|
977
|
+
function createUpstreamSignal(clientSignal, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
991
978
|
const controller = new AbortController();
|
|
992
|
-
const timeout = setTimeout(() =>
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
let listenerAdded = false;
|
|
996
|
-
let abortListener;
|
|
997
|
-
if (clientSignal && !clientSignal.aborted) {
|
|
998
|
-
abortListener = () => {
|
|
999
|
-
controller.abort();
|
|
1000
|
-
};
|
|
1001
|
-
clientSignal.addEventListener("abort", abortListener);
|
|
1002
|
-
listenerAdded = true;
|
|
1003
|
-
}
|
|
1004
|
-
const cleanup = () => {
|
|
1005
|
-
clearTimeout(timeout);
|
|
1006
|
-
if (listenerAdded && clientSignal && abortListener) clientSignal.removeEventListener("abort", abortListener);
|
|
1007
|
-
};
|
|
979
|
+
const timeout = timeoutMs > 0 ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
|
|
980
|
+
const onAbort = () => controller.abort();
|
|
981
|
+
if (clientSignal && !clientSignal.aborted) clientSignal.addEventListener("abort", onAbort);
|
|
1008
982
|
return {
|
|
1009
983
|
signal: controller.signal,
|
|
1010
|
-
cleanup
|
|
984
|
+
cleanup: () => {
|
|
985
|
+
if (timeout) clearTimeout(timeout);
|
|
986
|
+
clientSignal?.removeEventListener("abort", onAbort);
|
|
987
|
+
}
|
|
1011
988
|
};
|
|
1012
989
|
}
|
|
1013
990
|
|
|
@@ -1084,19 +1061,16 @@ async function handleCompletion$1(c) {
|
|
|
1084
1061
|
} catch (error) {
|
|
1085
1062
|
consola.warn("Failed to calculate token count:", error);
|
|
1086
1063
|
}
|
|
1087
|
-
if (
|
|
1064
|
+
if (payload.max_tokens == null) {
|
|
1088
1065
|
payload = {
|
|
1089
1066
|
...payload,
|
|
1090
1067
|
max_tokens: selectedModel?.capabilities.limits.max_output_tokens
|
|
1091
1068
|
};
|
|
1092
1069
|
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens));
|
|
1093
1070
|
}
|
|
1094
|
-
const { signal, cleanup } = createUpstreamSignal(
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
});
|
|
1098
|
-
const response = await new CopilotClient(state.auth, getClientConfig(state)).createChatCompletions(payload, { signal });
|
|
1099
|
-
if (isNonStreaming$1(response)) {
|
|
1071
|
+
const { signal, cleanup } = createUpstreamSignal(c.req.raw.signal, state.config.upstreamTimeoutSeconds !== void 0 ? state.config.upstreamTimeoutSeconds * 1e3 : void 0);
|
|
1072
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createChatCompletions(payload, { signal });
|
|
1073
|
+
if (isNonStreamingResponse(response)) {
|
|
1100
1074
|
consola.debug("Non-streaming response:", JSON.stringify(response));
|
|
1101
1075
|
cleanup();
|
|
1102
1076
|
return c.json(response);
|
|
@@ -1113,9 +1087,6 @@ async function handleCompletion$1(c) {
|
|
|
1113
1087
|
}
|
|
1114
1088
|
});
|
|
1115
1089
|
}
|
|
1116
|
-
function isNonStreaming$1(response) {
|
|
1117
|
-
return Object.hasOwn(response, "choices");
|
|
1118
|
-
}
|
|
1119
1090
|
|
|
1120
1091
|
//#endregion
|
|
1121
1092
|
//#region src/routes/chat-completions/route.ts
|
|
@@ -1127,7 +1098,7 @@ completionRoutes.post("/", requestGuard, (c) => handleCompletion$1(c));
|
|
|
1127
1098
|
const embeddingRoutes = new Hono();
|
|
1128
1099
|
embeddingRoutes.post("/", async (c) => {
|
|
1129
1100
|
const payload = parseEmbeddingRequest(await c.req.json());
|
|
1130
|
-
const response = await new CopilotClient(state.auth, getClientConfig(
|
|
1101
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createEmbeddings(payload);
|
|
1131
1102
|
return c.json(response);
|
|
1132
1103
|
});
|
|
1133
1104
|
|
|
@@ -1142,6 +1113,13 @@ function mapOpenAIStopReasonToAnthropic(finishReason) {
|
|
|
1142
1113
|
content_filter: "end_turn"
|
|
1143
1114
|
}[finishReason];
|
|
1144
1115
|
}
|
|
1116
|
+
function mapOpenAIUsageToAnthropic(usage) {
|
|
1117
|
+
return {
|
|
1118
|
+
input_tokens: (usage?.prompt_tokens ?? 0) - (usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1119
|
+
output_tokens: usage?.completion_tokens ?? 0,
|
|
1120
|
+
...usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: usage.prompt_tokens_details.cached_tokens }
|
|
1121
|
+
};
|
|
1122
|
+
}
|
|
1145
1123
|
|
|
1146
1124
|
//#endregion
|
|
1147
1125
|
//#region src/translator/anthropic/anthropic-stream-translator.ts
|
|
@@ -1152,19 +1130,21 @@ var AnthropicStreamTranslator = class {
|
|
|
1152
1130
|
messageStartSent: false,
|
|
1153
1131
|
contentBlockIndex: 0,
|
|
1154
1132
|
contentBlockOpen: false,
|
|
1133
|
+
thinkingBlockOpen: false,
|
|
1155
1134
|
toolCalls: {}
|
|
1156
1135
|
};
|
|
1157
1136
|
}
|
|
1158
1137
|
onChunk(chunk) {
|
|
1159
1138
|
if (chunk.choices.length === 0) return [];
|
|
1160
|
-
const events
|
|
1139
|
+
const events = [];
|
|
1161
1140
|
const choice = chunk.choices[0];
|
|
1162
1141
|
const { delta } = choice;
|
|
1163
|
-
this.appendMessageStart(events
|
|
1164
|
-
this.
|
|
1165
|
-
this.
|
|
1166
|
-
this.
|
|
1167
|
-
|
|
1142
|
+
this.appendMessageStart(events, chunk);
|
|
1143
|
+
this.appendThinkingDelta(events, delta.reasoning_text);
|
|
1144
|
+
this.appendContentDelta(events, delta.content);
|
|
1145
|
+
this.appendToolCalls(events, delta.tool_calls);
|
|
1146
|
+
this.appendFinish(events, chunk, choice.finish_reason);
|
|
1147
|
+
return events;
|
|
1168
1148
|
}
|
|
1169
1149
|
onError(error) {
|
|
1170
1150
|
return [{
|
|
@@ -1190,9 +1170,9 @@ var AnthropicStreamTranslator = class {
|
|
|
1190
1170
|
return tc !== void 0 && tc.anthropicBlockIndex === this.state.contentBlockIndex;
|
|
1191
1171
|
});
|
|
1192
1172
|
}
|
|
1193
|
-
appendMessageStart(events
|
|
1173
|
+
appendMessageStart(events, chunk) {
|
|
1194
1174
|
if (this.state.messageStartSent) return;
|
|
1195
|
-
events
|
|
1175
|
+
events.push({
|
|
1196
1176
|
type: "message_start",
|
|
1197
1177
|
message: {
|
|
1198
1178
|
id: chunk.id,
|
|
@@ -1203,18 +1183,57 @@ var AnthropicStreamTranslator = class {
|
|
|
1203
1183
|
stop_reason: null,
|
|
1204
1184
|
stop_sequence: null,
|
|
1205
1185
|
usage: {
|
|
1206
|
-
|
|
1207
|
-
output_tokens: 0
|
|
1208
|
-
...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
1186
|
+
...mapOpenAIUsageToAnthropic(chunk.usage),
|
|
1187
|
+
output_tokens: 0
|
|
1209
1188
|
}
|
|
1210
1189
|
}
|
|
1211
1190
|
});
|
|
1212
1191
|
this.state.messageStartSent = true;
|
|
1213
1192
|
}
|
|
1214
|
-
|
|
1193
|
+
appendThinkingDelta(events, reasoningText) {
|
|
1194
|
+
if (!reasoningText) return;
|
|
1195
|
+
if (this.state.contentBlockOpen && !this.state.thinkingBlockOpen) {
|
|
1196
|
+
events.push({
|
|
1197
|
+
type: "content_block_stop",
|
|
1198
|
+
index: this.state.contentBlockIndex
|
|
1199
|
+
});
|
|
1200
|
+
this.state.contentBlockIndex++;
|
|
1201
|
+
this.state.contentBlockOpen = false;
|
|
1202
|
+
}
|
|
1203
|
+
if (!this.state.thinkingBlockOpen) {
|
|
1204
|
+
events.push({
|
|
1205
|
+
type: "content_block_start",
|
|
1206
|
+
index: this.state.contentBlockIndex,
|
|
1207
|
+
content_block: {
|
|
1208
|
+
type: "thinking",
|
|
1209
|
+
thinking: ""
|
|
1210
|
+
}
|
|
1211
|
+
});
|
|
1212
|
+
this.state.contentBlockOpen = true;
|
|
1213
|
+
this.state.thinkingBlockOpen = true;
|
|
1214
|
+
}
|
|
1215
|
+
events.push({
|
|
1216
|
+
type: "content_block_delta",
|
|
1217
|
+
index: this.state.contentBlockIndex,
|
|
1218
|
+
delta: {
|
|
1219
|
+
type: "thinking_delta",
|
|
1220
|
+
thinking: reasoningText
|
|
1221
|
+
}
|
|
1222
|
+
});
|
|
1223
|
+
}
|
|
1224
|
+
appendContentDelta(events, content) {
|
|
1215
1225
|
if (!content) return;
|
|
1226
|
+
if (this.state.thinkingBlockOpen) {
|
|
1227
|
+
events.push({
|
|
1228
|
+
type: "content_block_stop",
|
|
1229
|
+
index: this.state.contentBlockIndex
|
|
1230
|
+
});
|
|
1231
|
+
this.state.contentBlockIndex++;
|
|
1232
|
+
this.state.contentBlockOpen = false;
|
|
1233
|
+
this.state.thinkingBlockOpen = false;
|
|
1234
|
+
}
|
|
1216
1235
|
if (this.isToolBlockOpen()) {
|
|
1217
|
-
events
|
|
1236
|
+
events.push({
|
|
1218
1237
|
type: "content_block_stop",
|
|
1219
1238
|
index: this.state.contentBlockIndex
|
|
1220
1239
|
});
|
|
@@ -1222,7 +1241,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1222
1241
|
this.state.contentBlockOpen = false;
|
|
1223
1242
|
}
|
|
1224
1243
|
if (!this.state.contentBlockOpen) {
|
|
1225
|
-
events
|
|
1244
|
+
events.push({
|
|
1226
1245
|
type: "content_block_start",
|
|
1227
1246
|
index: this.state.contentBlockIndex,
|
|
1228
1247
|
content_block: {
|
|
@@ -1232,7 +1251,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1232
1251
|
});
|
|
1233
1252
|
this.state.contentBlockOpen = true;
|
|
1234
1253
|
}
|
|
1235
|
-
events
|
|
1254
|
+
events.push({
|
|
1236
1255
|
type: "content_block_delta",
|
|
1237
1256
|
index: this.state.contentBlockIndex,
|
|
1238
1257
|
delta: {
|
|
@@ -1241,17 +1260,18 @@ var AnthropicStreamTranslator = class {
|
|
|
1241
1260
|
}
|
|
1242
1261
|
});
|
|
1243
1262
|
}
|
|
1244
|
-
appendToolCalls(events
|
|
1263
|
+
appendToolCalls(events, toolCalls) {
|
|
1245
1264
|
if (!toolCalls || toolCalls.length === 0) return;
|
|
1246
1265
|
for (const toolCall of toolCalls) {
|
|
1247
1266
|
if (toolCall.id && toolCall.function?.name) {
|
|
1248
1267
|
if (this.state.contentBlockOpen) {
|
|
1249
|
-
events
|
|
1268
|
+
events.push({
|
|
1250
1269
|
type: "content_block_stop",
|
|
1251
1270
|
index: this.state.contentBlockIndex
|
|
1252
1271
|
});
|
|
1253
1272
|
this.state.contentBlockIndex++;
|
|
1254
1273
|
this.state.contentBlockOpen = false;
|
|
1274
|
+
this.state.thinkingBlockOpen = false;
|
|
1255
1275
|
}
|
|
1256
1276
|
const anthropicBlockIndex = this.state.contentBlockIndex;
|
|
1257
1277
|
this.state.toolCalls[toolCall.index] = {
|
|
@@ -1259,7 +1279,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1259
1279
|
name: toolCall.function.name,
|
|
1260
1280
|
anthropicBlockIndex
|
|
1261
1281
|
};
|
|
1262
|
-
events
|
|
1282
|
+
events.push({
|
|
1263
1283
|
type: "content_block_start",
|
|
1264
1284
|
index: anthropicBlockIndex,
|
|
1265
1285
|
content_block: {
|
|
@@ -1274,7 +1294,7 @@ var AnthropicStreamTranslator = class {
|
|
|
1274
1294
|
if (toolCall.function?.arguments) {
|
|
1275
1295
|
const toolCallInfo = this.state.toolCalls[toolCall.index];
|
|
1276
1296
|
if (!toolCallInfo) continue;
|
|
1277
|
-
events
|
|
1297
|
+
events.push({
|
|
1278
1298
|
type: "content_block_delta",
|
|
1279
1299
|
index: toolCallInfo.anthropicBlockIndex,
|
|
1280
1300
|
delta: {
|
|
@@ -1285,26 +1305,23 @@ var AnthropicStreamTranslator = class {
|
|
|
1285
1305
|
}
|
|
1286
1306
|
}
|
|
1287
1307
|
}
|
|
1288
|
-
appendFinish(events
|
|
1308
|
+
appendFinish(events, chunk, finishReason) {
|
|
1289
1309
|
if (!finishReason) return;
|
|
1290
1310
|
if (this.state.contentBlockOpen) {
|
|
1291
|
-
events
|
|
1311
|
+
events.push({
|
|
1292
1312
|
type: "content_block_stop",
|
|
1293
1313
|
index: this.state.contentBlockIndex
|
|
1294
1314
|
});
|
|
1295
1315
|
this.state.contentBlockOpen = false;
|
|
1316
|
+
this.state.thinkingBlockOpen = false;
|
|
1296
1317
|
}
|
|
1297
|
-
events
|
|
1318
|
+
events.push({
|
|
1298
1319
|
type: "message_delta",
|
|
1299
1320
|
delta: {
|
|
1300
1321
|
stop_reason: mapOpenAIStopReasonToAnthropic(finishReason),
|
|
1301
1322
|
stop_sequence: null
|
|
1302
1323
|
},
|
|
1303
|
-
usage:
|
|
1304
|
-
input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1305
|
-
output_tokens: chunk.usage?.completion_tokens ?? 0,
|
|
1306
|
-
...chunk.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
1307
|
-
}
|
|
1324
|
+
usage: mapOpenAIUsageToAnthropic(chunk.usage)
|
|
1308
1325
|
}, { type: "message_stop" });
|
|
1309
1326
|
}
|
|
1310
1327
|
};
|
|
@@ -1317,11 +1334,11 @@ const DEFAULT_FALLBACKS = {
|
|
|
1317
1334
|
claudeHaiku: "claude-haiku-4.5"
|
|
1318
1335
|
};
|
|
1319
1336
|
function getModelFallbackConfig() {
|
|
1320
|
-
const cachedConfig
|
|
1337
|
+
const cachedConfig = getCachedConfig();
|
|
1321
1338
|
return {
|
|
1322
|
-
claudeOpus: process.env.MODEL_FALLBACK_CLAUDE_OPUS || cachedConfig
|
|
1323
|
-
claudeSonnet: process.env.MODEL_FALLBACK_CLAUDE_SONNET || cachedConfig
|
|
1324
|
-
claudeHaiku: process.env.MODEL_FALLBACK_CLAUDE_HAIKU || cachedConfig
|
|
1339
|
+
claudeOpus: process.env.MODEL_FALLBACK_CLAUDE_OPUS || cachedConfig.modelFallback?.claudeOpus || DEFAULT_FALLBACKS.claudeOpus,
|
|
1340
|
+
claudeSonnet: process.env.MODEL_FALLBACK_CLAUDE_SONNET || cachedConfig.modelFallback?.claudeSonnet || DEFAULT_FALLBACKS.claudeSonnet,
|
|
1341
|
+
claudeHaiku: process.env.MODEL_FALLBACK_CLAUDE_HAIKU || cachedConfig.modelFallback?.claudeHaiku || DEFAULT_FALLBACKS.claudeHaiku
|
|
1325
1342
|
};
|
|
1326
1343
|
}
|
|
1327
1344
|
function resolveModel(modelId, knownModelIds, config) {
|
|
@@ -1336,17 +1353,20 @@ function resolveModel(modelId, knownModelIds, config) {
|
|
|
1336
1353
|
//#region src/translator/anthropic/anthropic-translator.ts
|
|
1337
1354
|
var AnthropicTranslator = class {
|
|
1338
1355
|
toOpenAI(payload) {
|
|
1356
|
+
const thinkingParams = this.translateThinking(payload.thinking, payload.model);
|
|
1357
|
+
const isThinkingActive = payload.thinking?.type === "enabled" || payload.thinking?.type === "adaptive";
|
|
1339
1358
|
return {
|
|
1340
1359
|
model: this.translateModelName(payload.model),
|
|
1341
1360
|
messages: this.translateAnthropicMessagesToOpenAI(payload.messages, payload.system),
|
|
1342
1361
|
max_tokens: payload.max_tokens,
|
|
1343
1362
|
stop: payload.stop_sequences,
|
|
1344
1363
|
stream: payload.stream,
|
|
1345
|
-
temperature: payload.temperature,
|
|
1346
|
-
top_p: payload.top_p,
|
|
1364
|
+
temperature: isThinkingActive ? void 0 : payload.temperature,
|
|
1365
|
+
top_p: isThinkingActive ? void 0 : payload.top_p,
|
|
1347
1366
|
user: payload.metadata?.user_id,
|
|
1348
1367
|
tools: this.translateAnthropicToolsToOpenAI(payload.tools),
|
|
1349
|
-
tool_choice: this.translateAnthropicToolChoiceToOpenAI(payload.tool_choice)
|
|
1368
|
+
tool_choice: this.translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
|
|
1369
|
+
...thinkingParams
|
|
1350
1370
|
};
|
|
1351
1371
|
}
|
|
1352
1372
|
fromOpenAI(response) {
|
|
@@ -1369,20 +1389,32 @@ var AnthropicTranslator = class {
|
|
|
1369
1389
|
content: [...allTextBlocks, ...allToolUseBlocks],
|
|
1370
1390
|
stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
|
|
1371
1391
|
stop_sequence: null,
|
|
1372
|
-
usage:
|
|
1373
|
-
input_tokens: (response.usage?.prompt_tokens ?? 0) - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
|
|
1374
|
-
output_tokens: response.usage?.completion_tokens ?? 0,
|
|
1375
|
-
...response.usage?.prompt_tokens_details?.cached_tokens !== void 0 && { cache_read_input_tokens: response.usage.prompt_tokens_details.cached_tokens }
|
|
1376
|
-
}
|
|
1392
|
+
usage: mapOpenAIUsageToAnthropic(response.usage)
|
|
1377
1393
|
};
|
|
1378
1394
|
}
|
|
1379
1395
|
createStreamTranslator() {
|
|
1380
1396
|
return new AnthropicStreamTranslator();
|
|
1381
1397
|
}
|
|
1382
1398
|
translateModelName(model) {
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1399
|
+
return resolveModel(model, state.cache.models ? new Set(state.cache.models.data.map((m) => m.id)) : void 0, getModelFallbackConfig());
|
|
1400
|
+
}
|
|
1401
|
+
translateThinking(thinking, model) {
|
|
1402
|
+
if (!thinking || thinking.type === "disabled") return {};
|
|
1403
|
+
const isClaude = model.startsWith("claude");
|
|
1404
|
+
if (thinking.type === "adaptive") return {
|
|
1405
|
+
reasoning_effort: "medium",
|
|
1406
|
+
...isClaude && { thinking_budget: 24e3 }
|
|
1407
|
+
};
|
|
1408
|
+
const budgetTokens = thinking.budget_tokens;
|
|
1409
|
+
return {
|
|
1410
|
+
reasoning_effort: this.budgetToReasoningEffort(budgetTokens),
|
|
1411
|
+
...isClaude && { thinking_budget: budgetTokens }
|
|
1412
|
+
};
|
|
1413
|
+
}
|
|
1414
|
+
budgetToReasoningEffort(budgetTokens) {
|
|
1415
|
+
if (budgetTokens <= 8e3) return "low";
|
|
1416
|
+
if (budgetTokens <= 24e3) return "medium";
|
|
1417
|
+
return "high";
|
|
1386
1418
|
}
|
|
1387
1419
|
translateAnthropicMessagesToOpenAI(anthropicMessages, system) {
|
|
1388
1420
|
const systemMessages = this.handleSystemPrompt(system);
|
|
@@ -1523,6 +1555,10 @@ var AnthropicTranslator = class {
|
|
|
1523
1555
|
|
|
1524
1556
|
//#endregion
|
|
1525
1557
|
//#region src/routes/messages/count-tokens-handler.ts
|
|
1558
|
+
const CLAUDE_TOOL_OVERHEAD_TOKENS = 346;
|
|
1559
|
+
const GROK_TOOL_OVERHEAD_TOKENS = 480;
|
|
1560
|
+
const CLAUDE_ESTIMATION_FACTOR = 1.15;
|
|
1561
|
+
const GROK_ESTIMATION_FACTOR = 1.03;
|
|
1526
1562
|
/**
|
|
1527
1563
|
* Handles token counting for Anthropic messages
|
|
1528
1564
|
*/
|
|
@@ -1541,13 +1577,13 @@ async function handleCountTokens(c) {
|
|
|
1541
1577
|
let mcpToolExist = false;
|
|
1542
1578
|
if (anthropicBeta?.startsWith("claude-code")) mcpToolExist = anthropicPayload.tools.some((tool) => tool.name.startsWith("mcp__"));
|
|
1543
1579
|
if (!mcpToolExist) {
|
|
1544
|
-
if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input +
|
|
1545
|
-
else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input +
|
|
1580
|
+
if (anthropicPayload.model.startsWith("claude")) tokenCount.input = tokenCount.input + CLAUDE_TOOL_OVERHEAD_TOKENS;
|
|
1581
|
+
else if (anthropicPayload.model.startsWith("grok")) tokenCount.input = tokenCount.input + GROK_TOOL_OVERHEAD_TOKENS;
|
|
1546
1582
|
}
|
|
1547
1583
|
}
|
|
1548
1584
|
let finalTokenCount = tokenCount.input + tokenCount.output;
|
|
1549
|
-
if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount *
|
|
1550
|
-
else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount *
|
|
1585
|
+
if (anthropicPayload.model.startsWith("claude")) finalTokenCount = Math.round(finalTokenCount * CLAUDE_ESTIMATION_FACTOR);
|
|
1586
|
+
else if (anthropicPayload.model.startsWith("grok")) finalTokenCount = Math.round(finalTokenCount * GROK_ESTIMATION_FACTOR);
|
|
1551
1587
|
consola.info("Token count:", finalTokenCount);
|
|
1552
1588
|
return c.json({ input_tokens: finalTokenCount });
|
|
1553
1589
|
}
|
|
@@ -1565,13 +1601,10 @@ async function handleCompletion(c) {
|
|
|
1565
1601
|
});
|
|
1566
1602
|
consola.debug("Claude Code requested model:", anthropicPayload.model, "-> Copilot model:", openAIPayload.model);
|
|
1567
1603
|
consola.debug("Translated OpenAI request payload:", JSON.stringify(openAIPayload));
|
|
1568
|
-
const { signal, cleanup } = createUpstreamSignal(
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
const response = await new CopilotClient(state.auth, getClientConfig(state)).createChatCompletions(openAIPayload, { signal });
|
|
1573
|
-
if (isNonStreaming(response)) {
|
|
1574
|
-
consola.debug("Non-streaming response from Copilot:", JSON.stringify(response).slice(-400));
|
|
1604
|
+
const { signal, cleanup } = createUpstreamSignal(c.req.raw.signal, state.config.upstreamTimeoutSeconds !== void 0 ? state.config.upstreamTimeoutSeconds * 1e3 : void 0);
|
|
1605
|
+
const response = await new CopilotClient(state.auth, getClientConfig()).createChatCompletions(openAIPayload, { signal });
|
|
1606
|
+
if (isNonStreamingResponse(response)) {
|
|
1607
|
+
consola.debug("Non-streaming response from Copilot (full):", JSON.stringify(response, null, 2));
|
|
1575
1608
|
const anthropicResponse = translator.fromOpenAI(response);
|
|
1576
1609
|
consola.debug("Translated Anthropic response:", JSON.stringify(anthropicResponse));
|
|
1577
1610
|
cleanup();
|
|
@@ -1586,8 +1619,8 @@ async function handleCompletion(c) {
|
|
|
1586
1619
|
if (rawEvent.data === "[DONE]") break;
|
|
1587
1620
|
if (!rawEvent.data) continue;
|
|
1588
1621
|
const chunk = JSON.parse(rawEvent.data);
|
|
1589
|
-
const events
|
|
1590
|
-
for (const event of events
|
|
1622
|
+
const events = streamTranslator.onChunk(chunk);
|
|
1623
|
+
for (const event of events) {
|
|
1591
1624
|
consola.debug("Translated Anthropic event:", JSON.stringify(event));
|
|
1592
1625
|
await stream.writeSSE({
|
|
1593
1626
|
event: event.type,
|
|
@@ -1611,9 +1644,6 @@ async function handleCompletion(c) {
|
|
|
1611
1644
|
}
|
|
1612
1645
|
});
|
|
1613
1646
|
}
|
|
1614
|
-
function isNonStreaming(response) {
|
|
1615
|
-
return Object.hasOwn(response, "choices");
|
|
1616
|
-
}
|
|
1617
1647
|
|
|
1618
1648
|
//#endregion
|
|
1619
1649
|
//#region src/routes/messages/route.ts
|
|
@@ -1625,10 +1655,7 @@ messageRoutes.post("/count_tokens", (c) => handleCountTokens(c));
|
|
|
1625
1655
|
//#region src/routes/models/route.ts
|
|
1626
1656
|
const modelRoutes = new Hono();
|
|
1627
1657
|
modelRoutes.get("/", async (c) => {
|
|
1628
|
-
if (!state.cache.models)
|
|
1629
|
-
const copilotClient = new CopilotClient(state.auth, getClientConfig(state));
|
|
1630
|
-
await cacheModels(copilotClient);
|
|
1631
|
-
}
|
|
1658
|
+
if (!state.cache.models) await cacheModels(new CopilotClient(state.auth, getClientConfig()));
|
|
1632
1659
|
const models = state.cache.models?.data.map((model) => ({
|
|
1633
1660
|
id: model.id,
|
|
1634
1661
|
object: "model",
|
|
@@ -1656,7 +1683,7 @@ tokenRoute.get("/", (c) => {
|
|
|
1656
1683
|
//#region src/routes/usage/route.ts
|
|
1657
1684
|
const usageRoute = new Hono();
|
|
1658
1685
|
usageRoute.get("/", async (c) => {
|
|
1659
|
-
const usage = await new GitHubClient(state.auth, getClientConfig(
|
|
1686
|
+
const usage = await new GitHubClient(state.auth, getClientConfig()).getCopilotUsage();
|
|
1660
1687
|
return c.json(usage);
|
|
1661
1688
|
});
|
|
1662
1689
|
|
|
@@ -1733,12 +1760,7 @@ async function runServer(options) {
|
|
|
1733
1760
|
await cacheVSCodeVersion();
|
|
1734
1761
|
if (!options.githubToken) await setupGitHubToken();
|
|
1735
1762
|
await setupCopilotToken();
|
|
1736
|
-
|
|
1737
|
-
...getClientConfig(state),
|
|
1738
|
-
accountType
|
|
1739
|
-
};
|
|
1740
|
-
const copilotClient = new CopilotClient(state.auth, clientConfig);
|
|
1741
|
-
await cacheModels(copilotClient);
|
|
1763
|
+
await cacheModels(new CopilotClient(state.auth, getClientConfig()));
|
|
1742
1764
|
consola.info(`Available models: \n${state.cache.models?.data.map((model) => `- ${model.id}`).join("\n")}`);
|
|
1743
1765
|
const serverUrl = `http://localhost:${options.port}`;
|
|
1744
1766
|
if (options.claudeCode) {
|
|
@@ -1751,6 +1773,15 @@ async function runServer(options) {
|
|
|
1751
1773
|
bun: options.idleTimeoutSeconds === void 0 ? void 0 : { idleTimeout: options.idleTimeoutSeconds }
|
|
1752
1774
|
});
|
|
1753
1775
|
}
|
|
1776
|
+
function parseIntArg(raw, name, fallbackMsg) {
|
|
1777
|
+
if (raw === void 0) return void 0;
|
|
1778
|
+
const n = Number.parseInt(raw, 10);
|
|
1779
|
+
if (Number.isNaN(n) || n < 0) {
|
|
1780
|
+
consola.warn(`Invalid --${name} value "${raw}". ${fallbackMsg}`);
|
|
1781
|
+
return;
|
|
1782
|
+
}
|
|
1783
|
+
return n;
|
|
1784
|
+
}
|
|
1754
1785
|
const start = defineCommand({
|
|
1755
1786
|
meta: {
|
|
1756
1787
|
name: "start",
|
|
@@ -1824,20 +1855,9 @@ const start = defineCommand({
|
|
|
1824
1855
|
}
|
|
1825
1856
|
},
|
|
1826
1857
|
run({ args }) {
|
|
1827
|
-
const
|
|
1828
|
-
const
|
|
1829
|
-
const
|
|
1830
|
-
let idleTimeoutSeconds = idleTimeoutRaw === void 0 ? void 0 : Number.parseInt(idleTimeoutRaw, 10);
|
|
1831
|
-
if (idleTimeoutSeconds !== void 0 && (Number.isNaN(idleTimeoutSeconds) || idleTimeoutSeconds < 0)) {
|
|
1832
|
-
consola.warn(`Invalid --idle-timeout value "${idleTimeoutRaw}". Falling back to Bun default.`);
|
|
1833
|
-
idleTimeoutSeconds = void 0;
|
|
1834
|
-
}
|
|
1835
|
-
const upstreamTimeoutRaw = args["upstream-timeout"];
|
|
1836
|
-
let upstreamTimeoutSeconds = upstreamTimeoutRaw === void 0 ? void 0 : Number.parseInt(upstreamTimeoutRaw, 10);
|
|
1837
|
-
if (upstreamTimeoutSeconds !== void 0 && (Number.isNaN(upstreamTimeoutSeconds) || upstreamTimeoutSeconds < 0)) {
|
|
1838
|
-
consola.warn(`Invalid --upstream-timeout value "${upstreamTimeoutRaw}". Falling back to default (300s).`);
|
|
1839
|
-
upstreamTimeoutSeconds = void 0;
|
|
1840
|
-
}
|
|
1858
|
+
const rateLimit = parseIntArg(args["rate-limit"], "rate-limit", "Rate limiting disabled.");
|
|
1859
|
+
const idleTimeoutSeconds = parseIntArg(args["idle-timeout"], "idle-timeout", "Falling back to Bun default.");
|
|
1860
|
+
const upstreamTimeoutSeconds = parseIntArg(args["upstream-timeout"], "upstream-timeout", "Falling back to default (300s).");
|
|
1841
1861
|
return runServer({
|
|
1842
1862
|
port: Number.parseInt(args.port, 10),
|
|
1843
1863
|
verbose: args.verbose,
|
|
@@ -1857,7 +1877,7 @@ const start = defineCommand({
|
|
|
1857
1877
|
|
|
1858
1878
|
//#endregion
|
|
1859
1879
|
//#region src/main.ts
|
|
1860
|
-
|
|
1880
|
+
runMain(defineCommand({
|
|
1861
1881
|
meta: {
|
|
1862
1882
|
name: "ghc-proxy",
|
|
1863
1883
|
description: "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools."
|
|
@@ -1868,12 +1888,11 @@ const main = defineCommand({
|
|
|
1868
1888
|
"check-usage": checkUsage,
|
|
1869
1889
|
debug
|
|
1870
1890
|
}
|
|
1871
|
-
})
|
|
1872
|
-
runMain(main).catch((error) => {
|
|
1891
|
+
})).catch((error) => {
|
|
1873
1892
|
consola.error("Failed to start CLI:", error);
|
|
1874
1893
|
process.exitCode = 1;
|
|
1875
1894
|
});
|
|
1876
1895
|
|
|
1877
1896
|
//#endregion
|
|
1878
1897
|
export { };
|
|
1879
|
-
//# sourceMappingURL=main.
|
|
1898
|
+
//# sourceMappingURL=main.mjs.map
|