ghc-proxy 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -105
- package/dist/main.mjs +225 -81
- package/dist/main.mjs.map +1 -1
- package/package.json +1 -1
package/dist/main.mjs
CHANGED
|
@@ -5380,7 +5380,12 @@ const configFileSchema = object({
|
|
|
5380
5380
|
useFunctionApplyPatch: boolean().optional(),
|
|
5381
5381
|
responsesApiContextManagementModels: array(string()).optional(),
|
|
5382
5382
|
modelReasoningEfforts: record(string(), reasoningEffortSchema).optional(),
|
|
5383
|
-
|
|
5383
|
+
modelRewrites: array(object({
|
|
5384
|
+
from: string(),
|
|
5385
|
+
to: string()
|
|
5386
|
+
})).optional(),
|
|
5387
|
+
contextUpgrade: boolean().optional(),
|
|
5388
|
+
contextUpgradeTokenThreshold: number().int().positive().optional()
|
|
5384
5389
|
}).passthrough();
|
|
5385
5390
|
const KNOWN_CONFIG_KEYS = new Set(Object.keys(configFileSchema.shape));
|
|
5386
5391
|
let cachedConfig = {};
|
|
@@ -5388,6 +5393,7 @@ const DEFAULT_REASONING_EFFORT = "high";
|
|
|
5388
5393
|
const DEFAULT_USE_FUNCTION_APPLY_PATCH = true;
|
|
5389
5394
|
const DEFAULT_COMPACT_USE_SMALL_MODEL = false;
|
|
5390
5395
|
const DEFAULT_CONTEXT_UPGRADE = true;
|
|
5396
|
+
const DEFAULT_CONTEXT_UPGRADE_TOKEN_THRESHOLD = 16e4;
|
|
5391
5397
|
async function readConfig() {
|
|
5392
5398
|
try {
|
|
5393
5399
|
const content = await fs.readFile(PATHS.CONFIG_PATH, "utf8");
|
|
@@ -5439,6 +5445,9 @@ function isResponsesApiContextManagementModel(model) {
|
|
|
5439
5445
|
function shouldContextUpgrade() {
|
|
5440
5446
|
return cachedConfig.contextUpgrade ?? DEFAULT_CONTEXT_UPGRADE;
|
|
5441
5447
|
}
|
|
5448
|
+
function getContextUpgradeTokenThreshold() {
|
|
5449
|
+
return cachedConfig.contextUpgradeTokenThreshold ?? DEFAULT_CONTEXT_UPGRADE_TOKEN_THRESHOLD;
|
|
5450
|
+
}
|
|
5442
5451
|
function getReasoningEffortForModel(model) {
|
|
5443
5452
|
return cachedConfig.modelReasoningEfforts?.[model] ?? DEFAULT_REASONING_EFFORT;
|
|
5444
5453
|
}
|
|
@@ -6216,7 +6225,7 @@ const checkUsage = defineCommand({
|
|
|
6216
6225
|
|
|
6217
6226
|
//#endregion
|
|
6218
6227
|
//#region src/lib/version.ts
|
|
6219
|
-
const VERSION = "0.
|
|
6228
|
+
const VERSION = "0.5.1";
|
|
6220
6229
|
|
|
6221
6230
|
//#endregion
|
|
6222
6231
|
//#region src/debug.ts
|
|
@@ -46561,12 +46570,20 @@ function colorizeMethod(method) {
|
|
|
46561
46570
|
}
|
|
46562
46571
|
function formatModelMapping(info) {
|
|
46563
46572
|
if (!info) return "";
|
|
46564
|
-
const { originalModel, mappedModel } = info;
|
|
46565
|
-
if (!originalModel && !mappedModel) return "";
|
|
46566
|
-
const
|
|
46567
|
-
const
|
|
46568
|
-
|
|
46569
|
-
|
|
46573
|
+
const { originalModel, rewrittenModel, mappedModel } = info;
|
|
46574
|
+
if (!originalModel && !rewrittenModel && !mappedModel) return "";
|
|
46575
|
+
const parts = [];
|
|
46576
|
+
const displayOriginal = originalModel ?? "-";
|
|
46577
|
+
parts.push(colorize("blueBright", displayOriginal));
|
|
46578
|
+
if (rewrittenModel && rewrittenModel !== displayOriginal) {
|
|
46579
|
+
parts.push(colorize("dim", "~>"));
|
|
46580
|
+
parts.push(colorize("cyanBright", rewrittenModel));
|
|
46581
|
+
}
|
|
46582
|
+
if (mappedModel && mappedModel !== (rewrittenModel ?? displayOriginal)) {
|
|
46583
|
+
parts.push(colorize("dim", "→"));
|
|
46584
|
+
parts.push(colorize("greenBright", mappedModel));
|
|
46585
|
+
}
|
|
46586
|
+
return ` ${colorize("dim", "model=")}${parts.join(" ")}`;
|
|
46570
46587
|
}
|
|
46571
46588
|
/**
|
|
46572
46589
|
* Request logging function.
|
|
@@ -46585,6 +46602,19 @@ function logRequest(method, url, status, elapsed, modelInfo) {
|
|
|
46585
46602
|
console.log(`${line}${formatModelMapping(modelInfo)}`);
|
|
46586
46603
|
}
|
|
46587
46604
|
|
|
46605
|
+
//#endregion
|
|
46606
|
+
//#region src/lib/request-timeout.ts
|
|
46607
|
+
function disableIdleTimeout(server, request) {
|
|
46608
|
+
if (typeof server?.timeout === "function") server.timeout(request, 0);
|
|
46609
|
+
}
|
|
46610
|
+
function hasStreamingFlag(body) {
|
|
46611
|
+
if (!body || typeof body !== "object") return false;
|
|
46612
|
+
return body.stream === true;
|
|
46613
|
+
}
|
|
46614
|
+
function hasStreamingResponsesQuery(request) {
|
|
46615
|
+
return new URL(request.url).searchParams.get("stream") === "true";
|
|
46616
|
+
}
|
|
46617
|
+
|
|
46588
46618
|
//#endregion
|
|
46589
46619
|
//#region src/lib/sse-adapter.ts
|
|
46590
46620
|
/**
|
|
@@ -47208,10 +47238,10 @@ var AnthropicStreamTranslator = class {
|
|
|
47208
47238
|
}
|
|
47209
47239
|
onChunk(chunk) {
|
|
47210
47240
|
const deltas = this.toConversationDeltas(chunk);
|
|
47241
|
+
if (chunk.usage) this.state.lastUsage = chunk.usage;
|
|
47211
47242
|
if (deltas.length === 0) return [];
|
|
47212
47243
|
const events = [];
|
|
47213
47244
|
this.appendMessageStart(events, chunk);
|
|
47214
|
-
this.state.lastUsage = chunk.usage;
|
|
47215
47245
|
for (const delta of deltas) switch (delta.kind) {
|
|
47216
47246
|
case "message_start": break;
|
|
47217
47247
|
case "thinking_delta":
|
|
@@ -47244,8 +47274,7 @@ var AnthropicStreamTranslator = class {
|
|
|
47244
47274
|
...delta.metadata
|
|
47245
47275
|
};
|
|
47246
47276
|
this.state.pendingStopReason = delta.stopReason;
|
|
47247
|
-
|
|
47248
|
-
events.push(...this.onDone());
|
|
47277
|
+
this.closeAllBlocks(events);
|
|
47249
47278
|
break;
|
|
47250
47279
|
}
|
|
47251
47280
|
return events;
|
|
@@ -47253,9 +47282,7 @@ var AnthropicStreamTranslator = class {
|
|
|
47253
47282
|
onDone() {
|
|
47254
47283
|
if (!this.state.messageStartSent || this.state.messageStopSent) return [];
|
|
47255
47284
|
const events = [];
|
|
47256
|
-
this.
|
|
47257
|
-
this.textWriter.close(events);
|
|
47258
|
-
this.toolWriter.closeAll(events);
|
|
47285
|
+
this.closeAllBlocks(events);
|
|
47259
47286
|
events.push({
|
|
47260
47287
|
type: "message_delta",
|
|
47261
47288
|
delta: {
|
|
@@ -47276,6 +47303,11 @@ var AnthropicStreamTranslator = class {
|
|
|
47276
47303
|
}
|
|
47277
47304
|
}];
|
|
47278
47305
|
}
|
|
47306
|
+
closeAllBlocks(events) {
|
|
47307
|
+
this.thinkingWriter.close(events);
|
|
47308
|
+
this.textWriter.close(events);
|
|
47309
|
+
this.toolWriter.closeAll(events);
|
|
47310
|
+
}
|
|
47279
47311
|
appendMessageStart(events, chunk) {
|
|
47280
47312
|
if (this.state.messageStartSent) return;
|
|
47281
47313
|
events.push({
|
|
@@ -47940,6 +47972,106 @@ function modelSupportsOutputConfig(model) {
|
|
|
47940
47972
|
return !MODELS_REJECTING_OUTPUT_CONFIG.has(model.id);
|
|
47941
47973
|
}
|
|
47942
47974
|
|
|
47975
|
+
//#endregion
|
|
47976
|
+
//#region src/lib/model-rewrite.ts
|
|
47977
|
+
/**
|
|
47978
|
+
* Unified model rewrite: user rules → built-in normalization → pass-through.
|
|
47979
|
+
* Call once at handler entry, before any model lookup or policy.
|
|
47980
|
+
*/
|
|
47981
|
+
function rewriteModel(modelId) {
|
|
47982
|
+
const userRules = getCachedConfig().modelRewrites;
|
|
47983
|
+
if (userRules) {
|
|
47984
|
+
for (const rule of userRules) if (matchesGlob(rule.from, modelId)) return {
|
|
47985
|
+
originalModel: modelId,
|
|
47986
|
+
model: normalizeToKnownModel(rule.to) ?? rule.to
|
|
47987
|
+
};
|
|
47988
|
+
}
|
|
47989
|
+
const normalized = normalizeToKnownModel(modelId);
|
|
47990
|
+
if (normalized && normalized !== modelId) return {
|
|
47991
|
+
originalModel: modelId,
|
|
47992
|
+
model: normalized
|
|
47993
|
+
};
|
|
47994
|
+
return {
|
|
47995
|
+
originalModel: modelId,
|
|
47996
|
+
model: modelId
|
|
47997
|
+
};
|
|
47998
|
+
}
|
|
47999
|
+
/**
|
|
48000
|
+
* Apply model rewrite to a mutable model field and log if changed.
|
|
48001
|
+
* Returns the rewrite result for downstream use.
|
|
48002
|
+
*/
|
|
48003
|
+
function applyModelRewrite(payload) {
|
|
48004
|
+
const result = rewriteModel(payload.model);
|
|
48005
|
+
if (result.model !== result.originalModel) {
|
|
48006
|
+
consola.debug(`Model rewritten: ${result.originalModel} ~> ${result.model}`);
|
|
48007
|
+
payload.model = result.model;
|
|
48008
|
+
}
|
|
48009
|
+
return result;
|
|
48010
|
+
}
|
|
48011
|
+
const DOT_RE = /\./g;
|
|
48012
|
+
/**
|
|
48013
|
+
* Resolve a model ID against Copilot's cached model list using
|
|
48014
|
+
* dash/dot equivalence. Returns the canonical ID if found.
|
|
48015
|
+
*/
|
|
48016
|
+
function normalizeToKnownModel(modelId) {
|
|
48017
|
+
const models = state.cache.models?.data;
|
|
48018
|
+
if (!models) return void 0;
|
|
48019
|
+
if (models.some((m) => m.id === modelId)) return modelId;
|
|
48020
|
+
const normalized = modelId.replace(DOT_RE, "-");
|
|
48021
|
+
for (const model of models) if (model.id.replace(DOT_RE, "-") === normalized) return model.id;
|
|
48022
|
+
}
|
|
48023
|
+
const GLOB_SPECIAL_RE = /[.+^${}()|[\]\\]/g;
|
|
48024
|
+
const GLOB_STAR_RE = /\*/g;
|
|
48025
|
+
function matchesGlob(pattern, value) {
|
|
48026
|
+
if (!pattern.includes("*")) return pattern === value;
|
|
48027
|
+
return new RegExp(`^${pattern.replace(GLOB_SPECIAL_RE, "\\$&").replace(GLOB_STAR_RE, ".*")}$`).test(value);
|
|
48028
|
+
}
|
|
48029
|
+
/** Data-driven upgrade rules. Add new entries to extend. */
|
|
48030
|
+
const CONTEXT_UPGRADE_RULES = [{
|
|
48031
|
+
from: "claude-opus-4.6",
|
|
48032
|
+
to: "claude-opus-4.6-1m"
|
|
48033
|
+
}];
|
|
48034
|
+
/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
|
|
48035
|
+
const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
|
|
48036
|
+
/**
|
|
48037
|
+
* Quick check: does this model have any context-upgrade rules?
|
|
48038
|
+
* Use to skip expensive token estimation for ineligible models.
|
|
48039
|
+
*/
|
|
48040
|
+
function hasContextUpgradeRule(model) {
|
|
48041
|
+
return UPGRADE_ELIGIBLE_MODELS.has(model);
|
|
48042
|
+
}
|
|
48043
|
+
/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
|
|
48044
|
+
function findUpgradeRule(model) {
|
|
48045
|
+
for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
|
|
48046
|
+
}
|
|
48047
|
+
/**
|
|
48048
|
+
* Proactive: resolve the upgrade target model for a given model + token count.
|
|
48049
|
+
* Returns the target model ID, or undefined if no upgrade applies.
|
|
48050
|
+
*/
|
|
48051
|
+
function resolveContextUpgrade(model, estimatedTokens) {
|
|
48052
|
+
const rule = findUpgradeRule(model);
|
|
48053
|
+
if (rule && estimatedTokens > getContextUpgradeTokenThreshold()) return rule.to;
|
|
48054
|
+
}
|
|
48055
|
+
/**
|
|
48056
|
+
* Reactive: get the upgrade target for a model on context-length error.
|
|
48057
|
+
* Returns the target model ID, or undefined if no fallback applies.
|
|
48058
|
+
*/
|
|
48059
|
+
function getContextUpgradeTarget(model) {
|
|
48060
|
+
return findUpgradeRule(model)?.to;
|
|
48061
|
+
}
|
|
48062
|
+
/** Context-length error detection with pattern matching */
|
|
48063
|
+
const CONTEXT_ERROR_PATTERNS = [
|
|
48064
|
+
/context.length/i,
|
|
48065
|
+
/too.long/i,
|
|
48066
|
+
/token.*(limit|maximum|exceed)/i,
|
|
48067
|
+
/(limit|maximum|exceed).*token/i
|
|
48068
|
+
];
|
|
48069
|
+
function isContextLengthError(error) {
|
|
48070
|
+
if (!(error instanceof HTTPError) || error.status !== 400) return false;
|
|
48071
|
+
const message = error.body?.error?.message;
|
|
48072
|
+
return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
|
|
48073
|
+
}
|
|
48074
|
+
|
|
47943
48075
|
//#endregion
|
|
47944
48076
|
//#region src/lib/tokenizer.ts
|
|
47945
48077
|
const ENCODING_MAP = {
|
|
@@ -48740,7 +48872,8 @@ async function handleCompletionCore({ body, signal, headers }) {
|
|
|
48740
48872
|
const adapter = new OpenAIChatAdapter();
|
|
48741
48873
|
let payload = parseOpenAIChatPayload(body);
|
|
48742
48874
|
consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
|
|
48743
|
-
const
|
|
48875
|
+
const rewrite = applyModelRewrite(payload);
|
|
48876
|
+
const originalModel = rewrite.originalModel;
|
|
48744
48877
|
const selectedModel = findModelById(payload.model);
|
|
48745
48878
|
try {
|
|
48746
48879
|
if (selectedModel) {
|
|
@@ -48761,6 +48894,7 @@ async function handleCompletionCore({ body, signal, headers }) {
|
|
|
48761
48894
|
const plan = adapter.toCapiPlan(payload, { requestContext: readCapiRequestContext(headers) });
|
|
48762
48895
|
const modelMapping = {
|
|
48763
48896
|
originalModel,
|
|
48897
|
+
rewrittenModel: rewrite.model,
|
|
48764
48898
|
mappedModel: plan.resolvedModel
|
|
48765
48899
|
};
|
|
48766
48900
|
const transport = new CopilotTransport(createCopilotClient());
|
|
@@ -48774,7 +48908,8 @@ async function handleCompletionCore({ body, signal, headers }) {
|
|
|
48774
48908
|
//#endregion
|
|
48775
48909
|
//#region src/routes/chat-completions/route.ts
|
|
48776
48910
|
function createCompletionRoutes() {
|
|
48777
|
-
return new Elysia().use(requestGuardPlugin).post("/chat/completions", async function* ({ body, request }) {
|
|
48911
|
+
return new Elysia().use(requestGuardPlugin).post("/chat/completions", async function* ({ body, request, server }) {
|
|
48912
|
+
if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
|
|
48778
48913
|
const { result, modelMapping } = await handleCompletionCore({
|
|
48779
48914
|
body,
|
|
48780
48915
|
signal: request.signal,
|
|
@@ -48860,60 +48995,15 @@ async function handleCountTokensCore({ body, headers }) {
|
|
|
48860
48995
|
return { input_tokens: finalTokenCount };
|
|
48861
48996
|
}
|
|
48862
48997
|
|
|
48863
|
-
//#endregion
|
|
48864
|
-
//#region src/lib/context-upgrade.ts
|
|
48865
|
-
/** Data-driven upgrade rules. Add new entries to extend. */
|
|
48866
|
-
const CONTEXT_UPGRADE_RULES = [{
|
|
48867
|
-
from: "claude-opus-4.6",
|
|
48868
|
-
to: "claude-opus-4.6-1m",
|
|
48869
|
-
tokenThreshold: 19e4
|
|
48870
|
-
}];
|
|
48871
|
-
/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
|
|
48872
|
-
const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
|
|
48873
|
-
/**
|
|
48874
|
-
* Quick check: does this model have any context-upgrade rules?
|
|
48875
|
-
* Use to skip expensive token estimation for ineligible models.
|
|
48876
|
-
*/
|
|
48877
|
-
function hasContextUpgradeRule(model) {
|
|
48878
|
-
return UPGRADE_ELIGIBLE_MODELS.has(model);
|
|
48879
|
-
}
|
|
48880
|
-
/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
|
|
48881
|
-
function findUpgradeRule(model) {
|
|
48882
|
-
for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
|
|
48883
|
-
}
|
|
48884
|
-
/**
|
|
48885
|
-
* Proactive: resolve the upgrade target model for a given model + token count.
|
|
48886
|
-
* Returns the target model ID, or undefined if no upgrade applies.
|
|
48887
|
-
*/
|
|
48888
|
-
function resolveContextUpgrade(model, estimatedTokens) {
|
|
48889
|
-
const rule = findUpgradeRule(model);
|
|
48890
|
-
if (rule && estimatedTokens > rule.tokenThreshold) return rule.to;
|
|
48891
|
-
}
|
|
48892
|
-
/**
|
|
48893
|
-
* Reactive: get the upgrade target for a model on context-length error.
|
|
48894
|
-
* Returns the target model ID, or undefined if no fallback applies.
|
|
48895
|
-
*/
|
|
48896
|
-
function getContextUpgradeTarget(model) {
|
|
48897
|
-
return findUpgradeRule(model)?.to;
|
|
48898
|
-
}
|
|
48899
|
-
/** Context-length error detection with pattern matching */
|
|
48900
|
-
const CONTEXT_ERROR_PATTERNS = [
|
|
48901
|
-
/context.length/i,
|
|
48902
|
-
/too.long/i,
|
|
48903
|
-
/token.*(limit|maximum|exceed)/i,
|
|
48904
|
-
/(limit|maximum|exceed).*token/i
|
|
48905
|
-
];
|
|
48906
|
-
function isContextLengthError(error) {
|
|
48907
|
-
if (!(error instanceof HTTPError) || error.status !== 400) return false;
|
|
48908
|
-
const message = error.body?.error?.message;
|
|
48909
|
-
return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
|
|
48910
|
-
}
|
|
48911
|
-
|
|
48912
48998
|
//#endregion
|
|
48913
48999
|
//#region src/lib/request-model-policy.ts
|
|
48914
49000
|
const COMPACT_SYSTEM_PROMPT_START = "You are a helpful AI assistant tasked with summarizing conversations";
|
|
48915
|
-
function applyMessagesModelPolicy(payload) {
|
|
49001
|
+
function applyMessagesModelPolicy(payload, options) {
|
|
48916
49002
|
const originalModel = payload.model;
|
|
49003
|
+
if (options?.betaUpgraded) return {
|
|
49004
|
+
originalModel,
|
|
49005
|
+
routedModel: originalModel
|
|
49006
|
+
};
|
|
48917
49007
|
if (shouldContextUpgrade() && hasContextUpgradeRule(payload.model)) {
|
|
48918
49008
|
const contextUpgradeTarget = resolveContextUpgrade(payload.model, estimateAnthropicInputTokens(payload));
|
|
48919
49009
|
if (contextUpgradeTarget) {
|
|
@@ -49396,6 +49486,13 @@ function createMessagesViaChatCompletionsStrategy(transport, adapter, plan, sign
|
|
|
49396
49486
|
data: JSON.stringify(event)
|
|
49397
49487
|
}));
|
|
49398
49488
|
},
|
|
49489
|
+
onStreamDone() {
|
|
49490
|
+
if (!streamTranslator) return null;
|
|
49491
|
+
return streamTranslator.onDone().map((event) => ({
|
|
49492
|
+
event: event.type,
|
|
49493
|
+
data: JSON.stringify(event)
|
|
49494
|
+
}));
|
|
49495
|
+
},
|
|
49399
49496
|
shouldBreakStream() {
|
|
49400
49497
|
return done;
|
|
49401
49498
|
},
|
|
@@ -50048,7 +50145,10 @@ function createMessagesViaResponsesStrategy(copilotClient, responsesPayload, opt
|
|
|
50048
50145
|
//#endregion
|
|
50049
50146
|
//#region src/routes/messages/strategy-registry.ts
|
|
50050
50147
|
function selectStrategy(registry, model) {
|
|
50051
|
-
for (const entry of registry) if (entry.canHandle(model))
|
|
50148
|
+
for (const entry of registry) if (entry.canHandle(model)) {
|
|
50149
|
+
consola.debug(`Strategy selected: ${entry.name} for model: ${model?.id ?? "(unknown)"}`);
|
|
50150
|
+
return entry;
|
|
50151
|
+
}
|
|
50052
50152
|
return registry.at(-1);
|
|
50053
50153
|
}
|
|
50054
50154
|
function filterThinkingBlocksForNativeMessages(anthropicPayload) {
|
|
@@ -50142,6 +50242,30 @@ const defaultStrategyRegistry = [
|
|
|
50142
50242
|
|
|
50143
50243
|
//#endregion
|
|
50144
50244
|
//#region src/routes/messages/handler.ts
|
|
50245
|
+
const CONTEXT_BETA_RE = /^context-\d+[km]-/;
|
|
50246
|
+
function processAnthropicBetaHeader(rawHeader, model) {
|
|
50247
|
+
if (!rawHeader) return {
|
|
50248
|
+
header: void 0,
|
|
50249
|
+
upgradeTarget: void 0
|
|
50250
|
+
};
|
|
50251
|
+
const values = rawHeader.split(",").map((v) => v.trim()).filter(Boolean);
|
|
50252
|
+
let upgradeTarget;
|
|
50253
|
+
const filtered = [];
|
|
50254
|
+
for (const value of values) {
|
|
50255
|
+
if (CONTEXT_BETA_RE.test(value)) {
|
|
50256
|
+
if (!upgradeTarget && shouldContextUpgrade()) {
|
|
50257
|
+
const target = getContextUpgradeTarget(model);
|
|
50258
|
+
if (target) upgradeTarget = target;
|
|
50259
|
+
}
|
|
50260
|
+
continue;
|
|
50261
|
+
}
|
|
50262
|
+
filtered.push(value);
|
|
50263
|
+
}
|
|
50264
|
+
return {
|
|
50265
|
+
header: filtered.length > 0 ? filtered.join(",") : void 0,
|
|
50266
|
+
upgradeTarget
|
|
50267
|
+
};
|
|
50268
|
+
}
|
|
50145
50269
|
/**
|
|
50146
50270
|
* Core handler for Anthropic messages endpoint.
|
|
50147
50271
|
* Returns both the execution result and model mapping info.
|
|
@@ -50149,10 +50273,17 @@ const defaultStrategyRegistry = [
|
|
|
50149
50273
|
async function handleMessagesCore({ body, signal, headers }) {
|
|
50150
50274
|
const anthropicPayload = parseAnthropicMessagesPayload(body);
|
|
50151
50275
|
if (consola.level >= 4) consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
|
|
50152
|
-
const
|
|
50153
|
-
const
|
|
50276
|
+
const rewrite = applyModelRewrite(anthropicPayload);
|
|
50277
|
+
const betaResult = processAnthropicBetaHeader(headers.get("anthropic-beta"), anthropicPayload.model);
|
|
50278
|
+
if (betaResult.upgradeTarget) {
|
|
50279
|
+
consola.debug(`Beta header context upgrade: ${anthropicPayload.model} → ${betaResult.upgradeTarget}`);
|
|
50280
|
+
anthropicPayload.model = betaResult.upgradeTarget;
|
|
50281
|
+
}
|
|
50282
|
+
const anthropicBetaHeader = betaResult.header;
|
|
50283
|
+
const modelRouting = applyMessagesModelPolicy(anthropicPayload, { betaUpgraded: !!betaResult.upgradeTarget });
|
|
50154
50284
|
const modelMapping = {
|
|
50155
|
-
originalModel:
|
|
50285
|
+
originalModel: rewrite.originalModel,
|
|
50286
|
+
rewrittenModel: rewrite.model,
|
|
50156
50287
|
mappedModel: modelRouting.routedModel
|
|
50157
50288
|
};
|
|
50158
50289
|
if (modelRouting.reason) consola.debug(`Routed anthropic request via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
|
|
@@ -50186,7 +50317,8 @@ async function handleMessagesCore({ body, signal, headers }) {
|
|
|
50186
50317
|
selectedModel: retryModel,
|
|
50187
50318
|
upstreamSignal: retrySignal,
|
|
50188
50319
|
modelMapping: {
|
|
50189
|
-
originalModel:
|
|
50320
|
+
originalModel: rewrite.originalModel,
|
|
50321
|
+
rewrittenModel: rewrite.model,
|
|
50190
50322
|
mappedModel: upgradeTarget
|
|
50191
50323
|
}
|
|
50192
50324
|
});
|
|
@@ -50200,7 +50332,8 @@ async function handleMessagesCore({ body, signal, headers }) {
|
|
|
50200
50332
|
//#endregion
|
|
50201
50333
|
//#region src/routes/messages/route.ts
|
|
50202
50334
|
function createMessageRoutes() {
|
|
50203
|
-
return new Elysia().use(requestGuardPlugin).post("/messages", async function* ({ body, request }) {
|
|
50335
|
+
return new Elysia().use(requestGuardPlugin).post("/messages", async function* ({ body, request, server }) {
|
|
50336
|
+
if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
|
|
50204
50337
|
const { result, modelMapping } = await handleMessagesCore({
|
|
50205
50338
|
body,
|
|
50206
50339
|
signal: request.signal,
|
|
@@ -50305,6 +50438,7 @@ const HTTP_URL_RE = /^https?:\/\//i;
|
|
|
50305
50438
|
*/
|
|
50306
50439
|
async function handleResponsesCore({ body, signal, headers }) {
|
|
50307
50440
|
const payload = parseResponsesPayload(body);
|
|
50441
|
+
const rewrite = applyModelRewrite(payload);
|
|
50308
50442
|
applyResponsesToolTransforms(payload);
|
|
50309
50443
|
applyResponsesInputPolicies(payload);
|
|
50310
50444
|
compactInputByLatestCompaction(payload);
|
|
@@ -50314,12 +50448,19 @@ async function handleResponsesCore({ body, signal, headers }) {
|
|
|
50314
50448
|
applyContextManagement(payload, selectedModel.capabilities.limits.max_prompt_tokens);
|
|
50315
50449
|
const { vision, initiator } = getResponsesRequestOptions(payload);
|
|
50316
50450
|
const upstreamSignal = createUpstreamSignalFromConfig(signal);
|
|
50317
|
-
return
|
|
50318
|
-
|
|
50319
|
-
|
|
50320
|
-
|
|
50321
|
-
|
|
50322
|
-
|
|
50451
|
+
return {
|
|
50452
|
+
result: await runStrategy(createResponsesPassthroughStrategy(createCopilotClient(), payload, {
|
|
50453
|
+
vision,
|
|
50454
|
+
initiator,
|
|
50455
|
+
requestContext: readCapiRequestContext(headers),
|
|
50456
|
+
signal: upstreamSignal.signal
|
|
50457
|
+
}), upstreamSignal),
|
|
50458
|
+
modelMapping: {
|
|
50459
|
+
originalModel: rewrite.originalModel,
|
|
50460
|
+
rewrittenModel: rewrite.model,
|
|
50461
|
+
mappedModel: payload.model
|
|
50462
|
+
}
|
|
50463
|
+
};
|
|
50323
50464
|
}
|
|
50324
50465
|
function applyResponsesToolTransforms(payload) {
|
|
50325
50466
|
applyFunctionApplyPatch(payload);
|
|
@@ -50454,12 +50595,14 @@ function parseBooleanParam(value) {
|
|
|
50454
50595
|
//#endregion
|
|
50455
50596
|
//#region src/routes/responses/route.ts
|
|
50456
50597
|
function createResponsesRoutes() {
|
|
50457
|
-
return new Elysia().use(requestGuardPlugin).post("/responses", async function* ({ body, request }) {
|
|
50458
|
-
|
|
50598
|
+
return new Elysia().use(requestGuardPlugin).post("/responses", async function* ({ body, request, server }) {
|
|
50599
|
+
if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
|
|
50600
|
+
const { result, modelMapping } = await handleResponsesCore({
|
|
50459
50601
|
body,
|
|
50460
50602
|
signal: request.signal,
|
|
50461
50603
|
headers: request.headers
|
|
50462
50604
|
});
|
|
50605
|
+
if (modelMapping) setRequestModelMapping(request, modelMapping);
|
|
50463
50606
|
if (result.kind === "json") return result.data;
|
|
50464
50607
|
yield* sseAdapter(result.generator);
|
|
50465
50608
|
}, { guarded: true }).post("/responses/input_tokens", async ({ body, request }) => {
|
|
@@ -50475,7 +50618,8 @@ function createResponsesRoutes() {
|
|
|
50475
50618
|
headers: request.headers,
|
|
50476
50619
|
signal: request.signal
|
|
50477
50620
|
});
|
|
50478
|
-
}).get("/responses/:responseId", async ({ params, request }) => {
|
|
50621
|
+
}).get("/responses/:responseId", async ({ params, request, server }) => {
|
|
50622
|
+
if (hasStreamingResponsesQuery(request)) disableIdleTimeout(server, request);
|
|
50479
50623
|
return handleRetrieveResponseCore({
|
|
50480
50624
|
params,
|
|
50481
50625
|
url: request.url,
|