ghc-proxy 0.3.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -5
- package/dist/main.mjs +135 -47
- package/dist/main.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -261,16 +261,13 @@ Or in the proxy's **config file** (`~/.local/share/ghc-proxy/config.json`):
|
|
|
261
261
|
|
|
262
262
|
- `smallModel`: the model to reroute to
|
|
263
263
|
- `compactUseSmallModel`: reroute recognized compact/summarization requests
|
|
264
|
-
- `warmupUseSmallModel`: reroute explicitly marked warmup/probe requests
|
|
265
264
|
|
|
266
|
-
|
|
265
|
+
The switch defaults to `false`. Routing is conservative:
|
|
267
266
|
|
|
268
267
|
- the target `smallModel` must exist in Copilot's model list
|
|
269
268
|
- it must preserve the original model's declared endpoint support
|
|
270
269
|
- tool, thinking, and vision requests are not rerouted to a model that lacks the required capabilities
|
|
271
270
|
|
|
272
|
-
Warmup routing is intentionally narrow. Requests must look like explicit warmup/probe traffic; ordinary tool-free chat requests are not rerouted just because they include `anthropic-beta`.
|
|
273
|
-
|
|
274
271
|
### Responses Compatibility
|
|
275
272
|
|
|
276
273
|
`/v1/responses` is designed to stay close to the OpenAI wire format while making Copilot limitations explicit:
|
|
@@ -300,7 +297,6 @@ Example `config.json`:
|
|
|
300
297
|
{
|
|
301
298
|
"smallModel": "gpt-4.1-mini",
|
|
302
299
|
"compactUseSmallModel": true,
|
|
303
|
-
"warmupUseSmallModel": false,
|
|
304
300
|
"useFunctionApplyPatch": true,
|
|
305
301
|
"responsesApiContextManagementModels": ["gpt-5", "gpt-5-mini"],
|
|
306
302
|
"modelReasoningEfforts": {
|
package/dist/main.mjs
CHANGED
|
@@ -5377,17 +5377,17 @@ const configFileSchema = object({
|
|
|
5377
5377
|
}).optional(),
|
|
5378
5378
|
smallModel: string().optional(),
|
|
5379
5379
|
compactUseSmallModel: boolean().optional(),
|
|
5380
|
-
warmupUseSmallModel: boolean().optional(),
|
|
5381
5380
|
useFunctionApplyPatch: boolean().optional(),
|
|
5382
5381
|
responsesApiContextManagementModels: array(string()).optional(),
|
|
5383
|
-
modelReasoningEfforts: record(string(), reasoningEffortSchema).optional()
|
|
5382
|
+
modelReasoningEfforts: record(string(), reasoningEffortSchema).optional(),
|
|
5383
|
+
contextUpgrade: boolean().optional()
|
|
5384
5384
|
}).passthrough();
|
|
5385
5385
|
const KNOWN_CONFIG_KEYS = new Set(Object.keys(configFileSchema.shape));
|
|
5386
5386
|
let cachedConfig = {};
|
|
5387
5387
|
const DEFAULT_REASONING_EFFORT = "high";
|
|
5388
5388
|
const DEFAULT_USE_FUNCTION_APPLY_PATCH = true;
|
|
5389
5389
|
const DEFAULT_COMPACT_USE_SMALL_MODEL = false;
|
|
5390
|
-
const
|
|
5390
|
+
const DEFAULT_CONTEXT_UPGRADE = true;
|
|
5391
5391
|
async function readConfig() {
|
|
5392
5392
|
try {
|
|
5393
5393
|
const content = await fs.readFile(PATHS.CONFIG_PATH, "utf8");
|
|
@@ -5430,15 +5430,15 @@ function getSmallModel() {
|
|
|
5430
5430
|
function shouldCompactUseSmallModel() {
|
|
5431
5431
|
return cachedConfig.compactUseSmallModel ?? DEFAULT_COMPACT_USE_SMALL_MODEL;
|
|
5432
5432
|
}
|
|
5433
|
-
function shouldWarmupUseSmallModel() {
|
|
5434
|
-
return cachedConfig.warmupUseSmallModel ?? DEFAULT_WARMUP_USE_SMALL_MODEL;
|
|
5435
|
-
}
|
|
5436
5433
|
function shouldUseFunctionApplyPatch() {
|
|
5437
5434
|
return cachedConfig.useFunctionApplyPatch ?? DEFAULT_USE_FUNCTION_APPLY_PATCH;
|
|
5438
5435
|
}
|
|
5439
5436
|
function isResponsesApiContextManagementModel(model) {
|
|
5440
5437
|
return cachedConfig.responsesApiContextManagementModels?.includes(model) ?? false;
|
|
5441
5438
|
}
|
|
5439
|
+
function shouldContextUpgrade() {
|
|
5440
|
+
return cachedConfig.contextUpgrade ?? DEFAULT_CONTEXT_UPGRADE;
|
|
5441
|
+
}
|
|
5442
5442
|
function getReasoningEffortForModel(model) {
|
|
5443
5443
|
return cachedConfig.modelReasoningEfforts?.[model] ?? DEFAULT_REASONING_EFFORT;
|
|
5444
5444
|
}
|
|
@@ -6216,7 +6216,7 @@ const checkUsage = defineCommand({
|
|
|
6216
6216
|
|
|
6217
6217
|
//#endregion
|
|
6218
6218
|
//#region src/lib/version.ts
|
|
6219
|
-
const VERSION = "0.
|
|
6219
|
+
const VERSION = "0.4.1";
|
|
6220
6220
|
|
|
6221
6221
|
//#endregion
|
|
6222
6222
|
//#region src/debug.ts
|
|
@@ -48130,6 +48130,41 @@ async function getTokenCount(payload, model) {
|
|
|
48130
48130
|
output: outputTokens
|
|
48131
48131
|
};
|
|
48132
48132
|
}
|
|
48133
|
+
/**
|
|
48134
|
+
* Fast character-based token estimate for Anthropic payloads.
|
|
48135
|
+
* Uses ~3.5 chars/token ratio (conservative for Claude's tokenizer).
|
|
48136
|
+
* Intentionally over-estimates to favor proactive routing.
|
|
48137
|
+
*/
|
|
48138
|
+
function estimateAnthropicInputTokens(payload) {
|
|
48139
|
+
let chars = 0;
|
|
48140
|
+
if (typeof payload.system === "string") chars += payload.system.length;
|
|
48141
|
+
else if (Array.isArray(payload.system)) for (const block of payload.system) chars += block.text?.length ?? 0;
|
|
48142
|
+
for (const msg of payload.messages) if (typeof msg.content === "string") chars += msg.content.length;
|
|
48143
|
+
else if (Array.isArray(msg.content)) chars += estimateContentBlockChars(msg.content);
|
|
48144
|
+
if (payload.tools?.length) chars += JSON.stringify(payload.tools).length;
|
|
48145
|
+
return Math.ceil(chars / 3.5);
|
|
48146
|
+
}
|
|
48147
|
+
function estimateContentBlockChars(blocks) {
|
|
48148
|
+
let chars = 0;
|
|
48149
|
+
for (const block of blocks) switch (block.type) {
|
|
48150
|
+
case "text":
|
|
48151
|
+
chars += block.text.length;
|
|
48152
|
+
break;
|
|
48153
|
+
case "thinking":
|
|
48154
|
+
chars += block.thinking.length;
|
|
48155
|
+
break;
|
|
48156
|
+
case "tool_use":
|
|
48157
|
+
chars += JSON.stringify(block.input).length;
|
|
48158
|
+
break;
|
|
48159
|
+
case "tool_result":
|
|
48160
|
+
chars += typeof block.content === "string" ? block.content.length : JSON.stringify(block.content ?? "").length;
|
|
48161
|
+
break;
|
|
48162
|
+
case "image":
|
|
48163
|
+
chars += 1e3;
|
|
48164
|
+
break;
|
|
48165
|
+
}
|
|
48166
|
+
return chars;
|
|
48167
|
+
}
|
|
48133
48168
|
|
|
48134
48169
|
//#endregion
|
|
48135
48170
|
//#region src/lib/upstream-signal.ts
|
|
@@ -48825,24 +48860,77 @@ async function handleCountTokensCore({ body, headers }) {
|
|
|
48825
48860
|
return { input_tokens: finalTokenCount };
|
|
48826
48861
|
}
|
|
48827
48862
|
|
|
48863
|
+
//#endregion
|
|
48864
|
+
//#region src/lib/context-upgrade.ts
|
|
48865
|
+
/** Data-driven upgrade rules. Add new entries to extend. */
|
|
48866
|
+
const CONTEXT_UPGRADE_RULES = [{
|
|
48867
|
+
from: "claude-opus-4.6",
|
|
48868
|
+
to: "claude-opus-4.6-1m",
|
|
48869
|
+
tokenThreshold: 19e4
|
|
48870
|
+
}];
|
|
48871
|
+
/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
|
|
48872
|
+
const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
|
|
48873
|
+
/**
|
|
48874
|
+
* Quick check: does this model have any context-upgrade rules?
|
|
48875
|
+
* Use to skip expensive token estimation for ineligible models.
|
|
48876
|
+
*/
|
|
48877
|
+
function hasContextUpgradeRule(model) {
|
|
48878
|
+
return UPGRADE_ELIGIBLE_MODELS.has(model);
|
|
48879
|
+
}
|
|
48880
|
+
/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
|
|
48881
|
+
function findUpgradeRule(model) {
|
|
48882
|
+
for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
|
|
48883
|
+
}
|
|
48884
|
+
/**
|
|
48885
|
+
* Proactive: resolve the upgrade target model for a given model + token count.
|
|
48886
|
+
* Returns the target model ID, or undefined if no upgrade applies.
|
|
48887
|
+
*/
|
|
48888
|
+
function resolveContextUpgrade(model, estimatedTokens) {
|
|
48889
|
+
const rule = findUpgradeRule(model);
|
|
48890
|
+
if (rule && estimatedTokens > rule.tokenThreshold) return rule.to;
|
|
48891
|
+
}
|
|
48892
|
+
/**
|
|
48893
|
+
* Reactive: get the upgrade target for a model on context-length error.
|
|
48894
|
+
* Returns the target model ID, or undefined if no fallback applies.
|
|
48895
|
+
*/
|
|
48896
|
+
function getContextUpgradeTarget(model) {
|
|
48897
|
+
return findUpgradeRule(model)?.to;
|
|
48898
|
+
}
|
|
48899
|
+
/** Context-length error detection with pattern matching */
|
|
48900
|
+
const CONTEXT_ERROR_PATTERNS = [
|
|
48901
|
+
/context.length/i,
|
|
48902
|
+
/too.long/i,
|
|
48903
|
+
/token.*(limit|maximum|exceed)/i,
|
|
48904
|
+
/(limit|maximum|exceed).*token/i
|
|
48905
|
+
];
|
|
48906
|
+
function isContextLengthError(error) {
|
|
48907
|
+
if (!(error instanceof HTTPError) || error.status !== 400) return false;
|
|
48908
|
+
const message = error.body?.error?.message;
|
|
48909
|
+
return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
|
|
48910
|
+
}
|
|
48911
|
+
|
|
48828
48912
|
//#endregion
|
|
48829
48913
|
//#region src/lib/request-model-policy.ts
|
|
48830
48914
|
const COMPACT_SYSTEM_PROMPT_START = "You are a helpful AI assistant tasked with summarizing conversations";
|
|
48831
|
-
|
|
48832
|
-
"warmup",
|
|
48833
|
-
"probe",
|
|
48834
|
-
"preflight"
|
|
48835
|
-
];
|
|
48836
|
-
function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
|
|
48915
|
+
function applyMessagesModelPolicy(payload) {
|
|
48837
48916
|
const originalModel = payload.model;
|
|
48917
|
+
if (shouldContextUpgrade() && hasContextUpgradeRule(payload.model)) {
|
|
48918
|
+
const contextUpgradeTarget = resolveContextUpgrade(payload.model, estimateAnthropicInputTokens(payload));
|
|
48919
|
+
if (contextUpgradeTarget) {
|
|
48920
|
+
payload.model = contextUpgradeTarget;
|
|
48921
|
+
return {
|
|
48922
|
+
originalModel,
|
|
48923
|
+
routedModel: contextUpgradeTarget,
|
|
48924
|
+
reason: "context-upgrade"
|
|
48925
|
+
};
|
|
48926
|
+
}
|
|
48927
|
+
}
|
|
48838
48928
|
const smallModel = getSmallModel();
|
|
48839
|
-
if (!smallModel) return {
|
|
48929
|
+
if (!smallModel || !shouldCompactUseSmallModel() || !isCompactRequest(payload)) return {
|
|
48840
48930
|
originalModel,
|
|
48841
48931
|
routedModel: originalModel
|
|
48842
48932
|
};
|
|
48843
|
-
|
|
48844
|
-
const smallSelection = findModelById(smallModel);
|
|
48845
|
-
if (shouldCompactUseSmallModel() && isCompactRequest(payload) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
|
|
48933
|
+
if (canRouteToSmallModel(payload, findModelById(originalModel), findModelById(smallModel))) {
|
|
48846
48934
|
payload.model = smallModel;
|
|
48847
48935
|
return {
|
|
48848
48936
|
originalModel,
|
|
@@ -48850,14 +48938,6 @@ function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
|
|
|
48850
48938
|
reason: "compact"
|
|
48851
48939
|
};
|
|
48852
48940
|
}
|
|
48853
|
-
if (shouldWarmupUseSmallModel() && isWarmupRequest(payload, anthropicBetaHeader) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
|
|
48854
|
-
payload.model = smallModel;
|
|
48855
|
-
return {
|
|
48856
|
-
originalModel,
|
|
48857
|
-
routedModel: smallModel,
|
|
48858
|
-
reason: "warmup"
|
|
48859
|
-
};
|
|
48860
|
-
}
|
|
48861
48941
|
return {
|
|
48862
48942
|
originalModel,
|
|
48863
48943
|
routedModel: originalModel
|
|
@@ -48868,15 +48948,6 @@ function isCompactRequest(payload) {
|
|
|
48868
48948
|
if (!Array.isArray(payload.system)) return false;
|
|
48869
48949
|
return payload.system.some((block) => typeof block.text === "string" && block.text.startsWith(COMPACT_SYSTEM_PROMPT_START));
|
|
48870
48950
|
}
|
|
48871
|
-
function isWarmupRequest(payload, anthropicBetaHeader) {
|
|
48872
|
-
if (!anthropicBetaHeader || isCompactRequest(payload)) return false;
|
|
48873
|
-
const normalizedBeta = anthropicBetaHeader.toLowerCase();
|
|
48874
|
-
if (!WARMUP_BETA_MARKERS.some((marker) => normalizedBeta.includes(marker))) return false;
|
|
48875
|
-
if (payload.system !== void 0 || payload.thinking !== void 0) return false;
|
|
48876
|
-
if (payload.tools && payload.tools.length > 0) return false;
|
|
48877
|
-
if (payload.max_tokens > 64) return false;
|
|
48878
|
-
return hasSingleShortUserTextMessage(payload);
|
|
48879
|
-
}
|
|
48880
48951
|
function canRouteToSmallModel(payload, originalModel, smallModel) {
|
|
48881
48952
|
if (!originalModel || !smallModel) return false;
|
|
48882
48953
|
const originalEndpoints = new Set(originalModel.supported_endpoints ?? []);
|
|
@@ -48887,15 +48958,6 @@ function canRouteToSmallModel(payload, originalModel, smallModel) {
|
|
|
48887
48958
|
if (hasVisionInput$1(payload) && !modelSupportsVision(smallModel)) return false;
|
|
48888
48959
|
return true;
|
|
48889
48960
|
}
|
|
48890
|
-
function hasSingleShortUserTextMessage(payload) {
|
|
48891
|
-
if (payload.messages.length !== 1) return false;
|
|
48892
|
-
const [message] = payload.messages;
|
|
48893
|
-
if (message.role !== "user") return false;
|
|
48894
|
-
if (typeof message.content === "string") return message.content.trim().length > 0 && message.content.length <= 64;
|
|
48895
|
-
if (message.content.length !== 1 || message.content[0]?.type !== "text") return false;
|
|
48896
|
-
const text = message.content[0].text.trim();
|
|
48897
|
-
return text.length > 0 && text.length <= 64;
|
|
48898
|
-
}
|
|
48899
48961
|
function hasVisionInput$1(payload) {
|
|
48900
48962
|
return payload.messages.some((message) => containsVisionContent$1(message.content));
|
|
48901
48963
|
}
|
|
@@ -50088,16 +50150,17 @@ async function handleMessagesCore({ body, signal, headers }) {
|
|
|
50088
50150
|
const anthropicPayload = parseAnthropicMessagesPayload(body);
|
|
50089
50151
|
if (consola.level >= 4) consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
|
|
50090
50152
|
const anthropicBetaHeader = headers.get("anthropic-beta") ?? void 0;
|
|
50091
|
-
const modelRouting = applyMessagesModelPolicy(anthropicPayload
|
|
50153
|
+
const modelRouting = applyMessagesModelPolicy(anthropicPayload);
|
|
50092
50154
|
const modelMapping = {
|
|
50093
50155
|
originalModel: modelRouting.originalModel,
|
|
50094
50156
|
mappedModel: modelRouting.routedModel
|
|
50095
50157
|
};
|
|
50096
|
-
if (modelRouting.reason) consola.debug(`Routed anthropic request
|
|
50158
|
+
if (modelRouting.reason) consola.debug(`Routed anthropic request via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
|
|
50097
50159
|
const selectedModel = findModelById(anthropicPayload.model);
|
|
50098
50160
|
const upstreamSignal = createUpstreamSignalFromConfig(signal);
|
|
50099
50161
|
const copilotClient = createCopilotClient();
|
|
50100
|
-
|
|
50162
|
+
const entry = selectStrategy(defaultStrategyRegistry, selectedModel);
|
|
50163
|
+
const strategyCtx = {
|
|
50101
50164
|
copilotClient,
|
|
50102
50165
|
anthropicPayload,
|
|
50103
50166
|
anthropicBetaHeader,
|
|
@@ -50106,7 +50169,32 @@ async function handleMessagesCore({ body, signal, headers }) {
|
|
|
50106
50169
|
headers,
|
|
50107
50170
|
requestContext: readCapiRequestContext(headers),
|
|
50108
50171
|
modelMapping
|
|
50109
|
-
}
|
|
50172
|
+
};
|
|
50173
|
+
let strategyResult;
|
|
50174
|
+
try {
|
|
50175
|
+
strategyResult = await entry.execute(strategyCtx);
|
|
50176
|
+
} catch (error) {
|
|
50177
|
+
const upgradeTarget = shouldContextUpgrade() && isContextLengthError(error) ? getContextUpgradeTarget(anthropicPayload.model) : void 0;
|
|
50178
|
+
if (!upgradeTarget) throw error;
|
|
50179
|
+
consola.info(`Context length exceeded, retrying: ${anthropicPayload.model} → ${upgradeTarget}`);
|
|
50180
|
+
anthropicPayload.model = upgradeTarget;
|
|
50181
|
+
const retryModel = findModelById(upgradeTarget);
|
|
50182
|
+
const retrySignal = createUpstreamSignalFromConfig(signal);
|
|
50183
|
+
strategyResult = await selectStrategy(defaultStrategyRegistry, retryModel).execute({
|
|
50184
|
+
...strategyCtx,
|
|
50185
|
+
anthropicPayload,
|
|
50186
|
+
selectedModel: retryModel,
|
|
50187
|
+
upstreamSignal: retrySignal,
|
|
50188
|
+
modelMapping: {
|
|
50189
|
+
originalModel: modelRouting.originalModel,
|
|
50190
|
+
mappedModel: upgradeTarget
|
|
50191
|
+
}
|
|
50192
|
+
});
|
|
50193
|
+
}
|
|
50194
|
+
return {
|
|
50195
|
+
result: strategyResult.result,
|
|
50196
|
+
modelMapping: strategyResult.modelMapping
|
|
50197
|
+
};
|
|
50110
50198
|
}
|
|
50111
50199
|
|
|
50112
50200
|
//#endregion
|