ghc-proxy 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -261,16 +261,13 @@ Or in the proxy's **config file** (`~/.local/share/ghc-proxy/config.json`):
261
261
 
262
262
  - `smallModel`: the model to reroute to
263
263
  - `compactUseSmallModel`: reroute recognized compact/summarization requests
264
- - `warmupUseSmallModel`: reroute explicitly marked warmup/probe requests
265
264
 
266
- Both switches default to `false`. Routing is conservative:
265
+ The switch defaults to `false`. Routing is conservative:
267
266
 
268
267
  - the target `smallModel` must exist in Copilot's model list
269
268
  - it must preserve the original model's declared endpoint support
270
269
  - tool, thinking, and vision requests are not rerouted to a model that lacks the required capabilities
271
270
 
272
- Warmup routing is intentionally narrow. Requests must look like explicit warmup/probe traffic; ordinary tool-free chat requests are not rerouted just because they include `anthropic-beta`.
273
-
274
271
  ### Responses Compatibility
275
272
 
276
273
  `/v1/responses` is designed to stay close to the OpenAI wire format while making Copilot limitations explicit:
@@ -300,7 +297,6 @@ Example `config.json`:
300
297
  {
301
298
  "smallModel": "gpt-4.1-mini",
302
299
  "compactUseSmallModel": true,
303
- "warmupUseSmallModel": false,
304
300
  "useFunctionApplyPatch": true,
305
301
  "responsesApiContextManagementModels": ["gpt-5", "gpt-5-mini"],
306
302
  "modelReasoningEfforts": {
package/dist/main.mjs CHANGED
@@ -5377,17 +5377,17 @@ const configFileSchema = object({
5377
5377
  }).optional(),
5378
5378
  smallModel: string().optional(),
5379
5379
  compactUseSmallModel: boolean().optional(),
5380
- warmupUseSmallModel: boolean().optional(),
5381
5380
  useFunctionApplyPatch: boolean().optional(),
5382
5381
  responsesApiContextManagementModels: array(string()).optional(),
5383
- modelReasoningEfforts: record(string(), reasoningEffortSchema).optional()
5382
+ modelReasoningEfforts: record(string(), reasoningEffortSchema).optional(),
5383
+ contextUpgrade: boolean().optional()
5384
5384
  }).passthrough();
5385
5385
  const KNOWN_CONFIG_KEYS = new Set(Object.keys(configFileSchema.shape));
5386
5386
  let cachedConfig = {};
5387
5387
  const DEFAULT_REASONING_EFFORT = "high";
5388
5388
  const DEFAULT_USE_FUNCTION_APPLY_PATCH = true;
5389
5389
  const DEFAULT_COMPACT_USE_SMALL_MODEL = false;
5390
- const DEFAULT_WARMUP_USE_SMALL_MODEL = false;
5390
+ const DEFAULT_CONTEXT_UPGRADE = true;
5391
5391
  async function readConfig() {
5392
5392
  try {
5393
5393
  const content = await fs.readFile(PATHS.CONFIG_PATH, "utf8");
@@ -5430,15 +5430,15 @@ function getSmallModel() {
5430
5430
  function shouldCompactUseSmallModel() {
5431
5431
  return cachedConfig.compactUseSmallModel ?? DEFAULT_COMPACT_USE_SMALL_MODEL;
5432
5432
  }
5433
- function shouldWarmupUseSmallModel() {
5434
- return cachedConfig.warmupUseSmallModel ?? DEFAULT_WARMUP_USE_SMALL_MODEL;
5435
- }
5436
5433
  function shouldUseFunctionApplyPatch() {
5437
5434
  return cachedConfig.useFunctionApplyPatch ?? DEFAULT_USE_FUNCTION_APPLY_PATCH;
5438
5435
  }
5439
5436
  function isResponsesApiContextManagementModel(model) {
5440
5437
  return cachedConfig.responsesApiContextManagementModels?.includes(model) ?? false;
5441
5438
  }
5439
+ function shouldContextUpgrade() {
5440
+ return cachedConfig.contextUpgrade ?? DEFAULT_CONTEXT_UPGRADE;
5441
+ }
5442
5442
  function getReasoningEffortForModel(model) {
5443
5443
  return cachedConfig.modelReasoningEfforts?.[model] ?? DEFAULT_REASONING_EFFORT;
5444
5444
  }
@@ -6216,7 +6216,7 @@ const checkUsage = defineCommand({
6216
6216
 
6217
6217
  //#endregion
6218
6218
  //#region src/lib/version.ts
6219
- const VERSION = "0.3.2";
6219
+ const VERSION = "0.4.0";
6220
6220
 
6221
6221
  //#endregion
6222
6222
  //#region src/debug.ts
@@ -48130,6 +48130,41 @@ async function getTokenCount(payload, model) {
48130
48130
  output: outputTokens
48131
48131
  };
48132
48132
  }
48133
+ /**
48134
+ * Fast character-based token estimate for Anthropic payloads.
48135
+ * Uses ~3.5 chars/token ratio (conservative for Claude's tokenizer).
48136
+ * Intentionally over-estimates to favor proactive routing.
48137
+ */
48138
+ function estimateAnthropicInputTokens(payload) {
48139
+ let chars = 0;
48140
+ if (typeof payload.system === "string") chars += payload.system.length;
48141
+ else if (Array.isArray(payload.system)) for (const block of payload.system) chars += block.text?.length ?? 0;
48142
+ for (const msg of payload.messages) if (typeof msg.content === "string") chars += msg.content.length;
48143
+ else if (Array.isArray(msg.content)) chars += estimateContentBlockChars(msg.content);
48144
+ if (payload.tools?.length) chars += JSON.stringify(payload.tools).length;
48145
+ return Math.ceil(chars / 3.5);
48146
+ }
48147
+ function estimateContentBlockChars(blocks) {
48148
+ let chars = 0;
48149
+ for (const block of blocks) switch (block.type) {
48150
+ case "text":
48151
+ chars += block.text.length;
48152
+ break;
48153
+ case "thinking":
48154
+ chars += block.thinking.length;
48155
+ break;
48156
+ case "tool_use":
48157
+ chars += JSON.stringify(block.input).length;
48158
+ break;
48159
+ case "tool_result":
48160
+ chars += typeof block.content === "string" ? block.content.length : JSON.stringify(block.content ?? "").length;
48161
+ break;
48162
+ case "image":
48163
+ chars += 1e3;
48164
+ break;
48165
+ }
48166
+ return chars;
48167
+ }
48133
48168
 
48134
48169
  //#endregion
48135
48170
  //#region src/lib/upstream-signal.ts
@@ -48825,24 +48860,77 @@ async function handleCountTokensCore({ body, headers }) {
48825
48860
  return { input_tokens: finalTokenCount };
48826
48861
  }
48827
48862
 
48863
+ //#endregion
48864
+ //#region src/lib/context-upgrade.ts
48865
+ /** Data-driven upgrade rules. Add new entries to extend. */
48866
+ const CONTEXT_UPGRADE_RULES = [{
48867
+ from: "claude-opus-4.6",
48868
+ to: "claude-opus-4.6-1m",
48869
+ tokenThreshold: 19e4
48870
+ }];
48871
+ /** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
48872
+ const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
48873
+ /**
48874
+ * Quick check: does this model have any context-upgrade rules?
48875
+ * Use to skip expensive token estimation for ineligible models.
48876
+ */
48877
+ function hasContextUpgradeRule(model) {
48878
+ return UPGRADE_ELIGIBLE_MODELS.has(model);
48879
+ }
48880
+ /** Find the upgrade rule for a model whose target exists in Copilot's model list. */
48881
+ function findUpgradeRule(model) {
48882
+ for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
48883
+ }
48884
+ /**
48885
+ * Proactive: resolve the upgrade target model for a given model + token count.
48886
+ * Returns the target model ID, or undefined if no upgrade applies.
48887
+ */
48888
+ function resolveContextUpgrade(model, estimatedTokens) {
48889
+ const rule = findUpgradeRule(model);
48890
+ if (rule && estimatedTokens > rule.tokenThreshold) return rule.to;
48891
+ }
48892
+ /**
48893
+ * Reactive: get the upgrade target for a model on context-length error.
48894
+ * Returns the target model ID, or undefined if no fallback applies.
48895
+ */
48896
+ function getContextUpgradeTarget(model) {
48897
+ return findUpgradeRule(model)?.to;
48898
+ }
48899
+ /** Context-length error detection with pattern matching */
48900
+ const CONTEXT_ERROR_PATTERNS = [
48901
+ /context.length/i,
48902
+ /too.long/i,
48903
+ /token.*(limit|maximum|exceed)/i,
48904
+ /(limit|maximum|exceed).*token/i
48905
+ ];
48906
+ function isContextLengthError(error) {
48907
+ if (!(error instanceof HTTPError) || error.status !== 400) return false;
48908
+ const message = error.body?.error?.message;
48909
+ return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
48910
+ }
48911
+
48828
48912
  //#endregion
48829
48913
  //#region src/lib/request-model-policy.ts
48830
48914
  const COMPACT_SYSTEM_PROMPT_START = "You are a helpful AI assistant tasked with summarizing conversations";
48831
- const WARMUP_BETA_MARKERS = [
48832
- "warmup",
48833
- "probe",
48834
- "preflight"
48835
- ];
48836
- function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
48915
+ function applyMessagesModelPolicy(payload) {
48837
48916
  const originalModel = payload.model;
48917
+ if (shouldContextUpgrade() && hasContextUpgradeRule(payload.model)) {
48918
+ const contextUpgradeTarget = resolveContextUpgrade(payload.model, estimateAnthropicInputTokens(payload));
48919
+ if (contextUpgradeTarget) {
48920
+ payload.model = contextUpgradeTarget;
48921
+ return {
48922
+ originalModel,
48923
+ routedModel: contextUpgradeTarget,
48924
+ reason: "context-upgrade"
48925
+ };
48926
+ }
48927
+ }
48838
48928
  const smallModel = getSmallModel();
48839
- if (!smallModel) return {
48929
+ if (!smallModel || !shouldCompactUseSmallModel() || !isCompactRequest(payload)) return {
48840
48930
  originalModel,
48841
48931
  routedModel: originalModel
48842
48932
  };
48843
- const originalSelection = findModelById(originalModel);
48844
- const smallSelection = findModelById(smallModel);
48845
- if (shouldCompactUseSmallModel() && isCompactRequest(payload) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
48933
+ if (canRouteToSmallModel(payload, findModelById(originalModel), findModelById(smallModel))) {
48846
48934
  payload.model = smallModel;
48847
48935
  return {
48848
48936
  originalModel,
@@ -48850,14 +48938,6 @@ function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
48850
48938
  reason: "compact"
48851
48939
  };
48852
48940
  }
48853
- if (shouldWarmupUseSmallModel() && isWarmupRequest(payload, anthropicBetaHeader) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
48854
- payload.model = smallModel;
48855
- return {
48856
- originalModel,
48857
- routedModel: smallModel,
48858
- reason: "warmup"
48859
- };
48860
- }
48861
48941
  return {
48862
48942
  originalModel,
48863
48943
  routedModel: originalModel
@@ -48868,15 +48948,6 @@ function isCompactRequest(payload) {
48868
48948
  if (!Array.isArray(payload.system)) return false;
48869
48949
  return payload.system.some((block) => typeof block.text === "string" && block.text.startsWith(COMPACT_SYSTEM_PROMPT_START));
48870
48950
  }
48871
- function isWarmupRequest(payload, anthropicBetaHeader) {
48872
- if (!anthropicBetaHeader || isCompactRequest(payload)) return false;
48873
- const normalizedBeta = anthropicBetaHeader.toLowerCase();
48874
- if (!WARMUP_BETA_MARKERS.some((marker) => normalizedBeta.includes(marker))) return false;
48875
- if (payload.system !== void 0 || payload.thinking !== void 0) return false;
48876
- if (payload.tools && payload.tools.length > 0) return false;
48877
- if (payload.max_tokens > 64) return false;
48878
- return hasSingleShortUserTextMessage(payload);
48879
- }
48880
48951
  function canRouteToSmallModel(payload, originalModel, smallModel) {
48881
48952
  if (!originalModel || !smallModel) return false;
48882
48953
  const originalEndpoints = new Set(originalModel.supported_endpoints ?? []);
@@ -48887,15 +48958,6 @@ function canRouteToSmallModel(payload, originalModel, smallModel) {
48887
48958
  if (hasVisionInput$1(payload) && !modelSupportsVision(smallModel)) return false;
48888
48959
  return true;
48889
48960
  }
48890
- function hasSingleShortUserTextMessage(payload) {
48891
- if (payload.messages.length !== 1) return false;
48892
- const [message] = payload.messages;
48893
- if (message.role !== "user") return false;
48894
- if (typeof message.content === "string") return message.content.trim().length > 0 && message.content.length <= 64;
48895
- if (message.content.length !== 1 || message.content[0]?.type !== "text") return false;
48896
- const text = message.content[0].text.trim();
48897
- return text.length > 0 && text.length <= 64;
48898
- }
48899
48961
  function hasVisionInput$1(payload) {
48900
48962
  return payload.messages.some((message) => containsVisionContent$1(message.content));
48901
48963
  }
@@ -50088,16 +50150,17 @@ async function handleMessagesCore({ body, signal, headers }) {
50088
50150
  const anthropicPayload = parseAnthropicMessagesPayload(body);
50089
50151
  if (consola.level >= 4) consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
50090
50152
  const anthropicBetaHeader = headers.get("anthropic-beta") ?? void 0;
50091
- const modelRouting = applyMessagesModelPolicy(anthropicPayload, anthropicBetaHeader);
50153
+ const modelRouting = applyMessagesModelPolicy(anthropicPayload);
50092
50154
  const modelMapping = {
50093
50155
  originalModel: modelRouting.originalModel,
50094
50156
  mappedModel: modelRouting.routedModel
50095
50157
  };
50096
- if (modelRouting.reason) consola.debug(`Routed anthropic request to small model via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
50158
+ if (modelRouting.reason) consola.debug(`Routed anthropic request via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
50097
50159
  const selectedModel = findModelById(anthropicPayload.model);
50098
50160
  const upstreamSignal = createUpstreamSignalFromConfig(signal);
50099
50161
  const copilotClient = createCopilotClient();
50100
- return selectStrategy(defaultStrategyRegistry, selectedModel).execute({
50162
+ const entry = selectStrategy(defaultStrategyRegistry, selectedModel);
50163
+ const strategyCtx = {
50101
50164
  copilotClient,
50102
50165
  anthropicPayload,
50103
50166
  anthropicBetaHeader,
@@ -50106,7 +50169,32 @@ async function handleMessagesCore({ body, signal, headers }) {
50106
50169
  headers,
50107
50170
  requestContext: readCapiRequestContext(headers),
50108
50171
  modelMapping
50109
- });
50172
+ };
50173
+ let strategyResult;
50174
+ try {
50175
+ strategyResult = await entry.execute(strategyCtx);
50176
+ } catch (error) {
50177
+ const upgradeTarget = shouldContextUpgrade() && isContextLengthError(error) ? getContextUpgradeTarget(anthropicPayload.model) : void 0;
50178
+ if (!upgradeTarget) throw error;
50179
+ consola.info(`Context length exceeded, retrying: ${anthropicPayload.model} → ${upgradeTarget}`);
50180
+ anthropicPayload.model = upgradeTarget;
50181
+ const retryModel = findModelById(upgradeTarget);
50182
+ const retrySignal = createUpstreamSignalFromConfig(signal);
50183
+ strategyResult = await selectStrategy(defaultStrategyRegistry, retryModel).execute({
50184
+ ...strategyCtx,
50185
+ anthropicPayload,
50186
+ selectedModel: retryModel,
50187
+ upstreamSignal: retrySignal,
50188
+ modelMapping: {
50189
+ originalModel: modelRouting.originalModel,
50190
+ mappedModel: upgradeTarget
50191
+ }
50192
+ });
50193
+ }
50194
+ return {
50195
+ result: strategyResult.result,
50196
+ modelMapping: strategyResult.modelMapping
50197
+ };
50110
50198
  }
50111
50199
 
50112
50200
  //#endregion