npm - ghc-proxy - Versions diffs - 0.3.2 → 0.4.0 - Mend

ghc-proxy 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -261,16 +261,13 @@ Or in the proxy's **config file** (`~/.local/share/ghc-proxy/config.json`):
 - `smallModel`: the model to reroute to
 - `compactUseSmallModel`: reroute recognized compact/summarization requests
-- `warmupUseSmallModel`: reroute explicitly marked warmup/probe requests
-Both switches default to `false`. Routing is conservative:
+The switch defaults to `false`. Routing is conservative:
 - the target `smallModel` must exist in Copilot's model list
 - it must preserve the original model's declared endpoint support
 - tool, thinking, and vision requests are not rerouted to a model that lacks the required capabilities
-Warmup routing is intentionally narrow. Requests must look like explicit warmup/probe traffic; ordinary tool-free chat requests are not rerouted just because they include `anthropic-beta`.
 ### Responses Compatibility
 `/v1/responses` is designed to stay close to the OpenAI wire format while making Copilot limitations explicit:
@@ -300,7 +297,6 @@ Example `config.json`:
 {
   "smallModel": "gpt-4.1-mini",
   "compactUseSmallModel": true,
-  "warmupUseSmallModel": false,
   "useFunctionApplyPatch": true,
   "responsesApiContextManagementModels": ["gpt-5", "gpt-5-mini"],
   "modelReasoningEfforts": {

package/dist/main.mjs CHANGED Viewed

@@ -5377,17 +5377,17 @@ const configFileSchema = object({
 	}).optional(),
 	smallModel: string().optional(),
 	compactUseSmallModel: boolean().optional(),
-	warmupUseSmallModel: boolean().optional(),
 	useFunctionApplyPatch: boolean().optional(),
 	responsesApiContextManagementModels: array(string()).optional(),
-	modelReasoningEfforts: record(string(), reasoningEffortSchema).optional()
+	modelReasoningEfforts: record(string(), reasoningEffortSchema).optional(),
+	contextUpgrade: boolean().optional()
 }).passthrough();
 const KNOWN_CONFIG_KEYS = new Set(Object.keys(configFileSchema.shape));
 let cachedConfig = {};
 const DEFAULT_REASONING_EFFORT = "high";
 const DEFAULT_USE_FUNCTION_APPLY_PATCH = true;
 const DEFAULT_COMPACT_USE_SMALL_MODEL = false;
-const DEFAULT_WARMUP_USE_SMALL_MODEL = false;
+const DEFAULT_CONTEXT_UPGRADE = true;
 async function readConfig() {
 	try {
 		const content = await fs.readFile(PATHS.CONFIG_PATH, "utf8");
@@ -5430,15 +5430,15 @@ function getSmallModel() {
 function shouldCompactUseSmallModel() {
 	return cachedConfig.compactUseSmallModel ?? DEFAULT_COMPACT_USE_SMALL_MODEL;
 }
-function shouldWarmupUseSmallModel() {
-	return cachedConfig.warmupUseSmallModel ?? DEFAULT_WARMUP_USE_SMALL_MODEL;
-}
 function shouldUseFunctionApplyPatch() {
 	return cachedConfig.useFunctionApplyPatch ?? DEFAULT_USE_FUNCTION_APPLY_PATCH;
 }
 function isResponsesApiContextManagementModel(model) {
 	return cachedConfig.responsesApiContextManagementModels?.includes(model) ?? false;
 }
+function shouldContextUpgrade() {
+	return cachedConfig.contextUpgrade ?? DEFAULT_CONTEXT_UPGRADE;
+}
 function getReasoningEffortForModel(model) {
 	return cachedConfig.modelReasoningEfforts?.[model] ?? DEFAULT_REASONING_EFFORT;
 }
@@ -6216,7 +6216,7 @@ const checkUsage = defineCommand({
 //#endregion
 //#region src/lib/version.ts
-const VERSION = "0.3.2";
+const VERSION = "0.4.0";
 //#endregion
 //#region src/debug.ts
@@ -48130,6 +48130,41 @@ async function getTokenCount(payload, model) {
 		output: outputTokens
 	};
 }
+/**
+* Fast character-based token estimate for Anthropic payloads.
+* Uses ~3.5 chars/token ratio (conservative for Claude's tokenizer).
+* Intentionally over-estimates to favor proactive routing.
+*/
+function estimateAnthropicInputTokens(payload) {
+	let chars = 0;
+	if (typeof payload.system === "string") chars += payload.system.length;
+	else if (Array.isArray(payload.system)) for (const block of payload.system) chars += block.text?.length ?? 0;
+	for (const msg of payload.messages) if (typeof msg.content === "string") chars += msg.content.length;
+	else if (Array.isArray(msg.content)) chars += estimateContentBlockChars(msg.content);
+	if (payload.tools?.length) chars += JSON.stringify(payload.tools).length;
+	return Math.ceil(chars / 3.5);
+}
+function estimateContentBlockChars(blocks) {
+	let chars = 0;
+	for (const block of blocks) switch (block.type) {
+		case "text":
+			chars += block.text.length;
+			break;
+		case "thinking":
+			chars += block.thinking.length;
+			break;
+		case "tool_use":
+			chars += JSON.stringify(block.input).length;
+			break;
+		case "tool_result":
+			chars += typeof block.content === "string" ? block.content.length : JSON.stringify(block.content ?? "").length;
+			break;
+		case "image":
+			chars += 1e3;
+			break;
+	}
+	return chars;
+}
 //#endregion
 //#region src/lib/upstream-signal.ts
@@ -48825,24 +48860,77 @@ async function handleCountTokensCore({ body, headers }) {
 	return { input_tokens: finalTokenCount };
 }
+//#endregion
+//#region src/lib/context-upgrade.ts
+/** Data-driven upgrade rules. Add new entries to extend. */
+const CONTEXT_UPGRADE_RULES = [{
+	from: "claude-opus-4.6",
+	to: "claude-opus-4.6-1m",
+	tokenThreshold: 19e4
+}];
+/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
+const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
+/**
+* Quick check: does this model have any context-upgrade rules?
+* Use to skip expensive token estimation for ineligible models.
+*/
+function hasContextUpgradeRule(model) {
+	return UPGRADE_ELIGIBLE_MODELS.has(model);
+}
+/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
+function findUpgradeRule(model) {
+	for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
+}
+/**
+* Proactive: resolve the upgrade target model for a given model + token count.
+* Returns the target model ID, or undefined if no upgrade applies.
+*/
+function resolveContextUpgrade(model, estimatedTokens) {
+	const rule = findUpgradeRule(model);
+	if (rule && estimatedTokens > rule.tokenThreshold) return rule.to;
+}
+/**
+* Reactive: get the upgrade target for a model on context-length error.
+* Returns the target model ID, or undefined if no fallback applies.
+*/
+function getContextUpgradeTarget(model) {
+	return findUpgradeRule(model)?.to;
+}
+/** Context-length error detection with pattern matching */
+const CONTEXT_ERROR_PATTERNS = [
+	/context.length/i,
+	/too.long/i,
+	/token.*(limit|maximum|exceed)/i,
+	/(limit|maximum|exceed).*token/i
+];
+function isContextLengthError(error) {
+	if (!(error instanceof HTTPError) || error.status !== 400) return false;
+	const message = error.body?.error?.message;
+	return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
+}
 //#endregion
 //#region src/lib/request-model-policy.ts
 const COMPACT_SYSTEM_PROMPT_START = "You are a helpful AI assistant tasked with summarizing conversations";
-const WARMUP_BETA_MARKERS = [
-	"warmup",
-	"probe",
-	"preflight"
-];
-function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
+function applyMessagesModelPolicy(payload) {
 	const originalModel = payload.model;
+	if (shouldContextUpgrade() && hasContextUpgradeRule(payload.model)) {
+		const contextUpgradeTarget = resolveContextUpgrade(payload.model, estimateAnthropicInputTokens(payload));
+		if (contextUpgradeTarget) {
+			payload.model = contextUpgradeTarget;
+			return {
+				originalModel,
+				routedModel: contextUpgradeTarget,
+				reason: "context-upgrade"
+			};
+		}
+	}
 	const smallModel = getSmallModel();
-	if (!smallModel) return {
+	if (!smallModel || !shouldCompactUseSmallModel() || !isCompactRequest(payload)) return {
 		originalModel,
 		routedModel: originalModel
 	};
-	const originalSelection = findModelById(originalModel);
-	const smallSelection = findModelById(smallModel);
-	if (shouldCompactUseSmallModel() && isCompactRequest(payload) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
+	if (canRouteToSmallModel(payload, findModelById(originalModel), findModelById(smallModel))) {
 		payload.model = smallModel;
 		return {
 			originalModel,
@@ -48850,14 +48938,6 @@ function applyMessagesModelPolicy(payload, anthropicBetaHeader) {
 			reason: "compact"
 		};
 	}
-	if (shouldWarmupUseSmallModel() && isWarmupRequest(payload, anthropicBetaHeader) && canRouteToSmallModel(payload, originalSelection, smallSelection)) {
-		payload.model = smallModel;
-		return {
-			originalModel,
-			routedModel: smallModel,
-			reason: "warmup"
-		};
-	}
 	return {
 		originalModel,
 		routedModel: originalModel
@@ -48868,15 +48948,6 @@ function isCompactRequest(payload) {
 	if (!Array.isArray(payload.system)) return false;
 	return payload.system.some((block) => typeof block.text === "string" && block.text.startsWith(COMPACT_SYSTEM_PROMPT_START));
 }
-function isWarmupRequest(payload, anthropicBetaHeader) {
-	if (!anthropicBetaHeader || isCompactRequest(payload)) return false;
-	const normalizedBeta = anthropicBetaHeader.toLowerCase();
-	if (!WARMUP_BETA_MARKERS.some((marker) => normalizedBeta.includes(marker))) return false;
-	if (payload.system !== void 0 || payload.thinking !== void 0) return false;
-	if (payload.tools && payload.tools.length > 0) return false;
-	if (payload.max_tokens > 64) return false;
-	return hasSingleShortUserTextMessage(payload);
-}
 function canRouteToSmallModel(payload, originalModel, smallModel) {
 	if (!originalModel || !smallModel) return false;
 	const originalEndpoints = new Set(originalModel.supported_endpoints ?? []);
@@ -48887,15 +48958,6 @@ function canRouteToSmallModel(payload, originalModel, smallModel) {
 	if (hasVisionInput$1(payload) && !modelSupportsVision(smallModel)) return false;
 	return true;
 }
-function hasSingleShortUserTextMessage(payload) {
-	if (payload.messages.length !== 1) return false;
-	const [message] = payload.messages;
-	if (message.role !== "user") return false;
-	if (typeof message.content === "string") return message.content.trim().length > 0 && message.content.length <= 64;
-	if (message.content.length !== 1 || message.content[0]?.type !== "text") return false;
-	const text = message.content[0].text.trim();
-	return text.length > 0 && text.length <= 64;
-}
 function hasVisionInput$1(payload) {
 	return payload.messages.some((message) => containsVisionContent$1(message.content));
 }
@@ -50088,16 +50150,17 @@ async function handleMessagesCore({ body, signal, headers }) {
 	const anthropicPayload = parseAnthropicMessagesPayload(body);
 	if (consola.level >= 4) consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
 	const anthropicBetaHeader = headers.get("anthropic-beta") ?? void 0;
-	const modelRouting = applyMessagesModelPolicy(anthropicPayload, anthropicBetaHeader);
+	const modelRouting = applyMessagesModelPolicy(anthropicPayload);
 	const modelMapping = {
 		originalModel: modelRouting.originalModel,
 		mappedModel: modelRouting.routedModel
 	};
-	if (modelRouting.reason) consola.debug(`Routed anthropic request to small model via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
+	if (modelRouting.reason) consola.debug(`Routed anthropic request via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
 	const selectedModel = findModelById(anthropicPayload.model);
 	const upstreamSignal = createUpstreamSignalFromConfig(signal);
 	const copilotClient = createCopilotClient();
-	return selectStrategy(defaultStrategyRegistry, selectedModel).execute({
+	const entry = selectStrategy(defaultStrategyRegistry, selectedModel);
+	const strategyCtx = {
 		copilotClient,
 		anthropicPayload,
 		anthropicBetaHeader,
@@ -50106,7 +50169,32 @@ async function handleMessagesCore({ body, signal, headers }) {
 		headers,
 		requestContext: readCapiRequestContext(headers),
 		modelMapping
-	});
+	};
+	let strategyResult;
+	try {
+		strategyResult = await entry.execute(strategyCtx);
+	} catch (error) {
+		const upgradeTarget = shouldContextUpgrade() && isContextLengthError(error) ? getContextUpgradeTarget(anthropicPayload.model) : void 0;
+		if (!upgradeTarget) throw error;
+		consola.info(`Context length exceeded, retrying: ${anthropicPayload.model} → ${upgradeTarget}`);
+		anthropicPayload.model = upgradeTarget;
+		const retryModel = findModelById(upgradeTarget);
+		const retrySignal = createUpstreamSignalFromConfig(signal);
+		strategyResult = await selectStrategy(defaultStrategyRegistry, retryModel).execute({
+			...strategyCtx,
+			anthropicPayload,
+			selectedModel: retryModel,
+			upstreamSignal: retrySignal,
+			modelMapping: {
+				originalModel: modelRouting.originalModel,
+				mappedModel: upgradeTarget
+			}
+		});
+	}
+	return {
+		result: strategyResult.result,
+		modelMapping: strategyResult.modelMapping
+	};
 }
 //#endregion