npm - ghc-proxy - Versions diffs - 0.4.2 → 0.5.1 - Mend

ghc-proxy 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/main.mjs CHANGED Viewed

@@ -5380,7 +5380,12 @@ const configFileSchema = object({
 	useFunctionApplyPatch: boolean().optional(),
 	responsesApiContextManagementModels: array(string()).optional(),
 	modelReasoningEfforts: record(string(), reasoningEffortSchema).optional(),
-	contextUpgrade: boolean().optional()
+	modelRewrites: array(object({
+		from: string(),
+		to: string()
+	})).optional(),
+	contextUpgrade: boolean().optional(),
+	contextUpgradeTokenThreshold: number().int().positive().optional()
 }).passthrough();
 const KNOWN_CONFIG_KEYS = new Set(Object.keys(configFileSchema.shape));
 let cachedConfig = {};
@@ -5388,6 +5393,7 @@ const DEFAULT_REASONING_EFFORT = "high";
 const DEFAULT_USE_FUNCTION_APPLY_PATCH = true;
 const DEFAULT_COMPACT_USE_SMALL_MODEL = false;
 const DEFAULT_CONTEXT_UPGRADE = true;
+const DEFAULT_CONTEXT_UPGRADE_TOKEN_THRESHOLD = 16e4;
 async function readConfig() {
 	try {
 		const content = await fs.readFile(PATHS.CONFIG_PATH, "utf8");
@@ -5439,6 +5445,9 @@ function isResponsesApiContextManagementModel(model) {
 function shouldContextUpgrade() {
 	return cachedConfig.contextUpgrade ?? DEFAULT_CONTEXT_UPGRADE;
 }
+function getContextUpgradeTokenThreshold() {
+	return cachedConfig.contextUpgradeTokenThreshold ?? DEFAULT_CONTEXT_UPGRADE_TOKEN_THRESHOLD;
+}
 function getReasoningEffortForModel(model) {
 	return cachedConfig.modelReasoningEfforts?.[model] ?? DEFAULT_REASONING_EFFORT;
 }
@@ -6216,7 +6225,7 @@ const checkUsage = defineCommand({
 //#endregion
 //#region src/lib/version.ts
-const VERSION = "0.4.2";
+const VERSION = "0.5.1";
 //#endregion
 //#region src/debug.ts
@@ -46561,12 +46570,20 @@ function colorizeMethod(method) {
 }
 function formatModelMapping(info) {
 	if (!info) return "";
-	const { originalModel, mappedModel } = info;
-	if (!originalModel && !mappedModel) return "";
-	const original = originalModel ?? "-";
-	const mapped = mappedModel ?? "-";
-	if (original === mapped) return ` ${colorize("dim", "model=")}${colorize("blueBright", original)}`;
-	return ` ${colorize("dim", "model=")}${colorize("blueBright", original)} ${colorize("dim", "→")} ${colorize("greenBright", mapped)}`;
+	const { originalModel, rewrittenModel, mappedModel } = info;
+	if (!originalModel && !rewrittenModel && !mappedModel) return "";
+	const parts = [];
+	const displayOriginal = originalModel ?? "-";
+	parts.push(colorize("blueBright", displayOriginal));
+	if (rewrittenModel && rewrittenModel !== displayOriginal) {
+		parts.push(colorize("dim", "~>"));
+		parts.push(colorize("cyanBright", rewrittenModel));
+	}
+	if (mappedModel && mappedModel !== (rewrittenModel ?? displayOriginal)) {
+		parts.push(colorize("dim", "→"));
+		parts.push(colorize("greenBright", mappedModel));
+	}
+	return ` ${colorize("dim", "model=")}${parts.join(" ")}`;
 }
 /**
 * Request logging function.
@@ -46585,6 +46602,19 @@ function logRequest(method, url, status, elapsed, modelInfo) {
 	console.log(`${line}${formatModelMapping(modelInfo)}`);
 }
+//#endregion
+//#region src/lib/request-timeout.ts
+function disableIdleTimeout(server, request) {
+	if (typeof server?.timeout === "function") server.timeout(request, 0);
+}
+function hasStreamingFlag(body) {
+	if (!body || typeof body !== "object") return false;
+	return body.stream === true;
+}
+function hasStreamingResponsesQuery(request) {
+	return new URL(request.url).searchParams.get("stream") === "true";
+}
 //#endregion
 //#region src/lib/sse-adapter.ts
 /**
@@ -47208,10 +47238,10 @@ var AnthropicStreamTranslator = class {
 	}
 	onChunk(chunk) {
 		const deltas = this.toConversationDeltas(chunk);
+		if (chunk.usage) this.state.lastUsage = chunk.usage;
 		if (deltas.length === 0) return [];
 		const events = [];
 		this.appendMessageStart(events, chunk);
-		this.state.lastUsage = chunk.usage;
 		for (const delta of deltas) switch (delta.kind) {
 			case "message_start": break;
 			case "thinking_delta":
@@ -47244,8 +47274,7 @@ var AnthropicStreamTranslator = class {
 					...delta.metadata
 				};
 				this.state.pendingStopReason = delta.stopReason;
-				if (delta.usage) this.state.lastUsage = delta.usage;
-				events.push(...this.onDone());
+				this.closeAllBlocks(events);
 				break;
 		}
 		return events;
@@ -47253,9 +47282,7 @@ var AnthropicStreamTranslator = class {
 	onDone() {
 		if (!this.state.messageStartSent || this.state.messageStopSent) return [];
 		const events = [];
-		this.thinkingWriter.close(events);
-		this.textWriter.close(events);
-		this.toolWriter.closeAll(events);
+		this.closeAllBlocks(events);
 		events.push({
 			type: "message_delta",
 			delta: {
@@ -47276,6 +47303,11 @@ var AnthropicStreamTranslator = class {
 			}
 		}];
 	}
+	closeAllBlocks(events) {
+		this.thinkingWriter.close(events);
+		this.textWriter.close(events);
+		this.toolWriter.closeAll(events);
+	}
 	appendMessageStart(events, chunk) {
 		if (this.state.messageStartSent) return;
 		events.push({
@@ -47940,6 +47972,106 @@ function modelSupportsOutputConfig(model) {
 	return !MODELS_REJECTING_OUTPUT_CONFIG.has(model.id);
 }
+//#endregion
+//#region src/lib/model-rewrite.ts
+/**
+* Unified model rewrite: user rules → built-in normalization → pass-through.
+* Call once at handler entry, before any model lookup or policy.
+*/
+function rewriteModel(modelId) {
+	const userRules = getCachedConfig().modelRewrites;
+	if (userRules) {
+		for (const rule of userRules) if (matchesGlob(rule.from, modelId)) return {
+			originalModel: modelId,
+			model: normalizeToKnownModel(rule.to) ?? rule.to
+		};
+	}
+	const normalized = normalizeToKnownModel(modelId);
+	if (normalized && normalized !== modelId) return {
+		originalModel: modelId,
+		model: normalized
+	};
+	return {
+		originalModel: modelId,
+		model: modelId
+	};
+}
+/**
+* Apply model rewrite to a mutable model field and log if changed.
+* Returns the rewrite result for downstream use.
+*/
+function applyModelRewrite(payload) {
+	const result = rewriteModel(payload.model);
+	if (result.model !== result.originalModel) {
+		consola.debug(`Model rewritten: ${result.originalModel} ~> ${result.model}`);
+		payload.model = result.model;
+	}
+	return result;
+}
+const DOT_RE = /\./g;
+/**
+* Resolve a model ID against Copilot's cached model list using
+* dash/dot equivalence. Returns the canonical ID if found.
+*/
+function normalizeToKnownModel(modelId) {
+	const models = state.cache.models?.data;
+	if (!models) return void 0;
+	if (models.some((m) => m.id === modelId)) return modelId;
+	const normalized = modelId.replace(DOT_RE, "-");
+	for (const model of models) if (model.id.replace(DOT_RE, "-") === normalized) return model.id;
+}
+const GLOB_SPECIAL_RE = /[.+^${}()|[\]\\]/g;
+const GLOB_STAR_RE = /\*/g;
+function matchesGlob(pattern, value) {
+	if (!pattern.includes("*")) return pattern === value;
+	return new RegExp(`^${pattern.replace(GLOB_SPECIAL_RE, "\\$&").replace(GLOB_STAR_RE, ".*")}$`).test(value);
+}
+/** Data-driven upgrade rules. Add new entries to extend. */
+const CONTEXT_UPGRADE_RULES = [{
+	from: "claude-opus-4.6",
+	to: "claude-opus-4.6-1m"
+}];
+/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
+const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
+/**
+* Quick check: does this model have any context-upgrade rules?
+* Use to skip expensive token estimation for ineligible models.
+*/
+function hasContextUpgradeRule(model) {
+	return UPGRADE_ELIGIBLE_MODELS.has(model);
+}
+/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
+function findUpgradeRule(model) {
+	for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
+}
+/**
+* Proactive: resolve the upgrade target model for a given model + token count.
+* Returns the target model ID, or undefined if no upgrade applies.
+*/
+function resolveContextUpgrade(model, estimatedTokens) {
+	const rule = findUpgradeRule(model);
+	if (rule && estimatedTokens > getContextUpgradeTokenThreshold()) return rule.to;
+}
+/**
+* Reactive: get the upgrade target for a model on context-length error.
+* Returns the target model ID, or undefined if no fallback applies.
+*/
+function getContextUpgradeTarget(model) {
+	return findUpgradeRule(model)?.to;
+}
+/** Context-length error detection with pattern matching */
+const CONTEXT_ERROR_PATTERNS = [
+	/context.length/i,
+	/too.long/i,
+	/token.*(limit|maximum|exceed)/i,
+	/(limit|maximum|exceed).*token/i
+];
+function isContextLengthError(error) {
+	if (!(error instanceof HTTPError) || error.status !== 400) return false;
+	const message = error.body?.error?.message;
+	return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
+}
 //#endregion
 //#region src/lib/tokenizer.ts
 const ENCODING_MAP = {
@@ -48740,7 +48872,8 @@ async function handleCompletionCore({ body, signal, headers }) {
 	const adapter = new OpenAIChatAdapter();
 	let payload = parseOpenAIChatPayload(body);
 	consola.debug("Request payload:", JSON.stringify(payload).slice(-400));
-	const originalModel = payload.model;
+	const rewrite = applyModelRewrite(payload);
+	const originalModel = rewrite.originalModel;
 	const selectedModel = findModelById(payload.model);
 	try {
 		if (selectedModel) {
@@ -48761,6 +48894,7 @@ async function handleCompletionCore({ body, signal, headers }) {
 	const plan = adapter.toCapiPlan(payload, { requestContext: readCapiRequestContext(headers) });
 	const modelMapping = {
 		originalModel,
+		rewrittenModel: rewrite.model,
 		mappedModel: plan.resolvedModel
 	};
 	const transport = new CopilotTransport(createCopilotClient());
@@ -48774,7 +48908,8 @@ async function handleCompletionCore({ body, signal, headers }) {
 //#endregion
 //#region src/routes/chat-completions/route.ts
 function createCompletionRoutes() {
-	return new Elysia().use(requestGuardPlugin).post("/chat/completions", async function* ({ body, request }) {
+	return new Elysia().use(requestGuardPlugin).post("/chat/completions", async function* ({ body, request, server }) {
+		if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
 		const { result, modelMapping } = await handleCompletionCore({
 			body,
 			signal: request.signal,
@@ -48860,60 +48995,15 @@ async function handleCountTokensCore({ body, headers }) {
 	return { input_tokens: finalTokenCount };
 }
-//#endregion
-//#region src/lib/context-upgrade.ts
-/** Data-driven upgrade rules. Add new entries to extend. */
-const CONTEXT_UPGRADE_RULES = [{
-	from: "claude-opus-4.6",
-	to: "claude-opus-4.6-1m",
-	tokenThreshold: 19e4
-}];
-/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
-const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
-/**
-* Quick check: does this model have any context-upgrade rules?
-* Use to skip expensive token estimation for ineligible models.
-*/
-function hasContextUpgradeRule(model) {
-	return UPGRADE_ELIGIBLE_MODELS.has(model);
-}
-/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
-function findUpgradeRule(model) {
-	for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && findModelById(rule.to)) return rule;
-}
-/**
-* Proactive: resolve the upgrade target model for a given model + token count.
-* Returns the target model ID, or undefined if no upgrade applies.
-*/
-function resolveContextUpgrade(model, estimatedTokens) {
-	const rule = findUpgradeRule(model);
-	if (rule && estimatedTokens > rule.tokenThreshold) return rule.to;
-}
-/**
-* Reactive: get the upgrade target for a model on context-length error.
-* Returns the target model ID, or undefined if no fallback applies.
-*/
-function getContextUpgradeTarget(model) {
-	return findUpgradeRule(model)?.to;
-}
-/** Context-length error detection with pattern matching */
-const CONTEXT_ERROR_PATTERNS = [
-	/context.length/i,
-	/too.long/i,
-	/token.*(limit|maximum|exceed)/i,
-	/(limit|maximum|exceed).*token/i
-];
-function isContextLengthError(error) {
-	if (!(error instanceof HTTPError) || error.status !== 400) return false;
-	const message = error.body?.error?.message;
-	return message ? CONTEXT_ERROR_PATTERNS.some((pattern) => pattern.test(message)) : false;
-}
 //#endregion
 //#region src/lib/request-model-policy.ts
 const COMPACT_SYSTEM_PROMPT_START = "You are a helpful AI assistant tasked with summarizing conversations";
-function applyMessagesModelPolicy(payload) {
+function applyMessagesModelPolicy(payload, options) {
 	const originalModel = payload.model;
+	if (options?.betaUpgraded) return {
+		originalModel,
+		routedModel: originalModel
+	};
 	if (shouldContextUpgrade() && hasContextUpgradeRule(payload.model)) {
 		const contextUpgradeTarget = resolveContextUpgrade(payload.model, estimateAnthropicInputTokens(payload));
 		if (contextUpgradeTarget) {
@@ -49396,6 +49486,13 @@ function createMessagesViaChatCompletionsStrategy(transport, adapter, plan, sign
 				data: JSON.stringify(event)
 			}));
 		},
+		onStreamDone() {
+			if (!streamTranslator) return null;
+			return streamTranslator.onDone().map((event) => ({
+				event: event.type,
+				data: JSON.stringify(event)
+			}));
+		},
 		shouldBreakStream() {
 			return done;
 		},
@@ -50048,7 +50145,10 @@ function createMessagesViaResponsesStrategy(copilotClient, responsesPayload, opt
 //#endregion
 //#region src/routes/messages/strategy-registry.ts
 function selectStrategy(registry, model) {
-	for (const entry of registry) if (entry.canHandle(model)) return entry;
+	for (const entry of registry) if (entry.canHandle(model)) {
+		consola.debug(`Strategy selected: ${entry.name} for model: ${model?.id ?? "(unknown)"}`);
+		return entry;
+	}
 	return registry.at(-1);
 }
 function filterThinkingBlocksForNativeMessages(anthropicPayload) {
@@ -50142,6 +50242,30 @@ const defaultStrategyRegistry = [
 //#endregion
 //#region src/routes/messages/handler.ts
+const CONTEXT_BETA_RE = /^context-\d+[km]-/;
+function processAnthropicBetaHeader(rawHeader, model) {
+	if (!rawHeader) return {
+		header: void 0,
+		upgradeTarget: void 0
+	};
+	const values = rawHeader.split(",").map((v) => v.trim()).filter(Boolean);
+	let upgradeTarget;
+	const filtered = [];
+	for (const value of values) {
+		if (CONTEXT_BETA_RE.test(value)) {
+			if (!upgradeTarget && shouldContextUpgrade()) {
+				const target = getContextUpgradeTarget(model);
+				if (target) upgradeTarget = target;
+			}
+			continue;
+		}
+		filtered.push(value);
+	}
+	return {
+		header: filtered.length > 0 ? filtered.join(",") : void 0,
+		upgradeTarget
+	};
+}
 /**
 * Core handler for Anthropic messages endpoint.
 * Returns both the execution result and model mapping info.
@@ -50149,10 +50273,17 @@ const defaultStrategyRegistry = [
 async function handleMessagesCore({ body, signal, headers }) {
 	const anthropicPayload = parseAnthropicMessagesPayload(body);
 	if (consola.level >= 4) consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload));
-	const anthropicBetaHeader = headers.get("anthropic-beta") ?? void 0;
-	const modelRouting = applyMessagesModelPolicy(anthropicPayload);
+	const rewrite = applyModelRewrite(anthropicPayload);
+	const betaResult = processAnthropicBetaHeader(headers.get("anthropic-beta"), anthropicPayload.model);
+	if (betaResult.upgradeTarget) {
+		consola.debug(`Beta header context upgrade: ${anthropicPayload.model} → ${betaResult.upgradeTarget}`);
+		anthropicPayload.model = betaResult.upgradeTarget;
+	}
+	const anthropicBetaHeader = betaResult.header;
+	const modelRouting = applyMessagesModelPolicy(anthropicPayload, { betaUpgraded: !!betaResult.upgradeTarget });
 	const modelMapping = {
-		originalModel: modelRouting.originalModel,
+		originalModel: rewrite.originalModel,
+		rewrittenModel: rewrite.model,
 		mappedModel: modelRouting.routedModel
 	};
 	if (modelRouting.reason) consola.debug(`Routed anthropic request via ${modelRouting.reason}:`, `${modelRouting.originalModel} -> ${modelRouting.routedModel}`);
@@ -50186,7 +50317,8 @@ async function handleMessagesCore({ body, signal, headers }) {
 			selectedModel: retryModel,
 			upstreamSignal: retrySignal,
 			modelMapping: {
-				originalModel: modelRouting.originalModel,
+				originalModel: rewrite.originalModel,
+				rewrittenModel: rewrite.model,
 				mappedModel: upgradeTarget
 			}
 		});
@@ -50200,7 +50332,8 @@ async function handleMessagesCore({ body, signal, headers }) {
 //#endregion
 //#region src/routes/messages/route.ts
 function createMessageRoutes() {
-	return new Elysia().use(requestGuardPlugin).post("/messages", async function* ({ body, request }) {
+	return new Elysia().use(requestGuardPlugin).post("/messages", async function* ({ body, request, server }) {
+		if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
 		const { result, modelMapping } = await handleMessagesCore({
 			body,
 			signal: request.signal,
@@ -50305,6 +50438,7 @@ const HTTP_URL_RE = /^https?:\/\//i;
 */
 async function handleResponsesCore({ body, signal, headers }) {
 	const payload = parseResponsesPayload(body);
+	const rewrite = applyModelRewrite(payload);
 	applyResponsesToolTransforms(payload);
 	applyResponsesInputPolicies(payload);
 	compactInputByLatestCompaction(payload);
@@ -50314,12 +50448,19 @@ async function handleResponsesCore({ body, signal, headers }) {
 	applyContextManagement(payload, selectedModel.capabilities.limits.max_prompt_tokens);
 	const { vision, initiator } = getResponsesRequestOptions(payload);
 	const upstreamSignal = createUpstreamSignalFromConfig(signal);
-	return runStrategy(createResponsesPassthroughStrategy(createCopilotClient(), payload, {
-		vision,
-		initiator,
-		requestContext: readCapiRequestContext(headers),
-		signal: upstreamSignal.signal
-	}), upstreamSignal);
+	return {
+		result: await runStrategy(createResponsesPassthroughStrategy(createCopilotClient(), payload, {
+			vision,
+			initiator,
+			requestContext: readCapiRequestContext(headers),
+			signal: upstreamSignal.signal
+		}), upstreamSignal),
+		modelMapping: {
+			originalModel: rewrite.originalModel,
+			rewrittenModel: rewrite.model,
+			mappedModel: payload.model
+		}
+	};
 }
 function applyResponsesToolTransforms(payload) {
 	applyFunctionApplyPatch(payload);
@@ -50454,12 +50595,14 @@ function parseBooleanParam(value) {
 //#endregion
 //#region src/routes/responses/route.ts
 function createResponsesRoutes() {
-	return new Elysia().use(requestGuardPlugin).post("/responses", async function* ({ body, request }) {
-		const result = await handleResponsesCore({
+	return new Elysia().use(requestGuardPlugin).post("/responses", async function* ({ body, request, server }) {
+		if (hasStreamingFlag(body)) disableIdleTimeout(server, request);
+		const { result, modelMapping } = await handleResponsesCore({
 			body,
 			signal: request.signal,
 			headers: request.headers
 		});
+		if (modelMapping) setRequestModelMapping(request, modelMapping);
 		if (result.kind === "json") return result.data;
 		yield* sseAdapter(result.generator);
 	}, { guarded: true }).post("/responses/input_tokens", async ({ body, request }) => {
@@ -50475,7 +50618,8 @@ function createResponsesRoutes() {
 			headers: request.headers,
 			signal: request.signal
 		});
-	}).get("/responses/:responseId", async ({ params, request }) => {
+	}).get("/responses/:responseId", async ({ params, request, server }) => {
+		if (hasStreamingResponsesQuery(request)) disableIdleTimeout(server, request);
 		return handleRetrieveResponseCore({
 			params,
 			url: request.url,