npm - ghc-proxy - Versions diffs - 0.6.0 → 0.6.1 - Mend

ghc-proxy 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -189,7 +189,8 @@ All fields are optional. The full schema:
 | `modelFallback.claudeHaiku` | `string` | `claude-haiku-4.5` | Fallback for `claude-haiku-*` models |
 | `smallModel` | `string` | -- | Target model for compact request routing (see [Small-Model Routing](#small-model-routing)) |
 | `compactUseSmallModel` | `boolean` | `false` | Route compact/summarization requests to `smallModel` |
-| `contextUpgrade` | `boolean` | `true` | Auto-upgrade to extended-context model variants (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
+| `contextUpgrade` | `boolean` | `true` | Enable configured extended-context upgrade rules (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
+| `contextUpgradeRules` | `{ from, to }[]` | `[]` | Glob-pattern context upgrade rules used for proactive, reactive, and beta-header upgrades |
 | `contextUpgradeTokenThreshold` | `number` | `160000` | Token threshold for proactive context upgrade |
 | `useFunctionApplyPatch` | `boolean` | `true` | Rewrite `apply_patch` custom tool as function tool on Responses path |
 | `responsesApiAutoCompactInput` | `boolean` | `false` | Automatically trim Responses `input` to the latest `compaction` item |
@@ -213,6 +214,9 @@ Example:
   "smallModel": "gpt-4.1-mini",
   "compactUseSmallModel": true,
   "contextUpgrade": true,
+  "contextUpgradeRules": [
+    { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
+  ],
   "contextUpgradeTokenThreshold": 160000,
   "useFunctionApplyPatch": true,
   "responsesApiAutoCompactInput": false,
@@ -287,25 +291,45 @@ Rewrites run **before** any other model policy — context upgrades, small-model
 ### Context-1M Auto-Upgrade
-The proxy can automatically upgrade models to their extended-context (1M token) variants when the request is large. This is enabled by default.
-**Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the model is upgraded to its 1M variant before the request is sent.
-**Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the upgraded model automatically.
+The proxy can automatically upgrade models to extended-context variants when the request is large. Upgrade targets are config-driven so users only route to models their Copilot account can access.
-**Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and upgrades the model to the 1M variant instead.
+**Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the first matching `contextUpgradeRules` entry is applied before the request is sent.
-Current upgrade rules:
+**Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the configured upgraded model automatically.
-| Source Model | Upgraded Model |
-|-------------|----------------|
-| `claude-opus-4.6` | `claude-opus-4.6-1m` |
+**Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and applies the configured context upgrade rule instead.
 Configuration:
-- `contextUpgrade` (boolean, default `true`) — enable or disable auto-upgrade
+- `contextUpgrade` (boolean, default `true`) — enable or disable configured auto-upgrade rules
+- `contextUpgradeRules` (`{ from, to }[]`, default `[]`) — glob-pattern model upgrade rules; first match wins
 - `contextUpgradeTokenThreshold` (number, default `160000`) — token count threshold for proactive upgrade
+Example for the public Opus 4.6 1M model:
+```json
+{
+  "contextUpgradeRules": [
+    { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
+  ]
+}
+```
+Example for an enterprise account with access to the Opus 4.7 internal 1M model:
+```json
+{
+  "modelRewrites": [
+    { "from": "claude-opus-*", "to": "claude-opus-4.7" }
+  ],
+  "contextUpgrade": true,
+  "contextUpgradeRules": [
+    { "from": "claude-opus-4.7", "to": "claude-opus-4.7-1m-internal" }
+  ],
+  "contextUpgradeTokenThreshold": 160000
+}
+```
 ### Small-Model Routing
 `/v1/messages` can optionally reroute specific low-value requests to a cheaper model:

package/dist/main.mjs CHANGED Viewed

@@ -5405,6 +5405,10 @@ const configFileSchema = object({
 		from: string(),
 		to: string()
 	})).optional(),
+	contextUpgradeRules: array(object({
+		from: string(),
+		to: string()
+	})).optional(),
 	contextUpgrade: boolean().optional(),
 	contextUpgradeTokenThreshold: number().int().positive().optional(),
 	upstreamQueueConcurrency: number().int().positive().optional(),
@@ -5535,6 +5539,9 @@ var ConfigStore = class {
 	getModelRewrites() {
 		return getCachedConfig().modelRewrites ?? [];
 	}
+	getContextUpgradeRules() {
+		return getCachedConfig().contextUpgradeRules ?? [];
+	}
 	getModelFallback() {
 		return getCachedConfig().modelFallback;
 	}
@@ -7049,7 +7056,7 @@ const checkUsage = defineCommand({
 //#endregion
 //#region src/lib/version.ts
-const VERSION = "0.6.0";
+const VERSION = "0.6.1";
 //#endregion
 //#region src/debug.ts
@@ -47810,23 +47817,19 @@ function matchesGlob(pattern, value) {
 	if (!pattern.includes("*")) return pattern === value;
 	return new RegExp(`^${pattern.replace(GLOB_SPECIAL_RE, "\\$&").replace(GLOB_STAR_RE, ".*")}$`).test(value);
 }
-/** Data-driven upgrade rules. Add new entries to extend. */
-const CONTEXT_UPGRADE_RULES = [{
-	from: "claude-opus-4.6",
-	to: "claude-opus-4.6-1m"
-}];
-/** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
-const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
 /**
-* Quick check: does this model have any context-upgrade rules?
+* Quick check: does this model have any configured context-upgrade rules?
 * Use to skip expensive token estimation for ineligible models.
 */
 function hasContextUpgradeRule(model) {
-	return UPGRADE_ELIGIBLE_MODELS.has(model);
+	return configStore.getContextUpgradeRules().some((rule) => matchesGlob(rule.from, model));
 }
-/** Find the upgrade rule for a model whose target exists in Copilot's model list. */
+/** Find the first configured upgrade rule for a model. */
 function findUpgradeRule(model) {
-	for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && modelCache.findById(rule.to)) return rule;
+	for (const rule of configStore.getContextUpgradeRules()) if (matchesGlob(rule.from, model)) return {
+		from: rule.from,
+		to: normalizeToKnownModel(rule.to) ?? rule.to
+	};
 }
 /**
 * Proactive: resolve the upgrade target model for a given model + token count.
@@ -47991,14 +47994,9 @@ function stripSubagentMarkerFromAnthropicPayload(payload) {
 		payload.system = result.text || void 0;
 		marker ??= result.marker;
 	} else if (Array.isArray(payload.system)) {
-		payload.system = payload.system.map((block) => {
-			const result = stripSubagentMarkerFromText(block.text);
-			marker ??= result.marker;
-			return result.text ? {
-				...block,
-				text: result.text
-			} : void 0;
-		}).filter((block) => block !== void 0);
+		const result = stripSubagentMarkerFromTextBlocks(payload.system);
+		payload.system = result.blocks;
+		marker ??= result.marker;
 		if (payload.system.length === 0) payload.system = void 0;
 	}
 	payload.messages = payload.messages.map((message) => {
@@ -48038,6 +48036,16 @@ function sanitizeAnthropicMessage(message) {
 			} : void 0
 		};
 	}
+	if (message.role === "system") {
+		const result = stripSubagentMarkerFromTextBlocks(message.content);
+		return {
+			marker: result.marker,
+			message: result.blocks.length > 0 ? {
+				...message,
+				content: result.blocks
+			} : void 0
+		};
+	}
 	let marker;
 	const content = message.content.map((block) => {
 		if (block.type !== "text") return block;
@@ -48056,6 +48064,20 @@ function sanitizeAnthropicMessage(message) {
 		} : void 0
 	};
 }
+function stripSubagentMarkerFromTextBlocks(blocks) {
+	let marker;
+	return {
+		blocks: blocks.map((block) => {
+			const result = stripSubagentMarkerFromText(block.text);
+			marker ??= result.marker;
+			return result.text ? {
+				...block,
+				text: result.text
+			} : void 0;
+		}).filter((block) => block !== void 0),
+		marker
+	};
+}
 function stripSubagentMarkerFromChatPayload(payload) {
 	let marker;
 	payload.messages = payload.messages.map((message) => {
@@ -48642,7 +48664,15 @@ const anthropicAssistantMessageSchema = object({
 		anthropicServerToolResultBlockSchema
 	]))])
 }).loose();
-const anthropicMessageSchema = union([anthropicUserMessageSchema, anthropicAssistantMessageSchema]);
+const anthropicSystemMessageSchema = object({
+	role: literal("system"),
+	content: union([string(), array(anthropicTextBlockSchema)])
+}).loose();
+const anthropicMessageSchema = union([
+	anthropicUserMessageSchema,
+	anthropicAssistantMessageSchema,
+	anthropicSystemMessageSchema
+]);
 const anthropicToolSchema = object({
 	name: string().min(1),
 	description: string().optional(),
@@ -48665,13 +48695,23 @@ const anthropicThinkingSchema = union([
 		budget_tokens: number().int().positive()
 	}).loose()
 ]);
-const anthropicOutputConfigSchema = object({ effort: _enum([
-	"low",
-	"medium",
-	"high",
-	"max",
-	"xhigh"
-]).nullable().optional() }).loose();
+const anthropicOutputFormatSchema = object({
+	type: literal("json_schema"),
+	schema: jsonObjectSchema,
+	name: string().min(1).optional(),
+	description: string().nullable().optional(),
+	strict: boolean().optional()
+}).strict();
+const anthropicOutputConfigSchema = object({
+	effort: _enum([
+		"low",
+		"medium",
+		"high",
+		"max",
+		"xhigh"
+	]).nullable().optional(),
+	format: anthropicOutputFormatSchema.optional()
+}).strict();
 const anthropicMessagesBasePayloadSchema = object({
 	model: string().min(1),
 	messages: array(anthropicMessageSchema).min(1),
@@ -49322,7 +49362,7 @@ async function runPipeline(params, config) {
 				...payload,
 				model
 			} : payload;
-			const currentModel = isRetry ? modelCache.findById(model) : selectedModel;
+			const currentModel = isRetry ? modelCache.findById(model) ?? selectedModel : selectedModel;
 			const ctx = config.buildStrategyContext({
 				payload: effectivePayload,
 				meta,
@@ -49332,7 +49372,7 @@ async function runPipeline(params, config) {
 				upstreamSignal: isRetry ? createUpstreamSignalFromConfig(params.signal) : upstreamSignal,
 				modelMapping: currentMapping
 			});
-			const entryResult = await config.strategyRegistry.select(currentModel).execute(ctx);
+			const entryResult = await config.strategyRegistry.select(currentModel, ctx).execute(ctx);
 			if (isRetry) modelMapping.steps = currentMapping.steps;
 			return entryResult;
 		}, {
@@ -49347,7 +49387,7 @@ async function runPipeline(params, config) {
 	};
 	const ctx = buildCtx();
 	return {
-		result: await config.strategyRegistry.select(selectedModel).execute(ctx),
+		result: await config.strategyRegistry.select(selectedModel, ctx).execute(ctx),
 		modelMapping
 	};
 }
@@ -49358,6 +49398,7 @@ const CONTEXT_BETA_RE = /^context-\d+[km]-/;
 //#endregion
 //#region src/transform/beta-headers.ts
+const COPILOT_UNSUPPORTED_BETA_RE = /^mid-conversation-system-\d{4}-\d{2}-\d{2}$/;
 function processAnthropicBetaHeader(rawHeader, model) {
 	if (!rawHeader) return {
 		header: void 0,
@@ -49374,6 +49415,7 @@ function processAnthropicBetaHeader(rawHeader, model) {
 			}
 			continue;
 		}
+		if (COPILOT_UNSUPPORTED_BETA_RE.test(value)) continue;
 		filtered.push(value);
 	}
 	return {
@@ -49383,12 +49425,14 @@ function processAnthropicBetaHeader(rawHeader, model) {
 }
 const betaHeaderStep = {
 	tag: "BETA_UPGRADE",
-	apply({ model, headers }) {
+	apply({ model, headers, resolvedModel }) {
 		if (!headers) return null;
 		const result = processAnthropicBetaHeader(headers.get("anthropic-beta"), model);
 		if (!result.upgradeTarget) return null;
 		return {
 			model: result.upgradeTarget,
+			tag: "BETA_UPGRADE",
+			resolvedModel: modelCache.findById(result.upgradeTarget) ?? resolvedModel ?? modelCache.findById(model),
 			mutatePayload(payload) {
 				if (payload && typeof payload === "object" && "model" in payload) payload.model = result.upgradeTarget;
 			}
@@ -49509,12 +49553,13 @@ function containsVisionContent$1(content) {
 //#region src/transform/policy.ts
 const modelPolicyStep = {
 	tag: "POLICY",
-	apply({ payload, meta }) {
+	apply({ model, payload, meta, resolvedModel }) {
 		const routing = applyMessagesModelPolicy(payload, { betaUpgraded: meta?.betaHeaders?.some((b) => CONTEXT_BETA_RE.test(b)) ?? false });
 		if (!routing.reason) return null;
 		return {
 			model: routing.routedModel,
-			tag: routing.reason === "context-upgrade" ? "CONTEXT_UPGRADE" : "COMPACT"
+			tag: routing.reason === "context-upgrade" ? "CONTEXT_UPGRADE" : "COMPACT",
+			resolvedModel: routing.reason === "context-upgrade" ? modelCache.findById(routing.routedModel) ?? resolvedModel ?? modelCache.findById(model) : void 0
 		};
 	}
 };
@@ -49623,6 +49668,9 @@ function normalizeOutputConfigEffort(effort, model) {
 		return (OUTPUT_CONFIG_EFFORT_RANK.get(current) ?? -1) > highestRank ? current : highest;
 	});
 }
+function hasOutputConfigFormat(payload) {
+	return payload?.output_config?.format != null;
+}
 function sanitizeOutputConfig(payload, model) {
 	if (!payload.output_config) return;
 	if (!modelCache.supportsOutputConfig(model)) {
@@ -49631,12 +49679,10 @@ function sanitizeOutputConfig(payload, model) {
 	}
 	const effort = payload.output_config.effort;
 	if (effort == null) {
-		delete payload.output_config.effort;
-		if (Object.keys(payload.output_config).length === 0) delete payload.output_config;
+		delete payload.output_config;
 		return;
 	}
-	const normalizedEffort = normalizeOutputConfigEffort(effort, model);
-	if (normalizedEffort) payload.output_config.effort = normalizedEffort;
+	payload.output_config = { effort: normalizeOutputConfigEffort(effort, model) ?? effort };
 }
 function normalizeCacheControlBlock(obj) {
 	if (obj.cache_control && typeof obj.cache_control === "object") obj.cache_control = { type: obj.cache_control.type };
@@ -50900,9 +50946,9 @@ var StrategyRegistry = class {
 	register(entry) {
 		this.entries.push(entry);
 	}
-	select(model) {
+	select(model, ctx) {
 		if (this.entries.length === 0) throw new Error("StrategyRegistry has no registered entries");
-		for (const entry of this.entries) if (entry.canHandle(model)) {
+		for (const entry of this.entries) if (entry.canHandle(model, ctx)) {
 			consola.debug(`Strategy selected: ${entry.name} for model: ${model?.id ?? "(unknown)"}`);
 			return entry;
 		}
@@ -51202,6 +51248,7 @@ function translateAnthropicToResponsesPayload(payload, options) {
 	for (const message of payload.messages) input.push(...translateMessage(message));
 	const { safetyIdentifier, promptCacheKey } = parseUserId(payload.metadata?.user_id);
 	const reasoning = resolveResponsesReasoningConfig(payload, options);
+	const text = resolveResponsesTextConfig(payload);
 	return {
 		model: payload.model,
 		input,
@@ -51217,6 +51264,7 @@ function translateAnthropicToResponsesPayload(payload, options) {
 		stream: payload.stream ?? null,
 		store: false,
 		parallel_tool_calls: true,
+		...text ? { text } : {},
 		...reasoning ? {
 			reasoning,
 			include: ["reasoning.encrypted_content"]
@@ -51227,8 +51275,16 @@ function decodeCompactionCarrierSignature(signature) {
 	return SignatureCodec.decodeCompaction(signature);
 }
 function translateMessage(message) {
-	if (message.role === "user") return translateUserMessage(message);
-	return translateAssistantMessage(message);
+	switch (message.role) {
+		case "user": return translateUserMessage(message);
+		case "assistant": return translateAssistantMessage(message);
+		case "system": return translateSystemMessage(message);
+	}
+}
+function translateSystemMessage(message) {
+	if (typeof message.content === "string") return [createMessage("system", message.content)];
+	if (!Array.isArray(message.content)) return [];
+	return [createMessage("system", message.content.map((block) => createTextContent(block.text)))];
 }
 function translateUserMessage(message) {
 	if (typeof message.content === "string") return [createMessage("user", message.content)];
@@ -51492,6 +51548,19 @@ function resolveResponsesReasoningConfig(payload, options) {
 		summary: effort === "none" ? null : "detailed"
 	};
 }
+function resolveResponsesTextConfig(payload) {
+	const format = payload.output_config?.format;
+	if (!format) return;
+	switch (format.type) {
+		case "json_schema": return { format: {
+			type: "json_schema",
+			name: format.name ?? "anthropic_output",
+			schema: format.schema,
+			...format.description !== void 0 ? { description: format.description } : {},
+			...format.strict !== void 0 ? { strict: format.strict } : {}
+		} };
+	}
+}
 function resolveResponsesReasoningEffort(payload, options) {
 	if (payload.thinking?.type === "disabled") return "none";
 	if (payload.output_config?.effort) return mapAnthropicEffortToResponses(payload.output_config.effort);
@@ -51683,6 +51752,7 @@ function sanitizeNativeMessagesPayloadForCopilot(payload) {
 				...message,
 				content: message.content.map(sanitizeUserContentBlock)
 			};
+			if (message.role === "system") return message;
 			return {
 				...message,
 				content: message.content.map(sanitizeAssistantContentBlock)
@@ -52352,7 +52422,7 @@ function createMessagesViaResponsesStrategy(copilotClient, responsesPayload, opt
 //#region src/routes/messages/strategy-registry.ts
 const nativeMessagesEntry = {
 	name: "native-messages",
-	canHandle: (model) => modelCache.supportsEndpoint(model, MESSAGES_ENDPOINT),
+	canHandle: (model, ctx) => modelCache.supportsEndpoint(model, MESSAGES_ENDPOINT) && !hasOutputConfigFormat(ctx?.anthropicPayload),
 	async execute(ctx) {
 		filterThinkingBlocksForNativeMessages(ctx.anthropicPayload);
 		sanitizeOutputConfig(ctx.anthropicPayload, ctx.selectedModel);
@@ -52383,6 +52453,7 @@ const chatCompletionsEntry = {
 	name: "chat-completions",
 	canHandle: () => true,
 	async execute(ctx) {
+		if (hasOutputConfigFormat(ctx.anthropicPayload)) throwInvalidRequestError("Anthropic output_config.format requires a model with Responses endpoint support.", "output_config.format", "unsupported_output_config_format");
 		const adapter = createAnthropicAdapter();
 		const plan = withTranslationErrors(() => adapter.toCapiPlan(ctx.anthropicPayload, { requestContext: ctx.requestContext }));
 		appendModelStepInPlace(ctx.modelMapping, "MODEL_RESOLVE", plan.resolvedModel);