ghc-proxy 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -189,7 +189,8 @@ All fields are optional. The full schema:
189
189
  | `modelFallback.claudeHaiku` | `string` | `claude-haiku-4.5` | Fallback for `claude-haiku-*` models |
190
190
  | `smallModel` | `string` | -- | Target model for compact request routing (see [Small-Model Routing](#small-model-routing)) |
191
191
  | `compactUseSmallModel` | `boolean` | `false` | Route compact/summarization requests to `smallModel` |
192
- | `contextUpgrade` | `boolean` | `true` | Auto-upgrade to extended-context model variants (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
192
+ | `contextUpgrade` | `boolean` | `true` | Enable configured extended-context upgrade rules (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
193
+ | `contextUpgradeRules` | `{ from, to }[]` | `[]` | Glob-pattern context upgrade rules used for proactive, reactive, and beta-header upgrades |
193
194
  | `contextUpgradeTokenThreshold` | `number` | `160000` | Token threshold for proactive context upgrade |
194
195
  | `useFunctionApplyPatch` | `boolean` | `true` | Rewrite `apply_patch` custom tool as function tool on Responses path |
195
196
  | `responsesApiAutoCompactInput` | `boolean` | `false` | Automatically trim Responses `input` to the latest `compaction` item |
@@ -213,6 +214,9 @@ Example:
213
214
  "smallModel": "gpt-4.1-mini",
214
215
  "compactUseSmallModel": true,
215
216
  "contextUpgrade": true,
217
+ "contextUpgradeRules": [
218
+ { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
219
+ ],
216
220
  "contextUpgradeTokenThreshold": 160000,
217
221
  "useFunctionApplyPatch": true,
218
222
  "responsesApiAutoCompactInput": false,
@@ -287,25 +291,45 @@ Rewrites run **before** any other model policy — context upgrades, small-model
287
291
 
288
292
  ### Context-1M Auto-Upgrade
289
293
 
290
- The proxy can automatically upgrade models to their extended-context (1M token) variants when the request is large. This is enabled by default.
291
-
292
- **Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the model is upgraded to its 1M variant before the request is sent.
293
-
294
- **Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the upgraded model automatically.
294
+ The proxy can automatically upgrade models to extended-context variants when the request is large. Upgrade targets are config-driven so users only route to models their Copilot account can access.
295
295
 
296
- **Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and upgrades the model to the 1M variant instead.
296
+ **Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the first matching `contextUpgradeRules` entry is applied before the request is sent.
297
297
 
298
- Current upgrade rules:
298
+ **Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the configured upgraded model automatically.
299
299
 
300
- | Source Model | Upgraded Model |
301
- |-------------|----------------|
302
- | `claude-opus-4.6` | `claude-opus-4.6-1m` |
300
+ **Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and applies the configured context upgrade rule instead.
303
301
 
304
302
  Configuration:
305
303
 
306
- - `contextUpgrade` (boolean, default `true`) — enable or disable auto-upgrade
304
+ - `contextUpgrade` (boolean, default `true`) — enable or disable configured auto-upgrade rules
305
+ - `contextUpgradeRules` (`{ from, to }[]`, default `[]`) — glob-pattern model upgrade rules; first match wins
307
306
  - `contextUpgradeTokenThreshold` (number, default `160000`) — token count threshold for proactive upgrade
308
307
 
308
+ Example for the public Opus 4.6 1M model:
309
+
310
+ ```json
311
+ {
312
+ "contextUpgradeRules": [
313
+ { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
314
+ ]
315
+ }
316
+ ```
317
+
318
+ Example for an enterprise account with access to the Opus 4.7 internal 1M model:
319
+
320
+ ```json
321
+ {
322
+ "modelRewrites": [
323
+ { "from": "claude-opus-*", "to": "claude-opus-4.7" }
324
+ ],
325
+ "contextUpgrade": true,
326
+ "contextUpgradeRules": [
327
+ { "from": "claude-opus-4.7", "to": "claude-opus-4.7-1m-internal" }
328
+ ],
329
+ "contextUpgradeTokenThreshold": 160000
330
+ }
331
+ ```
332
+
309
333
  ### Small-Model Routing
310
334
 
311
335
  `/v1/messages` can optionally reroute specific low-value requests to a cheaper model:
package/dist/main.mjs CHANGED
@@ -5405,6 +5405,10 @@ const configFileSchema = object({
5405
5405
  from: string(),
5406
5406
  to: string()
5407
5407
  })).optional(),
5408
+ contextUpgradeRules: array(object({
5409
+ from: string(),
5410
+ to: string()
5411
+ })).optional(),
5408
5412
  contextUpgrade: boolean().optional(),
5409
5413
  contextUpgradeTokenThreshold: number().int().positive().optional(),
5410
5414
  upstreamQueueConcurrency: number().int().positive().optional(),
@@ -5535,6 +5539,9 @@ var ConfigStore = class {
5535
5539
  getModelRewrites() {
5536
5540
  return getCachedConfig().modelRewrites ?? [];
5537
5541
  }
5542
+ getContextUpgradeRules() {
5543
+ return getCachedConfig().contextUpgradeRules ?? [];
5544
+ }
5538
5545
  getModelFallback() {
5539
5546
  return getCachedConfig().modelFallback;
5540
5547
  }
@@ -7049,7 +7056,7 @@ const checkUsage = defineCommand({
7049
7056
 
7050
7057
  //#endregion
7051
7058
  //#region src/lib/version.ts
7052
- const VERSION = "0.6.0";
7059
+ const VERSION = "0.6.1";
7053
7060
 
7054
7061
  //#endregion
7055
7062
  //#region src/debug.ts
@@ -47810,23 +47817,19 @@ function matchesGlob(pattern, value) {
47810
47817
  if (!pattern.includes("*")) return pattern === value;
47811
47818
  return new RegExp(`^${pattern.replace(GLOB_SPECIAL_RE, "\\$&").replace(GLOB_STAR_RE, ".*")}$`).test(value);
47812
47819
  }
47813
- /** Data-driven upgrade rules. Add new entries to extend. */
47814
- const CONTEXT_UPGRADE_RULES = [{
47815
- from: "claude-opus-4.6",
47816
- to: "claude-opus-4.6-1m"
47817
- }];
47818
- /** Pre-computed set for fast model eligibility checks (avoids token estimation on non-eligible models). */
47819
- const UPGRADE_ELIGIBLE_MODELS = new Set(CONTEXT_UPGRADE_RULES.map((r) => r.from));
47820
47820
  /**
47821
- * Quick check: does this model have any context-upgrade rules?
47821
+ * Quick check: does this model have any configured context-upgrade rules?
47822
47822
  * Use to skip expensive token estimation for ineligible models.
47823
47823
  */
47824
47824
  function hasContextUpgradeRule(model) {
47825
- return UPGRADE_ELIGIBLE_MODELS.has(model);
47825
+ return configStore.getContextUpgradeRules().some((rule) => matchesGlob(rule.from, model));
47826
47826
  }
47827
- /** Find the upgrade rule for a model whose target exists in Copilot's model list. */
47827
+ /** Find the first configured upgrade rule for a model. */
47828
47828
  function findUpgradeRule(model) {
47829
- for (const rule of CONTEXT_UPGRADE_RULES) if (model === rule.from && modelCache.findById(rule.to)) return rule;
47829
+ for (const rule of configStore.getContextUpgradeRules()) if (matchesGlob(rule.from, model)) return {
47830
+ from: rule.from,
47831
+ to: normalizeToKnownModel(rule.to) ?? rule.to
47832
+ };
47830
47833
  }
47831
47834
  /**
47832
47835
  * Proactive: resolve the upgrade target model for a given model + token count.
@@ -47991,14 +47994,9 @@ function stripSubagentMarkerFromAnthropicPayload(payload) {
47991
47994
  payload.system = result.text || void 0;
47992
47995
  marker ??= result.marker;
47993
47996
  } else if (Array.isArray(payload.system)) {
47994
- payload.system = payload.system.map((block) => {
47995
- const result = stripSubagentMarkerFromText(block.text);
47996
- marker ??= result.marker;
47997
- return result.text ? {
47998
- ...block,
47999
- text: result.text
48000
- } : void 0;
48001
- }).filter((block) => block !== void 0);
47997
+ const result = stripSubagentMarkerFromTextBlocks(payload.system);
47998
+ payload.system = result.blocks;
47999
+ marker ??= result.marker;
48002
48000
  if (payload.system.length === 0) payload.system = void 0;
48003
48001
  }
48004
48002
  payload.messages = payload.messages.map((message) => {
@@ -48038,6 +48036,16 @@ function sanitizeAnthropicMessage(message) {
48038
48036
  } : void 0
48039
48037
  };
48040
48038
  }
48039
+ if (message.role === "system") {
48040
+ const result = stripSubagentMarkerFromTextBlocks(message.content);
48041
+ return {
48042
+ marker: result.marker,
48043
+ message: result.blocks.length > 0 ? {
48044
+ ...message,
48045
+ content: result.blocks
48046
+ } : void 0
48047
+ };
48048
+ }
48041
48049
  let marker;
48042
48050
  const content = message.content.map((block) => {
48043
48051
  if (block.type !== "text") return block;
@@ -48056,6 +48064,20 @@ function sanitizeAnthropicMessage(message) {
48056
48064
  } : void 0
48057
48065
  };
48058
48066
  }
48067
+ function stripSubagentMarkerFromTextBlocks(blocks) {
48068
+ let marker;
48069
+ return {
48070
+ blocks: blocks.map((block) => {
48071
+ const result = stripSubagentMarkerFromText(block.text);
48072
+ marker ??= result.marker;
48073
+ return result.text ? {
48074
+ ...block,
48075
+ text: result.text
48076
+ } : void 0;
48077
+ }).filter((block) => block !== void 0),
48078
+ marker
48079
+ };
48080
+ }
48059
48081
  function stripSubagentMarkerFromChatPayload(payload) {
48060
48082
  let marker;
48061
48083
  payload.messages = payload.messages.map((message) => {
@@ -48642,7 +48664,15 @@ const anthropicAssistantMessageSchema = object({
48642
48664
  anthropicServerToolResultBlockSchema
48643
48665
  ]))])
48644
48666
  }).loose();
48645
- const anthropicMessageSchema = union([anthropicUserMessageSchema, anthropicAssistantMessageSchema]);
48667
+ const anthropicSystemMessageSchema = object({
48668
+ role: literal("system"),
48669
+ content: union([string(), array(anthropicTextBlockSchema)])
48670
+ }).loose();
48671
+ const anthropicMessageSchema = union([
48672
+ anthropicUserMessageSchema,
48673
+ anthropicAssistantMessageSchema,
48674
+ anthropicSystemMessageSchema
48675
+ ]);
48646
48676
  const anthropicToolSchema = object({
48647
48677
  name: string().min(1),
48648
48678
  description: string().optional(),
@@ -48665,13 +48695,23 @@ const anthropicThinkingSchema = union([
48665
48695
  budget_tokens: number().int().positive()
48666
48696
  }).loose()
48667
48697
  ]);
48668
- const anthropicOutputConfigSchema = object({ effort: _enum([
48669
- "low",
48670
- "medium",
48671
- "high",
48672
- "max",
48673
- "xhigh"
48674
- ]).nullable().optional() }).loose();
48698
+ const anthropicOutputFormatSchema = object({
48699
+ type: literal("json_schema"),
48700
+ schema: jsonObjectSchema,
48701
+ name: string().min(1).optional(),
48702
+ description: string().nullable().optional(),
48703
+ strict: boolean().optional()
48704
+ }).strict();
48705
+ const anthropicOutputConfigSchema = object({
48706
+ effort: _enum([
48707
+ "low",
48708
+ "medium",
48709
+ "high",
48710
+ "max",
48711
+ "xhigh"
48712
+ ]).nullable().optional(),
48713
+ format: anthropicOutputFormatSchema.optional()
48714
+ }).strict();
48675
48715
  const anthropicMessagesBasePayloadSchema = object({
48676
48716
  model: string().min(1),
48677
48717
  messages: array(anthropicMessageSchema).min(1),
@@ -49322,7 +49362,7 @@ async function runPipeline(params, config) {
49322
49362
  ...payload,
49323
49363
  model
49324
49364
  } : payload;
49325
- const currentModel = isRetry ? modelCache.findById(model) : selectedModel;
49365
+ const currentModel = isRetry ? modelCache.findById(model) ?? selectedModel : selectedModel;
49326
49366
  const ctx = config.buildStrategyContext({
49327
49367
  payload: effectivePayload,
49328
49368
  meta,
@@ -49332,7 +49372,7 @@ async function runPipeline(params, config) {
49332
49372
  upstreamSignal: isRetry ? createUpstreamSignalFromConfig(params.signal) : upstreamSignal,
49333
49373
  modelMapping: currentMapping
49334
49374
  });
49335
- const entryResult = await config.strategyRegistry.select(currentModel).execute(ctx);
49375
+ const entryResult = await config.strategyRegistry.select(currentModel, ctx).execute(ctx);
49336
49376
  if (isRetry) modelMapping.steps = currentMapping.steps;
49337
49377
  return entryResult;
49338
49378
  }, {
@@ -49347,7 +49387,7 @@ async function runPipeline(params, config) {
49347
49387
  };
49348
49388
  const ctx = buildCtx();
49349
49389
  return {
49350
- result: await config.strategyRegistry.select(selectedModel).execute(ctx),
49390
+ result: await config.strategyRegistry.select(selectedModel, ctx).execute(ctx),
49351
49391
  modelMapping
49352
49392
  };
49353
49393
  }
@@ -49358,6 +49398,7 @@ const CONTEXT_BETA_RE = /^context-\d+[km]-/;
49358
49398
 
49359
49399
  //#endregion
49360
49400
  //#region src/transform/beta-headers.ts
49401
+ const COPILOT_UNSUPPORTED_BETA_RE = /^mid-conversation-system-\d{4}-\d{2}-\d{2}$/;
49361
49402
  function processAnthropicBetaHeader(rawHeader, model) {
49362
49403
  if (!rawHeader) return {
49363
49404
  header: void 0,
@@ -49374,6 +49415,7 @@ function processAnthropicBetaHeader(rawHeader, model) {
49374
49415
  }
49375
49416
  continue;
49376
49417
  }
49418
+ if (COPILOT_UNSUPPORTED_BETA_RE.test(value)) continue;
49377
49419
  filtered.push(value);
49378
49420
  }
49379
49421
  return {
@@ -49383,12 +49425,14 @@ function processAnthropicBetaHeader(rawHeader, model) {
49383
49425
  }
49384
49426
  const betaHeaderStep = {
49385
49427
  tag: "BETA_UPGRADE",
49386
- apply({ model, headers }) {
49428
+ apply({ model, headers, resolvedModel }) {
49387
49429
  if (!headers) return null;
49388
49430
  const result = processAnthropicBetaHeader(headers.get("anthropic-beta"), model);
49389
49431
  if (!result.upgradeTarget) return null;
49390
49432
  return {
49391
49433
  model: result.upgradeTarget,
49434
+ tag: "BETA_UPGRADE",
49435
+ resolvedModel: modelCache.findById(result.upgradeTarget) ?? resolvedModel ?? modelCache.findById(model),
49392
49436
  mutatePayload(payload) {
49393
49437
  if (payload && typeof payload === "object" && "model" in payload) payload.model = result.upgradeTarget;
49394
49438
  }
@@ -49509,12 +49553,13 @@ function containsVisionContent$1(content) {
49509
49553
  //#region src/transform/policy.ts
49510
49554
  const modelPolicyStep = {
49511
49555
  tag: "POLICY",
49512
- apply({ payload, meta }) {
49556
+ apply({ model, payload, meta, resolvedModel }) {
49513
49557
  const routing = applyMessagesModelPolicy(payload, { betaUpgraded: meta?.betaHeaders?.some((b) => CONTEXT_BETA_RE.test(b)) ?? false });
49514
49558
  if (!routing.reason) return null;
49515
49559
  return {
49516
49560
  model: routing.routedModel,
49517
- tag: routing.reason === "context-upgrade" ? "CONTEXT_UPGRADE" : "COMPACT"
49561
+ tag: routing.reason === "context-upgrade" ? "CONTEXT_UPGRADE" : "COMPACT",
49562
+ resolvedModel: routing.reason === "context-upgrade" ? modelCache.findById(routing.routedModel) ?? resolvedModel ?? modelCache.findById(model) : void 0
49518
49563
  };
49519
49564
  }
49520
49565
  };
@@ -49623,6 +49668,9 @@ function normalizeOutputConfigEffort(effort, model) {
49623
49668
  return (OUTPUT_CONFIG_EFFORT_RANK.get(current) ?? -1) > highestRank ? current : highest;
49624
49669
  });
49625
49670
  }
49671
+ function hasOutputConfigFormat(payload) {
49672
+ return payload?.output_config?.format != null;
49673
+ }
49626
49674
  function sanitizeOutputConfig(payload, model) {
49627
49675
  if (!payload.output_config) return;
49628
49676
  if (!modelCache.supportsOutputConfig(model)) {
@@ -49631,12 +49679,10 @@ function sanitizeOutputConfig(payload, model) {
49631
49679
  }
49632
49680
  const effort = payload.output_config.effort;
49633
49681
  if (effort == null) {
49634
- delete payload.output_config.effort;
49635
- if (Object.keys(payload.output_config).length === 0) delete payload.output_config;
49682
+ delete payload.output_config;
49636
49683
  return;
49637
49684
  }
49638
- const normalizedEffort = normalizeOutputConfigEffort(effort, model);
49639
- if (normalizedEffort) payload.output_config.effort = normalizedEffort;
49685
+ payload.output_config = { effort: normalizeOutputConfigEffort(effort, model) ?? effort };
49640
49686
  }
49641
49687
  function normalizeCacheControlBlock(obj) {
49642
49688
  if (obj.cache_control && typeof obj.cache_control === "object") obj.cache_control = { type: obj.cache_control.type };
@@ -50900,9 +50946,9 @@ var StrategyRegistry = class {
50900
50946
  register(entry) {
50901
50947
  this.entries.push(entry);
50902
50948
  }
50903
- select(model) {
50949
+ select(model, ctx) {
50904
50950
  if (this.entries.length === 0) throw new Error("StrategyRegistry has no registered entries");
50905
- for (const entry of this.entries) if (entry.canHandle(model)) {
50951
+ for (const entry of this.entries) if (entry.canHandle(model, ctx)) {
50906
50952
  consola.debug(`Strategy selected: ${entry.name} for model: ${model?.id ?? "(unknown)"}`);
50907
50953
  return entry;
50908
50954
  }
@@ -51202,6 +51248,7 @@ function translateAnthropicToResponsesPayload(payload, options) {
51202
51248
  for (const message of payload.messages) input.push(...translateMessage(message));
51203
51249
  const { safetyIdentifier, promptCacheKey } = parseUserId(payload.metadata?.user_id);
51204
51250
  const reasoning = resolveResponsesReasoningConfig(payload, options);
51251
+ const text = resolveResponsesTextConfig(payload);
51205
51252
  return {
51206
51253
  model: payload.model,
51207
51254
  input,
@@ -51217,6 +51264,7 @@ function translateAnthropicToResponsesPayload(payload, options) {
51217
51264
  stream: payload.stream ?? null,
51218
51265
  store: false,
51219
51266
  parallel_tool_calls: true,
51267
+ ...text ? { text } : {},
51220
51268
  ...reasoning ? {
51221
51269
  reasoning,
51222
51270
  include: ["reasoning.encrypted_content"]
@@ -51227,8 +51275,16 @@ function decodeCompactionCarrierSignature(signature) {
51227
51275
  return SignatureCodec.decodeCompaction(signature);
51228
51276
  }
51229
51277
  function translateMessage(message) {
51230
- if (message.role === "user") return translateUserMessage(message);
51231
- return translateAssistantMessage(message);
51278
+ switch (message.role) {
51279
+ case "user": return translateUserMessage(message);
51280
+ case "assistant": return translateAssistantMessage(message);
51281
+ case "system": return translateSystemMessage(message);
51282
+ }
51283
+ }
51284
+ function translateSystemMessage(message) {
51285
+ if (typeof message.content === "string") return [createMessage("system", message.content)];
51286
+ if (!Array.isArray(message.content)) return [];
51287
+ return [createMessage("system", message.content.map((block) => createTextContent(block.text)))];
51232
51288
  }
51233
51289
  function translateUserMessage(message) {
51234
51290
  if (typeof message.content === "string") return [createMessage("user", message.content)];
@@ -51492,6 +51548,19 @@ function resolveResponsesReasoningConfig(payload, options) {
51492
51548
  summary: effort === "none" ? null : "detailed"
51493
51549
  };
51494
51550
  }
51551
+ function resolveResponsesTextConfig(payload) {
51552
+ const format = payload.output_config?.format;
51553
+ if (!format) return;
51554
+ switch (format.type) {
51555
+ case "json_schema": return { format: {
51556
+ type: "json_schema",
51557
+ name: format.name ?? "anthropic_output",
51558
+ schema: format.schema,
51559
+ ...format.description !== void 0 ? { description: format.description } : {},
51560
+ ...format.strict !== void 0 ? { strict: format.strict } : {}
51561
+ } };
51562
+ }
51563
+ }
51495
51564
  function resolveResponsesReasoningEffort(payload, options) {
51496
51565
  if (payload.thinking?.type === "disabled") return "none";
51497
51566
  if (payload.output_config?.effort) return mapAnthropicEffortToResponses(payload.output_config.effort);
@@ -51683,6 +51752,7 @@ function sanitizeNativeMessagesPayloadForCopilot(payload) {
51683
51752
  ...message,
51684
51753
  content: message.content.map(sanitizeUserContentBlock)
51685
51754
  };
51755
+ if (message.role === "system") return message;
51686
51756
  return {
51687
51757
  ...message,
51688
51758
  content: message.content.map(sanitizeAssistantContentBlock)
@@ -52352,7 +52422,7 @@ function createMessagesViaResponsesStrategy(copilotClient, responsesPayload, opt
52352
52422
  //#region src/routes/messages/strategy-registry.ts
52353
52423
  const nativeMessagesEntry = {
52354
52424
  name: "native-messages",
52355
- canHandle: (model) => modelCache.supportsEndpoint(model, MESSAGES_ENDPOINT),
52425
+ canHandle: (model, ctx) => modelCache.supportsEndpoint(model, MESSAGES_ENDPOINT) && !hasOutputConfigFormat(ctx?.anthropicPayload),
52356
52426
  async execute(ctx) {
52357
52427
  filterThinkingBlocksForNativeMessages(ctx.anthropicPayload);
52358
52428
  sanitizeOutputConfig(ctx.anthropicPayload, ctx.selectedModel);
@@ -52383,6 +52453,7 @@ const chatCompletionsEntry = {
52383
52453
  name: "chat-completions",
52384
52454
  canHandle: () => true,
52385
52455
  async execute(ctx) {
52456
+ if (hasOutputConfigFormat(ctx.anthropicPayload)) throwInvalidRequestError("Anthropic output_config.format requires a model with Responses endpoint support.", "output_config.format", "unsupported_output_config_format");
52386
52457
  const adapter = createAnthropicAdapter();
52387
52458
  const plan = withTranslationErrors(() => adapter.toCapiPlan(ctx.anthropicPayload, { requestContext: ctx.requestContext }));
52388
52459
  appendModelStepInPlace(ctx.modelMapping, "MODEL_RESOLVE", plan.resolvedModel);