ghc-proxy 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +37 -12
  2. package/dist/{GptEncoding-DuDWxow_.mjs → GptEncoding-DdY2evDX.mjs} +15 -48
  3. package/dist/{GptEncoding-DuDWxow_.mjs.map → GptEncoding-DdY2evDX.mjs.map} +1 -1
  4. package/dist/{cl100k_base-YsziDpoU.mjs → cl100k_base-ChJqEXhP.mjs} +3 -5
  5. package/dist/{cl100k_base-YsziDpoU.mjs.map → cl100k_base-ChJqEXhP.mjs.map} +1 -1
  6. package/dist/{file-type-DlzWawJh.mjs → file-type-BwbWtW7C.mjs} +15 -52
  7. package/dist/{file-type-DlzWawJh.mjs.map → file-type-BwbWtW7C.mjs.map} +1 -1
  8. package/dist/main.mjs +2275 -2093
  9. package/dist/main.mjs.map +1 -1
  10. package/dist/{o200k_base-C_Bgi80R.mjs → o200k_base-DXNwToXP.mjs} +3 -5
  11. package/dist/{o200k_base-C_Bgi80R.mjs.map → o200k_base-DXNwToXP.mjs.map} +1 -1
  12. package/dist/{p50k_base-DRo0AxsG.mjs → p50k_base-BDWEQNXM.mjs} +2 -2
  13. package/dist/{p50k_base-DRo0AxsG.mjs.map → p50k_base-BDWEQNXM.mjs.map} +1 -1
  14. package/dist/{p50k_base-teVr-d1Y.mjs → p50k_base-Cab7w92R.mjs} +4 -5
  15. package/dist/{p50k_base-teVr-d1Y.mjs.map → p50k_base-Cab7w92R.mjs.map} +1 -1
  16. package/dist/{p50k_edit-nucqZWIv.mjs → p50k_edit-DkrRw_em.mjs} +4 -5
  17. package/dist/{p50k_edit-nucqZWIv.mjs.map → p50k_edit-DkrRw_em.mjs.map} +1 -1
  18. package/dist/{prompt-mE5xxWUf.mjs → prompt-DsMdjS4d.mjs} +7 -6
  19. package/dist/{prompt-mE5xxWUf.mjs.map → prompt-DsMdjS4d.mjs.map} +1 -1
  20. package/dist/{r50k_base-B2MFjxES.mjs → r50k_base-1vVxWqTY.mjs} +3 -5
  21. package/dist/{r50k_base-B2MFjxES.mjs.map → r50k_base-1vVxWqTY.mjs.map} +1 -1
  22. package/package.json +10 -10
package/README.md CHANGED
@@ -115,6 +115,7 @@ bunx ghc-proxy@latest debug # Print diagnostic info (version, paths, to
115
115
  | `--github-token` | `-g` | -- | Pass a GitHub token directly (from `auth`) |
116
116
  | `--claude-code` | `-c` | `false` | Generate a Claude Code launch command |
117
117
  | `--show-token` | -- | `false` | Display tokens on auth and refresh |
118
+ | `--dump-failed-payloads` | `-D` | `false` | Dump failed `/responses` payloads on upstream 400 errors for debugging. Can also be enabled with `DUMP_FAILED_PAYLOADS=1`. |
118
119
  | `--proxy-env` | -- | `false` | Use `HTTP_PROXY`/`HTTPS_PROXY` from env (Node.js only; Bun reads proxy env natively) |
119
120
  | `--idle-timeout` | -- | `120` | Bun server idle timeout in seconds (`0` disables; Bun max is `255`; streaming routes disable idle timeout automatically) |
120
121
  | `--upstream-timeout` | -- | `1800` | Upstream request timeout in seconds (0 to disable) |
@@ -189,7 +190,8 @@ All fields are optional. The full schema:
189
190
  | `modelFallback.claudeHaiku` | `string` | `claude-haiku-4.5` | Fallback for `claude-haiku-*` models |
190
191
  | `smallModel` | `string` | -- | Target model for compact request routing (see [Small-Model Routing](#small-model-routing)) |
191
192
  | `compactUseSmallModel` | `boolean` | `false` | Route compact/summarization requests to `smallModel` |
192
- | `contextUpgrade` | `boolean` | `true` | Auto-upgrade to extended-context model variants (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
193
+ | `contextUpgrade` | `boolean` | `true` | Enable configured extended-context upgrade rules (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
194
+ | `contextUpgradeRules` | `{ from, to }[]` | `[]` | Glob-pattern context upgrade rules used for proactive, reactive, and beta-header upgrades |
193
195
  | `contextUpgradeTokenThreshold` | `number` | `160000` | Token threshold for proactive context upgrade |
194
196
  | `useFunctionApplyPatch` | `boolean` | `true` | Rewrite `apply_patch` custom tool as function tool on Responses path |
195
197
  | `responsesApiAutoCompactInput` | `boolean` | `false` | Automatically trim Responses `input` to the latest `compaction` item |
@@ -213,6 +215,9 @@ Example:
213
215
  "smallModel": "gpt-4.1-mini",
214
216
  "compactUseSmallModel": true,
215
217
  "contextUpgrade": true,
218
+ "contextUpgradeRules": [
219
+ { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
220
+ ],
216
221
  "contextUpgradeTokenThreshold": 160000,
217
222
  "useFunctionApplyPatch": true,
218
223
  "responsesApiAutoCompactInput": false,
@@ -287,25 +292,45 @@ Rewrites run **before** any other model policy — context upgrades, small-model
287
292
 
288
293
  ### Context-1M Auto-Upgrade
289
294
 
290
- The proxy can automatically upgrade models to their extended-context (1M token) variants when the request is large. This is enabled by default.
291
-
292
- **Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the model is upgraded to its 1M variant before the request is sent.
293
-
294
- **Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the upgraded model automatically.
295
+ The proxy can automatically upgrade models to extended-context variants when the request is large. Upgrade targets are config-driven so users only route to models their Copilot account can access.
295
296
 
296
- **Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and upgrades the model to the 1M variant instead.
297
+ **Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the first matching `contextUpgradeRules` entry is applied before the request is sent.
297
298
 
298
- Current upgrade rules:
299
+ **Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the configured upgraded model automatically.
299
300
 
300
- | Source Model | Upgraded Model |
301
- |-------------|----------------|
302
- | `claude-opus-4.6` | `claude-opus-4.6-1m` |
301
+ **Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and applies the configured context upgrade rule instead.
303
302
 
304
303
  Configuration:
305
304
 
306
- - `contextUpgrade` (boolean, default `true`) — enable or disable auto-upgrade
305
+ - `contextUpgrade` (boolean, default `true`) — enable or disable configured auto-upgrade rules
306
+ - `contextUpgradeRules` (`{ from, to }[]`, default `[]`) — glob-pattern model upgrade rules; first match wins
307
307
  - `contextUpgradeTokenThreshold` (number, default `160000`) — token count threshold for proactive upgrade
308
308
 
309
+ Example for the public Opus 4.6 1M model:
310
+
311
+ ```json
312
+ {
313
+ "contextUpgradeRules": [
314
+ { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
315
+ ]
316
+ }
317
+ ```
318
+
319
+ Example for an enterprise account with access to the Opus 4.7 internal 1M model:
320
+
321
+ ```json
322
+ {
323
+ "modelRewrites": [
324
+ { "from": "claude-opus-*", "to": "claude-opus-4.7" }
325
+ ],
326
+ "contextUpgrade": true,
327
+ "contextUpgradeRules": [
328
+ { "from": "claude-opus-4.7", "to": "claude-opus-4.7-1m-internal" }
329
+ ],
330
+ "contextUpgradeTokenThreshold": 160000
331
+ }
332
+ ```
333
+
309
334
  ### Small-Model Routing
310
335
 
311
336
  `/v1/messages` can optionally reroute specific low-value requests to a cheaper model:
@@ -1,9 +1,7 @@
1
1
  import { n as __exportAll } from "./main.mjs";
2
-
3
2
  //#region node_modules/gpt-tokenizer/esm/constants.js
4
3
  const ALL_SPECIAL_TOKENS = "all";
5
4
  const DEFAULT_MERGE_CACHE_SIZE = 1e5;
6
-
7
5
  //#endregion
8
6
  //#region node_modules/gpt-tokenizer/esm/utfUtil.js
9
7
  const isAscii = (codePoint) => codePoint <= 127;
@@ -60,7 +58,6 @@ function compareUint8Arrays(a, b) {
60
58
  for (let i = 0; i < len; i++) if (a[i] !== b[i]) return a[i] - b[i];
61
59
  return a.length - b.length;
62
60
  }
63
-
64
61
  //#endregion
65
62
  //#region node_modules/gpt-tokenizer/esm/util.js
66
63
  function getMaxValueFromMap(map) {
@@ -77,7 +74,6 @@ function getSpecialTokenRegex(tokens) {
77
74
  const inner = [...tokens].map(escapeRegExp).join("|");
78
75
  return new RegExp(`(${inner})`);
79
76
  }
80
-
81
77
  //#endregion
82
78
  //#region node_modules/gpt-tokenizer/esm/BytePairEncodingCore.js
83
79
  const emptyBuffer = new Uint8Array(0);
@@ -374,32 +370,20 @@ var BytePairEncodingCore = class {
374
370
  return output;
375
371
  }
376
372
  };
377
-
378
- //#endregion
379
- //#region node_modules/gpt-tokenizer/esm/functionCalling.js
380
- const MESSAGE_TOKEN_OVERHEAD = 3;
381
- const MESSAGE_NAME_TOKEN_OVERHEAD = 1;
382
- const FUNCTION_ROLE_TOKEN_DISCOUNT = 2;
383
- const FUNCTION_CALL_METADATA_TOKEN_OVERHEAD = 3;
384
- const FUNCTION_DEFINITION_TOKEN_OVERHEAD = 9;
385
- const COMPLETION_REQUEST_TOKEN_OVERHEAD = 3;
386
- const FUNCTION_CALL_NAME_TOKEN_OVERHEAD = 4;
387
- const FUNCTION_CALL_NONE_TOKEN_OVERHEAD = 1;
388
- const SYSTEM_FUNCTION_TOKEN_DEDUCTION = 4;
389
373
  const NEWLINE = "\n";
390
374
  function countMessageTokens(message, countStringTokens) {
391
375
  let tokens = 0;
392
376
  if (message.role) tokens += countStringTokens(message.role);
393
377
  if (message.content) tokens += countStringTokens(message.content);
394
- if (message.name) tokens += countStringTokens(message.name) + MESSAGE_NAME_TOKEN_OVERHEAD;
378
+ if (message.name) tokens += countStringTokens(message.name) + 1;
395
379
  if (message.function_call) {
396
380
  const { name, arguments: args } = message.function_call;
397
381
  if (name) tokens += countStringTokens(name);
398
382
  if (args) tokens += countStringTokens(args);
399
- tokens += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
383
+ tokens += 3;
400
384
  }
401
- tokens += MESSAGE_TOKEN_OVERHEAD;
402
- if (message.role === "function") tokens -= FUNCTION_ROLE_TOKEN_DISCOUNT;
385
+ tokens += 3;
386
+ if (message.role === "function") tokens -= 2;
403
387
  return tokens;
404
388
  }
405
389
  function formatObjectProperties(obj, indent, formatType) {
@@ -449,7 +433,7 @@ function formatFunctionDefinitions(functions) {
449
433
  }
450
434
  function estimateTokensInFunctions(functions, countStringTokens) {
451
435
  let tokens = countStringTokens(formatFunctionDefinitions(functions));
452
- tokens += FUNCTION_DEFINITION_TOKEN_OVERHEAD;
436
+ tokens += 9;
453
437
  return tokens;
454
438
  }
455
439
  function padSystemMessage(message, hasFunctions, isSystemPadded) {
@@ -471,18 +455,17 @@ function computeChatCompletionTokenCount(request, countStringTokens) {
471
455
  else if (message.role === "system" && hasFunctions && !paddedSystem) paddedSystem = true;
472
456
  total += countMessageTokens(messageToCount, countStringTokens);
473
457
  }
474
- total += COMPLETION_REQUEST_TOKEN_OVERHEAD;
458
+ total += 3;
475
459
  if (hasFunctions && functions) {
476
460
  total += estimateTokensInFunctions(functions, countStringTokens);
477
- if (messages.some((message) => message.role === "system")) total -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
461
+ if (messages.some((message) => message.role === "system")) total -= 4;
478
462
  }
479
463
  if (functionCall && functionCall !== "auto") {
480
- if (functionCall === "none") total += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
481
- else if (typeof functionCall === "object" && functionCall.name) total += countStringTokens(functionCall.name) + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
464
+ if (functionCall === "none") total += 1;
465
+ else if (typeof functionCall === "object" && functionCall.name) total += countStringTokens(functionCall.name) + 4;
482
466
  }
483
467
  return total;
484
468
  }
485
-
486
469
  //#endregion
487
470
  //#region node_modules/gpt-tokenizer/esm/modelsChatEnabled.gen.js
488
471
  const chatEnabledModels = [
@@ -569,7 +552,6 @@ const chatEnabledModels = [
569
552
  "o4-mini-deep-research",
570
553
  "o4-mini-deep-research-2025-06-26"
571
554
  ];
572
-
573
555
  //#endregion
574
556
  //#region node_modules/gpt-tokenizer/esm/modelsMap.js
575
557
  var modelsMap_exports = /* @__PURE__ */ __exportAll({
@@ -644,7 +626,6 @@ const cl100k_base = [
644
626
  ];
645
627
  const o200k_base$1 = [];
646
628
  const o200k_harmony = ["gpt-oss-20b", "gpt-oss-120b"];
647
-
648
629
  //#endregion
649
630
  //#region node_modules/gpt-tokenizer/esm/specialTokens.js
650
631
  const EndOfText = "<|endoftext|>";
@@ -663,11 +644,6 @@ const HarmonyChannel = "<|channel|>";
663
644
  const HarmonyReturn = "<|return|>";
664
645
  const HarmonyConstrain = "<|constrain|>";
665
646
  const HarmonyCall = "<|call|>";
666
-
667
- //#endregion
668
- //#region node_modules/gpt-tokenizer/esm/mapping.js
669
- const o200k_base = "o200k_base";
670
- const DEFAULT_ENCODING = o200k_base;
671
647
  /**
672
648
  * maps model names to encoding names
673
649
  * if a model is not listed, it uses the default encoding for new models
@@ -683,7 +659,6 @@ const gpt4params = {
683
659
  roleSeparator: ImSep
684
660
  };
685
661
  const chatModelParams = Object.fromEntries(chatEnabledModels.flatMap((modelName) => modelName.startsWith("gpt-3.5") ? [[modelName, gpt3params]] : [[modelName, gpt4params]]));
686
-
687
662
  //#endregion
688
663
  //#region node_modules/gpt-tokenizer/esm/encodingParams/constants.js
689
664
  const R50K_TOKEN_SPLIT_REGEX = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
@@ -693,7 +668,6 @@ const CL100K_TOKEN_SPLIT_PATTERN = String.raw`${CONTRACTION_SUFFIX_PATTERN}|[^\r
693
668
  const CL100K_TOKEN_SPLIT_REGEX = new RegExp(CL100K_TOKEN_SPLIT_PATTERN, "gu");
694
669
  const O200K_TOKEN_SPLIT_PATTERN = String.raw`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+${OPTIONAL_CONTRACTION_SUFFIX}|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*${OPTIONAL_CONTRACTION_SUFFIX}|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`;
695
670
  const O200K_TOKEN_SPLIT_REGEX = new RegExp(O200K_TOKEN_SPLIT_PATTERN, "gu");
696
-
697
671
  //#endregion
698
672
  //#region node_modules/gpt-tokenizer/esm/encodingParams/cl100k_base.js
699
673
  function Cl100KBase(bytePairRankDecoder) {
@@ -712,7 +686,6 @@ function Cl100KBase(bytePairRankDecoder) {
712
686
  ])
713
687
  };
714
688
  }
715
-
716
689
  //#endregion
717
690
  //#region node_modules/gpt-tokenizer/esm/encodingParams/o200k_base.js
718
691
  const O200K_BASE_SPECIAL_TOKEN_ENTRIES = [
@@ -733,7 +706,6 @@ function O200KBase(bytePairRankDecoder) {
733
706
  specialTokensEncoder: createO200KSpecialTokenMap()
734
707
  };
735
708
  }
736
-
737
709
  //#endregion
738
710
  //#region node_modules/gpt-tokenizer/esm/encodingParams/o200k_harmony.js
739
711
  const RESERVED_TOKEN_RANGE_START = 200013;
@@ -766,7 +738,6 @@ function O200KHarmony(bytePairRankDecoder) {
766
738
  chatFormatter: "harmony"
767
739
  };
768
740
  }
769
-
770
741
  //#endregion
771
742
  //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_base.js
772
743
  function P50KBase(bytePairRankDecoder) {
@@ -777,7 +748,6 @@ function P50KBase(bytePairRankDecoder) {
777
748
  specialTokensEncoder: new Map([[EndOfText, 50256]])
778
749
  };
779
750
  }
780
-
781
751
  //#endregion
782
752
  //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_edit.js
783
753
  function P50KEdit(bytePairRankDecoder) {
@@ -792,7 +762,6 @@ function P50KEdit(bytePairRankDecoder) {
792
762
  ])
793
763
  };
794
764
  }
795
-
796
765
  //#endregion
797
766
  //#region node_modules/gpt-tokenizer/esm/encodingParams/r50k_base.js
798
767
  function R50KBase(bytePairRankDecoder) {
@@ -803,7 +772,6 @@ function R50KBase(bytePairRankDecoder) {
803
772
  specialTokensEncoder: new Map([[EndOfText, 50256]])
804
773
  };
805
774
  }
806
-
807
775
  //#endregion
808
776
  //#region node_modules/gpt-tokenizer/esm/modelParams.js
809
777
  function getEncodingParams(encodingName, getMergeableRanks) {
@@ -818,7 +786,6 @@ function getEncodingParams(encodingName, getMergeableRanks) {
818
786
  default: throw new Error(`Unknown encoding name: ${encodingName}`);
819
787
  }
820
788
  }
821
-
822
789
  //#endregion
823
790
  //#region node_modules/gpt-tokenizer/esm/GptEncoding.js
824
791
  var GptEncoding = class GptEncoding {
@@ -920,21 +887,21 @@ var GptEncoding = class GptEncoding {
920
887
  }
921
888
  static getEncodingApiForModel(modelName, getMergeableRanks, modelSpec) {
922
889
  return new GptEncoding({
923
- ...getEncodingParams(modelToEncodingMap[modelName] ?? DEFAULT_ENCODING, getMergeableRanks),
890
+ ...getEncodingParams(modelToEncodingMap[modelName] ?? "o200k_base", getMergeableRanks),
924
891
  modelName,
925
892
  modelSpec
926
893
  });
927
894
  }
928
895
  processSpecialTokens({ allowedSpecial, disallowedSpecial } = {}) {
929
896
  let regexPattern;
930
- if (allowedSpecial === ALL_SPECIAL_TOKENS || allowedSpecial?.has(ALL_SPECIAL_TOKENS)) {
897
+ if (allowedSpecial === "all" || allowedSpecial?.has("all")) {
931
898
  allowedSpecial = new Set(this.specialTokensSet);
932
899
  const allowedSpecialSet = allowedSpecial;
933
- if (disallowedSpecial === ALL_SPECIAL_TOKENS) throw new Error("allowedSpecial and disallowedSpecial cannot both be set to \"all\".");
900
+ if (disallowedSpecial === "all") throw new Error("allowedSpecial and disallowedSpecial cannot both be set to \"all\".");
934
901
  if (typeof disallowedSpecial === "object") disallowedSpecial.forEach((val) => allowedSpecialSet.delete(val));
935
902
  else disallowedSpecial = /* @__PURE__ */ new Set();
936
903
  }
937
- if (!disallowedSpecial || disallowedSpecial === ALL_SPECIAL_TOKENS || disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
904
+ if (!disallowedSpecial || disallowedSpecial === "all" || disallowedSpecial.has("all")) {
938
905
  disallowedSpecial = new Set(this.specialTokensSet);
939
906
  const disallowedSpecialSet = disallowedSpecial;
940
907
  if (allowedSpecial?.size) {
@@ -1128,7 +1095,7 @@ var GptEncoding = class GptEncoding {
1128
1095
  return result;
1129
1096
  }
1130
1097
  };
1131
-
1132
1098
  //#endregion
1133
1099
  export { ImStart as _, FimPrefix as a, HarmonyChannel as c, HarmonyMessage as d, HarmonyReturn as f, ImSep as g, ImEnd as h, FimMiddle as i, HarmonyConstrain as l, HarmonyStartOfText as m, EndOfPrompt as n, FimSuffix as o, HarmonyStart as p, EndOfText as r, HarmonyCall as s, GptEncoding as t, HarmonyEnd as u, ALL_SPECIAL_TOKENS as v, DEFAULT_MERGE_CACHE_SIZE as y };
1134
- //# sourceMappingURL=GptEncoding-DuDWxow_.mjs.map
1100
+
1101
+ //# sourceMappingURL=GptEncoding-DdY2evDX.mjs.map