npm - ghc-proxy - Versions diffs - 0.6.1 → 0.7.0 - Mend

ghc-proxy 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md CHANGED Viewed

@@ -19,7 +19,7 @@ bunx ghc-proxy@latest start
 Before you start, make sure you have:
-1. **Bun** (>= 1.2) -- a fast JavaScript runtime used to run the proxy
+1. **Bun** (>= 1.3) -- a fast JavaScript runtime used to run the proxy
    - **Windows:** `winget install --id Oven-sh.Bun`
    - **Other platforms:** see the [official installation guide](https://bun.com/docs/installation)
 2. **A GitHub Copilot subscription** -- individual, business, or enterprise
@@ -59,7 +59,7 @@ Create or edit `~/.claude/settings.json` (this applies globally to all projects)
   "env": {
     "ANTHROPIC_BASE_URL": "http://localhost:4141",
     "ANTHROPIC_AUTH_TOKEN": "dummy-token",
-    "ANTHROPIC_MODEL": "claude-opus-4.6",
+    "ANTHROPIC_MODEL": "claude-opus-4.8",
     "ANTHROPIC_DEFAULT_SONNET_MODEL": "claude-sonnet-4.6",
     "ANTHROPIC_DEFAULT_HAIKU_MODEL": "claude-haiku-4.5",
     "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
@@ -87,7 +87,7 @@ bunx ghc-proxy@latest start
 | `ANTHROPIC_DEFAULT_HAIKU_MODEL` | The model used for Haiku-tier (fast/cheap) tasks |
 | `CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC` | Disables telemetry and non-essential network traffic |
-> **Tip:** The model names above (e.g. `claude-opus-4.6`) are mapped to actual Copilot models by the proxy. See [Model Mapping](#model-mapping) below for details.
+> **Tip:** The model names above (e.g. `claude-opus-4.8`) are mapped to actual Copilot models by the proxy. See [Model Mapping](#model-mapping) below for details.
 See the [Claude Code settings docs](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables) for more options.
@@ -100,6 +100,7 @@ bunx ghc-proxy@latest start          # Start the proxy server
 bunx ghc-proxy@latest auth           # Run GitHub auth flow without starting the server
 bunx ghc-proxy@latest check-usage    # Show your Copilot usage/quota in the terminal
 bunx ghc-proxy@latest debug          # Print diagnostic info (version, paths, token status)
+bunx ghc-proxy@latest selfcheck      # Probe the packaged bundle (loads every tokenizer chunk; useful for install troubleshooting)
 ```
 ### `start` Options
@@ -115,6 +116,7 @@ bunx ghc-proxy@latest debug          # Print diagnostic info (version, paths, to
 | `--github-token` | `-g` | -- | Pass a GitHub token directly (from `auth`) |
 | `--claude-code` | `-c` | `false` | Generate a Claude Code launch command |
 | `--show-token` | -- | `false` | Display tokens on auth and refresh |
+| `--dump-failed-payloads` | `-D` | `false` | Dump failed `/responses` payloads on upstream 400 errors for debugging. Can also be enabled with `DUMP_FAILED_PAYLOADS=1`. |
 | `--proxy-env` | -- | `false` | Use `HTTP_PROXY`/`HTTPS_PROXY` from env (Node.js only; Bun reads proxy env natively) |
 | `--idle-timeout` | -- | `120` | Bun server idle timeout in seconds (`0` disables; Bun max is `255`; streaming routes disable idle timeout automatically) |
 | `--upstream-timeout` | -- | `1800` | Upstream request timeout in seconds (0 to disable) |
@@ -184,14 +186,11 @@ All fields are optional. The full schema:
 |-------|------|---------|-------------|
 | `modelRewrites` | `{ from, to }[]` | -- | Glob-pattern model substitution rules (see [Model Rewrites](#model-rewrites)) |
 | `modelFallback` | `object` | -- | Override default model fallbacks (see [Customizing Fallbacks](#customizing-fallbacks)) |
-| `modelFallback.claudeOpus` | `string` | `claude-opus-4.6` | Fallback for `claude-opus-*` models |
+| `modelFallback.claudeOpus` | `string` | `claude-opus-4.8` | Fallback for `claude-opus-*` models |
 | `modelFallback.claudeSonnet` | `string` | `claude-sonnet-4.6` | Fallback for `claude-sonnet-*` models |
 | `modelFallback.claudeHaiku` | `string` | `claude-haiku-4.5` | Fallback for `claude-haiku-*` models |
 | `smallModel` | `string` | -- | Target model for compact request routing (see [Small-Model Routing](#small-model-routing)) |
 | `compactUseSmallModel` | `boolean` | `false` | Route compact/summarization requests to `smallModel` |
-| `contextUpgrade` | `boolean` | `true` | Enable configured extended-context upgrade rules (see [Context-1M Auto-Upgrade](#context-1m-auto-upgrade)) |
-| `contextUpgradeRules` | `{ from, to }[]` | `[]` | Glob-pattern context upgrade rules used for proactive, reactive, and beta-header upgrades |
-| `contextUpgradeTokenThreshold` | `number` | `160000` | Token threshold for proactive context upgrade |
 | `useFunctionApplyPatch` | `boolean` | `true` | Rewrite `apply_patch` custom tool as function tool on Responses path |
 | `responsesApiAutoCompactInput` | `boolean` | `false` | Automatically trim Responses `input` to the latest `compaction` item |
 | `responsesApiAutoContextManagement` | `boolean` | `false` | Automatically inject Responses `context_management` for selected models |
@@ -208,16 +207,11 @@ Example:
     { "from": "claude-haiku-*", "to": "gpt-4.1-mini" }
   ],
   "modelFallback": {
-    "claudeOpus": "claude-opus-4.6",
+    "claudeOpus": "claude-opus-4.8",
     "claudeSonnet": "claude-sonnet-4.6"
   },
   "smallModel": "gpt-4.1-mini",
   "compactUseSmallModel": true,
-  "contextUpgrade": true,
-  "contextUpgradeRules": [
-    { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
-  ],
-  "contextUpgradeTokenThreshold": 160000,
   "useFunctionApplyPatch": true,
   "responsesApiAutoCompactInput": false,
   "responsesApiAutoContextManagement": false,
@@ -244,7 +238,7 @@ When Claude Code sends a request for a model like `claude-sonnet-4.6`, the proxy
 | Prefix | Default Fallback |
 |--------|-----------------|
-| `claude-opus-*` | `claude-opus-4.6` |
+| `claude-opus-*` | `claude-opus-4.8` |
 | `claude-sonnet-*` | `claude-sonnet-4.6` |
 | `claude-haiku-*` | `claude-haiku-4.5` |
@@ -253,7 +247,7 @@ When Claude Code sends a request for a model like `claude-sonnet-4.6`, the proxy
 You can override the defaults with **environment variables**:
 ```bash
-MODEL_FALLBACK_CLAUDE_OPUS=claude-opus-4.6
+MODEL_FALLBACK_CLAUDE_OPUS=claude-opus-4.8
 MODEL_FALLBACK_CLAUDE_SONNET=claude-sonnet-4.6
 MODEL_FALLBACK_CLAUDE_HAIKU=claude-haiku-4.5
 ```
@@ -263,7 +257,7 @@ Or in the proxy's **config file** (`~/.local/share/ghc-proxy/config.json`):
 ```json
 {
   "modelFallback": {
-    "claudeOpus": "claude-opus-4.6",
+    "claudeOpus": "claude-opus-4.8",
     "claudeSonnet": "claude-sonnet-4.6",
     "claudeHaiku": "claude-haiku-4.5"
   }
@@ -287,48 +281,7 @@ For more general model substitution, use `modelRewrites` in the config file. Eac
 Unlike model fallbacks (which only apply to the chat completions path), rewrites are applied **uniformly to all three endpoints** — `/v1/messages`, `/v1/chat/completions`, and `/v1/responses`. Target model names are normalized against Copilot's known model list using dash/dot equivalence (e.g. `gpt-4.1` matches `gpt-4-1`).
-Rewrites run **before** any other model policy — context upgrades, small-model routing, and strategy selection all see the rewritten model. This means a rewritten model still benefits from context-1m upgrades if the target has an upgrade rule.
-### Context-1M Auto-Upgrade
-The proxy can automatically upgrade models to extended-context variants when the request is large. Upgrade targets are config-driven so users only route to models their Copilot account can access.
-**Proactive upgrade:** Before sending the request, the proxy estimates the input token count. If it exceeds the configured threshold (default: 160,000 tokens), the first matching `contextUpgradeRules` entry is applied before the request is sent.
-**Reactive upgrade:** If the upstream returns a context-length error (e.g. "context length exceeded"), the proxy retries the request with the configured upgraded model automatically.
-**Beta header support:** When a client sends an `anthropic-beta: context-*` header (e.g. `context-1m-2025-04-14`), the proxy strips the header (Copilot does not understand it) and applies the configured context upgrade rule instead.
-Configuration:
-- `contextUpgrade` (boolean, default `true`) — enable or disable configured auto-upgrade rules
-- `contextUpgradeRules` (`{ from, to }[]`, default `[]`) — glob-pattern model upgrade rules; first match wins
-- `contextUpgradeTokenThreshold` (number, default `160000`) — token count threshold for proactive upgrade
-Example for the public Opus 4.6 1M model:
-```json
-{
-  "contextUpgradeRules": [
-    { "from": "claude-opus-4.6", "to": "claude-opus-4.6-1m" }
-  ]
-}
-```
-Example for an enterprise account with access to the Opus 4.7 internal 1M model:
-```json
-{
-  "modelRewrites": [
-    { "from": "claude-opus-*", "to": "claude-opus-4.7" }
-  ],
-  "contextUpgrade": true,
-  "contextUpgradeRules": [
-    { "from": "claude-opus-4.7", "to": "claude-opus-4.7-1m-internal" }
-  ],
-  "contextUpgradeTokenThreshold": 160000
-}
-```
+Rewrites run **before** any other model policy — small-model routing and strategy selection all see the rewritten model.
 ### Small-Model Routing
@@ -521,9 +474,9 @@ bun run matrix:live --stateful-only --json --model=gpt-5.2-codex
 Tests which server-side tool types (bash, text_editor, web_search, memory, etc.) each Copilot model actually accepts. Useful for tracking backend changes over time.
 ```bash
-bun scripts/probe-all-copilot-tools.ts              # human-readable table
-bun scripts/probe-all-copilot-tools.ts --json        # JSON snapshot to stdout
-bun scripts/probe-all-copilot-tools.ts --model=claude-opus-4.6  # single model
+bun scripts/probes/copilot-tools.ts              # human-readable table
+bun scripts/probes/copilot-tools.ts --json        # JSON snapshot to stdout
+bun scripts/probes/copilot-tools.ts --model=claude-opus-4.6  # single model
 ```
 The JSON output is designed for weekly diffing — `generatedAt` is the only volatile field:

package/dist/{GptEncoding-DuDWxow_.mjs → GptEncoding-DdY2evDX.mjs} RENAMED Viewed

@@ -1,9 +1,7 @@
 import { n as __exportAll } from "./main.mjs";
 //#region node_modules/gpt-tokenizer/esm/constants.js
 const ALL_SPECIAL_TOKENS = "all";
 const DEFAULT_MERGE_CACHE_SIZE = 1e5;
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/utfUtil.js
 const isAscii = (codePoint) => codePoint <= 127;
@@ -60,7 +58,6 @@ function compareUint8Arrays(a, b) {
 	for (let i = 0; i < len; i++) if (a[i] !== b[i]) return a[i] - b[i];
 	return a.length - b.length;
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/util.js
 function getMaxValueFromMap(map) {
@@ -77,7 +74,6 @@ function getSpecialTokenRegex(tokens) {
 	const inner = [...tokens].map(escapeRegExp).join("|");
 	return new RegExp(`(${inner})`);
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/BytePairEncodingCore.js
 const emptyBuffer = new Uint8Array(0);
@@ -374,32 +370,20 @@ var BytePairEncodingCore = class {
 		return output;
 	}
 };
-//#endregion
-//#region node_modules/gpt-tokenizer/esm/functionCalling.js
-const MESSAGE_TOKEN_OVERHEAD = 3;
-const MESSAGE_NAME_TOKEN_OVERHEAD = 1;
-const FUNCTION_ROLE_TOKEN_DISCOUNT = 2;
-const FUNCTION_CALL_METADATA_TOKEN_OVERHEAD = 3;
-const FUNCTION_DEFINITION_TOKEN_OVERHEAD = 9;
-const COMPLETION_REQUEST_TOKEN_OVERHEAD = 3;
-const FUNCTION_CALL_NAME_TOKEN_OVERHEAD = 4;
-const FUNCTION_CALL_NONE_TOKEN_OVERHEAD = 1;
-const SYSTEM_FUNCTION_TOKEN_DEDUCTION = 4;
 const NEWLINE = "\n";
 function countMessageTokens(message, countStringTokens) {
 	let tokens = 0;
 	if (message.role) tokens += countStringTokens(message.role);
 	if (message.content) tokens += countStringTokens(message.content);
-	if (message.name) tokens += countStringTokens(message.name) + MESSAGE_NAME_TOKEN_OVERHEAD;
+	if (message.name) tokens += countStringTokens(message.name) + 1;
 	if (message.function_call) {
 		const { name, arguments: args } = message.function_call;
 		if (name) tokens += countStringTokens(name);
 		if (args) tokens += countStringTokens(args);
-		tokens += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
+		tokens += 3;
 	}
-	tokens += MESSAGE_TOKEN_OVERHEAD;
-	if (message.role === "function") tokens -= FUNCTION_ROLE_TOKEN_DISCOUNT;
+	tokens += 3;
+	if (message.role === "function") tokens -= 2;
 	return tokens;
 }
 function formatObjectProperties(obj, indent, formatType) {
@@ -449,7 +433,7 @@ function formatFunctionDefinitions(functions) {
 }
 function estimateTokensInFunctions(functions, countStringTokens) {
 	let tokens = countStringTokens(formatFunctionDefinitions(functions));
-	tokens += FUNCTION_DEFINITION_TOKEN_OVERHEAD;
+	tokens += 9;
 	return tokens;
 }
 function padSystemMessage(message, hasFunctions, isSystemPadded) {
@@ -471,18 +455,17 @@ function computeChatCompletionTokenCount(request, countStringTokens) {
 		else if (message.role === "system" && hasFunctions && !paddedSystem) paddedSystem = true;
 		total += countMessageTokens(messageToCount, countStringTokens);
 	}
-	total += COMPLETION_REQUEST_TOKEN_OVERHEAD;
+	total += 3;
 	if (hasFunctions && functions) {
 		total += estimateTokensInFunctions(functions, countStringTokens);
-		if (messages.some((message) => message.role === "system")) total -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
+		if (messages.some((message) => message.role === "system")) total -= 4;
 	}
 	if (functionCall && functionCall !== "auto") {
-		if (functionCall === "none") total += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
-		else if (typeof functionCall === "object" && functionCall.name) total += countStringTokens(functionCall.name) + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
+		if (functionCall === "none") total += 1;
+		else if (typeof functionCall === "object" && functionCall.name) total += countStringTokens(functionCall.name) + 4;
 	}
 	return total;
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/modelsChatEnabled.gen.js
 const chatEnabledModels = [
@@ -569,7 +552,6 @@ const chatEnabledModels = [
 	"o4-mini-deep-research",
 	"o4-mini-deep-research-2025-06-26"
 ];
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/modelsMap.js
 var modelsMap_exports = /* @__PURE__ */ __exportAll({
@@ -644,7 +626,6 @@ const cl100k_base = [
 ];
 const o200k_base$1 = [];
 const o200k_harmony = ["gpt-oss-20b", "gpt-oss-120b"];
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/specialTokens.js
 const EndOfText = "<|endoftext|>";
@@ -663,11 +644,6 @@ const HarmonyChannel = "<|channel|>";
 const HarmonyReturn = "<|return|>";
 const HarmonyConstrain = "<|constrain|>";
 const HarmonyCall = "<|call|>";
-//#endregion
-//#region node_modules/gpt-tokenizer/esm/mapping.js
-const o200k_base = "o200k_base";
-const DEFAULT_ENCODING = o200k_base;
 /**
 * maps model names to encoding names
 * if a model is not listed, it uses the default encoding for new models
@@ -683,7 +659,6 @@ const gpt4params = {
 	roleSeparator: ImSep
 };
 const chatModelParams = Object.fromEntries(chatEnabledModels.flatMap((modelName) => modelName.startsWith("gpt-3.5") ? [[modelName, gpt3params]] : [[modelName, gpt4params]]));
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/constants.js
 const R50K_TOKEN_SPLIT_REGEX = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
@@ -693,7 +668,6 @@ const CL100K_TOKEN_SPLIT_PATTERN = String.raw`${CONTRACTION_SUFFIX_PATTERN}|[^\r
 const CL100K_TOKEN_SPLIT_REGEX = new RegExp(CL100K_TOKEN_SPLIT_PATTERN, "gu");
 const O200K_TOKEN_SPLIT_PATTERN = String.raw`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+${OPTIONAL_CONTRACTION_SUFFIX}|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*${OPTIONAL_CONTRACTION_SUFFIX}|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`;
 const O200K_TOKEN_SPLIT_REGEX = new RegExp(O200K_TOKEN_SPLIT_PATTERN, "gu");
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/cl100k_base.js
 function Cl100KBase(bytePairRankDecoder) {
@@ -712,7 +686,6 @@ function Cl100KBase(bytePairRankDecoder) {
 		])
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/o200k_base.js
 const O200K_BASE_SPECIAL_TOKEN_ENTRIES = [
@@ -733,7 +706,6 @@ function O200KBase(bytePairRankDecoder) {
 		specialTokensEncoder: createO200KSpecialTokenMap()
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/o200k_harmony.js
 const RESERVED_TOKEN_RANGE_START = 200013;
@@ -766,7 +738,6 @@ function O200KHarmony(bytePairRankDecoder) {
 		chatFormatter: "harmony"
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_base.js
 function P50KBase(bytePairRankDecoder) {
@@ -777,7 +748,6 @@ function P50KBase(bytePairRankDecoder) {
 		specialTokensEncoder: new Map([[EndOfText, 50256]])
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_edit.js
 function P50KEdit(bytePairRankDecoder) {
@@ -792,7 +762,6 @@ function P50KEdit(bytePairRankDecoder) {
 		])
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/encodingParams/r50k_base.js
 function R50KBase(bytePairRankDecoder) {
@@ -803,7 +772,6 @@ function R50KBase(bytePairRankDecoder) {
 		specialTokensEncoder: new Map([[EndOfText, 50256]])
 	};
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/modelParams.js
 function getEncodingParams(encodingName, getMergeableRanks) {
@@ -818,7 +786,6 @@ function getEncodingParams(encodingName, getMergeableRanks) {
 		default: throw new Error(`Unknown encoding name: ${encodingName}`);
 	}
 }
 //#endregion
 //#region node_modules/gpt-tokenizer/esm/GptEncoding.js
 var GptEncoding = class GptEncoding {
@@ -920,21 +887,21 @@ var GptEncoding = class GptEncoding {
 	}
 	static getEncodingApiForModel(modelName, getMergeableRanks, modelSpec) {
 		return new GptEncoding({
-			...getEncodingParams(modelToEncodingMap[modelName] ?? DEFAULT_ENCODING, getMergeableRanks),
+			...getEncodingParams(modelToEncodingMap[modelName] ?? "o200k_base", getMergeableRanks),
 			modelName,
 			modelSpec
 		});
 	}
 	processSpecialTokens({ allowedSpecial, disallowedSpecial } = {}) {
 		let regexPattern;
-		if (allowedSpecial === ALL_SPECIAL_TOKENS || allowedSpecial?.has(ALL_SPECIAL_TOKENS)) {
+		if (allowedSpecial === "all" || allowedSpecial?.has("all")) {
 			allowedSpecial = new Set(this.specialTokensSet);
 			const allowedSpecialSet = allowedSpecial;
-			if (disallowedSpecial === ALL_SPECIAL_TOKENS) throw new Error("allowedSpecial and disallowedSpecial cannot both be set to \"all\".");
+			if (disallowedSpecial === "all") throw new Error("allowedSpecial and disallowedSpecial cannot both be set to \"all\".");
 			if (typeof disallowedSpecial === "object") disallowedSpecial.forEach((val) => allowedSpecialSet.delete(val));
 			else disallowedSpecial = /* @__PURE__ */ new Set();
 		}
-		if (!disallowedSpecial || disallowedSpecial === ALL_SPECIAL_TOKENS || disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
+		if (!disallowedSpecial || disallowedSpecial === "all" || disallowedSpecial.has("all")) {
 			disallowedSpecial = new Set(this.specialTokensSet);
 			const disallowedSpecialSet = disallowedSpecial;
 			if (allowedSpecial?.size) {
@@ -1128,7 +1095,7 @@ var GptEncoding = class GptEncoding {
 		return result;
 	}
 };
 //#endregion
 export { ImStart as _, FimPrefix as a, HarmonyChannel as c, HarmonyMessage as d, HarmonyReturn as f, ImSep as g, ImEnd as h, FimMiddle as i, HarmonyConstrain as l, HarmonyStartOfText as m, EndOfPrompt as n, FimSuffix as o, HarmonyStart as p, EndOfText as r, HarmonyCall as s, GptEncoding as t, HarmonyEnd as u, ALL_SPECIAL_TOKENS as v, DEFAULT_MERGE_CACHE_SIZE as y };
-//# sourceMappingURL=GptEncoding-DuDWxow_.mjs.map
+//# sourceMappingURL=GptEncoding-DdY2evDX.mjs.map