github-router 0.3.39 → 0.3.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{lifecycle-Ho67_Rew.js → lifecycle-CpnAVVQ_.js} +2 -2
- package/dist/{lifecycle-Ho67_Rew.js.map → lifecycle-CpnAVVQ_.js.map} +1 -1
- package/dist/{lifecycle-CMnTe0W7.js → lifecycle-DpnTmHCo.js} +2 -2
- package/dist/main.js +371 -227
- package/dist/main.js.map +1 -1
- package/dist/{paths-DZwqh1p5.js → paths-B7jmIPYq.js} +1 -1
- package/dist/{paths-C-GyxwCW.js → paths-cZle37Jp.js} +13 -3
- package/dist/paths-cZle37Jp.js.map +1 -0
- package/package.json +1 -1
- package/dist/paths-C-GyxwCW.js.map +0 -1
package/dist/main.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { c as writeRuntimeFileSecure, i as removeOwnClaudeConfigMirror, n as ensureClaudeConfigMirror, r as ensurePaths, t as PATHS } from "./paths-
|
|
3
|
-
import { a as sweepRegistry, i as registerExitHandlers, n as getInstanceUuid, r as recordWorkerRepo, t as WorktreeRegistry } from "./lifecycle-
|
|
2
|
+
import { c as writeRuntimeFileSecure, i as removeOwnClaudeConfigMirror, n as ensureClaudeConfigMirror, r as ensurePaths, t as PATHS } from "./paths-cZle37Jp.js";
|
|
3
|
+
import { a as sweepRegistry, i as registerExitHandlers, n as getInstanceUuid, r as recordWorkerRepo, t as WorktreeRegistry } from "./lifecycle-CpnAVVQ_.js";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
5
|
import { defineCommand, runMain } from "citty";
|
|
6
6
|
import consola from "consola";
|
|
@@ -3197,7 +3197,7 @@ function logAudit$1(record) {
|
|
|
3197
3197
|
try {
|
|
3198
3198
|
const fs$2 = await import("node:fs/promises");
|
|
3199
3199
|
const path$2 = await import("node:path");
|
|
3200
|
-
const { PATHS: PATHS$1 } = await import("./paths-
|
|
3200
|
+
const { PATHS: PATHS$1 } = await import("./paths-B7jmIPYq.js");
|
|
3201
3201
|
const dir = path$2.join(PATHS$1.APP_DIR, "browser-mcp");
|
|
3202
3202
|
await fs$2.mkdir(dir, { recursive: true });
|
|
3203
3203
|
const line = JSON.stringify({
|
|
@@ -5851,6 +5851,221 @@ function acquireInFlightSlot() {
|
|
|
5851
5851
|
};
|
|
5852
5852
|
}
|
|
5853
5853
|
|
|
5854
|
+
//#endregion
|
|
5855
|
+
//#region src/lib/tokenizer.ts
|
|
5856
|
+
const ENCODING_MAP = {
|
|
5857
|
+
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
|
|
5858
|
+
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
|
|
5859
|
+
p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"),
|
|
5860
|
+
p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"),
|
|
5861
|
+
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
5862
|
+
};
|
|
5863
|
+
const encodingCache = /* @__PURE__ */ new Map();
|
|
5864
|
+
/**
|
|
5865
|
+
* Calculate tokens for tool calls
|
|
5866
|
+
*/
|
|
5867
|
+
const calculateToolCallsTokens = (toolCalls, encoder, constants) => {
|
|
5868
|
+
let tokens = 0;
|
|
5869
|
+
for (const toolCall of toolCalls) {
|
|
5870
|
+
tokens += constants.funcInit;
|
|
5871
|
+
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
5872
|
+
}
|
|
5873
|
+
tokens += constants.funcEnd;
|
|
5874
|
+
return tokens;
|
|
5875
|
+
};
|
|
5876
|
+
/**
|
|
5877
|
+
* Calculate tokens for content parts
|
|
5878
|
+
*/
|
|
5879
|
+
const calculateContentPartsTokens = (contentParts, encoder) => {
|
|
5880
|
+
let tokens = 0;
|
|
5881
|
+
for (const part of contentParts) if (part.type === "image_url") tokens += encoder.encode(part.image_url.url).length + 85;
|
|
5882
|
+
else if (part.text) tokens += encoder.encode(part.text).length;
|
|
5883
|
+
return tokens;
|
|
5884
|
+
};
|
|
5885
|
+
/**
|
|
5886
|
+
* Calculate tokens for a single message
|
|
5887
|
+
*/
|
|
5888
|
+
const calculateMessageTokens = (message, encoder, constants) => {
|
|
5889
|
+
const tokensPerMessage = 3;
|
|
5890
|
+
const tokensPerName = 1;
|
|
5891
|
+
let tokens = tokensPerMessage;
|
|
5892
|
+
for (const [key, value] of Object.entries(message)) {
|
|
5893
|
+
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
5894
|
+
if (key === "name") tokens += tokensPerName;
|
|
5895
|
+
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
5896
|
+
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
5897
|
+
}
|
|
5898
|
+
return tokens;
|
|
5899
|
+
};
|
|
5900
|
+
/**
|
|
5901
|
+
* Calculate tokens using custom algorithm
|
|
5902
|
+
*/
|
|
5903
|
+
const calculateTokens = (messages, encoder, constants) => {
|
|
5904
|
+
if (messages.length === 0) return 0;
|
|
5905
|
+
let numTokens = 0;
|
|
5906
|
+
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
5907
|
+
numTokens += 3;
|
|
5908
|
+
return numTokens;
|
|
5909
|
+
};
|
|
5910
|
+
/**
|
|
5911
|
+
* Get the corresponding encoder module based on encoding type
|
|
5912
|
+
*/
|
|
5913
|
+
const getEncodeChatFunction = async (encoding) => {
|
|
5914
|
+
if (encodingCache.has(encoding)) {
|
|
5915
|
+
const cached$1 = encodingCache.get(encoding);
|
|
5916
|
+
if (cached$1) return cached$1;
|
|
5917
|
+
}
|
|
5918
|
+
const supportedEncoding = encoding;
|
|
5919
|
+
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
5920
|
+
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
5921
|
+
encodingCache.set(encoding, fallbackModule);
|
|
5922
|
+
return fallbackModule;
|
|
5923
|
+
}
|
|
5924
|
+
const encodingModule = await ENCODING_MAP[supportedEncoding]();
|
|
5925
|
+
encodingCache.set(encoding, encodingModule);
|
|
5926
|
+
return encodingModule;
|
|
5927
|
+
};
|
|
5928
|
+
/**
|
|
5929
|
+
* Get tokenizer type from model information
|
|
5930
|
+
*/
|
|
5931
|
+
const getTokenizerFromModel = (model) => {
|
|
5932
|
+
return model.capabilities?.tokenizer || "o200k_base";
|
|
5933
|
+
};
|
|
5934
|
+
/**
|
|
5935
|
+
* Load (and cache) the encoder for an encoding name. Unknown encodings
|
|
5936
|
+
* fall back to o200k_base. Exposed so prompt-window budgeting code can
|
|
5937
|
+
* count raw-text tokens without going through the chat-payload path.
|
|
5938
|
+
*/
|
|
5939
|
+
const loadEncoder = async (encoding = "o200k_base") => getEncodeChatFunction(encoding);
|
|
5940
|
+
/**
|
|
5941
|
+
* Exact token count of a raw text string under the given encoding
|
|
5942
|
+
* (default o200k_base — the tokenizer every adaptive Copilot model in
|
|
5943
|
+
* our lineup declares via `capabilities.tokenizer`). This is the real
|
|
5944
|
+
* BPE count, NOT a chars-per-token or word-count approximation, so it
|
|
5945
|
+
* matches the limit Copilot enforces (`max_prompt_tokens`) to the
|
|
5946
|
+
* token. Used by advisor transcript budgeting and the peer-MCP
|
|
5947
|
+
* prompt-window guard.
|
|
5948
|
+
*/
|
|
5949
|
+
const getTextTokenCount = async (text, encoding = "o200k_base") => {
|
|
5950
|
+
if (!text) return 0;
|
|
5951
|
+
return (await getEncodeChatFunction(encoding)).encode(text).length;
|
|
5952
|
+
};
|
|
5953
|
+
/**
|
|
5954
|
+
* Get model-specific constants for token calculation
|
|
5955
|
+
*/
|
|
5956
|
+
const getModelConstants = (model) => {
|
|
5957
|
+
return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ? {
|
|
5958
|
+
funcInit: 10,
|
|
5959
|
+
propInit: 3,
|
|
5960
|
+
propKey: 3,
|
|
5961
|
+
enumInit: -3,
|
|
5962
|
+
enumItem: 3,
|
|
5963
|
+
funcEnd: 12
|
|
5964
|
+
} : {
|
|
5965
|
+
funcInit: 7,
|
|
5966
|
+
propInit: 3,
|
|
5967
|
+
propKey: 3,
|
|
5968
|
+
enumInit: -3,
|
|
5969
|
+
enumItem: 3,
|
|
5970
|
+
funcEnd: 12
|
|
5971
|
+
};
|
|
5972
|
+
};
|
|
5973
|
+
/**
|
|
5974
|
+
* Calculate tokens for a single parameter
|
|
5975
|
+
*/
|
|
5976
|
+
const calculateParameterTokens = (key, prop, context) => {
|
|
5977
|
+
const { encoder, constants } = context;
|
|
5978
|
+
let tokens = constants.propKey;
|
|
5979
|
+
if (typeof prop !== "object" || prop === null) return tokens;
|
|
5980
|
+
const param = prop;
|
|
5981
|
+
const paramName = key;
|
|
5982
|
+
const paramType = param.type || "string";
|
|
5983
|
+
let paramDesc = param.description || "";
|
|
5984
|
+
if (param.enum && Array.isArray(param.enum)) {
|
|
5985
|
+
tokens += constants.enumInit;
|
|
5986
|
+
for (const item of param.enum) {
|
|
5987
|
+
tokens += constants.enumItem;
|
|
5988
|
+
tokens += encoder.encode(String(item)).length;
|
|
5989
|
+
}
|
|
5990
|
+
}
|
|
5991
|
+
if (paramDesc.endsWith(".")) paramDesc = paramDesc.slice(0, -1);
|
|
5992
|
+
const line = `${paramName}:${paramType}:${paramDesc}`;
|
|
5993
|
+
tokens += encoder.encode(line).length;
|
|
5994
|
+
const excludedKeys = new Set([
|
|
5995
|
+
"type",
|
|
5996
|
+
"description",
|
|
5997
|
+
"enum"
|
|
5998
|
+
]);
|
|
5999
|
+
for (const propertyName of Object.keys(param)) if (!excludedKeys.has(propertyName)) {
|
|
6000
|
+
const propertyValue = param[propertyName];
|
|
6001
|
+
const propertyText = typeof propertyValue === "string" ? propertyValue : JSON.stringify(propertyValue);
|
|
6002
|
+
tokens += encoder.encode(`${propertyName}:${propertyText}`).length;
|
|
6003
|
+
}
|
|
6004
|
+
return tokens;
|
|
6005
|
+
};
|
|
6006
|
+
/**
|
|
6007
|
+
* Calculate tokens for function parameters
|
|
6008
|
+
*/
|
|
6009
|
+
const calculateParametersTokens = (parameters, encoder, constants) => {
|
|
6010
|
+
if (!parameters || typeof parameters !== "object") return 0;
|
|
6011
|
+
const params = parameters;
|
|
6012
|
+
let tokens = 0;
|
|
6013
|
+
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
6014
|
+
const properties = value;
|
|
6015
|
+
if (Object.keys(properties).length > 0) {
|
|
6016
|
+
tokens += constants.propInit;
|
|
6017
|
+
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
6018
|
+
encoder,
|
|
6019
|
+
constants
|
|
6020
|
+
});
|
|
6021
|
+
}
|
|
6022
|
+
} else {
|
|
6023
|
+
const paramText = typeof value === "string" ? value : JSON.stringify(value);
|
|
6024
|
+
tokens += encoder.encode(`${key}:${paramText}`).length;
|
|
6025
|
+
}
|
|
6026
|
+
return tokens;
|
|
6027
|
+
};
|
|
6028
|
+
/**
|
|
6029
|
+
* Calculate tokens for a single tool
|
|
6030
|
+
*/
|
|
6031
|
+
const calculateToolTokens = (tool, encoder, constants) => {
|
|
6032
|
+
let tokens = constants.funcInit;
|
|
6033
|
+
const func = tool.function;
|
|
6034
|
+
const fName = func.name;
|
|
6035
|
+
let fDesc = func.description || "";
|
|
6036
|
+
if (fDesc.endsWith(".")) fDesc = fDesc.slice(0, -1);
|
|
6037
|
+
const line = fName + ":" + fDesc;
|
|
6038
|
+
tokens += encoder.encode(line).length;
|
|
6039
|
+
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
6040
|
+
return tokens;
|
|
6041
|
+
};
|
|
6042
|
+
/**
|
|
6043
|
+
* Calculate token count for tools based on model
|
|
6044
|
+
*/
|
|
6045
|
+
const numTokensForTools = (tools, encoder, constants) => {
|
|
6046
|
+
let funcTokenCount = 0;
|
|
6047
|
+
for (const tool of tools) funcTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
6048
|
+
funcTokenCount += constants.funcEnd;
|
|
6049
|
+
return funcTokenCount;
|
|
6050
|
+
};
|
|
6051
|
+
/**
|
|
6052
|
+
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
6053
|
+
*/
|
|
6054
|
+
const getTokenCount = async (payload, model) => {
|
|
6055
|
+
const encoder = await getEncodeChatFunction(getTokenizerFromModel(model));
|
|
6056
|
+
const simplifiedMessages = payload.messages;
|
|
6057
|
+
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
6058
|
+
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
6059
|
+
const constants = getModelConstants(model);
|
|
6060
|
+
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
6061
|
+
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
6062
|
+
const outputTokens = calculateTokens(outputMessages, encoder, constants);
|
|
6063
|
+
return {
|
|
6064
|
+
input: inputTokens,
|
|
6065
|
+
output: outputTokens
|
|
6066
|
+
};
|
|
6067
|
+
};
|
|
6068
|
+
|
|
5854
6069
|
//#endregion
|
|
5855
6070
|
//#region src/services/copilot/create-messages.ts
|
|
5856
6071
|
/**
|
|
@@ -6220,8 +6435,24 @@ function browserToolsEnabled() {
|
|
|
6220
6435
|
if (!(state.browseEnabled || process.env.GH_ROUTER_ENABLE_BROWSE === "1")) return false;
|
|
6221
6436
|
return hasSupportedBrowserInstalled();
|
|
6222
6437
|
}
|
|
6438
|
+
/**
|
|
6439
|
+
* The 1M-context Opus variant (`claude-opus-4.7-1m-internal`,
|
|
6440
|
+
* `max_prompt_tokens` 936K), gated `restricted_to: ["enterprise"]`.
|
|
6441
|
+
* opus_critic prefers it so it can take large artifacts in one shot
|
|
6442
|
+
* (the whole point of pairing it with gpt-5.5 as the big-window peers);
|
|
6443
|
+
* falls back to the 200K `claude-opus-4-7` when the catalog (non-
|
|
6444
|
+
* enterprise) doesn't carry a 1M opus slug.
|
|
6445
|
+
*/
|
|
6446
|
+
const OPUS_1M_RE = /opus-4\.7.*1m/i;
|
|
6447
|
+
function resolveOpusCriticModel() {
|
|
6448
|
+
const oneM = state.models?.data?.find((m) => OPUS_1M_RE.test(m.id));
|
|
6449
|
+
return oneM ? oneM.id : "claude-opus-4-7";
|
|
6450
|
+
}
|
|
6223
6451
|
function activePersonas() {
|
|
6224
|
-
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable())
|
|
6452
|
+
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable()).map((p) => p.toolNameHttp === "opus_critic" ? {
|
|
6453
|
+
...p,
|
|
6454
|
+
model: resolveOpusCriticModel()
|
|
6455
|
+
} : p);
|
|
6225
6456
|
}
|
|
6226
6457
|
function toolEntries() {
|
|
6227
6458
|
const personaEntries = activePersonas().map((p) => ({
|
|
@@ -6355,6 +6586,46 @@ function predictedTooLong(persona, effort, briefBytes) {
|
|
|
6355
6586
|
return { tooLong: false };
|
|
6356
6587
|
}
|
|
6357
6588
|
/**
|
|
6589
|
+
* Tokens reserved below a peer model's `max_prompt_tokens` for the
|
|
6590
|
+
* per-call message framing (role wrappers, output_config, etc.) and any
|
|
6591
|
+
* discrepancy between our o200k count and Copilot's full-payload count.
|
|
6592
|
+
*/
|
|
6593
|
+
const PEER_PROMPT_TOKEN_RESERVE = 2e3;
|
|
6594
|
+
/**
|
|
6595
|
+
* Prompt-window guard. Unlike `predictedTooLong` (a JSON-path *timeout*
|
|
6596
|
+
* predictor in bytes), this guards the *context window*: it counts the
|
|
6597
|
+
* EXACT o200k tokens of the text actually sent to the peer (system
|
|
6598
|
+
* instructions + prompt + context) and compares against the persona
|
|
6599
|
+
* model's live `max_prompt_tokens`. Applies on BOTH the SSE and JSON
|
|
6600
|
+
* paths (called from `handleToolsCall`, before slot acquisition) because
|
|
6601
|
+
* an over-window brief 400s `model_max_prompt_tokens_exceeded` upstream
|
|
6602
|
+
* regardless of transport — and on SSE there is no other size bound.
|
|
6603
|
+
*
|
|
6604
|
+
* Returns an actionable message when over budget (reject, don't
|
|
6605
|
+
* truncate — silently dropping lines from a review artifact is worse
|
|
6606
|
+
* than a clear error), or undefined when it fits or the limit is unknown.
|
|
6607
|
+
*/
|
|
6608
|
+
async function predictedWindowOverflow(persona, prompt, context) {
|
|
6609
|
+
const id = resolveModel(persona.model);
|
|
6610
|
+
const entry = state.models?.data?.find((m) => m.id === id);
|
|
6611
|
+
if (!entry) return void 0;
|
|
6612
|
+
const maxPromptTokens = entry.capabilities?.limits?.max_prompt_tokens;
|
|
6613
|
+
if (typeof maxPromptTokens !== "number" || !Number.isFinite(maxPromptTokens) || maxPromptTokens <= 0) return;
|
|
6614
|
+
const budget = maxPromptTokens - PEER_PROMPT_TOKEN_RESERVE;
|
|
6615
|
+
const inputText = `${persona.baseInstructions}\n${buildUserText(prompt, context)}`;
|
|
6616
|
+
if (Buffer.byteLength(inputText, "utf8") <= budget) return void 0;
|
|
6617
|
+
let tokens;
|
|
6618
|
+
try {
|
|
6619
|
+
tokens = await getTextTokenCount(inputText, getTokenizerFromModel(entry));
|
|
6620
|
+
} catch (err) {
|
|
6621
|
+
consola.debug("[mcp] window-guard tokenization failed; allowing call:", err);
|
|
6622
|
+
return;
|
|
6623
|
+
}
|
|
6624
|
+
if (tokens <= budget) return void 0;
|
|
6625
|
+
const opusHint = OPUS_1M_RE.test(id) ? "" : " / `opus_critic` (Opus-4.7 1M ≈ 936K tokens, when the enterprise catalog carries it)";
|
|
6626
|
+
return `pre-flight rejected: this ${persona.toolNameHttp} brief is ≈${tokens} tokens, over the ${budget}-token budget for ${persona.model} (its ${maxPromptTokens}-token prompt window minus a ${PEER_PROMPT_TOKEN_RESERVE}-token framing reserve). Do NOT summarize or truncate the artifact to fit. Route the full artifact to a larger-window peer — \`codex_critic\` (gpt-5.5 ≈ 922K tokens)${opusHint} — or split it into focused sub-calls BY CONCERN and call them in parallel, then aggregate.`;
|
|
6627
|
+
}
|
|
6628
|
+
/**
|
|
6358
6629
|
* JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
|
|
6359
6630
|
* body wrapping a tool-error envelope when the call would bust the 60s
|
|
6360
6631
|
* tools/call ceiling on the JSON path; returns undefined when the call
|
|
@@ -6516,6 +6787,10 @@ async function handleToolsCall(body) {
|
|
|
6516
6787
|
if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
|
|
6517
6788
|
personaEffort = requestedEffort ?? persona.defaultEffort;
|
|
6518
6789
|
}
|
|
6790
|
+
if (persona && personaPrompt !== void 0) {
|
|
6791
|
+
const overflow = await predictedWindowOverflow(persona, personaPrompt, personaContext);
|
|
6792
|
+
if (overflow) return rpcResult(body.id, toolError(overflow));
|
|
6793
|
+
}
|
|
6519
6794
|
const release = acquireInFlightSlot();
|
|
6520
6795
|
if (!release) return rpcResult(body.id, {
|
|
6521
6796
|
content: [{
|
|
@@ -6755,10 +7030,13 @@ function acceptsEventStream(accept) {
|
|
|
6755
7030
|
/**
|
|
6756
7031
|
* SSE-streamed response for a single tools/call. Delegates the actual
|
|
6757
7032
|
* upstream call to `handleToolsCall` (so the per-persona effort gate,
|
|
6758
|
-
*
|
|
6759
|
-
* inFlight slot accounting all run identically); wraps
|
|
6760
|
-
* result in an SSE envelope with periodic heartbeats while
|
|
6761
|
-
* fetch is in flight.
|
|
7033
|
+
* the token-exact prompt-window guard, AbortController registration,
|
|
7034
|
+
* telemetry, and inFlight slot accounting all run identically); wraps
|
|
7035
|
+
* the awaited result in an SSE envelope with periodic heartbeats while
|
|
7036
|
+
* the upstream fetch is in flight. NOTE: the JSON-path `predictedTooLong`
|
|
7037
|
+
* byte cap is NOT applied here — it lives in `jsonPathPreflightCap`
|
|
7038
|
+
* (JSON path only); SSE bypasses it intentionally because heartbeats
|
|
7039
|
+
* keep the call alive past the ~60s tools/call ceiling it guards.
|
|
6762
7040
|
*
|
|
6763
7041
|
* SSE event format (per MCP Streamable HTTP):
|
|
6764
7042
|
* event: message
|
|
@@ -7145,15 +7423,39 @@ function injectAdvisorTool(rawBody) {
|
|
|
7145
7423
|
}];
|
|
7146
7424
|
return JSON.stringify(parsed);
|
|
7147
7425
|
}
|
|
7148
|
-
/**
|
|
7149
|
-
*
|
|
7150
|
-
*
|
|
7151
|
-
*
|
|
7152
|
-
*
|
|
7153
|
-
*
|
|
7154
|
-
*
|
|
7155
|
-
* and the advisor falls back silently. */
|
|
7426
|
+
/** Fallback CHARACTER budget for `renderConversationAsText` when called
|
|
7427
|
+
* without a token `measure` (unit-agnostic default = char length). Also
|
|
7428
|
+
* the conservative no-catalog floor: 720,000 chars ≈ 240,000 tokens at
|
|
7429
|
+
* ~3 chars/token, which fits even the smaller `/responses` models. The
|
|
7430
|
+
* live path measures EXACT o200k tokens (see `runAdvisor`) and budgets
|
|
7431
|
+
* against the model's real `max_prompt_tokens`, so this constant is only
|
|
7432
|
+
* a safety net, never the normal path. */
|
|
7156
7433
|
const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
|
|
7434
|
+
/** Token budget used when the advisor model's `max_prompt_tokens` can't
|
|
7435
|
+
* be resolved from the live catalog. ≈ the 720K-char fallback in tokens. */
|
|
7436
|
+
const ADVISOR_FALLBACK_MAX_TOKENS = 24e4;
|
|
7437
|
+
/** Tokens reserved below the model's `max_prompt_tokens` for the advisor
|
|
7438
|
+
* system prompt + per-call framing + any encode/wire discrepancy between
|
|
7439
|
+
* our o200k count and Copilot's full-payload count. The transcript token
|
|
7440
|
+
* budget is `max_prompt_tokens - reserve`. Generous on purpose: a 400
|
|
7441
|
+
* `model_max_prompt_tokens_exceeded` degrades to a silent advisor
|
|
7442
|
+
* fallback, and the marginal window we give up is irrelevant next to
|
|
7443
|
+
* gpt-5.5's 922K. */
|
|
7444
|
+
const ADVISOR_PROMPT_TOKEN_RESERVE = 8e3;
|
|
7445
|
+
/**
|
|
7446
|
+
* Derive the TOKEN budget for the rendered transcript from the advisor
|
|
7447
|
+
* model's live `max_prompt_tokens` (cached in `state.models` by
|
|
7448
|
+
* `cacheModels()` at startup). Self-correcting: tracks the model's real
|
|
7449
|
+
* window instead of a hardcoded guess, and honors a SMALLER window if a
|
|
7450
|
+
* caller overrides `advisorModel` to a tighter model. Falls back to
|
|
7451
|
+
* `ADVISOR_FALLBACK_MAX_TOKENS` when the catalog or field is missing.
|
|
7452
|
+
*/
|
|
7453
|
+
function resolveAdvisorMaxTokens(advisorModel) {
|
|
7454
|
+
const id = resolveModel(advisorModel);
|
|
7455
|
+
const maxPromptTokens = state.models?.data?.find((m) => m.id === id)?.capabilities?.limits?.max_prompt_tokens;
|
|
7456
|
+
if (typeof maxPromptTokens !== "number" || !Number.isFinite(maxPromptTokens) || maxPromptTokens <= 0) return ADVISOR_FALLBACK_MAX_TOKENS;
|
|
7457
|
+
return Math.max(1, maxPromptTokens - ADVISOR_PROMPT_TOKEN_RESERVE);
|
|
7458
|
+
}
|
|
7157
7459
|
/**
|
|
7158
7460
|
* Render an Anthropic-shape conversation (messages array with
|
|
7159
7461
|
* role/content blocks) as a single human-readable text blob. Used
|
|
@@ -7163,14 +7465,20 @@ const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
|
|
|
7163
7465
|
* just needs to READ the conversation, not produce more of it).
|
|
7164
7466
|
*
|
|
7165
7467
|
* Front-truncates oldest turns when the rendered output would exceed
|
|
7166
|
-
* `
|
|
7468
|
+
* `maxUnits`. The advisor cares more about current state (latest
|
|
7167
7469
|
* tool calls, errors, in-flight task) than the original prompt —
|
|
7168
7470
|
* mirrors Claude Code's own context-truncation strategy. When any
|
|
7169
7471
|
* turns are dropped, prepends a `[TRUNCATED: N earlier turn(s)
|
|
7170
7472
|
* omitted ...]` notice so the advisor knows the transcript is
|
|
7171
7473
|
* partial and can flag if it needs the missing context.
|
|
7474
|
+
*
|
|
7475
|
+
* Unit-agnostic via the injected `measure` function: production passes
|
|
7476
|
+
* an EXACT o200k token counter and a token budget (so truncation tracks
|
|
7477
|
+
* the model's real `max_prompt_tokens`); the default `measure` is char
|
|
7478
|
+
* length, so callers/tests that pass a plain numeric budget get the
|
|
7479
|
+
* historical character-budget behavior.
|
|
7172
7480
|
*/
|
|
7173
|
-
function renderConversationAsText(conversation,
|
|
7481
|
+
function renderConversationAsText(conversation, maxUnits = ADVISOR_MAX_CONVERSATION_CHARS, measure = (s) => s.length) {
|
|
7174
7482
|
const turnBlocks = [];
|
|
7175
7483
|
for (let i = 0; i < conversation.length; i++) {
|
|
7176
7484
|
const msg = conversation[i];
|
|
@@ -7191,23 +7499,42 @@ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSAT
|
|
|
7191
7499
|
block.push("");
|
|
7192
7500
|
turnBlocks.push(block.join("\n"));
|
|
7193
7501
|
}
|
|
7194
|
-
let
|
|
7502
|
+
let totalUnits = 0;
|
|
7195
7503
|
let firstKeptIdx = turnBlocks.length;
|
|
7196
7504
|
for (let i = turnBlocks.length - 1; i >= 0; i--) {
|
|
7197
|
-
const len = turnBlocks[i]
|
|
7198
|
-
if (
|
|
7199
|
-
|
|
7505
|
+
const len = measure(turnBlocks[i]) + 1;
|
|
7506
|
+
if (totalUnits + len > maxUnits) break;
|
|
7507
|
+
totalUnits += len;
|
|
7200
7508
|
firstKeptIdx = i;
|
|
7201
7509
|
}
|
|
7202
7510
|
if (firstKeptIdx === turnBlocks.length && turnBlocks.length > 0) {
|
|
7203
|
-
const
|
|
7204
|
-
|
|
7511
|
+
const last = turnBlocks[turnBlocks.length - 1];
|
|
7512
|
+
const notice = `[TRUNCATED: conversation too long for advisor model context; only the tail of the latest (turn ${turnBlocks.length}) is shown]\n\n`;
|
|
7513
|
+
return notice + truncateTailToUnits(last, Math.max(0, maxUnits - measure(notice)), measure);
|
|
7205
7514
|
}
|
|
7206
7515
|
const kept = turnBlocks.slice(firstKeptIdx);
|
|
7207
7516
|
if (firstKeptIdx > 0) kept.unshift(`[TRUNCATED: ${firstKeptIdx} earlier turn(s) omitted to fit advisor model context budget; ${turnBlocks.length - firstKeptIdx} most-recent turn(s) shown below]\n`);
|
|
7208
7517
|
return kept.join("\n");
|
|
7209
7518
|
}
|
|
7210
7519
|
/**
|
|
7520
|
+
* Return the longest suffix of `text` whose `measure(...)` is ≤ `maxUnits`.
|
|
7521
|
+
* Binary search on the cut point — unit-agnostic (works for the token
|
|
7522
|
+
* `measure` in prod and the char-length default), and exact rather than
|
|
7523
|
+
* a chars-per-token estimate. `measure` is called O(log n) times.
|
|
7524
|
+
*/
|
|
7525
|
+
function truncateTailToUnits(text, maxUnits, measure) {
|
|
7526
|
+
if (maxUnits <= 0) return "";
|
|
7527
|
+
if (measure(text) <= maxUnits) return text;
|
|
7528
|
+
let lo = 0;
|
|
7529
|
+
let hi = text.length;
|
|
7530
|
+
while (lo < hi) {
|
|
7531
|
+
const mid = Math.ceil((lo + hi + 1) / 2);
|
|
7532
|
+
if (measure(text.slice(text.length - mid)) <= maxUnits) lo = mid;
|
|
7533
|
+
else hi = mid - 1;
|
|
7534
|
+
}
|
|
7535
|
+
return text.slice(text.length - lo);
|
|
7536
|
+
}
|
|
7537
|
+
/**
|
|
7211
7538
|
* Run the advisor model with the full conversation context. Returns
|
|
7212
7539
|
* the advisor's text response.
|
|
7213
7540
|
*
|
|
@@ -7227,8 +7554,20 @@ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSAT
|
|
|
7227
7554
|
async function runAdvisor(conversation, advisorModel, advisorEffort, signal) {
|
|
7228
7555
|
if (signal?.aborted) throw new Error("advisor call aborted before dispatch");
|
|
7229
7556
|
const advisorSystem = "You are an expert advisor reviewing an in-progress Claude Code session. The transcript below is the work-in-progress (turns numbered, with tool calls and results inlined). Read carefully and provide concrete, actionable advice on the next step or course-correction. Be specific — cite the parts of the transcript you're responding to. If the assistant is on the right track, say so explicitly. If they're stuck or off-track, name the specific assumption or step to revisit. Aim for 2-5 paragraphs of substantive guidance.";
|
|
7230
|
-
const conversationText = renderConversationAsText(conversation);
|
|
7231
7557
|
const resolvedAdvisorModel = resolveModel(advisorModel);
|
|
7558
|
+
let measure;
|
|
7559
|
+
let maxUnits;
|
|
7560
|
+
try {
|
|
7561
|
+
const modelEntry = state.models?.data?.find((m) => m.id === resolvedAdvisorModel);
|
|
7562
|
+
const encoder = await loadEncoder(modelEntry ? getTokenizerFromModel(modelEntry) : "o200k_base");
|
|
7563
|
+
measure = (s) => encoder.encode(s).length;
|
|
7564
|
+
maxUnits = resolveAdvisorMaxTokens(advisorModel);
|
|
7565
|
+
} catch (err) {
|
|
7566
|
+
consola.debug("advisor: tokenizer load failed; using char-length budget:", err);
|
|
7567
|
+
measure = (s) => s.length;
|
|
7568
|
+
maxUnits = ADVISOR_MAX_CONVERSATION_CHARS;
|
|
7569
|
+
}
|
|
7570
|
+
const conversationText = renderConversationAsText(conversation, maxUnits, measure);
|
|
7232
7571
|
if (/^(gpt-|o\d|.*codex)/i.test(resolvedAdvisorModel)) {
|
|
7233
7572
|
const response = await createResponses({
|
|
7234
7573
|
model: resolvedAdvisorModel,
|
|
@@ -9898,7 +10237,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9898
10237
|
toolNameHttp: "codex_critic",
|
|
9899
10238
|
model: "gpt-5.5",
|
|
9900
10239
|
endpoint: "/v1/responses",
|
|
9901
|
-
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI,
|
|
10240
|
+
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI, ≈922K-token input window) — strongest reasoning model in the critic lineup, different lab than Opus. Best for architecture decisions, design reviews, and tradeoff analysis where cross-lab diversity matters. Not for line-level code review (use codex_reviewer). Pass artifact verbatim.",
|
|
9902
10241
|
baseInstructions: CRITIC_BASE,
|
|
9903
10242
|
agentPrompt: "",
|
|
9904
10243
|
writeCapable: false,
|
|
@@ -9934,7 +10273,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9934
10273
|
toolNameHttp: "codex_reviewer",
|
|
9935
10274
|
model: "gpt-5.3-codex",
|
|
9936
10275
|
endpoint: "/v1/responses",
|
|
9937
|
-
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI,
|
|
10276
|
+
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI, ≈272K-token input window) — code-specialist, fastest critic (~16s). Surfaces bugs, edge cases, security issues, and idiom violations at specific line numbers. Not suited for architecture or design review (use codex_critic for plans). Pass artifact verbatim.",
|
|
9938
10277
|
baseInstructions: REVIEWER_BASE,
|
|
9939
10278
|
agentPrompt: "",
|
|
9940
10279
|
writeCapable: false,
|
|
@@ -9952,7 +10291,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9952
10291
|
toolNameHttp: "opus_critic",
|
|
9953
10292
|
model: "claude-opus-4-7",
|
|
9954
10293
|
endpoint: "/v1/messages",
|
|
9955
|
-
description: "Adversarial second opinion from a fresh-context Opus 4.7 — same lab as the lead, limited blind-spot diversity vs cross-lab critics
|
|
10294
|
+
description: "Adversarial second opinion from a fresh-context Opus 4.7 — same lab as the lead, limited blind-spot diversity vs cross-lab critics. On enterprise catalogs that carry Opus-4.7-1M it runs with a ≈936K-token input window and handles large artifacts without decomposition; otherwise ≈168K. Fast (~22s), catches confabulation and motivated reasoning. Pass artifact verbatim.",
|
|
9956
10295
|
baseInstructions: OPUS_CRITIC_BASE,
|
|
9957
10296
|
agentPrompt: "",
|
|
9958
10297
|
writeCapable: false,
|
|
@@ -10615,14 +10954,14 @@ function buildCoordinatorAgent(opts) {
|
|
|
10615
10954
|
"",
|
|
10616
10955
|
"- **Plan / design / architecture choice** → fan out to `codex-critic` (gpt-5.5, strongest reasoning, cross-lab)" + (opts.geminiAvailable ? " AND `gemini-critic` (third-lab triangulation, strong on formal reasoning) in parallel" : "") + ". codex-reviewer is the wrong tool for plans (it's a code-specialist, not an architecture critic).",
|
|
10617
10956
|
"- **Concrete diff or single file** → fan out to `codex-reviewer` (gpt-5.3-codex, line-level code specialist, fastest at ~16s)" + (opts.geminiAvailable ? " AND `gemini-critic` for cross-lab triangulation" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
|
|
10618
|
-
"- **Large artifact (
|
|
10957
|
+
"- **Large artifact** → the only peers that take a large artifact WHOLE are `codex-critic` (gpt-5.5, ≈922K-token input window) and `opus-critic` (Opus-4.7-1M, ≈936K-token input on enterprise catalogs; ≈168K otherwise). Route the full artifact to those for cross-lab coverage. `codex-reviewer` (≈272K) and `gemini-critic` (≈136K) have small windows — see Decomposition below: never summarize or downsize the request to squeeze a large artifact into a small-window peer.",
|
|
10619
10958
|
"- **Formal reasoning, proofs, or invariants** → prefer `gemini-critic`" + (opts.geminiAvailable ? " (gemini-3.1-pro, strong on math and formally-stated properties)" : " (NOT REGISTERED in this session — gemini-3.x not in catalog)") + ".",
|
|
10620
10959
|
"- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + " or `opus-critic` with the artifact AND codex-critic's verdict for cross-check.",
|
|
10621
10960
|
"- **Fast sanity check** → `opus-critic` (~22s, same lab as lead but fresh context — catches confabulation and motivated reasoning).",
|
|
10622
10961
|
"",
|
|
10623
10962
|
"## Decomposition for large artifacts",
|
|
10624
10963
|
"",
|
|
10625
|
-
"
|
|
10964
|
+
"Route by the peer's real PROMPT WINDOW (input tokens): `codex-critic` gpt-5.5 ≈922K · `opus-critic` Opus-4.7-1M ≈936K (enterprise catalogs; ≈168K otherwise) · `codex-reviewer` gpt-5.3-codex ≈272K · `gemini-critic` gemini-3.1-pro ≈136K. The proxy REJECTS (with an actionable message) any single call whose brief exceeds the target peer's window — it will NOT silently truncate, because dropping lines from a review artifact is worse than a clear error. So: send the full artifact only to peers whose window fits it (large artifacts → `codex-critic` and/or `opus-critic`). When a peer's window is too small (commonly `gemini-critic` at ≈136K, or `codex-reviewer` at ≈272K), do NOT summarize or downsize the request to include it — either skip that peer, or split the artifact into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) that each fit, and call in parallel. Use the big-window peers for the whole and reserve a small-window peer like gemini for the concerns it can actually hold. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back. (Separately, on the JSON transport a per-effort `predictedTooLong` byte cap still guards the ~60s tools/call timeout for non-SSE clients; Claude Code uses SSE, which streams with heartbeats and isn't subject to that cap.)",
|
|
10626
10965
|
"",
|
|
10627
10966
|
"## Aggregation contract",
|
|
10628
10967
|
"",
|
|
@@ -11120,7 +11459,7 @@ function initProxyFromEnv() {
|
|
|
11120
11459
|
//#endregion
|
|
11121
11460
|
//#region package.json
|
|
11122
11461
|
var name = "github-router";
|
|
11123
|
-
var version = "0.3.
|
|
11462
|
+
var version = "0.3.41";
|
|
11124
11463
|
|
|
11125
11464
|
//#endregion
|
|
11126
11465
|
//#region src/lib/approval.ts
|
|
@@ -11276,202 +11615,6 @@ function collectToolFieldKeys(body) {
|
|
|
11276
11615
|
return [...seen].sort();
|
|
11277
11616
|
}
|
|
11278
11617
|
|
|
11279
|
-
//#endregion
|
|
11280
|
-
//#region src/lib/tokenizer.ts
|
|
11281
|
-
const ENCODING_MAP = {
|
|
11282
|
-
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
|
|
11283
|
-
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
|
|
11284
|
-
p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"),
|
|
11285
|
-
p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"),
|
|
11286
|
-
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
11287
|
-
};
|
|
11288
|
-
const encodingCache = /* @__PURE__ */ new Map();
|
|
11289
|
-
/**
|
|
11290
|
-
* Calculate tokens for tool calls
|
|
11291
|
-
*/
|
|
11292
|
-
const calculateToolCallsTokens = (toolCalls, encoder, constants) => {
|
|
11293
|
-
let tokens = 0;
|
|
11294
|
-
for (const toolCall of toolCalls) {
|
|
11295
|
-
tokens += constants.funcInit;
|
|
11296
|
-
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
11297
|
-
}
|
|
11298
|
-
tokens += constants.funcEnd;
|
|
11299
|
-
return tokens;
|
|
11300
|
-
};
|
|
11301
|
-
/**
|
|
11302
|
-
* Calculate tokens for content parts
|
|
11303
|
-
*/
|
|
11304
|
-
const calculateContentPartsTokens = (contentParts, encoder) => {
|
|
11305
|
-
let tokens = 0;
|
|
11306
|
-
for (const part of contentParts) if (part.type === "image_url") tokens += encoder.encode(part.image_url.url).length + 85;
|
|
11307
|
-
else if (part.text) tokens += encoder.encode(part.text).length;
|
|
11308
|
-
return tokens;
|
|
11309
|
-
};
|
|
11310
|
-
/**
|
|
11311
|
-
* Calculate tokens for a single message
|
|
11312
|
-
*/
|
|
11313
|
-
const calculateMessageTokens = (message, encoder, constants) => {
|
|
11314
|
-
const tokensPerMessage = 3;
|
|
11315
|
-
const tokensPerName = 1;
|
|
11316
|
-
let tokens = tokensPerMessage;
|
|
11317
|
-
for (const [key, value] of Object.entries(message)) {
|
|
11318
|
-
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
11319
|
-
if (key === "name") tokens += tokensPerName;
|
|
11320
|
-
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
11321
|
-
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
11322
|
-
}
|
|
11323
|
-
return tokens;
|
|
11324
|
-
};
|
|
11325
|
-
/**
|
|
11326
|
-
* Calculate tokens using custom algorithm
|
|
11327
|
-
*/
|
|
11328
|
-
const calculateTokens = (messages, encoder, constants) => {
|
|
11329
|
-
if (messages.length === 0) return 0;
|
|
11330
|
-
let numTokens = 0;
|
|
11331
|
-
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
11332
|
-
numTokens += 3;
|
|
11333
|
-
return numTokens;
|
|
11334
|
-
};
|
|
11335
|
-
/**
|
|
11336
|
-
* Get the corresponding encoder module based on encoding type
|
|
11337
|
-
*/
|
|
11338
|
-
const getEncodeChatFunction = async (encoding) => {
|
|
11339
|
-
if (encodingCache.has(encoding)) {
|
|
11340
|
-
const cached$1 = encodingCache.get(encoding);
|
|
11341
|
-
if (cached$1) return cached$1;
|
|
11342
|
-
}
|
|
11343
|
-
const supportedEncoding = encoding;
|
|
11344
|
-
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
11345
|
-
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
11346
|
-
encodingCache.set(encoding, fallbackModule);
|
|
11347
|
-
return fallbackModule;
|
|
11348
|
-
}
|
|
11349
|
-
const encodingModule = await ENCODING_MAP[supportedEncoding]();
|
|
11350
|
-
encodingCache.set(encoding, encodingModule);
|
|
11351
|
-
return encodingModule;
|
|
11352
|
-
};
|
|
11353
|
-
/**
|
|
11354
|
-
* Get tokenizer type from model information
|
|
11355
|
-
*/
|
|
11356
|
-
const getTokenizerFromModel = (model) => {
|
|
11357
|
-
return model.capabilities?.tokenizer || "o200k_base";
|
|
11358
|
-
};
|
|
11359
|
-
/**
|
|
11360
|
-
* Get model-specific constants for token calculation
|
|
11361
|
-
*/
|
|
11362
|
-
const getModelConstants = (model) => {
|
|
11363
|
-
return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ? {
|
|
11364
|
-
funcInit: 10,
|
|
11365
|
-
propInit: 3,
|
|
11366
|
-
propKey: 3,
|
|
11367
|
-
enumInit: -3,
|
|
11368
|
-
enumItem: 3,
|
|
11369
|
-
funcEnd: 12
|
|
11370
|
-
} : {
|
|
11371
|
-
funcInit: 7,
|
|
11372
|
-
propInit: 3,
|
|
11373
|
-
propKey: 3,
|
|
11374
|
-
enumInit: -3,
|
|
11375
|
-
enumItem: 3,
|
|
11376
|
-
funcEnd: 12
|
|
11377
|
-
};
|
|
11378
|
-
};
|
|
11379
|
-
/**
|
|
11380
|
-
* Calculate tokens for a single parameter
|
|
11381
|
-
*/
|
|
11382
|
-
const calculateParameterTokens = (key, prop, context) => {
|
|
11383
|
-
const { encoder, constants } = context;
|
|
11384
|
-
let tokens = constants.propKey;
|
|
11385
|
-
if (typeof prop !== "object" || prop === null) return tokens;
|
|
11386
|
-
const param = prop;
|
|
11387
|
-
const paramName = key;
|
|
11388
|
-
const paramType = param.type || "string";
|
|
11389
|
-
let paramDesc = param.description || "";
|
|
11390
|
-
if (param.enum && Array.isArray(param.enum)) {
|
|
11391
|
-
tokens += constants.enumInit;
|
|
11392
|
-
for (const item of param.enum) {
|
|
11393
|
-
tokens += constants.enumItem;
|
|
11394
|
-
tokens += encoder.encode(String(item)).length;
|
|
11395
|
-
}
|
|
11396
|
-
}
|
|
11397
|
-
if (paramDesc.endsWith(".")) paramDesc = paramDesc.slice(0, -1);
|
|
11398
|
-
const line = `${paramName}:${paramType}:${paramDesc}`;
|
|
11399
|
-
tokens += encoder.encode(line).length;
|
|
11400
|
-
const excludedKeys = new Set([
|
|
11401
|
-
"type",
|
|
11402
|
-
"description",
|
|
11403
|
-
"enum"
|
|
11404
|
-
]);
|
|
11405
|
-
for (const propertyName of Object.keys(param)) if (!excludedKeys.has(propertyName)) {
|
|
11406
|
-
const propertyValue = param[propertyName];
|
|
11407
|
-
const propertyText = typeof propertyValue === "string" ? propertyValue : JSON.stringify(propertyValue);
|
|
11408
|
-
tokens += encoder.encode(`${propertyName}:${propertyText}`).length;
|
|
11409
|
-
}
|
|
11410
|
-
return tokens;
|
|
11411
|
-
};
|
|
11412
|
-
/**
|
|
11413
|
-
* Calculate tokens for function parameters
|
|
11414
|
-
*/
|
|
11415
|
-
const calculateParametersTokens = (parameters, encoder, constants) => {
|
|
11416
|
-
if (!parameters || typeof parameters !== "object") return 0;
|
|
11417
|
-
const params = parameters;
|
|
11418
|
-
let tokens = 0;
|
|
11419
|
-
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
11420
|
-
const properties = value;
|
|
11421
|
-
if (Object.keys(properties).length > 0) {
|
|
11422
|
-
tokens += constants.propInit;
|
|
11423
|
-
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
11424
|
-
encoder,
|
|
11425
|
-
constants
|
|
11426
|
-
});
|
|
11427
|
-
}
|
|
11428
|
-
} else {
|
|
11429
|
-
const paramText = typeof value === "string" ? value : JSON.stringify(value);
|
|
11430
|
-
tokens += encoder.encode(`${key}:${paramText}`).length;
|
|
11431
|
-
}
|
|
11432
|
-
return tokens;
|
|
11433
|
-
};
|
|
11434
|
-
/**
|
|
11435
|
-
* Calculate tokens for a single tool
|
|
11436
|
-
*/
|
|
11437
|
-
const calculateToolTokens = (tool, encoder, constants) => {
|
|
11438
|
-
let tokens = constants.funcInit;
|
|
11439
|
-
const func = tool.function;
|
|
11440
|
-
const fName = func.name;
|
|
11441
|
-
let fDesc = func.description || "";
|
|
11442
|
-
if (fDesc.endsWith(".")) fDesc = fDesc.slice(0, -1);
|
|
11443
|
-
const line = fName + ":" + fDesc;
|
|
11444
|
-
tokens += encoder.encode(line).length;
|
|
11445
|
-
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
11446
|
-
return tokens;
|
|
11447
|
-
};
|
|
11448
|
-
/**
|
|
11449
|
-
* Calculate token count for tools based on model
|
|
11450
|
-
*/
|
|
11451
|
-
const numTokensForTools = (tools, encoder, constants) => {
|
|
11452
|
-
let funcTokenCount = 0;
|
|
11453
|
-
for (const tool of tools) funcTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
11454
|
-
funcTokenCount += constants.funcEnd;
|
|
11455
|
-
return funcTokenCount;
|
|
11456
|
-
};
|
|
11457
|
-
/**
|
|
11458
|
-
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
11459
|
-
*/
|
|
11460
|
-
const getTokenCount = async (payload, model) => {
|
|
11461
|
-
const encoder = await getEncodeChatFunction(getTokenizerFromModel(model));
|
|
11462
|
-
const simplifiedMessages = payload.messages;
|
|
11463
|
-
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
11464
|
-
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
11465
|
-
const constants = getModelConstants(model);
|
|
11466
|
-
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
11467
|
-
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
11468
|
-
const outputTokens = calculateTokens(outputMessages, encoder, constants);
|
|
11469
|
-
return {
|
|
11470
|
-
input: inputTokens,
|
|
11471
|
-
output: outputTokens
|
|
11472
|
-
};
|
|
11473
|
-
};
|
|
11474
|
-
|
|
11475
11618
|
//#endregion
|
|
11476
11619
|
//#region src/routes/chat-completions/handler.ts
|
|
11477
11620
|
const ENCODER$1 = new TextEncoder();
|
|
@@ -13192,6 +13335,7 @@ function getClaudeCodeEnvVars(serverUrl, model) {
|
|
|
13192
13335
|
if (process.env.ANTHROPIC_DEFAULT_SONNET_MODEL === void 0) vars.ANTHROPIC_DEFAULT_SONNET_MODEL = "claude-sonnet-4-6";
|
|
13193
13336
|
if (process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL === void 0) vars.ANTHROPIC_DEFAULT_HAIKU_MODEL = "claude-haiku-4-5";
|
|
13194
13337
|
if (process.env.ANTHROPIC_DEFAULT_OPUS_MODEL === void 0) vars.ANTHROPIC_DEFAULT_OPUS_MODEL = "claude-opus-4-7";
|
|
13338
|
+
if (process.env.CLAUDE_CODE_PLAN_V2_AGENT_COUNT === void 0) vars.CLAUDE_CODE_PLAN_V2_AGENT_COUNT = "7";
|
|
13195
13339
|
for (const key of [
|
|
13196
13340
|
"CLAUDE_CODE_ENABLE_EXPERIMENTAL_ADVISOR_TOOL",
|
|
13197
13341
|
"CLAUDE_CODE_FORK_SUBAGENT",
|