github-router 0.3.40 → 0.3.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{lifecycle-CpnAVVQ_.js → lifecycle-DU0UI2t5.js} +2 -2
- package/dist/{lifecycle-CpnAVVQ_.js.map → lifecycle-DU0UI2t5.js.map} +1 -1
- package/dist/{lifecycle-DpnTmHCo.js → lifecycle-zr19Ot-e.js} +2 -2
- package/dist/main.js +558 -266
- package/dist/main.js.map +1 -1
- package/dist/{paths-cZle37Jp.js → paths-lwEqM5-i.js} +293 -2
- package/dist/paths-lwEqM5-i.js.map +1 -0
- package/dist/{paths-B7jmIPYq.js → paths-nd-94lLq.js} +1 -1
- package/package.json +1 -1
- package/dist/paths-cZle37Jp.js.map +0 -1
package/dist/main.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { c as writeRuntimeFileSecure, i as removeOwnClaudeConfigMirror, n as ensureClaudeConfigMirror, r as ensurePaths, t as PATHS } from "./paths-
|
|
3
|
-
import { a as sweepRegistry, i as registerExitHandlers, n as getInstanceUuid, r as recordWorkerRepo, t as WorktreeRegistry } from "./lifecycle-
|
|
2
|
+
import { c as writeRuntimeFileSecure, i as removeOwnClaudeConfigMirror, n as ensureClaudeConfigMirror, r as ensurePaths, t as PATHS } from "./paths-lwEqM5-i.js";
|
|
3
|
+
import { a as sweepRegistry, i as registerExitHandlers, n as getInstanceUuid, r as recordWorkerRepo, t as WorktreeRegistry } from "./lifecycle-DU0UI2t5.js";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
5
|
import { defineCommand, runMain } from "citty";
|
|
6
6
|
import consola from "consola";
|
|
@@ -447,8 +447,8 @@ function normalizeModelId(id) {
|
|
|
447
447
|
* 6. Return as-is with a warning
|
|
448
448
|
*/
|
|
449
449
|
function resolveModel(modelId) {
|
|
450
|
-
const models = state.models?.data;
|
|
451
|
-
if (!models) return modelId;
|
|
450
|
+
const models$1 = state.models?.data;
|
|
451
|
+
if (!models$1) return modelId;
|
|
452
452
|
const oneMMatch = modelId.match(/^(.*)\[1m\]$/i);
|
|
453
453
|
if (oneMMatch) {
|
|
454
454
|
const stripped = oneMMatch[1];
|
|
@@ -456,31 +456,31 @@ function resolveModel(modelId) {
|
|
|
456
456
|
if (!/-1m(?:$|-)/.test(resolved)) consola.warn(`Model "${modelId}" requested 1M context but no -1m backend is in Copilot's catalog for this tier/family; downgrading upstream to "${resolved}" (200K). Claude Code's local context accounting will still assume 1M — expect premature auto-compact. Drop the [1m] suffix (or unset CLAUDE_CODE_DISABLE_1M_CONTEXT if you set it) to silence.`);
|
|
457
457
|
return resolved;
|
|
458
458
|
}
|
|
459
|
-
if (models.some((m) => m.id === modelId)) return modelId;
|
|
459
|
+
if (models$1.some((m) => m.id === modelId)) return modelId;
|
|
460
460
|
const lower = modelId.toLowerCase();
|
|
461
|
-
const ciMatch = models.find((m) => m.id.toLowerCase() === lower);
|
|
461
|
+
const ciMatch = models$1.find((m) => m.id.toLowerCase() === lower);
|
|
462
462
|
if (ciMatch) return ciMatch.id;
|
|
463
463
|
if (lower.includes("opus")) {
|
|
464
|
-
const oneMs = models.filter((m) => m.id.includes("opus") && /-1m(?:$|-)/.test(m.id));
|
|
464
|
+
const oneMs = models$1.filter((m) => m.id.includes("opus") && /-1m(?:$|-)/.test(m.id));
|
|
465
465
|
const versionMatch = lower.match(/opus-(\d+)[.-](\d+)/);
|
|
466
466
|
const requestedVersion = versionMatch ? `${versionMatch[1]}.${versionMatch[2]}` : void 0;
|
|
467
467
|
const oneM = (requestedVersion ? oneMs.find((m) => m.id.includes(`opus-${requestedVersion}-`)) : void 0) ?? (requestedVersion ? void 0 : oneMs[0]);
|
|
468
468
|
if (oneM) return oneM.id;
|
|
469
469
|
}
|
|
470
470
|
if (lower.includes("codex")) {
|
|
471
|
-
const codexModels = models.filter((m) => m.id.includes("codex") && !m.id.includes("mini"));
|
|
471
|
+
const codexModels = models$1.filter((m) => m.id.includes("codex") && !m.id.includes("mini"));
|
|
472
472
|
if (codexModels.length > 0) {
|
|
473
473
|
codexModels.sort((a, b) => b.id.localeCompare(a.id));
|
|
474
474
|
return codexModels[0].id;
|
|
475
475
|
}
|
|
476
476
|
}
|
|
477
477
|
const normalized = normalizeModelId(modelId);
|
|
478
|
-
const normMatch = models.find((m) => normalizeModelId(m.id) === normalized);
|
|
478
|
+
const normMatch = models$1.find((m) => normalizeModelId(m.id) === normalized);
|
|
479
479
|
if (normMatch) return normMatch.id;
|
|
480
480
|
const dateStripped = modelId.replace(/^(claude-[\w.-]+)-20\d{6}$/i, "$1");
|
|
481
481
|
if (dateStripped !== modelId) {
|
|
482
482
|
const retried = resolveModel(dateStripped);
|
|
483
|
-
if (retried !== dateStripped || models.some((m) => m.id === dateStripped)) {
|
|
483
|
+
if (retried !== dateStripped || models$1.some((m) => m.id === dateStripped)) {
|
|
484
484
|
consola.info(`Resolved Anthropic dated slug "${modelId}" → "${retried}" (stripped -YYYYMMDD; pass an explicit catalog id to pin a snapshot)`);
|
|
485
485
|
return retried;
|
|
486
486
|
}
|
|
@@ -490,7 +490,7 @@ function resolveModel(modelId) {
|
|
|
490
490
|
const matchHaiku = /(?:^|-)haiku(?:-|$)/.test(lower);
|
|
491
491
|
if (matchSonnet || matchHaiku) {
|
|
492
492
|
const family = matchSonnet ? "sonnet" : "haiku";
|
|
493
|
-
const familyMembers = models.filter((m) => (/* @__PURE__ */ new RegExp(`(?:^|-)${family}(?:-|$|\\.)`)).test(m.id));
|
|
493
|
+
const familyMembers = models$1.filter((m) => (/* @__PURE__ */ new RegExp(`(?:^|-)${family}(?:-|$|\\.)`)).test(m.id));
|
|
494
494
|
if (familyMembers.length > 0) {
|
|
495
495
|
familyMembers.sort((a, b) => b.id.localeCompare(a.id, void 0, { numeric: true }));
|
|
496
496
|
const best = familyMembers[0].id;
|
|
@@ -499,7 +499,7 @@ function resolveModel(modelId) {
|
|
|
499
499
|
}
|
|
500
500
|
}
|
|
501
501
|
}
|
|
502
|
-
consola.warn(`Model "${modelId}" not found in Copilot model list. Available: ${models.map((m) => m.id).join(", ")}`);
|
|
502
|
+
consola.warn(`Model "${modelId}" not found in Copilot model list. Available: ${models$1.map((m) => m.id).join(", ")}`);
|
|
503
503
|
return modelId;
|
|
504
504
|
}
|
|
505
505
|
/**
|
|
@@ -508,10 +508,10 @@ function resolveModel(modelId) {
|
|
|
508
508
|
*/
|
|
509
509
|
function resolveCodexModel(modelId) {
|
|
510
510
|
const resolved = resolveModel(modelId);
|
|
511
|
-
const models = state.models?.data;
|
|
512
|
-
if (!models) return resolved;
|
|
513
|
-
if (models.some((m) => m.id === resolved)) return resolved;
|
|
514
|
-
const candidates = models.filter((m) => {
|
|
511
|
+
const models$1 = state.models?.data;
|
|
512
|
+
if (!models$1) return resolved;
|
|
513
|
+
if (models$1.some((m) => m.id === resolved)) return resolved;
|
|
514
|
+
const candidates = models$1.filter((m) => {
|
|
515
515
|
const endpoints = m.supported_endpoints ?? [];
|
|
516
516
|
if (m.id.includes("mini") || m.id.includes("nano")) return false;
|
|
517
517
|
return endpoints.length === 0 || endpoints.includes("/responses");
|
|
@@ -971,9 +971,9 @@ function pickClaudeDefault(opusFamily = DEFAULT_OPUS_FAMILY) {
|
|
|
971
971
|
const versionPattern = dotted.replace(/\./g, "[.-]");
|
|
972
972
|
const oneMRegex = new RegExp(`opus-${versionPattern}-1m(?:$|-)`, "i");
|
|
973
973
|
const familyRegex = new RegExp(`opus-${versionPattern}(?:$|[-.])`, "i");
|
|
974
|
-
const models = state.models?.data ?? [];
|
|
975
|
-
const has1m = models.some((m) => oneMRegex.test(m.id));
|
|
976
|
-
if (opusFamily !== DEFAULT_OPUS_FAMILY && state.models && models.length > 0 && !models.some((m) => familyRegex.test(m.id))) consola.warn(`Requested Opus family "${dotted}" not found in Copilot catalog; using "${bareSlug}" anyway (resolveModel may not find a backend for it).`);
|
|
974
|
+
const models$1 = state.models?.data ?? [];
|
|
975
|
+
const has1m = models$1.some((m) => oneMRegex.test(m.id));
|
|
976
|
+
if (opusFamily !== DEFAULT_OPUS_FAMILY && state.models && models$1.length > 0 && !models$1.some((m) => familyRegex.test(m.id))) consola.warn(`Requested Opus family "${dotted}" not found in Copilot catalog; using "${bareSlug}" anyway (resolveModel may not find a backend for it).`);
|
|
977
977
|
if (has1m) {
|
|
978
978
|
consola.info(`Catalog contains opus-${dotted}-1m variant; defaulting ANTHROPIC_MODEL to "${bareSlug}[1m]" so Claude Code accounts for 1M context locally. Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to opt out (HIPAA), or pass --model ${bareSlug} to pin 200K.`);
|
|
979
979
|
return `${bareSlug}[1m]`;
|
|
@@ -3197,7 +3197,7 @@ function logAudit$1(record) {
|
|
|
3197
3197
|
try {
|
|
3198
3198
|
const fs$2 = await import("node:fs/promises");
|
|
3199
3199
|
const path$2 = await import("node:path");
|
|
3200
|
-
const { PATHS: PATHS$1 } = await import("./paths-
|
|
3200
|
+
const { PATHS: PATHS$1 } = await import("./paths-nd-94lLq.js");
|
|
3201
3201
|
const dir = path$2.join(PATHS$1.APP_DIR, "browser-mcp");
|
|
3202
3202
|
await fs$2.mkdir(dir, { recursive: true });
|
|
3203
3203
|
const line = JSON.stringify({
|
|
@@ -3651,9 +3651,9 @@ const MODELS = {};
|
|
|
3651
3651
|
//#endregion
|
|
3652
3652
|
//#region src/vendor/pi/ai/models.ts
|
|
3653
3653
|
const modelRegistry = /* @__PURE__ */ new Map();
|
|
3654
|
-
for (const [provider, models] of Object.entries(MODELS)) {
|
|
3654
|
+
for (const [provider, models$1] of Object.entries(MODELS)) {
|
|
3655
3655
|
const providerModels = /* @__PURE__ */ new Map();
|
|
3656
|
-
for (const [id, model] of Object.entries(models)) providerModels.set(id, model);
|
|
3656
|
+
for (const [id, model] of Object.entries(models$1)) providerModels.set(id, model);
|
|
3657
3657
|
modelRegistry.set(provider, providerModels);
|
|
3658
3658
|
}
|
|
3659
3659
|
|
|
@@ -5851,6 +5851,221 @@ function acquireInFlightSlot() {
|
|
|
5851
5851
|
};
|
|
5852
5852
|
}
|
|
5853
5853
|
|
|
5854
|
+
//#endregion
|
|
5855
|
+
//#region src/lib/tokenizer.ts
|
|
5856
|
+
const ENCODING_MAP = {
|
|
5857
|
+
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
|
|
5858
|
+
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
|
|
5859
|
+
p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"),
|
|
5860
|
+
p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"),
|
|
5861
|
+
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
5862
|
+
};
|
|
5863
|
+
const encodingCache = /* @__PURE__ */ new Map();
|
|
5864
|
+
/**
|
|
5865
|
+
* Calculate tokens for tool calls
|
|
5866
|
+
*/
|
|
5867
|
+
const calculateToolCallsTokens = (toolCalls, encoder, constants) => {
|
|
5868
|
+
let tokens = 0;
|
|
5869
|
+
for (const toolCall of toolCalls) {
|
|
5870
|
+
tokens += constants.funcInit;
|
|
5871
|
+
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
5872
|
+
}
|
|
5873
|
+
tokens += constants.funcEnd;
|
|
5874
|
+
return tokens;
|
|
5875
|
+
};
|
|
5876
|
+
/**
|
|
5877
|
+
* Calculate tokens for content parts
|
|
5878
|
+
*/
|
|
5879
|
+
const calculateContentPartsTokens = (contentParts, encoder) => {
|
|
5880
|
+
let tokens = 0;
|
|
5881
|
+
for (const part of contentParts) if (part.type === "image_url") tokens += encoder.encode(part.image_url.url).length + 85;
|
|
5882
|
+
else if (part.text) tokens += encoder.encode(part.text).length;
|
|
5883
|
+
return tokens;
|
|
5884
|
+
};
|
|
5885
|
+
/**
|
|
5886
|
+
* Calculate tokens for a single message
|
|
5887
|
+
*/
|
|
5888
|
+
const calculateMessageTokens = (message, encoder, constants) => {
|
|
5889
|
+
const tokensPerMessage = 3;
|
|
5890
|
+
const tokensPerName = 1;
|
|
5891
|
+
let tokens = tokensPerMessage;
|
|
5892
|
+
for (const [key, value] of Object.entries(message)) {
|
|
5893
|
+
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
5894
|
+
if (key === "name") tokens += tokensPerName;
|
|
5895
|
+
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
5896
|
+
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
5897
|
+
}
|
|
5898
|
+
return tokens;
|
|
5899
|
+
};
|
|
5900
|
+
/**
|
|
5901
|
+
* Calculate tokens using custom algorithm
|
|
5902
|
+
*/
|
|
5903
|
+
const calculateTokens = (messages, encoder, constants) => {
|
|
5904
|
+
if (messages.length === 0) return 0;
|
|
5905
|
+
let numTokens = 0;
|
|
5906
|
+
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
5907
|
+
numTokens += 3;
|
|
5908
|
+
return numTokens;
|
|
5909
|
+
};
|
|
5910
|
+
/**
|
|
5911
|
+
* Get the corresponding encoder module based on encoding type
|
|
5912
|
+
*/
|
|
5913
|
+
const getEncodeChatFunction = async (encoding) => {
|
|
5914
|
+
if (encodingCache.has(encoding)) {
|
|
5915
|
+
const cached$1 = encodingCache.get(encoding);
|
|
5916
|
+
if (cached$1) return cached$1;
|
|
5917
|
+
}
|
|
5918
|
+
const supportedEncoding = encoding;
|
|
5919
|
+
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
5920
|
+
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
5921
|
+
encodingCache.set(encoding, fallbackModule);
|
|
5922
|
+
return fallbackModule;
|
|
5923
|
+
}
|
|
5924
|
+
const encodingModule = await ENCODING_MAP[supportedEncoding]();
|
|
5925
|
+
encodingCache.set(encoding, encodingModule);
|
|
5926
|
+
return encodingModule;
|
|
5927
|
+
};
|
|
5928
|
+
/**
|
|
5929
|
+
* Get tokenizer type from model information
|
|
5930
|
+
*/
|
|
5931
|
+
const getTokenizerFromModel = (model) => {
|
|
5932
|
+
return model.capabilities?.tokenizer || "o200k_base";
|
|
5933
|
+
};
|
|
5934
|
+
/**
|
|
5935
|
+
* Load (and cache) the encoder for an encoding name. Unknown encodings
|
|
5936
|
+
* fall back to o200k_base. Exposed so prompt-window budgeting code can
|
|
5937
|
+
* count raw-text tokens without going through the chat-payload path.
|
|
5938
|
+
*/
|
|
5939
|
+
const loadEncoder = async (encoding = "o200k_base") => getEncodeChatFunction(encoding);
|
|
5940
|
+
/**
|
|
5941
|
+
* Exact token count of a raw text string under the given encoding
|
|
5942
|
+
* (default o200k_base — the tokenizer every adaptive Copilot model in
|
|
5943
|
+
* our lineup declares via `capabilities.tokenizer`). This is the real
|
|
5944
|
+
* BPE count, NOT a chars-per-token or word-count approximation, so it
|
|
5945
|
+
* matches the limit Copilot enforces (`max_prompt_tokens`) to the
|
|
5946
|
+
* token. Used by advisor transcript budgeting and the peer-MCP
|
|
5947
|
+
* prompt-window guard.
|
|
5948
|
+
*/
|
|
5949
|
+
const getTextTokenCount = async (text, encoding = "o200k_base") => {
|
|
5950
|
+
if (!text) return 0;
|
|
5951
|
+
return (await getEncodeChatFunction(encoding)).encode(text).length;
|
|
5952
|
+
};
|
|
5953
|
+
/**
|
|
5954
|
+
* Get model-specific constants for token calculation
|
|
5955
|
+
*/
|
|
5956
|
+
const getModelConstants = (model) => {
|
|
5957
|
+
return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ? {
|
|
5958
|
+
funcInit: 10,
|
|
5959
|
+
propInit: 3,
|
|
5960
|
+
propKey: 3,
|
|
5961
|
+
enumInit: -3,
|
|
5962
|
+
enumItem: 3,
|
|
5963
|
+
funcEnd: 12
|
|
5964
|
+
} : {
|
|
5965
|
+
funcInit: 7,
|
|
5966
|
+
propInit: 3,
|
|
5967
|
+
propKey: 3,
|
|
5968
|
+
enumInit: -3,
|
|
5969
|
+
enumItem: 3,
|
|
5970
|
+
funcEnd: 12
|
|
5971
|
+
};
|
|
5972
|
+
};
|
|
5973
|
+
/**
|
|
5974
|
+
* Calculate tokens for a single parameter
|
|
5975
|
+
*/
|
|
5976
|
+
const calculateParameterTokens = (key, prop, context) => {
|
|
5977
|
+
const { encoder, constants } = context;
|
|
5978
|
+
let tokens = constants.propKey;
|
|
5979
|
+
if (typeof prop !== "object" || prop === null) return tokens;
|
|
5980
|
+
const param = prop;
|
|
5981
|
+
const paramName = key;
|
|
5982
|
+
const paramType = param.type || "string";
|
|
5983
|
+
let paramDesc = param.description || "";
|
|
5984
|
+
if (param.enum && Array.isArray(param.enum)) {
|
|
5985
|
+
tokens += constants.enumInit;
|
|
5986
|
+
for (const item of param.enum) {
|
|
5987
|
+
tokens += constants.enumItem;
|
|
5988
|
+
tokens += encoder.encode(String(item)).length;
|
|
5989
|
+
}
|
|
5990
|
+
}
|
|
5991
|
+
if (paramDesc.endsWith(".")) paramDesc = paramDesc.slice(0, -1);
|
|
5992
|
+
const line = `${paramName}:${paramType}:${paramDesc}`;
|
|
5993
|
+
tokens += encoder.encode(line).length;
|
|
5994
|
+
const excludedKeys = new Set([
|
|
5995
|
+
"type",
|
|
5996
|
+
"description",
|
|
5997
|
+
"enum"
|
|
5998
|
+
]);
|
|
5999
|
+
for (const propertyName of Object.keys(param)) if (!excludedKeys.has(propertyName)) {
|
|
6000
|
+
const propertyValue = param[propertyName];
|
|
6001
|
+
const propertyText = typeof propertyValue === "string" ? propertyValue : JSON.stringify(propertyValue);
|
|
6002
|
+
tokens += encoder.encode(`${propertyName}:${propertyText}`).length;
|
|
6003
|
+
}
|
|
6004
|
+
return tokens;
|
|
6005
|
+
};
|
|
6006
|
+
/**
|
|
6007
|
+
* Calculate tokens for function parameters
|
|
6008
|
+
*/
|
|
6009
|
+
const calculateParametersTokens = (parameters, encoder, constants) => {
|
|
6010
|
+
if (!parameters || typeof parameters !== "object") return 0;
|
|
6011
|
+
const params = parameters;
|
|
6012
|
+
let tokens = 0;
|
|
6013
|
+
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
6014
|
+
const properties = value;
|
|
6015
|
+
if (Object.keys(properties).length > 0) {
|
|
6016
|
+
tokens += constants.propInit;
|
|
6017
|
+
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
6018
|
+
encoder,
|
|
6019
|
+
constants
|
|
6020
|
+
});
|
|
6021
|
+
}
|
|
6022
|
+
} else {
|
|
6023
|
+
const paramText = typeof value === "string" ? value : JSON.stringify(value);
|
|
6024
|
+
tokens += encoder.encode(`${key}:${paramText}`).length;
|
|
6025
|
+
}
|
|
6026
|
+
return tokens;
|
|
6027
|
+
};
|
|
6028
|
+
/**
|
|
6029
|
+
* Calculate tokens for a single tool
|
|
6030
|
+
*/
|
|
6031
|
+
const calculateToolTokens = (tool, encoder, constants) => {
|
|
6032
|
+
let tokens = constants.funcInit;
|
|
6033
|
+
const func = tool.function;
|
|
6034
|
+
const fName = func.name;
|
|
6035
|
+
let fDesc = func.description || "";
|
|
6036
|
+
if (fDesc.endsWith(".")) fDesc = fDesc.slice(0, -1);
|
|
6037
|
+
const line = fName + ":" + fDesc;
|
|
6038
|
+
tokens += encoder.encode(line).length;
|
|
6039
|
+
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
6040
|
+
return tokens;
|
|
6041
|
+
};
|
|
6042
|
+
/**
|
|
6043
|
+
* Calculate token count for tools based on model
|
|
6044
|
+
*/
|
|
6045
|
+
const numTokensForTools = (tools, encoder, constants) => {
|
|
6046
|
+
let funcTokenCount = 0;
|
|
6047
|
+
for (const tool of tools) funcTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
6048
|
+
funcTokenCount += constants.funcEnd;
|
|
6049
|
+
return funcTokenCount;
|
|
6050
|
+
};
|
|
6051
|
+
/**
|
|
6052
|
+
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
6053
|
+
*/
|
|
6054
|
+
const getTokenCount = async (payload, model) => {
|
|
6055
|
+
const encoder = await getEncodeChatFunction(getTokenizerFromModel(model));
|
|
6056
|
+
const simplifiedMessages = payload.messages;
|
|
6057
|
+
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
6058
|
+
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
6059
|
+
const constants = getModelConstants(model);
|
|
6060
|
+
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
6061
|
+
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
6062
|
+
const outputTokens = calculateTokens(outputMessages, encoder, constants);
|
|
6063
|
+
return {
|
|
6064
|
+
input: inputTokens,
|
|
6065
|
+
output: outputTokens
|
|
6066
|
+
};
|
|
6067
|
+
};
|
|
6068
|
+
|
|
5854
6069
|
//#endregion
|
|
5855
6070
|
//#region src/services/copilot/create-messages.ts
|
|
5856
6071
|
/**
|
|
@@ -6123,9 +6338,9 @@ function checkAuth(c) {
|
|
|
6123
6338
|
return { ok: true };
|
|
6124
6339
|
}
|
|
6125
6340
|
function geminiAvailable() {
|
|
6126
|
-
const models = state.models?.data;
|
|
6127
|
-
if (!models) return false;
|
|
6128
|
-
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
6341
|
+
const models$1 = state.models?.data;
|
|
6342
|
+
if (!models$1) return false;
|
|
6343
|
+
return models$1.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
6129
6344
|
}
|
|
6130
6345
|
/**
|
|
6131
6346
|
* Gate for the `stand_in` tool.
|
|
@@ -6149,11 +6364,11 @@ function geminiAvailable() {
|
|
|
6149
6364
|
* land under the dotted slug, so we match by Copilot's actual id shape.
|
|
6150
6365
|
*/
|
|
6151
6366
|
function standInToolEnabled() {
|
|
6152
|
-
const models = state.models?.data;
|
|
6153
|
-
if (!models) return false;
|
|
6154
|
-
const hasGpt55 = models.some((m) => m.id === "gpt-5.5");
|
|
6155
|
-
const hasOpus = models.some((m) => m.id === "claude-opus-4-7" || m.id === "claude-opus-4.7");
|
|
6156
|
-
const hasGeminiPro = models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
6367
|
+
const models$1 = state.models?.data;
|
|
6368
|
+
if (!models$1) return false;
|
|
6369
|
+
const hasGpt55 = models$1.some((m) => m.id === "gpt-5.5");
|
|
6370
|
+
const hasOpus = models$1.some((m) => m.id === "claude-opus-4-7" || m.id === "claude-opus-4.7");
|
|
6371
|
+
const hasGeminiPro = models$1.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
6157
6372
|
return hasGpt55 && hasOpus && hasGeminiPro;
|
|
6158
6373
|
}
|
|
6159
6374
|
/**
|
|
@@ -6183,9 +6398,9 @@ function standInToolEnabled() {
|
|
|
6183
6398
|
*/
|
|
6184
6399
|
function workerToolsEnabled() {
|
|
6185
6400
|
if (process.env.GH_ROUTER_DISABLE_WORKER_TOOLS === "1") return false;
|
|
6186
|
-
const models = state.models?.data;
|
|
6187
|
-
if (!models) return false;
|
|
6188
|
-
const found = models.find((m) => m.id === DEFAULT_MODEL);
|
|
6401
|
+
const models$1 = state.models?.data;
|
|
6402
|
+
if (!models$1) return false;
|
|
6403
|
+
const found = models$1.find((m) => m.id === DEFAULT_MODEL);
|
|
6189
6404
|
if (!found) return false;
|
|
6190
6405
|
return found.capabilities?.supports?.tool_calls === true;
|
|
6191
6406
|
}
|
|
@@ -6220,8 +6435,24 @@ function browserToolsEnabled() {
|
|
|
6220
6435
|
if (!(state.browseEnabled || process.env.GH_ROUTER_ENABLE_BROWSE === "1")) return false;
|
|
6221
6436
|
return hasSupportedBrowserInstalled();
|
|
6222
6437
|
}
|
|
6438
|
+
/**
|
|
6439
|
+
* The 1M-context Opus variant (`claude-opus-4.7-1m-internal`,
|
|
6440
|
+
* `max_prompt_tokens` 936K), gated `restricted_to: ["enterprise"]`.
|
|
6441
|
+
* opus_critic prefers it so it can take large artifacts in one shot
|
|
6442
|
+
* (the whole point of pairing it with gpt-5.5 as the big-window peers);
|
|
6443
|
+
* falls back to the 200K `claude-opus-4-7` when the catalog (non-
|
|
6444
|
+
* enterprise) doesn't carry a 1M opus slug.
|
|
6445
|
+
*/
|
|
6446
|
+
const OPUS_1M_RE = /opus-4\.7.*1m/i;
|
|
6447
|
+
function resolveOpusCriticModel() {
|
|
6448
|
+
const oneM = state.models?.data?.find((m) => OPUS_1M_RE.test(m.id));
|
|
6449
|
+
return oneM ? oneM.id : "claude-opus-4-7";
|
|
6450
|
+
}
|
|
6223
6451
|
function activePersonas() {
|
|
6224
|
-
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable())
|
|
6452
|
+
return PERSONAS_READ.filter((p) => !p.requiresGeminiCatalog || geminiAvailable()).map((p) => p.toolNameHttp === "opus_critic" ? {
|
|
6453
|
+
...p,
|
|
6454
|
+
model: resolveOpusCriticModel()
|
|
6455
|
+
} : p);
|
|
6225
6456
|
}
|
|
6226
6457
|
function toolEntries() {
|
|
6227
6458
|
const personaEntries = activePersonas().map((p) => ({
|
|
@@ -6355,6 +6586,46 @@ function predictedTooLong(persona, effort, briefBytes) {
|
|
|
6355
6586
|
return { tooLong: false };
|
|
6356
6587
|
}
|
|
6357
6588
|
/**
|
|
6589
|
+
* Tokens reserved below a peer model's `max_prompt_tokens` for the
|
|
6590
|
+
* per-call message framing (role wrappers, output_config, etc.) and any
|
|
6591
|
+
* discrepancy between our o200k count and Copilot's full-payload count.
|
|
6592
|
+
*/
|
|
6593
|
+
const PEER_PROMPT_TOKEN_RESERVE = 2e3;
|
|
6594
|
+
/**
|
|
6595
|
+
* Prompt-window guard. Unlike `predictedTooLong` (a JSON-path *timeout*
|
|
6596
|
+
* predictor in bytes), this guards the *context window*: it counts the
|
|
6597
|
+
* EXACT o200k tokens of the text actually sent to the peer (system
|
|
6598
|
+
* instructions + prompt + context) and compares against the persona
|
|
6599
|
+
* model's live `max_prompt_tokens`. Applies on BOTH the SSE and JSON
|
|
6600
|
+
* paths (called from `handleToolsCall`, before slot acquisition) because
|
|
6601
|
+
* an over-window brief 400s `model_max_prompt_tokens_exceeded` upstream
|
|
6602
|
+
* regardless of transport — and on SSE there is no other size bound.
|
|
6603
|
+
*
|
|
6604
|
+
* Returns an actionable message when over budget (reject, don't
|
|
6605
|
+
* truncate — silently dropping lines from a review artifact is worse
|
|
6606
|
+
* than a clear error), or undefined when it fits or the limit is unknown.
|
|
6607
|
+
*/
|
|
6608
|
+
async function predictedWindowOverflow(persona, prompt, context) {
|
|
6609
|
+
const id = resolveModel(persona.model);
|
|
6610
|
+
const entry = state.models?.data?.find((m) => m.id === id);
|
|
6611
|
+
if (!entry) return void 0;
|
|
6612
|
+
const maxPromptTokens = entry.capabilities?.limits?.max_prompt_tokens;
|
|
6613
|
+
if (typeof maxPromptTokens !== "number" || !Number.isFinite(maxPromptTokens) || maxPromptTokens <= 0) return;
|
|
6614
|
+
const budget = maxPromptTokens - PEER_PROMPT_TOKEN_RESERVE;
|
|
6615
|
+
const inputText = `${persona.baseInstructions}\n${buildUserText(prompt, context)}`;
|
|
6616
|
+
if (Buffer.byteLength(inputText, "utf8") <= budget) return void 0;
|
|
6617
|
+
let tokens;
|
|
6618
|
+
try {
|
|
6619
|
+
tokens = await getTextTokenCount(inputText, getTokenizerFromModel(entry));
|
|
6620
|
+
} catch (err) {
|
|
6621
|
+
consola.debug("[mcp] window-guard tokenization failed; allowing call:", err);
|
|
6622
|
+
return;
|
|
6623
|
+
}
|
|
6624
|
+
if (tokens <= budget) return void 0;
|
|
6625
|
+
const opusHint = OPUS_1M_RE.test(id) ? "" : " / `opus_critic` (Opus-4.7 1M ≈ 936K tokens, when the enterprise catalog carries it)";
|
|
6626
|
+
return `pre-flight rejected: this ${persona.toolNameHttp} brief is ≈${tokens} tokens, over the ${budget}-token budget for ${persona.model} (its ${maxPromptTokens}-token prompt window minus a ${PEER_PROMPT_TOKEN_RESERVE}-token framing reserve). Do NOT summarize or truncate the artifact to fit. Route the full artifact to a larger-window peer — \`codex_critic\` (gpt-5.5 ≈ 922K tokens)${opusHint} — or split it into focused sub-calls BY CONCERN and call them in parallel, then aggregate.`;
|
|
6627
|
+
}
|
|
6628
|
+
/**
|
|
6358
6629
|
* JSON-path pre-flight predictedTooLong gate. Returns a JSON-RPC result
|
|
6359
6630
|
* body wrapping a tool-error envelope when the call would bust the 60s
|
|
6360
6631
|
* tools/call ceiling on the JSON path; returns undefined when the call
|
|
@@ -6516,6 +6787,10 @@ async function handleToolsCall(body) {
|
|
|
6516
6787
|
if (requestedEffort !== void 0 && !persona.allowedEfforts.includes(requestedEffort)) return rpcError(body.id, RPC_INVALID_PARAMS, `tools/call: persona "${persona.toolNameHttp}" does not accept effort="${requestedEffort}". Allowed: ${persona.allowedEfforts.join("|")}.`);
|
|
6517
6788
|
personaEffort = requestedEffort ?? persona.defaultEffort;
|
|
6518
6789
|
}
|
|
6790
|
+
if (persona && personaPrompt !== void 0) {
|
|
6791
|
+
const overflow = await predictedWindowOverflow(persona, personaPrompt, personaContext);
|
|
6792
|
+
if (overflow) return rpcResult(body.id, toolError(overflow));
|
|
6793
|
+
}
|
|
6519
6794
|
const release = acquireInFlightSlot();
|
|
6520
6795
|
if (!release) return rpcResult(body.id, {
|
|
6521
6796
|
content: [{
|
|
@@ -6755,10 +7030,13 @@ function acceptsEventStream(accept) {
|
|
|
6755
7030
|
/**
|
|
6756
7031
|
* SSE-streamed response for a single tools/call. Delegates the actual
|
|
6757
7032
|
* upstream call to `handleToolsCall` (so the per-persona effort gate,
|
|
6758
|
-
*
|
|
6759
|
-
* inFlight slot accounting all run identically); wraps
|
|
6760
|
-
* result in an SSE envelope with periodic heartbeats while
|
|
6761
|
-
* fetch is in flight.
|
|
7033
|
+
* the token-exact prompt-window guard, AbortController registration,
|
|
7034
|
+
* telemetry, and inFlight slot accounting all run identically); wraps
|
|
7035
|
+
* the awaited result in an SSE envelope with periodic heartbeats while
|
|
7036
|
+
* the upstream fetch is in flight. NOTE: the JSON-path `predictedTooLong`
|
|
7037
|
+
* byte cap is NOT applied here — it lives in `jsonPathPreflightCap`
|
|
7038
|
+
* (JSON path only); SSE bypasses it intentionally because heartbeats
|
|
7039
|
+
* keep the call alive past the ~60s tools/call ceiling it guards.
|
|
6762
7040
|
*
|
|
6763
7041
|
* SSE event format (per MCP Streamable HTTP):
|
|
6764
7042
|
* event: message
|
|
@@ -7145,15 +7423,39 @@ function injectAdvisorTool(rawBody) {
|
|
|
7145
7423
|
}];
|
|
7146
7424
|
return JSON.stringify(parsed);
|
|
7147
7425
|
}
|
|
7148
|
-
/**
|
|
7149
|
-
*
|
|
7150
|
-
*
|
|
7151
|
-
*
|
|
7152
|
-
*
|
|
7153
|
-
*
|
|
7154
|
-
*
|
|
7155
|
-
* and the advisor falls back silently. */
|
|
7426
|
+
/** Fallback CHARACTER budget for `renderConversationAsText` when called
|
|
7427
|
+
* without a token `measure` (unit-agnostic default = char length). Also
|
|
7428
|
+
* the conservative no-catalog floor: 720,000 chars ≈ 240,000 tokens at
|
|
7429
|
+
* ~3 chars/token, which fits even the smaller `/responses` models. The
|
|
7430
|
+
* live path measures EXACT o200k tokens (see `runAdvisor`) and budgets
|
|
7431
|
+
* against the model's real `max_prompt_tokens`, so this constant is only
|
|
7432
|
+
* a safety net, never the normal path. */
|
|
7156
7433
|
const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
|
|
7434
|
+
/** Token budget used when the advisor model's `max_prompt_tokens` can't
|
|
7435
|
+
* be resolved from the live catalog. ≈ the 720K-char fallback in tokens. */
|
|
7436
|
+
const ADVISOR_FALLBACK_MAX_TOKENS = 24e4;
|
|
7437
|
+
/** Tokens reserved below the model's `max_prompt_tokens` for the advisor
|
|
7438
|
+
* system prompt + per-call framing + any encode/wire discrepancy between
|
|
7439
|
+
* our o200k count and Copilot's full-payload count. The transcript token
|
|
7440
|
+
* budget is `max_prompt_tokens - reserve`. Generous on purpose: a 400
|
|
7441
|
+
* `model_max_prompt_tokens_exceeded` degrades to a silent advisor
|
|
7442
|
+
* fallback, and the marginal window we give up is irrelevant next to
|
|
7443
|
+
* gpt-5.5's 922K. */
|
|
7444
|
+
const ADVISOR_PROMPT_TOKEN_RESERVE = 8e3;
|
|
7445
|
+
/**
|
|
7446
|
+
* Derive the TOKEN budget for the rendered transcript from the advisor
|
|
7447
|
+
* model's live `max_prompt_tokens` (cached in `state.models` by
|
|
7448
|
+
* `cacheModels()` at startup). Self-correcting: tracks the model's real
|
|
7449
|
+
* window instead of a hardcoded guess, and honors a SMALLER window if a
|
|
7450
|
+
* caller overrides `advisorModel` to a tighter model. Falls back to
|
|
7451
|
+
* `ADVISOR_FALLBACK_MAX_TOKENS` when the catalog or field is missing.
|
|
7452
|
+
*/
|
|
7453
|
+
function resolveAdvisorMaxTokens(advisorModel) {
|
|
7454
|
+
const id = resolveModel(advisorModel);
|
|
7455
|
+
const maxPromptTokens = state.models?.data?.find((m) => m.id === id)?.capabilities?.limits?.max_prompt_tokens;
|
|
7456
|
+
if (typeof maxPromptTokens !== "number" || !Number.isFinite(maxPromptTokens) || maxPromptTokens <= 0) return ADVISOR_FALLBACK_MAX_TOKENS;
|
|
7457
|
+
return Math.max(1, maxPromptTokens - ADVISOR_PROMPT_TOKEN_RESERVE);
|
|
7458
|
+
}
|
|
7157
7459
|
/**
|
|
7158
7460
|
* Render an Anthropic-shape conversation (messages array with
|
|
7159
7461
|
* role/content blocks) as a single human-readable text blob. Used
|
|
@@ -7163,14 +7465,20 @@ const ADVISOR_MAX_CONVERSATION_CHARS = 72e4;
|
|
|
7163
7465
|
* just needs to READ the conversation, not produce more of it).
|
|
7164
7466
|
*
|
|
7165
7467
|
* Front-truncates oldest turns when the rendered output would exceed
|
|
7166
|
-
* `
|
|
7468
|
+
* `maxUnits`. The advisor cares more about current state (latest
|
|
7167
7469
|
* tool calls, errors, in-flight task) than the original prompt —
|
|
7168
7470
|
* mirrors Claude Code's own context-truncation strategy. When any
|
|
7169
7471
|
* turns are dropped, prepends a `[TRUNCATED: N earlier turn(s)
|
|
7170
7472
|
* omitted ...]` notice so the advisor knows the transcript is
|
|
7171
7473
|
* partial and can flag if it needs the missing context.
|
|
7474
|
+
*
|
|
7475
|
+
* Unit-agnostic via the injected `measure` function: production passes
|
|
7476
|
+
* an EXACT o200k token counter and a token budget (so truncation tracks
|
|
7477
|
+
* the model's real `max_prompt_tokens`); the default `measure` is char
|
|
7478
|
+
* length, so callers/tests that pass a plain numeric budget get the
|
|
7479
|
+
* historical character-budget behavior.
|
|
7172
7480
|
*/
|
|
7173
|
-
function renderConversationAsText(conversation,
|
|
7481
|
+
function renderConversationAsText(conversation, maxUnits = ADVISOR_MAX_CONVERSATION_CHARS, measure = (s) => s.length) {
|
|
7174
7482
|
const turnBlocks = [];
|
|
7175
7483
|
for (let i = 0; i < conversation.length; i++) {
|
|
7176
7484
|
const msg = conversation[i];
|
|
@@ -7191,23 +7499,42 @@ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSAT
|
|
|
7191
7499
|
block.push("");
|
|
7192
7500
|
turnBlocks.push(block.join("\n"));
|
|
7193
7501
|
}
|
|
7194
|
-
let
|
|
7502
|
+
let totalUnits = 0;
|
|
7195
7503
|
let firstKeptIdx = turnBlocks.length;
|
|
7196
7504
|
for (let i = turnBlocks.length - 1; i >= 0; i--) {
|
|
7197
|
-
const len = turnBlocks[i]
|
|
7198
|
-
if (
|
|
7199
|
-
|
|
7505
|
+
const len = measure(turnBlocks[i]) + 1;
|
|
7506
|
+
if (totalUnits + len > maxUnits) break;
|
|
7507
|
+
totalUnits += len;
|
|
7200
7508
|
firstKeptIdx = i;
|
|
7201
7509
|
}
|
|
7202
7510
|
if (firstKeptIdx === turnBlocks.length && turnBlocks.length > 0) {
|
|
7203
|
-
const
|
|
7204
|
-
|
|
7511
|
+
const last = turnBlocks[turnBlocks.length - 1];
|
|
7512
|
+
const notice = `[TRUNCATED: conversation too long for advisor model context; only the tail of the latest (turn ${turnBlocks.length}) is shown]\n\n`;
|
|
7513
|
+
return notice + truncateTailToUnits(last, Math.max(0, maxUnits - measure(notice)), measure);
|
|
7205
7514
|
}
|
|
7206
7515
|
const kept = turnBlocks.slice(firstKeptIdx);
|
|
7207
7516
|
if (firstKeptIdx > 0) kept.unshift(`[TRUNCATED: ${firstKeptIdx} earlier turn(s) omitted to fit advisor model context budget; ${turnBlocks.length - firstKeptIdx} most-recent turn(s) shown below]\n`);
|
|
7208
7517
|
return kept.join("\n");
|
|
7209
7518
|
}
|
|
7210
7519
|
/**
|
|
7520
|
+
* Return the longest suffix of `text` whose `measure(...)` is ≤ `maxUnits`.
|
|
7521
|
+
* Binary search on the cut point — unit-agnostic (works for the token
|
|
7522
|
+
* `measure` in prod and the char-length default), and exact rather than
|
|
7523
|
+
* a chars-per-token estimate. `measure` is called O(log n) times.
|
|
7524
|
+
*/
|
|
7525
|
+
function truncateTailToUnits(text, maxUnits, measure) {
|
|
7526
|
+
if (maxUnits <= 0) return "";
|
|
7527
|
+
if (measure(text) <= maxUnits) return text;
|
|
7528
|
+
let lo = 0;
|
|
7529
|
+
let hi = text.length;
|
|
7530
|
+
while (lo < hi) {
|
|
7531
|
+
const mid = Math.ceil((lo + hi + 1) / 2);
|
|
7532
|
+
if (measure(text.slice(text.length - mid)) <= maxUnits) lo = mid;
|
|
7533
|
+
else hi = mid - 1;
|
|
7534
|
+
}
|
|
7535
|
+
return text.slice(text.length - lo);
|
|
7536
|
+
}
|
|
7537
|
+
/**
|
|
7211
7538
|
* Run the advisor model with the full conversation context. Returns
|
|
7212
7539
|
* the advisor's text response.
|
|
7213
7540
|
*
|
|
@@ -7227,8 +7554,20 @@ function renderConversationAsText(conversation, maxChars = ADVISOR_MAX_CONVERSAT
|
|
|
7227
7554
|
async function runAdvisor(conversation, advisorModel, advisorEffort, signal) {
|
|
7228
7555
|
if (signal?.aborted) throw new Error("advisor call aborted before dispatch");
|
|
7229
7556
|
const advisorSystem = "You are an expert advisor reviewing an in-progress Claude Code session. The transcript below is the work-in-progress (turns numbered, with tool calls and results inlined). Read carefully and provide concrete, actionable advice on the next step or course-correction. Be specific — cite the parts of the transcript you're responding to. If the assistant is on the right track, say so explicitly. If they're stuck or off-track, name the specific assumption or step to revisit. Aim for 2-5 paragraphs of substantive guidance.";
|
|
7230
|
-
const conversationText = renderConversationAsText(conversation);
|
|
7231
7557
|
const resolvedAdvisorModel = resolveModel(advisorModel);
|
|
7558
|
+
let measure;
|
|
7559
|
+
let maxUnits;
|
|
7560
|
+
try {
|
|
7561
|
+
const modelEntry = state.models?.data?.find((m) => m.id === resolvedAdvisorModel);
|
|
7562
|
+
const encoder = await loadEncoder(modelEntry ? getTokenizerFromModel(modelEntry) : "o200k_base");
|
|
7563
|
+
measure = (s) => encoder.encode(s).length;
|
|
7564
|
+
maxUnits = resolveAdvisorMaxTokens(advisorModel);
|
|
7565
|
+
} catch (err) {
|
|
7566
|
+
consola.debug("advisor: tokenizer load failed; using char-length budget:", err);
|
|
7567
|
+
measure = (s) => s.length;
|
|
7568
|
+
maxUnits = ADVISOR_MAX_CONVERSATION_CHARS;
|
|
7569
|
+
}
|
|
7570
|
+
const conversationText = renderConversationAsText(conversation, maxUnits, measure);
|
|
7232
7571
|
if (/^(gpt-|o\d|.*codex)/i.test(resolvedAdvisorModel)) {
|
|
7233
7572
|
const response = await createResponses({
|
|
7234
7573
|
model: resolvedAdvisorModel,
|
|
@@ -8734,9 +9073,9 @@ function lookupPersona(critic) {
|
|
|
8734
9073
|
return persona;
|
|
8735
9074
|
}
|
|
8736
9075
|
function geminiInCatalog() {
|
|
8737
|
-
const models = state.models?.data;
|
|
8738
|
-
if (!models) return false;
|
|
8739
|
-
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
9076
|
+
const models$1 = state.models?.data;
|
|
9077
|
+
if (!models$1) return false;
|
|
9078
|
+
return models$1.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
8740
9079
|
}
|
|
8741
9080
|
const ADVISOR_PARAMS = Type.Object({ concern: Type.String({
|
|
8742
9081
|
description: "What you want a second pair of eyes on — your current approach, the blocker you're stuck on, or the decision you're about to commit. Required: the advisor needs a focal point.",
|
|
@@ -9898,7 +10237,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9898
10237
|
toolNameHttp: "codex_critic",
|
|
9899
10238
|
model: "gpt-5.5",
|
|
9900
10239
|
endpoint: "/v1/responses",
|
|
9901
|
-
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI,
|
|
10240
|
+
description: "Adversarial second opinion on plans, designs, or code tradeoffs. Backed by gpt-5.5 (OpenAI, ≈922K-token input window) — strongest reasoning model in the critic lineup, different lab than Opus. Best for architecture decisions, design reviews, and tradeoff analysis where cross-lab diversity matters. Not for line-level code review (use codex_reviewer). Pass artifact verbatim.",
|
|
9902
10241
|
baseInstructions: CRITIC_BASE,
|
|
9903
10242
|
agentPrompt: "",
|
|
9904
10243
|
writeCapable: false,
|
|
@@ -9934,7 +10273,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9934
10273
|
toolNameHttp: "codex_reviewer",
|
|
9935
10274
|
model: "gpt-5.3-codex",
|
|
9936
10275
|
endpoint: "/v1/responses",
|
|
9937
|
-
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI,
|
|
10276
|
+
description: "Line-level review of a concrete diff or single file. Backed by gpt-5.3-codex (OpenAI, ≈272K-token input window) — code-specialist, fastest critic (~16s). Surfaces bugs, edge cases, security issues, and idiom violations at specific line numbers. Not suited for architecture or design review (use codex_critic for plans). Pass artifact verbatim.",
|
|
9938
10277
|
baseInstructions: REVIEWER_BASE,
|
|
9939
10278
|
agentPrompt: "",
|
|
9940
10279
|
writeCapable: false,
|
|
@@ -9952,7 +10291,7 @@ const PERSONAS_READ = Object.freeze([
|
|
|
9952
10291
|
toolNameHttp: "opus_critic",
|
|
9953
10292
|
model: "claude-opus-4-7",
|
|
9954
10293
|
endpoint: "/v1/messages",
|
|
9955
|
-
description: "Adversarial second opinion from a fresh-context Opus 4.7 — same lab as the lead, limited blind-spot diversity vs cross-lab critics
|
|
10294
|
+
description: "Adversarial second opinion from a fresh-context Opus 4.7 — same lab as the lead, limited blind-spot diversity vs cross-lab critics. On enterprise catalogs that carry Opus-4.7-1M it runs with a ≈936K-token input window and handles large artifacts without decomposition; otherwise ≈168K. Fast (~22s), catches confabulation and motivated reasoning. Pass artifact verbatim.",
|
|
9956
10295
|
baseInstructions: OPUS_CRITIC_BASE,
|
|
9957
10296
|
agentPrompt: "",
|
|
9958
10297
|
writeCapable: false,
|
|
@@ -10615,14 +10954,14 @@ function buildCoordinatorAgent(opts) {
|
|
|
10615
10954
|
"",
|
|
10616
10955
|
"- **Plan / design / architecture choice** → fan out to `codex-critic` (gpt-5.5, strongest reasoning, cross-lab)" + (opts.geminiAvailable ? " AND `gemini-critic` (third-lab triangulation, strong on formal reasoning) in parallel" : "") + ". codex-reviewer is the wrong tool for plans (it's a code-specialist, not an architecture critic).",
|
|
10617
10956
|
"- **Concrete diff or single file** → fan out to `codex-reviewer` (gpt-5.3-codex, line-level code specialist, fastest at ~16s)" + (opts.geminiAvailable ? " AND `gemini-critic` for cross-lab triangulation" : "") + ". For very small changes (<20 lines), one `codex-reviewer` call is enough.",
|
|
10618
|
-
"- **Large artifact (
|
|
10957
|
+
"- **Large artifact** → the only peers that take a large artifact WHOLE are `codex-critic` (gpt-5.5, ≈922K-token input window) and `opus-critic` (Opus-4.7-1M, ≈936K-token input on enterprise catalogs; ≈168K otherwise). Route the full artifact to those for cross-lab coverage. `codex-reviewer` (≈272K) and `gemini-critic` (≈136K) have small windows — see Decomposition below: never summarize or downsize the request to squeeze a large artifact into a small-window peer.",
|
|
10619
10958
|
"- **Formal reasoning, proofs, or invariants** → prefer `gemini-critic`" + (opts.geminiAvailable ? " (gemini-3.1-pro, strong on math and formally-stated properties)" : " (NOT REGISTERED in this session — gemini-3.x not in catalog)") + ".",
|
|
10620
10959
|
"- **Tie-breaker after codex-critic has weighed in** → call `gemini-critic`" + (opts.geminiAvailable ? "" : " (NOT REGISTERED in this session)") + " or `opus-critic` with the artifact AND codex-critic's verdict for cross-check.",
|
|
10621
10960
|
"- **Fast sanity check** → `opus-critic` (~22s, same lab as lead but fresh context — catches confabulation and motivated reasoning).",
|
|
10622
10961
|
"",
|
|
10623
10962
|
"## Decomposition for large artifacts",
|
|
10624
10963
|
"",
|
|
10625
|
-
"
|
|
10964
|
+
"Route by the peer's real PROMPT WINDOW (input tokens): `codex-critic` gpt-5.5 ≈922K · `opus-critic` Opus-4.7-1M ≈936K (enterprise catalogs; ≈168K otherwise) · `codex-reviewer` gpt-5.3-codex ≈272K · `gemini-critic` gemini-3.1-pro ≈136K. The proxy REJECTS (with an actionable message) any single call whose brief exceeds the target peer's window — it will NOT silently truncate, because dropping lines from a review artifact is worse than a clear error. So: send the full artifact only to peers whose window fits it (large artifacts → `codex-critic` and/or `opus-critic`). When a peer's window is too small (commonly `gemini-critic` at ≈136K, or `codex-reviewer` at ≈272K), do NOT summarize or downsize the request to include it — either skip that peer, or split the artifact into 2-4 logical batches BY CONCERN (not by raw size — semantic batches give better per-batch reviews) that each fit, and call in parallel. Use the big-window peers for the whole and reserve a small-window peer like gemini for the concerns it can actually hold. The proxy's MCP cap allows up to 8 in-flight calls. Aggregate findings yourself before reporting back. (Separately, on the JSON transport a per-effort `predictedTooLong` byte cap still guards the ~60s tools/call timeout for non-SSE clients; Claude Code uses SSE, which streams with heartbeats and isn't subject to that cap.)",
|
|
10626
10965
|
"",
|
|
10627
10966
|
"## Aggregation contract",
|
|
10628
10967
|
"",
|
|
@@ -11120,7 +11459,7 @@ function initProxyFromEnv() {
|
|
|
11120
11459
|
//#endregion
|
|
11121
11460
|
//#region package.json
|
|
11122
11461
|
var name = "github-router";
|
|
11123
|
-
var version = "0.3.
|
|
11462
|
+
var version = "0.3.42";
|
|
11124
11463
|
|
|
11125
11464
|
//#endregion
|
|
11126
11465
|
//#region src/lib/approval.ts
|
|
@@ -11179,7 +11518,7 @@ async function doCheck(state$1, ticket) {
|
|
|
11179
11518
|
/**
|
|
11180
11519
|
* Format a number with K/M suffix for compact display.
|
|
11181
11520
|
*/
|
|
11182
|
-
function formatTokens(n) {
|
|
11521
|
+
function formatTokens$1(n) {
|
|
11183
11522
|
if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
|
|
11184
11523
|
if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
|
|
11185
11524
|
return String(n);
|
|
@@ -11193,9 +11532,9 @@ function formatTokenInfo(inputTokens, outputTokens, model) {
|
|
|
11193
11532
|
const maxPrompt = model?.capabilities?.limits?.max_prompt_tokens;
|
|
11194
11533
|
if (maxPrompt) {
|
|
11195
11534
|
const pct = (inputTokens / maxPrompt * 100).toFixed(1);
|
|
11196
|
-
parts.push(`in:${formatTokens(inputTokens)}/${formatTokens(maxPrompt)} (${pct}%)`);
|
|
11197
|
-
} else parts.push(`in:${formatTokens(inputTokens)}`);
|
|
11198
|
-
if (outputTokens !== void 0) parts.push(`out:${formatTokens(outputTokens)}`);
|
|
11535
|
+
parts.push(`in:${formatTokens$1(inputTokens)}/${formatTokens$1(maxPrompt)} (${pct}%)`);
|
|
11536
|
+
} else parts.push(`in:${formatTokens$1(inputTokens)}`);
|
|
11537
|
+
if (outputTokens !== void 0) parts.push(`out:${formatTokens$1(outputTokens)}`);
|
|
11199
11538
|
return parts.join(" ");
|
|
11200
11539
|
}
|
|
11201
11540
|
/**
|
|
@@ -11276,202 +11615,6 @@ function collectToolFieldKeys(body) {
|
|
|
11276
11615
|
return [...seen].sort();
|
|
11277
11616
|
}
|
|
11278
11617
|
|
|
11279
|
-
//#endregion
|
|
11280
|
-
//#region src/lib/tokenizer.ts
|
|
11281
|
-
const ENCODING_MAP = {
|
|
11282
|
-
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
|
|
11283
|
-
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
|
|
11284
|
-
p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"),
|
|
11285
|
-
p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"),
|
|
11286
|
-
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base")
|
|
11287
|
-
};
|
|
11288
|
-
const encodingCache = /* @__PURE__ */ new Map();
|
|
11289
|
-
/**
|
|
11290
|
-
* Calculate tokens for tool calls
|
|
11291
|
-
*/
|
|
11292
|
-
const calculateToolCallsTokens = (toolCalls, encoder, constants) => {
|
|
11293
|
-
let tokens = 0;
|
|
11294
|
-
for (const toolCall of toolCalls) {
|
|
11295
|
-
tokens += constants.funcInit;
|
|
11296
|
-
tokens += encoder.encode(JSON.stringify(toolCall)).length;
|
|
11297
|
-
}
|
|
11298
|
-
tokens += constants.funcEnd;
|
|
11299
|
-
return tokens;
|
|
11300
|
-
};
|
|
11301
|
-
/**
|
|
11302
|
-
* Calculate tokens for content parts
|
|
11303
|
-
*/
|
|
11304
|
-
const calculateContentPartsTokens = (contentParts, encoder) => {
|
|
11305
|
-
let tokens = 0;
|
|
11306
|
-
for (const part of contentParts) if (part.type === "image_url") tokens += encoder.encode(part.image_url.url).length + 85;
|
|
11307
|
-
else if (part.text) tokens += encoder.encode(part.text).length;
|
|
11308
|
-
return tokens;
|
|
11309
|
-
};
|
|
11310
|
-
/**
|
|
11311
|
-
* Calculate tokens for a single message
|
|
11312
|
-
*/
|
|
11313
|
-
const calculateMessageTokens = (message, encoder, constants) => {
|
|
11314
|
-
const tokensPerMessage = 3;
|
|
11315
|
-
const tokensPerName = 1;
|
|
11316
|
-
let tokens = tokensPerMessage;
|
|
11317
|
-
for (const [key, value] of Object.entries(message)) {
|
|
11318
|
-
if (typeof value === "string") tokens += encoder.encode(value).length;
|
|
11319
|
-
if (key === "name") tokens += tokensPerName;
|
|
11320
|
-
if (key === "tool_calls") tokens += calculateToolCallsTokens(value, encoder, constants);
|
|
11321
|
-
if (key === "content" && Array.isArray(value)) tokens += calculateContentPartsTokens(value, encoder);
|
|
11322
|
-
}
|
|
11323
|
-
return tokens;
|
|
11324
|
-
};
|
|
11325
|
-
/**
|
|
11326
|
-
* Calculate tokens using custom algorithm
|
|
11327
|
-
*/
|
|
11328
|
-
const calculateTokens = (messages, encoder, constants) => {
|
|
11329
|
-
if (messages.length === 0) return 0;
|
|
11330
|
-
let numTokens = 0;
|
|
11331
|
-
for (const message of messages) numTokens += calculateMessageTokens(message, encoder, constants);
|
|
11332
|
-
numTokens += 3;
|
|
11333
|
-
return numTokens;
|
|
11334
|
-
};
|
|
11335
|
-
/**
|
|
11336
|
-
* Get the corresponding encoder module based on encoding type
|
|
11337
|
-
*/
|
|
11338
|
-
const getEncodeChatFunction = async (encoding) => {
|
|
11339
|
-
if (encodingCache.has(encoding)) {
|
|
11340
|
-
const cached$1 = encodingCache.get(encoding);
|
|
11341
|
-
if (cached$1) return cached$1;
|
|
11342
|
-
}
|
|
11343
|
-
const supportedEncoding = encoding;
|
|
11344
|
-
if (!(supportedEncoding in ENCODING_MAP)) {
|
|
11345
|
-
const fallbackModule = await ENCODING_MAP.o200k_base();
|
|
11346
|
-
encodingCache.set(encoding, fallbackModule);
|
|
11347
|
-
return fallbackModule;
|
|
11348
|
-
}
|
|
11349
|
-
const encodingModule = await ENCODING_MAP[supportedEncoding]();
|
|
11350
|
-
encodingCache.set(encoding, encodingModule);
|
|
11351
|
-
return encodingModule;
|
|
11352
|
-
};
|
|
11353
|
-
/**
|
|
11354
|
-
* Get tokenizer type from model information
|
|
11355
|
-
*/
|
|
11356
|
-
const getTokenizerFromModel = (model) => {
|
|
11357
|
-
return model.capabilities?.tokenizer || "o200k_base";
|
|
11358
|
-
};
|
|
11359
|
-
/**
|
|
11360
|
-
* Get model-specific constants for token calculation
|
|
11361
|
-
*/
|
|
11362
|
-
const getModelConstants = (model) => {
|
|
11363
|
-
return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ? {
|
|
11364
|
-
funcInit: 10,
|
|
11365
|
-
propInit: 3,
|
|
11366
|
-
propKey: 3,
|
|
11367
|
-
enumInit: -3,
|
|
11368
|
-
enumItem: 3,
|
|
11369
|
-
funcEnd: 12
|
|
11370
|
-
} : {
|
|
11371
|
-
funcInit: 7,
|
|
11372
|
-
propInit: 3,
|
|
11373
|
-
propKey: 3,
|
|
11374
|
-
enumInit: -3,
|
|
11375
|
-
enumItem: 3,
|
|
11376
|
-
funcEnd: 12
|
|
11377
|
-
};
|
|
11378
|
-
};
|
|
11379
|
-
/**
|
|
11380
|
-
* Calculate tokens for a single parameter
|
|
11381
|
-
*/
|
|
11382
|
-
const calculateParameterTokens = (key, prop, context) => {
|
|
11383
|
-
const { encoder, constants } = context;
|
|
11384
|
-
let tokens = constants.propKey;
|
|
11385
|
-
if (typeof prop !== "object" || prop === null) return tokens;
|
|
11386
|
-
const param = prop;
|
|
11387
|
-
const paramName = key;
|
|
11388
|
-
const paramType = param.type || "string";
|
|
11389
|
-
let paramDesc = param.description || "";
|
|
11390
|
-
if (param.enum && Array.isArray(param.enum)) {
|
|
11391
|
-
tokens += constants.enumInit;
|
|
11392
|
-
for (const item of param.enum) {
|
|
11393
|
-
tokens += constants.enumItem;
|
|
11394
|
-
tokens += encoder.encode(String(item)).length;
|
|
11395
|
-
}
|
|
11396
|
-
}
|
|
11397
|
-
if (paramDesc.endsWith(".")) paramDesc = paramDesc.slice(0, -1);
|
|
11398
|
-
const line = `${paramName}:${paramType}:${paramDesc}`;
|
|
11399
|
-
tokens += encoder.encode(line).length;
|
|
11400
|
-
const excludedKeys = new Set([
|
|
11401
|
-
"type",
|
|
11402
|
-
"description",
|
|
11403
|
-
"enum"
|
|
11404
|
-
]);
|
|
11405
|
-
for (const propertyName of Object.keys(param)) if (!excludedKeys.has(propertyName)) {
|
|
11406
|
-
const propertyValue = param[propertyName];
|
|
11407
|
-
const propertyText = typeof propertyValue === "string" ? propertyValue : JSON.stringify(propertyValue);
|
|
11408
|
-
tokens += encoder.encode(`${propertyName}:${propertyText}`).length;
|
|
11409
|
-
}
|
|
11410
|
-
return tokens;
|
|
11411
|
-
};
|
|
11412
|
-
/**
|
|
11413
|
-
* Calculate tokens for function parameters
|
|
11414
|
-
*/
|
|
11415
|
-
const calculateParametersTokens = (parameters, encoder, constants) => {
|
|
11416
|
-
if (!parameters || typeof parameters !== "object") return 0;
|
|
11417
|
-
const params = parameters;
|
|
11418
|
-
let tokens = 0;
|
|
11419
|
-
for (const [key, value] of Object.entries(params)) if (key === "properties") {
|
|
11420
|
-
const properties = value;
|
|
11421
|
-
if (Object.keys(properties).length > 0) {
|
|
11422
|
-
tokens += constants.propInit;
|
|
11423
|
-
for (const propKey of Object.keys(properties)) tokens += calculateParameterTokens(propKey, properties[propKey], {
|
|
11424
|
-
encoder,
|
|
11425
|
-
constants
|
|
11426
|
-
});
|
|
11427
|
-
}
|
|
11428
|
-
} else {
|
|
11429
|
-
const paramText = typeof value === "string" ? value : JSON.stringify(value);
|
|
11430
|
-
tokens += encoder.encode(`${key}:${paramText}`).length;
|
|
11431
|
-
}
|
|
11432
|
-
return tokens;
|
|
11433
|
-
};
|
|
11434
|
-
/**
|
|
11435
|
-
* Calculate tokens for a single tool
|
|
11436
|
-
*/
|
|
11437
|
-
const calculateToolTokens = (tool, encoder, constants) => {
|
|
11438
|
-
let tokens = constants.funcInit;
|
|
11439
|
-
const func = tool.function;
|
|
11440
|
-
const fName = func.name;
|
|
11441
|
-
let fDesc = func.description || "";
|
|
11442
|
-
if (fDesc.endsWith(".")) fDesc = fDesc.slice(0, -1);
|
|
11443
|
-
const line = fName + ":" + fDesc;
|
|
11444
|
-
tokens += encoder.encode(line).length;
|
|
11445
|
-
if (typeof func.parameters === "object" && func.parameters !== null) tokens += calculateParametersTokens(func.parameters, encoder, constants);
|
|
11446
|
-
return tokens;
|
|
11447
|
-
};
|
|
11448
|
-
/**
|
|
11449
|
-
* Calculate token count for tools based on model
|
|
11450
|
-
*/
|
|
11451
|
-
const numTokensForTools = (tools, encoder, constants) => {
|
|
11452
|
-
let funcTokenCount = 0;
|
|
11453
|
-
for (const tool of tools) funcTokenCount += calculateToolTokens(tool, encoder, constants);
|
|
11454
|
-
funcTokenCount += constants.funcEnd;
|
|
11455
|
-
return funcTokenCount;
|
|
11456
|
-
};
|
|
11457
|
-
/**
|
|
11458
|
-
* Calculate the token count of messages, supporting multiple GPT encoders
|
|
11459
|
-
*/
|
|
11460
|
-
const getTokenCount = async (payload, model) => {
|
|
11461
|
-
const encoder = await getEncodeChatFunction(getTokenizerFromModel(model));
|
|
11462
|
-
const simplifiedMessages = payload.messages;
|
|
11463
|
-
const inputMessages = simplifiedMessages.filter((msg) => msg.role !== "assistant");
|
|
11464
|
-
const outputMessages = simplifiedMessages.filter((msg) => msg.role === "assistant");
|
|
11465
|
-
const constants = getModelConstants(model);
|
|
11466
|
-
let inputTokens = calculateTokens(inputMessages, encoder, constants);
|
|
11467
|
-
if (payload.tools && payload.tools.length > 0) inputTokens += numTokensForTools(payload.tools, encoder, constants);
|
|
11468
|
-
const outputTokens = calculateTokens(outputMessages, encoder, constants);
|
|
11469
|
-
return {
|
|
11470
|
-
input: inputTokens,
|
|
11471
|
-
output: outputTokens
|
|
11472
|
-
};
|
|
11473
|
-
};
|
|
11474
|
-
|
|
11475
11618
|
//#endregion
|
|
11476
11619
|
//#region src/routes/chat-completions/handler.ts
|
|
11477
11620
|
const ENCODER$1 = new TextEncoder();
|
|
@@ -12560,7 +12703,7 @@ const modelRoutes = new Hono();
|
|
|
12560
12703
|
modelRoutes.get("/", async (c) => {
|
|
12561
12704
|
try {
|
|
12562
12705
|
if (!state.models) await cacheModels();
|
|
12563
|
-
const models = state.models?.data.map((model) => {
|
|
12706
|
+
const models$1 = state.models?.data.map((model) => {
|
|
12564
12707
|
const { requestHeaders,...rest } = model;
|
|
12565
12708
|
return {
|
|
12566
12709
|
...rest,
|
|
@@ -12574,7 +12717,7 @@ modelRoutes.get("/", async (c) => {
|
|
|
12574
12717
|
});
|
|
12575
12718
|
return c.json({
|
|
12576
12719
|
object: "list",
|
|
12577
|
-
data: models,
|
|
12720
|
+
data: models$1,
|
|
12578
12721
|
has_more: false
|
|
12579
12722
|
});
|
|
12580
12723
|
} catch (error) {
|
|
@@ -13530,6 +13673,154 @@ const debug = defineCommand({
|
|
|
13530
13673
|
}
|
|
13531
13674
|
});
|
|
13532
13675
|
|
|
13676
|
+
//#endregion
|
|
13677
|
+
//#region src/models.ts
|
|
13678
|
+
const models = defineCommand({
|
|
13679
|
+
meta: {
|
|
13680
|
+
name: "models",
|
|
13681
|
+
description: "List available GitHub Copilot models and their capabilities. Pass an optional pattern to filter (case-insensitive substring match on id, name, vendor, family)."
|
|
13682
|
+
},
|
|
13683
|
+
args: {
|
|
13684
|
+
pattern: {
|
|
13685
|
+
type: "positional",
|
|
13686
|
+
required: false,
|
|
13687
|
+
description: "Substring to filter models by (matches id, name, vendor, or family)."
|
|
13688
|
+
},
|
|
13689
|
+
json: {
|
|
13690
|
+
type: "boolean",
|
|
13691
|
+
default: false,
|
|
13692
|
+
description: "Emit raw JSON instead of the pretty layout."
|
|
13693
|
+
}
|
|
13694
|
+
},
|
|
13695
|
+
async run({ args }) {
|
|
13696
|
+
await ensurePaths();
|
|
13697
|
+
await setupGitHubToken();
|
|
13698
|
+
try {
|
|
13699
|
+
await setupCopilotToken();
|
|
13700
|
+
} catch (err) {
|
|
13701
|
+
consola.error("Failed to obtain Copilot token:", err);
|
|
13702
|
+
process.exit(1);
|
|
13703
|
+
}
|
|
13704
|
+
let catalog;
|
|
13705
|
+
try {
|
|
13706
|
+
catalog = await getModels();
|
|
13707
|
+
} catch (err) {
|
|
13708
|
+
consola.error("Failed to fetch Copilot model catalog:", err);
|
|
13709
|
+
process.exit(1);
|
|
13710
|
+
}
|
|
13711
|
+
const all = catalog.data;
|
|
13712
|
+
const pattern = args.pattern?.toString().trim();
|
|
13713
|
+
const filtered = pattern ? filterModels(all, pattern) : all;
|
|
13714
|
+
if (args.json) {
|
|
13715
|
+
process.stdout.write(`${JSON.stringify(filtered, null, 2)}\n`);
|
|
13716
|
+
return;
|
|
13717
|
+
}
|
|
13718
|
+
if (filtered.length === 0) {
|
|
13719
|
+
consola.warn(`No models matched "${pattern}". ${all.length} models available — try a different substring or run without an argument to list everything.`);
|
|
13720
|
+
process.exit(1);
|
|
13721
|
+
}
|
|
13722
|
+
const grouped = groupByVendor(filtered);
|
|
13723
|
+
const lines = [];
|
|
13724
|
+
const header = pattern ? `${filtered.length}/${all.length} models match "${pattern}"` : `${all.length} models available`;
|
|
13725
|
+
lines.push(header);
|
|
13726
|
+
lines.push("");
|
|
13727
|
+
for (const [vendor, list] of grouped) {
|
|
13728
|
+
lines.push(`▾ ${vendor} (${list.length})`);
|
|
13729
|
+
for (const model of list) lines.push(...formatModel(model));
|
|
13730
|
+
lines.push("");
|
|
13731
|
+
}
|
|
13732
|
+
process.stdout.write(lines.join("\n"));
|
|
13733
|
+
}
|
|
13734
|
+
});
|
|
13735
|
+
function filterModels(models$1, pattern) {
|
|
13736
|
+
const needle = pattern.toLowerCase();
|
|
13737
|
+
return models$1.filter((m) => {
|
|
13738
|
+
return [
|
|
13739
|
+
m.id,
|
|
13740
|
+
m.name,
|
|
13741
|
+
m.vendor,
|
|
13742
|
+
m.capabilities.family,
|
|
13743
|
+
m.capabilities.type,
|
|
13744
|
+
m.model_picker_category ?? ""
|
|
13745
|
+
].join(" ").toLowerCase().includes(needle);
|
|
13746
|
+
});
|
|
13747
|
+
}
|
|
13748
|
+
function groupByVendor(models$1) {
|
|
13749
|
+
const map = /* @__PURE__ */ new Map();
|
|
13750
|
+
for (const m of models$1) {
|
|
13751
|
+
const key = m.vendor || "(unknown vendor)";
|
|
13752
|
+
const bucket = map.get(key);
|
|
13753
|
+
if (bucket) bucket.push(m);
|
|
13754
|
+
else map.set(key, [m]);
|
|
13755
|
+
}
|
|
13756
|
+
return [...map.entries()].sort(([a], [b]) => a.localeCompare(b));
|
|
13757
|
+
}
|
|
13758
|
+
function formatModel(model) {
|
|
13759
|
+
const lines = [];
|
|
13760
|
+
const tags = [];
|
|
13761
|
+
if (model.preview) tags.push("preview");
|
|
13762
|
+
if (model.is_chat_default) tags.push("chat-default");
|
|
13763
|
+
if (model.is_chat_fallback) tags.push("chat-fallback");
|
|
13764
|
+
if (model.billing?.is_premium) tags.push("premium");
|
|
13765
|
+
if (model.billing?.restricted_to?.length) tags.push(`restricted:${model.billing.restricted_to.join("/")}`);
|
|
13766
|
+
if (model.policy && model.policy.state !== "enabled") tags.push(`policy:${model.policy.state}`);
|
|
13767
|
+
const tagStr = tags.length > 0 ? ` [${tags.join(", ")}]` : "";
|
|
13768
|
+
lines.push(` • ${model.id}${tagStr}`);
|
|
13769
|
+
if (model.name && model.name !== model.id) lines.push(` name: ${model.name}`);
|
|
13770
|
+
const meta = [`family: ${model.capabilities.family}`, `type: ${model.capabilities.type}`];
|
|
13771
|
+
if (model.capabilities.tokenizer) meta.push(`tokenizer: ${model.capabilities.tokenizer}`);
|
|
13772
|
+
if (model.version) meta.push(`version: ${model.version}`);
|
|
13773
|
+
lines.push(` ${meta.join(" · ")}`);
|
|
13774
|
+
const limits = model.capabilities.limits;
|
|
13775
|
+
const limitParts = [];
|
|
13776
|
+
if (limits.max_context_window_tokens) limitParts.push(`ctx ${formatTokens(limits.max_context_window_tokens)}`);
|
|
13777
|
+
else if (limits.max_prompt_tokens) limitParts.push(`prompt ${formatTokens(limits.max_prompt_tokens)}`);
|
|
13778
|
+
if (limits.max_output_tokens) limitParts.push(`out ${formatTokens(limits.max_output_tokens)}`);
|
|
13779
|
+
if (limits.max_non_streaming_output_tokens && limits.max_non_streaming_output_tokens !== limits.max_output_tokens) limitParts.push(`out-non-stream ${formatTokens(limits.max_non_streaming_output_tokens)}`);
|
|
13780
|
+
if (limits.max_inputs) limitParts.push(`inputs ${limits.max_inputs}`);
|
|
13781
|
+
if (limits.vision?.max_prompt_images) limitParts.push(`images ${limits.vision.max_prompt_images}`);
|
|
13782
|
+
if (limitParts.length > 0) lines.push(` limits: ${limitParts.join(" · ")}`);
|
|
13783
|
+
const supports = model.capabilities.supports;
|
|
13784
|
+
const supportFlags = [];
|
|
13785
|
+
if (supports.tool_calls) supportFlags.push("tools");
|
|
13786
|
+
if (supports.parallel_tool_calls) supportFlags.push("parallel-tools");
|
|
13787
|
+
if (supports.streaming) supportFlags.push("streaming");
|
|
13788
|
+
if (supports.vision) supportFlags.push("vision");
|
|
13789
|
+
if (supports.structured_outputs) supportFlags.push("structured-outputs");
|
|
13790
|
+
if (supports.dimensions) supportFlags.push("dimensions");
|
|
13791
|
+
if (supports.adaptive_thinking) {
|
|
13792
|
+
const min = supports.min_thinking_budget;
|
|
13793
|
+
const max = supports.max_thinking_budget;
|
|
13794
|
+
const range = min !== void 0 && max !== void 0 ? `(${formatTokens(min)}-${formatTokens(max)})` : "";
|
|
13795
|
+
supportFlags.push(`adaptive-thinking${range}`);
|
|
13796
|
+
}
|
|
13797
|
+
if (supports.reasoning_effort && supports.reasoning_effort.length > 0) supportFlags.push(`reasoning:${supports.reasoning_effort.join("/")}`);
|
|
13798
|
+
if (supportFlags.length > 0) lines.push(` supports: ${supportFlags.join(", ")}`);
|
|
13799
|
+
if (model.supported_endpoints && model.supported_endpoints.length > 0) lines.push(` endpoints: ${model.supported_endpoints.join(", ")}`);
|
|
13800
|
+
if (model.billing) {
|
|
13801
|
+
const billParts = [];
|
|
13802
|
+
if (model.billing.is_premium) billParts.push("premium");
|
|
13803
|
+
if (typeof model.billing.multiplier === "number") billParts.push(`×${model.billing.multiplier}`);
|
|
13804
|
+
if (billParts.length > 0) lines.push(` billing: ${billParts.join(" ")}`);
|
|
13805
|
+
}
|
|
13806
|
+
return lines;
|
|
13807
|
+
}
|
|
13808
|
+
/**
|
|
13809
|
+
* Format a token count in a compact human-readable form: `1024` →
|
|
13810
|
+
* `1k`, `4096` → `4k`, `131072` → `128k`, `1048576` → `1M`. Prefer
|
|
13811
|
+
* binary multiples (mebi, kibi) since Claude Code / Copilot context
|
|
13812
|
+
* windows are reported in binary units (`1M context` = 1024 × 1024
|
|
13813
|
+
* tokens). Fall back to decimal (`64k` for `64000`) when the value
|
|
13814
|
+
* is a clean decimal multiple but not binary.
|
|
13815
|
+
*/
|
|
13816
|
+
function formatTokens(n) {
|
|
13817
|
+
if (n >= 1048576 && n % 1048576 === 0) return `${n / 1048576}M`;
|
|
13818
|
+
if (n >= 1024 && n % 1024 === 0) return `${n / 1024}k`;
|
|
13819
|
+
if (n >= 1e6 && n % 1e6 === 0) return `${n / 1e6}M`;
|
|
13820
|
+
if (n >= 1e3 && n % 1e3 === 0) return `${n / 1e3}k`;
|
|
13821
|
+
return `${n}`;
|
|
13822
|
+
}
|
|
13823
|
+
|
|
13533
13824
|
//#endregion
|
|
13534
13825
|
//#region src/lib/shell.ts
|
|
13535
13826
|
function getShell() {
|
|
@@ -13666,6 +13957,7 @@ await runMain(defineCommand({
|
|
|
13666
13957
|
start,
|
|
13667
13958
|
claude,
|
|
13668
13959
|
codex,
|
|
13960
|
+
models,
|
|
13669
13961
|
"check-usage": checkUsage,
|
|
13670
13962
|
debug
|
|
13671
13963
|
}
|