github-router 0.3.30 → 0.3.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +595 -43
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -4874,6 +4874,35 @@ function geminiAvailable() {
|
|
|
4874
4874
|
return models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
4875
4875
|
}
|
|
4876
4876
|
/**
|
|
4877
|
+
* Gate for the `stand_in` tool.
|
|
4878
|
+
*
|
|
4879
|
+
* Returns true iff Copilot's live catalog (`state.models?.data`) contains
|
|
4880
|
+
* ALL THREE peer models the consensus protocol needs:
|
|
4881
|
+
* - `gpt-5.5` (codex_critic's model)
|
|
4882
|
+
* - `claude-opus-4-7` (opus_critic's model)
|
|
4883
|
+
* - any `gemini-3.X.*pro` (gemini_critic's model family — matches the
|
|
4884
|
+
* same regex `geminiAvailable()` uses, so the gate stays in sync if
|
|
4885
|
+
* the GA slug renames `gemini-3.1-pro-preview` → `gemini-3.1-pro`)
|
|
4886
|
+
*
|
|
4887
|
+
* If any one is missing, `stand_in` is dropped from `tools/list` AND
|
|
4888
|
+
* fails `tools/call` with -32601 (mirroring the `worker` capability's
|
|
4889
|
+
* defense-in-depth pattern — the gated tool is functionally invisible).
|
|
4890
|
+
*
|
|
4891
|
+
* Tier-mismatch on `claude-opus-4-7`: the proxy's `resolveModel` will
|
|
4892
|
+
* fuzzy-match `claude-opus-4-7` to `claude-opus-4.7` (Copilot's dotted
|
|
4893
|
+
* slug). For the catalog probe we use the Anthropic-published dashed
|
|
4894
|
+
* slug too — `state.models?.data` mirrors Copilot's catalog where these
|
|
4895
|
+
* land under the dotted slug, so we match by Copilot's actual id shape.
|
|
4896
|
+
*/
|
|
4897
|
+
function standInToolEnabled() {
|
|
4898
|
+
const models = state.models?.data;
|
|
4899
|
+
if (!models) return false;
|
|
4900
|
+
const hasGpt55 = models.some((m) => m.id === "gpt-5.5");
|
|
4901
|
+
const hasOpus = models.some((m) => m.id === "claude-opus-4-7" || m.id === "claude-opus-4.7");
|
|
4902
|
+
const hasGeminiPro = models.some((m) => /^gemini-3\..*pro/i.test(m.id));
|
|
4903
|
+
return hasGpt55 && hasOpus && hasGeminiPro;
|
|
4904
|
+
}
|
|
4905
|
+
/**
|
|
4877
4906
|
* Gate for the worker tools (`worker_explore`, `worker_implement`).
|
|
4878
4907
|
*
|
|
4879
4908
|
* Returns true iff BOTH:
|
|
@@ -4934,7 +4963,11 @@ function toolEntries() {
|
|
|
4934
4963
|
}
|
|
4935
4964
|
}
|
|
4936
4965
|
}));
|
|
4937
|
-
const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.filter((t) =>
|
|
4966
|
+
const nonPersonaEntries = NON_PERSONA_MCP_TOOLS.filter((t) => {
|
|
4967
|
+
if (t.capability === "worker") return workerToolsEnabled();
|
|
4968
|
+
if (t.capability === "stand_in") return standInToolEnabled();
|
|
4969
|
+
return true;
|
|
4970
|
+
}).map((t) => ({
|
|
4938
4971
|
name: t.toolNameHttp,
|
|
4939
4972
|
description: t.description,
|
|
4940
4973
|
inputSchema: t.inputSchema
|
|
@@ -5054,10 +5087,21 @@ function jsonPathPreflightCap(body) {
|
|
|
5054
5087
|
const params = body.params ?? {};
|
|
5055
5088
|
const name$1 = typeof params.name === "string" ? params.name : "";
|
|
5056
5089
|
const args = params.arguments ?? {};
|
|
5090
|
+
if (!name$1) return void 0;
|
|
5091
|
+
if (name$1 === "stand_in") {
|
|
5092
|
+
const decision = typeof args.decision === "string" ? args.decision : "";
|
|
5093
|
+
const optionsRaw = Array.isArray(args.options) ? args.options : [];
|
|
5094
|
+
const standInContext = typeof args.context === "string" ? args.context : "";
|
|
5095
|
+
if (!decision || optionsRaw.length === 0) return void 0;
|
|
5096
|
+
const briefBytes$1 = Buffer.byteLength(decision + JSON.stringify(optionsRaw) + standInContext, "utf8");
|
|
5097
|
+
const STAND_IN_CAP_BYTES = 6 * 1024;
|
|
5098
|
+
if (briefBytes$1 > STAND_IN_CAP_BYTES) return rpcResult(body.id, toolError(`pre-flight rejected: stand_in on a ${briefBytes$1}-byte input is predicted to exceed the JSON tools/call timeout (cap=${STAND_IN_CAP_BYTES} bytes). stand_in runs two sequential voting rounds across three frontier models — wall-clock is typically 2-3 minutes regardless of input size. Send Accept: text/event-stream to use the SSE path which bypasses this cap, or trim the decision/options/context.`));
|
|
5099
|
+
return;
|
|
5100
|
+
}
|
|
5057
5101
|
const prompt = typeof args.prompt === "string" ? args.prompt : "";
|
|
5058
5102
|
const context = typeof args.context === "string" ? args.context : void 0;
|
|
5059
5103
|
const rawEffort = args.effort;
|
|
5060
|
-
if (!
|
|
5104
|
+
if (!prompt) return void 0;
|
|
5061
5105
|
const persona = activePersonas().find((p) => p.toolNameHttp === name$1);
|
|
5062
5106
|
if (!persona) return void 0;
|
|
5063
5107
|
if (rawEffort !== void 0 && !isEffort(rawEffort)) return void 0;
|
|
@@ -5069,60 +5113,81 @@ function jsonPathPreflightCap(body) {
|
|
|
5069
5113
|
if (!verdict.tooLong) return void 0;
|
|
5070
5114
|
return rpcResult(body.id, toolError(`pre-flight rejected: ${persona.toolNameHttp} at effort=${effort} on a ${briefBytes}-byte brief is empirically predicted to exceed the JSON tools/call timeout (cap=${verdict.capBytes} bytes for this tier). Either drop to a lower effort tier, split the brief into 2-4 parallel sub-calls per the decomposition guidance, or send Accept: text/event-stream to use the SSE path which bypasses this cap.`));
|
|
5071
5115
|
}
|
|
5072
|
-
|
|
5073
|
-
|
|
5074
|
-
|
|
5075
|
-
|
|
5076
|
-
|
|
5077
|
-
|
|
5078
|
-
|
|
5079
|
-
|
|
5080
|
-
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
|
|
5085
|
-
|
|
5086
|
-
|
|
5087
|
-
|
|
5088
|
-
|
|
5089
|
-
|
|
5090
|
-
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5097
|
-
|
|
5116
|
+
/**
|
|
5117
|
+
* Per-endpoint wire dispatch for a single peer-model call. Returns the
|
|
5118
|
+
* assistant's raw text (possibly empty — caller decides what "empty"
|
|
5119
|
+
* means in their context). Upstream errors (network, 4xx, 5xx) propagate
|
|
5120
|
+
* as exceptions via `await`.
|
|
5121
|
+
*
|
|
5122
|
+
* Extracted from `callPersona()` so non-persona callers — specifically
|
|
5123
|
+
* the `stand_in` orchestrator in `src/lib/stand-in.ts` — can reuse the
|
|
5124
|
+
* same per-endpoint request shaping without re-implementing it. The
|
|
5125
|
+
* stand_in tool needs to drive its own per-round system prompts across
|
|
5126
|
+
* three concrete models (gpt-5.5, claude-opus-4-7, gemini-3.1-pro-preview),
|
|
5127
|
+
* each on a different endpoint; doing that with a `PersonaSpec` would
|
|
5128
|
+
* require either inventing throwaway personas per round or duplicating
|
|
5129
|
+
* the dispatch switch.
|
|
5130
|
+
*
|
|
5131
|
+
* NOTE on consumer-cancel signal: we deliberately do NOT pass
|
|
5132
|
+
* c.req.raw.signal into the upstream fetch. Bun/srvx aborts the
|
|
5133
|
+
* request signal as soon as the request body is fully consumed
|
|
5134
|
+
* (after `await c.req.json()`), which would make every call fail
|
|
5135
|
+
* immediately with "This operation was aborted". The caller creates
|
|
5136
|
+
* its own AbortController and threads it through `signal`. See CLAUDE.md
|
|
5137
|
+
* "Bun request-signal quirk" for full context.
|
|
5138
|
+
*/
|
|
5139
|
+
async function dispatchModelCall(args) {
|
|
5140
|
+
const resolvedModel = resolveModel(args.model);
|
|
5141
|
+
if (args.endpoint === "/v1/responses") return extractResponsesText(await createResponses({
|
|
5142
|
+
model: resolvedModel,
|
|
5143
|
+
instructions: args.instructions,
|
|
5144
|
+
input: [{
|
|
5145
|
+
role: "user",
|
|
5146
|
+
content: [{
|
|
5147
|
+
type: "input_text",
|
|
5148
|
+
text: args.userText
|
|
5149
|
+
}]
|
|
5150
|
+
}],
|
|
5151
|
+
stream: false,
|
|
5152
|
+
reasoning: { effort: args.effort }
|
|
5153
|
+
}, void 0, args.signal));
|
|
5154
|
+
if (args.endpoint === "/v1/messages") {
|
|
5155
|
+
const maxTokens = args.effort === "low" ? 4096 : args.effort === "medium" ? 8192 : args.effort === "high" ? 16384 : 32768;
|
|
5156
|
+
return extractMessagesText(await (await createMessages(JSON.stringify({
|
|
5098
5157
|
model: resolvedModel,
|
|
5099
5158
|
max_tokens: maxTokens,
|
|
5100
|
-
system:
|
|
5159
|
+
system: args.instructions,
|
|
5101
5160
|
thinking: { type: "adaptive" },
|
|
5102
|
-
output_config: { effort },
|
|
5161
|
+
output_config: { effort: args.effort },
|
|
5103
5162
|
messages: [{
|
|
5104
5163
|
role: "user",
|
|
5105
|
-
content: userText
|
|
5164
|
+
content: args.userText
|
|
5106
5165
|
}]
|
|
5107
|
-
}), void 0, signal)).json());
|
|
5108
|
-
if (!text$1) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
5109
|
-
return { content: [{
|
|
5110
|
-
type: "text",
|
|
5111
|
-
text: text$1
|
|
5112
|
-
}] };
|
|
5166
|
+
}), void 0, args.signal)).json());
|
|
5113
5167
|
}
|
|
5114
|
-
|
|
5168
|
+
return extractChatCompletionText(await createChatCompletions({
|
|
5115
5169
|
model: resolvedModel,
|
|
5116
5170
|
messages: [{
|
|
5117
5171
|
role: "system",
|
|
5118
|
-
content:
|
|
5172
|
+
content: args.instructions
|
|
5119
5173
|
}, {
|
|
5120
5174
|
role: "user",
|
|
5121
|
-
content: userText
|
|
5175
|
+
content: args.userText
|
|
5122
5176
|
}],
|
|
5123
5177
|
stream: false,
|
|
5124
|
-
reasoning_effort: effort
|
|
5125
|
-
}, void 0, signal));
|
|
5178
|
+
reasoning_effort: args.effort
|
|
5179
|
+
}, void 0, args.signal));
|
|
5180
|
+
}
|
|
5181
|
+
async function callPersona(persona, prompt, context, effort, signal) {
|
|
5182
|
+
const userText = buildUserText(prompt, context);
|
|
5183
|
+
const text = await dispatchModelCall({
|
|
5184
|
+
model: persona.model,
|
|
5185
|
+
endpoint: persona.endpoint,
|
|
5186
|
+
instructions: persona.baseInstructions,
|
|
5187
|
+
userText,
|
|
5188
|
+
effort,
|
|
5189
|
+
signal
|
|
5190
|
+
});
|
|
5126
5191
|
if (!text) return toolError(`persona ${persona.agentName}: empty assistant output`);
|
|
5127
5192
|
return { content: [{
|
|
5128
5193
|
type: "text",
|
|
@@ -5150,6 +5215,7 @@ async function handleToolsCall(body) {
|
|
|
5150
5215
|
const nonPersonaTool = persona ? void 0 : NON_PERSONA_MCP_TOOLS.find((t) => t.toolNameHttp === name$1);
|
|
5151
5216
|
if (!persona && !nonPersonaTool) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
5152
5217
|
if (nonPersonaTool && nonPersonaTool.capability === "worker" && !workerToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
5218
|
+
if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
|
|
5153
5219
|
let personaPrompt;
|
|
5154
5220
|
let personaContext;
|
|
5155
5221
|
let personaEffort;
|
|
@@ -7980,6 +8046,341 @@ async function runWorkerAgent(opts) {
|
|
|
7980
8046
|
}
|
|
7981
8047
|
}
|
|
7982
8048
|
|
|
8049
|
+
//#endregion
|
|
8050
|
+
//#region src/lib/stand-in.ts
|
|
8051
|
+
/**
|
|
8052
|
+
* The three frontier peers. Effort is FIXED per model — not caller-tunable.
|
|
8053
|
+
* The tool's purpose is "give me the best 3-lab judgment available";
|
|
8054
|
+
* exposing effort knobs would invite the caller to cheap out and would
|
|
8055
|
+
* muddy the consensus signal.
|
|
8056
|
+
*
|
|
8057
|
+
* gemini-3.1-pro-preview is pinned to `high` because the model rejects
|
|
8058
|
+
* `xhigh` at the wire with a Copilot 400. `high` is the realistic ceiling.
|
|
8059
|
+
*/
|
|
8060
|
+
const STAND_IN_MODELS = Object.freeze([
|
|
8061
|
+
{
|
|
8062
|
+
key: "gpt-5.5",
|
|
8063
|
+
model: "gpt-5.5",
|
|
8064
|
+
endpoint: "/v1/responses",
|
|
8065
|
+
effort: "xhigh"
|
|
8066
|
+
},
|
|
8067
|
+
{
|
|
8068
|
+
key: "claude-opus-4-7",
|
|
8069
|
+
model: "claude-opus-4-7",
|
|
8070
|
+
endpoint: "/v1/messages",
|
|
8071
|
+
effort: "xhigh"
|
|
8072
|
+
},
|
|
8073
|
+
{
|
|
8074
|
+
key: "gemini-3.1-pro-preview",
|
|
8075
|
+
model: "gemini-3.1-pro-preview",
|
|
8076
|
+
endpoint: "/v1/chat/completions",
|
|
8077
|
+
effort: "high"
|
|
8078
|
+
}
|
|
8079
|
+
]);
|
|
8080
|
+
const SYSTEM_PROMPT_R1 = `You are one of three frontier reasoning models the user has authorized to stand in for them on a bounded decision while they are unavailable. Your task: pick the best option from those provided.
|
|
8081
|
+
|
|
8082
|
+
Respond with ONLY a single JSON object — no prose, no markdown fences, no preamble. Schema:
|
|
8083
|
+
|
|
8084
|
+
{
|
|
8085
|
+
"choice": "<option.id>" | null,
|
|
8086
|
+
"confidence": <number between 0.0 and 1.0>,
|
|
8087
|
+
"reasoning": "<one short sentence>",
|
|
8088
|
+
"need_more_info": "<what context is missing, if you cannot decide>"
|
|
8089
|
+
}
|
|
8090
|
+
|
|
8091
|
+
Calibration rules:
|
|
8092
|
+
- "confidence" reflects how sure you are this is the better option (not how confident you are in your prose). 0.5 = coin flip. 0.9 = clear winner. Be honestly calibrated; the orchestrator weighs your number directly.
|
|
8093
|
+
- If the question is genuinely under-specified — you'd need information you don't have to choose well — set "choice": null AND populate "need_more_info" with the specific gap. Do NOT guess.
|
|
8094
|
+
- One sentence of reasoning. Not a paragraph.
|
|
8095
|
+
- The other two models will vote independently and you will see their votes in round 2. There is no benefit to anticipating what they'll pick; vote on the merits.
|
|
8096
|
+
|
|
8097
|
+
Output ONLY the JSON object. No preamble, no markdown fences, no closing remarks.`;
|
|
8098
|
+
const SYSTEM_PROMPT_R2 = `You are one of three frontier reasoning models standing in for the user on a bounded decision. Round 1 voting is complete; you will now see the other models' votes and reasoning. Reconsider with their input visible.
|
|
8099
|
+
|
|
8100
|
+
Same JSON schema as round 1:
|
|
8101
|
+
|
|
8102
|
+
{
|
|
8103
|
+
"choice": "<option.id>" | null,
|
|
8104
|
+
"confidence": <number between 0.0 and 1.0>,
|
|
8105
|
+
"reasoning": "<one short sentence>",
|
|
8106
|
+
"need_more_info": "<gap, if any>"
|
|
8107
|
+
}
|
|
8108
|
+
|
|
8109
|
+
Calibration rules:
|
|
8110
|
+
- You may keep your round-1 vote OR change it. Do NOT change just to agree — agreement is not the goal, the right answer is. Capitulating to peer pressure when you still believe your original choice is better is a failure mode, not a success.
|
|
8111
|
+
- If a peer's reasoning identifies a consideration you missed or weighed wrong, update freely. The blind round was the anti-anchor mechanism; this round is where genuine evidence can move you.
|
|
8112
|
+
- If round 1 left you genuinely uncertain and peer reasoning hasn't resolved it, "choice": null is still the honest answer.
|
|
8113
|
+
|
|
8114
|
+
Output ONLY the JSON object.`;
|
|
8115
|
+
const RETRY_PROMPT_SUFFIX = `\n\nYour previous response was not valid JSON matching the schema. Respond with ONLY the JSON object — no preamble, no markdown fences, no closing remarks. Schema reminder: {"choice": "<id>" | null, "confidence": 0.0-1.0, "reasoning": "<one sentence>", "need_more_info": "<gap, if any>"}`;
|
|
8116
|
+
/**
|
|
8117
|
+
* Run the two-round stand-in protocol. Returns a structured verdict
|
|
8118
|
+
* envelope. Throws only on systemic failure (e.g., all three upstream
|
|
8119
|
+
* calls failed) — model-level errors and parse failures are surfaced as
|
|
8120
|
+
* `VoteFailure` entries in the result.
|
|
8121
|
+
*/
|
|
8122
|
+
async function runStandIn(input, signal) {
|
|
8123
|
+
const r1UserText = buildRound1UserText(input);
|
|
8124
|
+
const r1 = await Promise.all(STAND_IN_MODELS.map((cfg) => callAndParse(cfg, SYSTEM_PROMPT_R1, r1UserText, signal)));
|
|
8125
|
+
const successfulR1 = r1.filter((r) => isVote(r.vote));
|
|
8126
|
+
if (successfulR1.length === STAND_IN_MODELS.length && successfulR1.every((r) => r.vote.needMoreInfo && r.vote.choice === null)) {
|
|
8127
|
+
const gaps = successfulR1.map((r) => `- ${r.key}: ${r.vote.needMoreInfo}`).join("\n");
|
|
8128
|
+
return {
|
|
8129
|
+
verdict: "need_more_info",
|
|
8130
|
+
recommendation: null,
|
|
8131
|
+
confidence: 0,
|
|
8132
|
+
votes: voteRecord(r1, null),
|
|
8133
|
+
notes: `All three models reported they need more context to decide:\n${gaps}`
|
|
8134
|
+
};
|
|
8135
|
+
}
|
|
8136
|
+
const r1Decision = aggregateVotes(successfulR1);
|
|
8137
|
+
if (r1Decision.verdict === "consensus" && r1Decision.meanConfidence >= .8) return {
|
|
8138
|
+
verdict: "consensus",
|
|
8139
|
+
recommendation: r1Decision.winner,
|
|
8140
|
+
confidence: round2(r1Decision.meanConfidence),
|
|
8141
|
+
votes: voteRecord(r1, null),
|
|
8142
|
+
notes: `All three models picked ${r1Decision.winner} in round 1 with high confidence (skipped round 2).`
|
|
8143
|
+
};
|
|
8144
|
+
if (successfulR1.length < 2) return {
|
|
8145
|
+
verdict: "no_consensus",
|
|
8146
|
+
recommendation: null,
|
|
8147
|
+
confidence: 0,
|
|
8148
|
+
votes: voteRecord(r1, null),
|
|
8149
|
+
notes: `Only ${successfulR1.length} of 3 models returned a parseable round-1 vote; insufficient signal to run round 2.`
|
|
8150
|
+
};
|
|
8151
|
+
const r2UserTextBase = buildRound2UserTextBase(input, r1);
|
|
8152
|
+
const r2 = await Promise.all(STAND_IN_MODELS.map((cfg) => callAndParse(cfg, SYSTEM_PROMPT_R2, r2UserTextBase + `\n\nYou are ${cfg.key}. Reconsider and vote.`, signal)));
|
|
8153
|
+
const successfulR2 = r2.filter((r) => isVote(r.vote));
|
|
8154
|
+
if (successfulR2.length < 2) return {
|
|
8155
|
+
verdict: "no_consensus",
|
|
8156
|
+
recommendation: null,
|
|
8157
|
+
confidence: 0,
|
|
8158
|
+
votes: voteRecord(r1, r2),
|
|
8159
|
+
notes: `Only ${successfulR2.length} of 3 models returned a parseable round-2 vote; deferring to user.`
|
|
8160
|
+
};
|
|
8161
|
+
const r2Decision = aggregateVotes(successfulR2);
|
|
8162
|
+
if (r2Decision.verdict === "consensus") return {
|
|
8163
|
+
verdict: "consensus",
|
|
8164
|
+
recommendation: r2Decision.winner,
|
|
8165
|
+
confidence: round2(r2Decision.meanConfidence),
|
|
8166
|
+
votes: voteRecord(r1, r2),
|
|
8167
|
+
notes: `All three models picked ${r2Decision.winner} in round 2.`
|
|
8168
|
+
};
|
|
8169
|
+
if (r2Decision.verdict === "majority") {
|
|
8170
|
+
const dissenters = successfulR2.filter((r) => r.vote.choice !== r2Decision.winner).map((r) => `${r.key} picked ${r.vote.choice ?? "abstain"} (${r.vote.reasoning})`).join("; ");
|
|
8171
|
+
return {
|
|
8172
|
+
verdict: "majority",
|
|
8173
|
+
recommendation: r2Decision.winner,
|
|
8174
|
+
confidence: round2(r2Decision.meanConfidence),
|
|
8175
|
+
votes: voteRecord(r1, r2),
|
|
8176
|
+
notes: `Majority (2 of 3) picked ${r2Decision.winner}. Dissent: ${dissenters}.`
|
|
8177
|
+
};
|
|
8178
|
+
}
|
|
8179
|
+
return {
|
|
8180
|
+
verdict: "no_consensus",
|
|
8181
|
+
recommendation: null,
|
|
8182
|
+
confidence: 0,
|
|
8183
|
+
votes: voteRecord(r1, r2),
|
|
8184
|
+
notes: `Models did not converge in round 2 (votes split). Defer to user.`
|
|
8185
|
+
};
|
|
8186
|
+
}
|
|
8187
|
+
async function callAndParse(cfg, instructions, userText, signal) {
|
|
8188
|
+
let raw;
|
|
8189
|
+
try {
|
|
8190
|
+
raw = await dispatchModelCall({
|
|
8191
|
+
model: cfg.model,
|
|
8192
|
+
endpoint: cfg.endpoint,
|
|
8193
|
+
instructions,
|
|
8194
|
+
userText,
|
|
8195
|
+
effort: cfg.effort,
|
|
8196
|
+
signal
|
|
8197
|
+
});
|
|
8198
|
+
} catch (err) {
|
|
8199
|
+
return {
|
|
8200
|
+
key: cfg.key,
|
|
8201
|
+
vote: {
|
|
8202
|
+
error: "upstream_error",
|
|
8203
|
+
message: String(err)
|
|
8204
|
+
}
|
|
8205
|
+
};
|
|
8206
|
+
}
|
|
8207
|
+
const first = tryParseVote(raw);
|
|
8208
|
+
if (first.ok) return {
|
|
8209
|
+
key: cfg.key,
|
|
8210
|
+
vote: first.vote
|
|
8211
|
+
};
|
|
8212
|
+
let retryRaw;
|
|
8213
|
+
try {
|
|
8214
|
+
retryRaw = await dispatchModelCall({
|
|
8215
|
+
model: cfg.model,
|
|
8216
|
+
endpoint: cfg.endpoint,
|
|
8217
|
+
instructions,
|
|
8218
|
+
userText: userText + RETRY_PROMPT_SUFFIX,
|
|
8219
|
+
effort: cfg.effort,
|
|
8220
|
+
signal
|
|
8221
|
+
});
|
|
8222
|
+
} catch (err) {
|
|
8223
|
+
return {
|
|
8224
|
+
key: cfg.key,
|
|
8225
|
+
vote: {
|
|
8226
|
+
error: "upstream_error",
|
|
8227
|
+
message: `retry after parse failure: ${String(err)}`
|
|
8228
|
+
}
|
|
8229
|
+
};
|
|
8230
|
+
}
|
|
8231
|
+
const second = tryParseVote(retryRaw);
|
|
8232
|
+
if (second.ok) return {
|
|
8233
|
+
key: cfg.key,
|
|
8234
|
+
vote: second.vote
|
|
8235
|
+
};
|
|
8236
|
+
return {
|
|
8237
|
+
key: cfg.key,
|
|
8238
|
+
vote: {
|
|
8239
|
+
error: "parse_failure",
|
|
8240
|
+
message: `Could not parse vote JSON after one retry. Last error: ${second.error}.`,
|
|
8241
|
+
raw: retryRaw.slice(0, 500)
|
|
8242
|
+
}
|
|
8243
|
+
};
|
|
8244
|
+
}
|
|
8245
|
+
function tryParseVote(raw) {
|
|
8246
|
+
if (!raw || !raw.trim()) return {
|
|
8247
|
+
ok: false,
|
|
8248
|
+
error: "empty response"
|
|
8249
|
+
};
|
|
8250
|
+
let parsed;
|
|
8251
|
+
try {
|
|
8252
|
+
parsed = JSON.parse(raw.trim());
|
|
8253
|
+
} catch {
|
|
8254
|
+
const fence = /```(?:json)?\s*([\s\S]*?)\s*```/.exec(raw);
|
|
8255
|
+
if (!fence) return {
|
|
8256
|
+
ok: false,
|
|
8257
|
+
error: "not valid JSON and no code fence found"
|
|
8258
|
+
};
|
|
8259
|
+
try {
|
|
8260
|
+
parsed = JSON.parse(fence[1]);
|
|
8261
|
+
} catch {
|
|
8262
|
+
return {
|
|
8263
|
+
ok: false,
|
|
8264
|
+
error: "code fence content was not valid JSON"
|
|
8265
|
+
};
|
|
8266
|
+
}
|
|
8267
|
+
}
|
|
8268
|
+
if (typeof parsed !== "object" || parsed === null) return {
|
|
8269
|
+
ok: false,
|
|
8270
|
+
error: "parsed value is not an object"
|
|
8271
|
+
};
|
|
8272
|
+
const obj = parsed;
|
|
8273
|
+
const choice = obj.choice === null ? null : typeof obj.choice === "string" && obj.choice.length > 0 ? obj.choice : void 0;
|
|
8274
|
+
if (choice === void 0) return {
|
|
8275
|
+
ok: false,
|
|
8276
|
+
error: "missing or invalid 'choice' field (string or null required)"
|
|
8277
|
+
};
|
|
8278
|
+
const confidenceRaw = obj.confidence;
|
|
8279
|
+
const confidence = typeof confidenceRaw === "number" && Number.isFinite(confidenceRaw) ? Math.max(0, Math.min(1, confidenceRaw)) : void 0;
|
|
8280
|
+
if (confidence === void 0) return {
|
|
8281
|
+
ok: false,
|
|
8282
|
+
error: "missing or invalid 'confidence' field (number 0-1 required)"
|
|
8283
|
+
};
|
|
8284
|
+
const reasoning = typeof obj.reasoning === "string" ? obj.reasoning : "";
|
|
8285
|
+
if (!reasoning) return {
|
|
8286
|
+
ok: false,
|
|
8287
|
+
error: "missing or empty 'reasoning' field"
|
|
8288
|
+
};
|
|
8289
|
+
return {
|
|
8290
|
+
ok: true,
|
|
8291
|
+
vote: {
|
|
8292
|
+
choice,
|
|
8293
|
+
confidence,
|
|
8294
|
+
reasoning,
|
|
8295
|
+
needMoreInfo: typeof obj.need_more_info === "string" && obj.need_more_info.length > 0 ? obj.need_more_info : void 0
|
|
8296
|
+
}
|
|
8297
|
+
};
|
|
8298
|
+
}
|
|
8299
|
+
function aggregateVotes(results) {
|
|
8300
|
+
const tally = /* @__PURE__ */ new Map();
|
|
8301
|
+
for (const r of results) {
|
|
8302
|
+
if (r.vote.choice === null) continue;
|
|
8303
|
+
const entry = tally.get(r.vote.choice) ?? {
|
|
8304
|
+
count: 0,
|
|
8305
|
+
sumConfidence: 0
|
|
8306
|
+
};
|
|
8307
|
+
entry.count++;
|
|
8308
|
+
entry.sumConfidence += r.vote.confidence;
|
|
8309
|
+
tally.set(r.vote.choice, entry);
|
|
8310
|
+
}
|
|
8311
|
+
let topChoice = null;
|
|
8312
|
+
let topCount = 0;
|
|
8313
|
+
let topSumConfidence = 0;
|
|
8314
|
+
for (const [choice, { count, sumConfidence }] of tally) if (count > topCount) {
|
|
8315
|
+
topChoice = choice;
|
|
8316
|
+
topCount = count;
|
|
8317
|
+
topSumConfidence = sumConfidence;
|
|
8318
|
+
}
|
|
8319
|
+
const total = STAND_IN_MODELS.length;
|
|
8320
|
+
if (topChoice && topCount === total) return {
|
|
8321
|
+
verdict: "consensus",
|
|
8322
|
+
winner: topChoice,
|
|
8323
|
+
meanConfidence: topSumConfidence / topCount
|
|
8324
|
+
};
|
|
8325
|
+
if (topChoice && topCount >= 2) return {
|
|
8326
|
+
verdict: "majority",
|
|
8327
|
+
winner: topChoice,
|
|
8328
|
+
meanConfidence: topSumConfidence / topCount
|
|
8329
|
+
};
|
|
8330
|
+
return {
|
|
8331
|
+
verdict: "split",
|
|
8332
|
+
winner: null,
|
|
8333
|
+
meanConfidence: 0
|
|
8334
|
+
};
|
|
8335
|
+
}
|
|
8336
|
+
function buildRound1UserText(input) {
|
|
8337
|
+
const lines = [];
|
|
8338
|
+
lines.push(`Decision: ${input.decision}`);
|
|
8339
|
+
lines.push("");
|
|
8340
|
+
lines.push("Options:");
|
|
8341
|
+
for (const opt of input.options) {
|
|
8342
|
+
const suffix = opt.detail ? ` — ${opt.detail}` : "";
|
|
8343
|
+
lines.push(`- ${opt.id}: ${opt.summary}${suffix}`);
|
|
8344
|
+
}
|
|
8345
|
+
if (input.context) {
|
|
8346
|
+
lines.push("");
|
|
8347
|
+
lines.push("Context:");
|
|
8348
|
+
lines.push(input.context);
|
|
8349
|
+
}
|
|
8350
|
+
return lines.join("\n");
|
|
8351
|
+
}
|
|
8352
|
+
function buildRound2UserTextBase(input, r1) {
|
|
8353
|
+
const base = buildRound1UserText(input);
|
|
8354
|
+
const summaries = ["", "Round 1 votes:"];
|
|
8355
|
+
for (const r of r1) if (isVote(r.vote)) {
|
|
8356
|
+
const choiceText = r.vote.choice === null ? "abstain" : r.vote.choice;
|
|
8357
|
+
const gapText = r.vote.needMoreInfo ? ` (needs: ${r.vote.needMoreInfo})` : "";
|
|
8358
|
+
summaries.push(`- ${r.key} picked ${choiceText}, confidence ${r.vote.confidence.toFixed(2)}, reasoning: ${r.vote.reasoning}${gapText}`);
|
|
8359
|
+
} else summaries.push(`- ${r.key} did not return a valid round-1 vote (${r.vote.error}).`);
|
|
8360
|
+
return base + "\n" + summaries.join("\n");
|
|
8361
|
+
}
|
|
8362
|
+
function isVote(v) {
|
|
8363
|
+
return !("error" in v);
|
|
8364
|
+
}
|
|
8365
|
+
function voteRecord(r1, r2) {
|
|
8366
|
+
const record = {};
|
|
8367
|
+
for (const cfg of STAND_IN_MODELS) {
|
|
8368
|
+
const r1Entry = r1.find((r) => r.key === cfg.key);
|
|
8369
|
+
const r2Entry = r2?.find((r) => r.key === cfg.key) ?? null;
|
|
8370
|
+
record[cfg.key] = {
|
|
8371
|
+
round1: r1Entry?.vote ?? {
|
|
8372
|
+
error: "upstream_error",
|
|
8373
|
+
message: "no round-1 result recorded"
|
|
8374
|
+
},
|
|
8375
|
+
round2: r2Entry ? r2Entry.vote : null
|
|
8376
|
+
};
|
|
8377
|
+
}
|
|
8378
|
+
return record;
|
|
8379
|
+
}
|
|
8380
|
+
function round2(n) {
|
|
8381
|
+
return Math.round(n * 100) / 100;
|
|
8382
|
+
}
|
|
8383
|
+
|
|
7983
8384
|
//#endregion
|
|
7984
8385
|
//#region src/lib/peer-mcp-personas.ts
|
|
7985
8386
|
/**
|
|
@@ -8526,6 +8927,54 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
|
|
|
8526
8927
|
signal
|
|
8527
8928
|
});
|
|
8528
8929
|
}
|
|
8930
|
+
},
|
|
8931
|
+
{
|
|
8932
|
+
toolNameHttp: "stand_in",
|
|
8933
|
+
capability: "stand_in",
|
|
8934
|
+
description: "**Away-mode decision tiebreak.** Three-lab advisor (gpt-5.5 xhigh, opus-4.7 xhigh, gemini-3.1-pro high) for **when the user is unavailable and you are stuck between two or more concrete options**. Polls all three across two structured rounds (blind vote → informed re-vote with peer reasoning visible) and returns a ranked-choice verdict. Use when: you would otherwise halt and wait for the user. Do NOT use for: code review (use `peer-review-coordinator`), open-ended exploration, single-model second opinions (use `codex_critic` / `gemini_critic` / `opus_critic` directly), or as a substitute for user confirmation on irreversible actions (push, delete, drop, deploy — those still require the user even with three-lab consensus).",
|
|
8935
|
+
inputSchema: {
|
|
8936
|
+
type: "object",
|
|
8937
|
+
required: ["decision", "options"],
|
|
8938
|
+
additionalProperties: false,
|
|
8939
|
+
properties: {
|
|
8940
|
+
decision: {
|
|
8941
|
+
type: "string",
|
|
8942
|
+
description: "One-sentence framing of the choice the user would otherwise make. Be specific about what's being decided, not why."
|
|
8943
|
+
},
|
|
8944
|
+
options: {
|
|
8945
|
+
type: "array",
|
|
8946
|
+
minItems: 2,
|
|
8947
|
+
maxItems: 6,
|
|
8948
|
+
description: "2-6 concrete options for the panel to vote on. Caller-provided — do NOT ask the panel to generate options. The verdict cites the chosen option by `id`.",
|
|
8949
|
+
items: {
|
|
8950
|
+
type: "object",
|
|
8951
|
+
required: ["id", "summary"],
|
|
8952
|
+
additionalProperties: false,
|
|
8953
|
+
properties: {
|
|
8954
|
+
id: {
|
|
8955
|
+
type: "string",
|
|
8956
|
+
description: "Short stable identifier the verdict refers to (e.g., \"A\", \"lib-x\")."
|
|
8957
|
+
},
|
|
8958
|
+
summary: {
|
|
8959
|
+
type: "string",
|
|
8960
|
+
description: "One-line description of the option."
|
|
8961
|
+
},
|
|
8962
|
+
detail: {
|
|
8963
|
+
type: "string",
|
|
8964
|
+
description: "Optional longer context for the option (constraints, trade-offs)."
|
|
8965
|
+
}
|
|
8966
|
+
}
|
|
8967
|
+
}
|
|
8968
|
+
},
|
|
8969
|
+
context: {
|
|
8970
|
+
type: "string",
|
|
8971
|
+
description: "Task / code background that informs the decision. Keep tight — the input is capped at ~6KB total across decision + options + context."
|
|
8972
|
+
}
|
|
8973
|
+
}
|
|
8974
|
+
},
|
|
8975
|
+
async handler(args, signal) {
|
|
8976
|
+
return runStandInToolCall(args, signal);
|
|
8977
|
+
}
|
|
8529
8978
|
}
|
|
8530
8979
|
]);
|
|
8531
8980
|
/**
|
|
@@ -8609,6 +9058,109 @@ async function runWorkerToolCall(call) {
|
|
|
8609
9058
|
isError: result.isError
|
|
8610
9059
|
};
|
|
8611
9060
|
}
|
|
9061
|
+
/**
|
|
9062
|
+
* Shared closure body for the `stand_in` MCP tool. Validates the input
|
|
9063
|
+
* shape ({decision, options, context}) then calls `runStandIn`. The
|
|
9064
|
+
* orchestrator never throws — failure modes (upstream errors, parse
|
|
9065
|
+
* failures, abstains) all surface inside the structured `StandInResult`
|
|
9066
|
+
* envelope, which we JSON-stringify into the single MCP text block.
|
|
9067
|
+
*
|
|
9068
|
+
* Arg-validation policy mirrors `runWorkerToolCall` and `web_search`:
|
|
9069
|
+
* shape errors surface as `isError: true` tool-result envelopes (NOT
|
|
9070
|
+
* JSON-RPC -32602). The `tools/list` JSON schema documents required
|
|
9071
|
+
* fields; this runtime check is defense against a schema-ignoring
|
|
9072
|
+
* client.
|
|
9073
|
+
*
|
|
9074
|
+
* `isError` is FALSE for the no_consensus / need_more_info verdicts —
|
|
9075
|
+
* those are valid protocol outcomes the caller acts on, not errors.
|
|
9076
|
+
* `isError` is TRUE only for input-shape failures (bad arg types,
|
|
9077
|
+
* missing required fields).
|
|
9078
|
+
*/
|
|
9079
|
+
async function runStandInToolCall(args, signal) {
|
|
9080
|
+
const decision = typeof args.decision === "string" ? args.decision : "";
|
|
9081
|
+
if (!decision) return {
|
|
9082
|
+
content: [{
|
|
9083
|
+
type: "text",
|
|
9084
|
+
text: "stand_in: arguments.decision is required (non-empty string)"
|
|
9085
|
+
}],
|
|
9086
|
+
isError: true
|
|
9087
|
+
};
|
|
9088
|
+
const optionsRaw = args.options;
|
|
9089
|
+
if (!Array.isArray(optionsRaw)) return {
|
|
9090
|
+
content: [{
|
|
9091
|
+
type: "text",
|
|
9092
|
+
text: "stand_in: arguments.options must be an array (2-6 entries)"
|
|
9093
|
+
}],
|
|
9094
|
+
isError: true
|
|
9095
|
+
};
|
|
9096
|
+
if (optionsRaw.length < 2 || optionsRaw.length > 6) return {
|
|
9097
|
+
content: [{
|
|
9098
|
+
type: "text",
|
|
9099
|
+
text: `stand_in: arguments.options must contain 2-6 entries; got ${optionsRaw.length}`
|
|
9100
|
+
}],
|
|
9101
|
+
isError: true
|
|
9102
|
+
};
|
|
9103
|
+
const options = [];
|
|
9104
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
9105
|
+
for (let i = 0; i < optionsRaw.length; i++) {
|
|
9106
|
+
const entry = optionsRaw[i];
|
|
9107
|
+
if (typeof entry !== "object" || entry === null) return {
|
|
9108
|
+
content: [{
|
|
9109
|
+
type: "text",
|
|
9110
|
+
text: `stand_in: arguments.options[${i}] must be an object`
|
|
9111
|
+
}],
|
|
9112
|
+
isError: true
|
|
9113
|
+
};
|
|
9114
|
+
const e = entry;
|
|
9115
|
+
const id = typeof e.id === "string" ? e.id : "";
|
|
9116
|
+
const summary = typeof e.summary === "string" ? e.summary : "";
|
|
9117
|
+
if (!id) return {
|
|
9118
|
+
content: [{
|
|
9119
|
+
type: "text",
|
|
9120
|
+
text: `stand_in: arguments.options[${i}].id is required (non-empty string)`
|
|
9121
|
+
}],
|
|
9122
|
+
isError: true
|
|
9123
|
+
};
|
|
9124
|
+
if (!summary) return {
|
|
9125
|
+
content: [{
|
|
9126
|
+
type: "text",
|
|
9127
|
+
text: `stand_in: arguments.options[${i}].summary is required (non-empty string)`
|
|
9128
|
+
}],
|
|
9129
|
+
isError: true
|
|
9130
|
+
};
|
|
9131
|
+
if (seenIds.has(id)) return {
|
|
9132
|
+
content: [{
|
|
9133
|
+
type: "text",
|
|
9134
|
+
text: `stand_in: arguments.options[${i}].id="${id}" is duplicated; ids must be unique`
|
|
9135
|
+
}],
|
|
9136
|
+
isError: true
|
|
9137
|
+
};
|
|
9138
|
+
seenIds.add(id);
|
|
9139
|
+
const detail = typeof e.detail === "string" && e.detail.length > 0 ? e.detail : void 0;
|
|
9140
|
+
options.push({
|
|
9141
|
+
id,
|
|
9142
|
+
summary,
|
|
9143
|
+
detail
|
|
9144
|
+
});
|
|
9145
|
+
}
|
|
9146
|
+
const context = args.context === void 0 ? void 0 : typeof args.context === "string" ? args.context : null;
|
|
9147
|
+
if (context === null) return {
|
|
9148
|
+
content: [{
|
|
9149
|
+
type: "text",
|
|
9150
|
+
text: "stand_in: arguments.context must be a string when provided"
|
|
9151
|
+
}],
|
|
9152
|
+
isError: true
|
|
9153
|
+
};
|
|
9154
|
+
const result = await runStandIn({
|
|
9155
|
+
decision,
|
|
9156
|
+
options,
|
|
9157
|
+
context
|
|
9158
|
+
}, signal);
|
|
9159
|
+
return { content: [{
|
|
9160
|
+
type: "text",
|
|
9161
|
+
text: JSON.stringify(result)
|
|
9162
|
+
}] };
|
|
9163
|
+
}
|
|
8612
9164
|
|
|
8613
9165
|
//#endregion
|
|
8614
9166
|
//#region src/lib/codex-mcp-config.ts
|
|
@@ -9194,7 +9746,7 @@ function initProxyFromEnv() {
|
|
|
9194
9746
|
//#endregion
|
|
9195
9747
|
//#region package.json
|
|
9196
9748
|
var name = "github-router";
|
|
9197
|
-
var version = "0.3.
|
|
9749
|
+
var version = "0.3.31";
|
|
9198
9750
|
|
|
9199
9751
|
//#endregion
|
|
9200
9752
|
//#region src/lib/approval.ts
|