@booplex/bpx-consult 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/council.ts ADDED
@@ -0,0 +1,429 @@
1
+ /**
2
+ * council — the multi-model consensus mode.
3
+ *
4
+ * N personas run in parallel (Promise.allSettled, my-zen's asyncio.gather
5
+ * pattern), each with its own model + stance-injected system prompt. A
6
+ * synthesizer model merges their verdicts into one recommendation, annotated
7
+ * with a confidence score and any disagreement.
8
+ *
9
+ * Shares callAdvisor with solo — that factoring was deliberate, and it pays off
10
+ * here: each member is just `callAdvisor` with a different persona prompt.
11
+ */
12
+
13
+ import type { AgentToolResult, AgentToolUpdateCallback, ExtensionContext } from "@earendil-works/pi-coding-agent";
14
+ import type { Message, ThinkingLevel } from "@earendil-works/pi-ai";
15
+ import { buildSessionContext, convertToLlm } from "@earendil-works/pi-coding-agent";
16
+ import { callAdvisor, resolveAdvisor, type ResolvedAdvisor } from "./advisor.js";
17
+ import { linkSignal, withTimeout } from "./timeout.js";
18
+ import { buildConsultContext, type ContextBudget } from "./context-engine.js";
19
+ import type { BpxConsultConfig } from "./config.js";
20
+ import {
21
+ computeConfidence,
22
+ detectDisagreement,
23
+ type MemberResult,
24
+ validateStance,
25
+ } from "./consensus.js";
26
+ import { personaSystemPrompt, resolvePersona, type Persona } from "./personas.js";
27
+
28
+ export interface CouncilDetails {
29
+ mode: "council";
30
+ members: Array<{ persona: string; model: string; status: string }>;
31
+ /** Estimated input tokens each member saw (post context-engine re-fit). */
32
+ fittedTokens?: number;
33
+ omitted?: number;
34
+ synthesizer: string;
35
+ confidence: number;
36
+ confidenceBreakdown?: { successRatio: number; agreementRatio: number; avgAlignment: number };
37
+ disagreement?: string;
38
+ usage?: { input: number; output: number; total: number };
39
+ stopReason?: string;
40
+ errorMessage?: string;
41
+ }
42
+
43
+ const SYNTHESIZER_SYSTEM_PROMPT = `You are a synthesizer model. Several advisor personas have reviewed the same coding task, each from a different stance (advocating, critiquing, or weighing). Your job is to merge their views into ONE recommendation for the executor.
44
+
45
+ Rules:
46
+ - The user message contains MULTIPLE replies, each under a "### <persona> [<stance>]" header. READ EVERY SECTION before synthesizing. Do not begin your synthesis until you have read all of them — if you think you only saw one, re-read the message; they are all there.
47
+ - Weigh the replies by substance, not by count.
48
+ - If the members agreed, say so plainly and give the consensus recommendation.
49
+ - If they disagreed, SURFACE the disagreement. Do not paper over it. State what each side argued, then give your best call on which is right and why. A false consensus is worse than an honest split.
50
+ - Be concrete. The executor needs a PLAN, a CORRECTION, or a STOP signal — give it one, not a summary of opinions.
51
+ - You never call tools. You synthesize and advise.`;
52
+
53
+ export interface ExecuteCouncilInput {
54
+ ctx: ExtensionContext;
55
+ config: BpxConsultConfig;
56
+ signal: AbortSignal | undefined;
57
+ onUpdate: AgentToolUpdateCallback<CouncilDetails> | undefined;
58
+ question?: string;
59
+ }
60
+
61
+ export async function executeCouncil(input: ExecuteCouncilInput): Promise<AgentToolResult<CouncilDetails>> {
62
+ const { ctx, config, signal, onUpdate, question } = input;
63
+
64
+ const councilConfig = config.modes?.council;
65
+ const roster = councilConfig?.members ?? [];
66
+ const synthesizerKey = councilConfig?.synthesizer?.model;
67
+ const parallel = councilConfig?.parallel ?? true;
68
+
69
+ if (roster.length === 0) {
70
+ return err(
71
+ "No council members configured. Set modes.council.members in ~/.pi/agent/bpx-consult.json.",
72
+ { mode: "council", members: [], synthesizer: "(none)", confidence: 0 },
73
+ );
74
+ }
75
+
76
+ // Resolve personas (defaults + user overrides).
77
+ const personas: Persona[] = [];
78
+ for (const name of roster) {
79
+ const p = resolvePersona(name, config.personas as never);
80
+ if (!p) {
81
+ return err(
82
+ `Unknown persona "${name}". Check modes.council.members or personas in config.`,
83
+ { mode: "council", members: [], synthesizer: "(none)", confidence: 0 },
84
+ );
85
+ }
86
+ personas.push(p);
87
+ }
88
+
89
+ // Resolve the synthesizer model.
90
+ const synth = resolveAdvisor(ctx, synthesizerKey);
91
+ if (!synth) {
92
+ return err(
93
+ `No synthesizer model configured (got "${synthesizerKey ?? "(none)"}"). Set modes.council.synthesizer.model.`,
94
+ { mode: "council", members: [], synthesizer: "(none)", confidence: 0 },
95
+ );
96
+ }
97
+
98
+ // Resolve member models UPFRONT so we can fit the shared context to the
99
+ // smallest window among them + synthesizer. §I: every member must fit, no
100
+ // exceptions. Resolving here (instead of inside the fan-out) also lets us
101
+ // bail early with a clear error if a persona's model is missing.
102
+ const sessionId = ctx.sessionManager.getSessionId();
103
+ const memberAdvisors: Array<{ persona: Persona; advisor: ResolvedAdvisor }> = [];
104
+ // Members whose model failed to resolve are pre-failed here rather than
105
+ // aborting the whole council. One bad model (typo, deprovisioned, wrong
106
+ // provider) must not kill the other members — that's the isolation the
107
+ // per-member design is for. The pre-failed entries flow into the final
108
+ // results alongside the settled ones, so they count against success_ratio
109
+ // in the confidence score (honest about the partial failure).
110
+ const preFailed: MemberResult[] = [];
111
+ for (const persona of personas) {
112
+ const modelKey = persona.defaultModel ?? config.modes?.solo?.model;
113
+ const advisor = resolveAdvisor(ctx, modelKey);
114
+ if (!advisor) {
115
+ preFailed.push({
116
+ persona: persona.name,
117
+ stance: persona.stance,
118
+ model: modelKey ?? "(none)",
119
+ status: "error",
120
+ text: "",
121
+ errorMessage: `Could not resolve model "${modelKey ?? "(none)"}" for persona ${persona.name}.`,
122
+ alignment: 0,
123
+ });
124
+ continue;
125
+ }
126
+ memberAdvisors.push({ persona, advisor });
127
+ }
128
+
129
+ // If EVERY member failed to resolve, bail — there's no council to run.
130
+ if (memberAdvisors.length === 0) {
131
+ return err(
132
+ `No council members could resolve their models:\n${preFailed.map((r) => "- " + r.errorMessage).join("\n")}`,
133
+ { mode: "council", members: preFailed.map((r) => ({ persona: r.persona, model: r.model, status: r.status })), synthesizer: synth.label, confidence: 0 },
134
+ );
135
+ }
136
+
137
+ // Build the shared context once, fitted to the SMALLEST window in the council.
138
+ // Every member sees the same payload, and the smallest-window member is
139
+ // guaranteed to fit — that's what closes the §I breach.
140
+ const contextBudget = config.contextBudget as ContextBudget;
141
+ const { messages: sessionMessages } = buildSessionContext(
142
+ ctx.sessionManager.getEntries(),
143
+ ctx.sessionManager.getLeafId(),
144
+ );
145
+ const branchMessages: Message[] = convertToLlm(sessionMessages);
146
+ const directive = question?.trim() ? `Specific question from the executor: ${question.trim()}` : undefined;
147
+
148
+ const minWindow = Math.min(
149
+ synth.model.contextWindow,
150
+ ...memberAdvisors.map((m) => m.advisor.model.contextWindow),
151
+ );
152
+
153
+ const fit = buildConsultContext({
154
+ sessionMessages: branchMessages,
155
+ advisorContextWindow: minWindow,
156
+ budget: contextBudget,
157
+ directive,
158
+ });
159
+
160
+ onUpdate?.({
161
+ content: [{ type: "text", text: `Consulting council: ${personas.map((p) => p.name).join(", ")}…` }],
162
+ details: {
163
+ mode: "council",
164
+ members: personas.map((p) => ({ persona: p.name, model: p.defaultModel ?? "(inherit)", status: "pending" })),
165
+ synthesizer: synth.label,
166
+ confidence: 0,
167
+ },
168
+ });
169
+
170
+ // Provider-collision warning: if two or more resolved members share a provider,
171
+ // parallel calls can trip that provider's QPM rate limits and silently kill
172
+ // members (caught in live testing — two google/gemini-flash members, one died).
173
+ // Not a crash, but the user should know their roster is fragile. We warn rather
174
+ // than force-stagger because a paid tier with headroom can handle it; the user
175
+ // is the one who knows their provider's limits.
176
+ warnOnProviderCollision(ctx, memberAdvisors);
177
+
178
+ // Fan out — each member is a callAdvisor with its persona prompt + model.
179
+ // Each member gets its OWN AbortController, linked to the parent ctx.signal,
180
+ // so a member's own timeout/abort drops only that member — not its siblings.
181
+ // its siblings. (rpiv-btw "Decision 8" pattern, per Claude's review.)
182
+ // Build THUNKS (not promises) so parallel:false can genuinely await them
183
+ // one-at-a-time. The previous .map(() => runMember()) eagerly started every
184
+ // member, making parallel:false a no-op (runSequential awaited promises that
185
+ // were already running concurrently). Thunks defer execution.
186
+ const memberTimeoutMs = councilConfig?.timeoutMs ?? 120000;
187
+ const memberThunks: Array<() => Promise<MemberResult>> = memberAdvisors.map(
188
+ ({ persona, advisor }) => () => runMember(ctx, persona, advisor, fit.messages, contextBudget, signal, sessionId, memberTimeoutMs),
189
+ );
190
+
191
+ // Promise.allSettled semantics: one flaky member never crashes the council.
192
+ // parallel:false runs thunks sequentially (genuinely one-at-a-time) so the
193
+ // knob users reach for to dodge provider rate limits actually works.
194
+ const settled = parallel
195
+ ? await Promise.allSettled(memberThunks.map((thunk) => thunk()))
196
+ : await runSequential(memberThunks);
197
+ const memberResults: MemberResult[] = [
198
+ ...preFailed,
199
+ ...settled.map((s): MemberResult =>
200
+ s.status === "fulfilled" ? s.value : {
201
+ persona: "(unknown)",
202
+ stance: "neutral",
203
+ model: "(unknown)",
204
+ status: "error",
205
+ text: "",
206
+ errorMessage: s.reason instanceof Error ? s.reason.message : String(s.reason),
207
+ alignment: 0,
208
+ }
209
+ ),
210
+ ];
211
+
212
+ const confidence = computeConfidence(memberResults);
213
+ const disagreement = detectDisagreement(memberResults);
214
+
215
+ // If every member failed, don't bother the synthesizer.
216
+ const successful = memberResults.filter((r) => r.status === "ok");
217
+ if (successful.length === 0) {
218
+ const errs = memberResults.map((r) => `- ${r.persona} (${r.model}): ${r.errorMessage}`).join("\n");
219
+ return err(
220
+ `All council members failed:\n${errs}`,
221
+ {
222
+ mode: "council",
223
+ members: memberResults.map((r) => ({ persona: r.persona, model: r.model, status: r.status })),
224
+ synthesizer: synth.label,
225
+ confidence: 0,
226
+ fittedTokens: fit.estimatedTokens,
227
+ omitted: fit.omittedCount,
228
+ errorMessage: "all members failed",
229
+ },
230
+ );
231
+ }
232
+
233
+ // Synthesize.
234
+ const memberBlock = memberResults
235
+ .map((r) => {
236
+ const header = `### ${r.persona} [${r.stance}] — ${r.model} — ${r.status}`;
237
+ if (r.status !== "ok") return `${header}\n(ERROR: ${r.errorMessage ?? "no reply"})`;
238
+ return `${header}\n${r.text}`;
239
+ })
240
+ .join("\n\n---\n\n");
241
+
242
+ const successfulCount = memberResults.filter((r) => r.status === "ok").length;
243
+ const disagreementNote = disagreement ? `\n\nNOTE: ${disagreement}` : "";
244
+ const synthUserPrompt = `The council has reviewed the task. Below are ${successfulCount} advisor ${successfulCount === 1 ? "reply" : "replies"}, each under a ### header. READ ALL OF THEM before synthesizing.\n\n${memberBlock}${disagreementNote}\n\nConfidence in the consensus: ${confidence.confidence} (success ${confidence.successRatio}, agreement ${confidence.agreementRatio}, stance-alignment ${confidence.avgAlignment}).\n\nSynthesize ONE recommendation for the executor that weighs every reply above. Return a PLAN, a CORRECTION, or a STOP signal.`;
245
+
246
+ // §I: fit the synthesizer input to ITS window. The grown member transcript
247
+ // (memberBlock + disagreementNote) can exceed the synthesizer's context —
248
+ // exactly the §P failure this extension exists to prevent. buildConsultContext
249
+ // drops oldest-first with an [omitted] marker if needed. Mirrors debate.ts.
250
+ const synthFit = buildConsultContext({
251
+ sessionMessages: [{ role: "user", content: synthUserPrompt, timestamp: Date.now() }],
252
+ advisorContextWindow: synth.model.contextWindow,
253
+ budget: contextBudget,
254
+ });
255
+
256
+ try {
257
+ const synthResult = await callAdvisor({
258
+ ctx,
259
+ advisor: synth,
260
+ systemPrompt: SYNTHESIZER_SYSTEM_PROMPT,
261
+ messages: synthFit.messages,
262
+ thinkingLevel: councilConfig?.synthesizer?.thinkingLevel,
263
+ signal,
264
+ sessionId,
265
+ maxTokens: contextBudget.responseReserveTokens,
266
+ });
267
+
268
+ const details: CouncilDetails = {
269
+ mode: "council",
270
+ members: memberResults.map((r) => ({ persona: r.persona, model: r.model, status: r.status })),
271
+ fittedTokens: fit.estimatedTokens,
272
+ omitted: fit.omittedCount,
273
+ synthesizer: synth.label,
274
+ confidence: confidence.confidence,
275
+ confidenceBreakdown: {
276
+ successRatio: confidence.successRatio,
277
+ agreementRatio: confidence.agreementRatio,
278
+ avgAlignment: confidence.avgAlignment,
279
+ },
280
+ disagreement,
281
+ usage: synthResult.usage,
282
+ stopReason: synthResult.stopReason,
283
+ errorMessage: synthResult.errorMessage,
284
+ };
285
+
286
+ if (!synthResult.text) {
287
+ return err("Council synthesizer returned no usable text.", { ...details, errorMessage: synthResult.errorMessage ?? "empty synthesis" });
288
+ }
289
+
290
+ return ok(synthResult.text, details);
291
+ } catch (e) {
292
+ const message = e instanceof Error ? e.message : String(e);
293
+ return err(`Council synthesizer call threw: ${message}`, {
294
+ mode: "council",
295
+ members: memberResults.map((r) => ({ persona: r.persona, model: r.model, status: r.status })),
296
+ synthesizer: synth.label,
297
+ confidence: confidence.confidence,
298
+ fittedTokens: fit.estimatedTokens,
299
+ omitted: fit.omittedCount,
300
+ errorMessage: message,
301
+ });
302
+ }
303
+ }
304
+
305
+ // ---------------------------------------------------------------------------
306
+ // Helpers
307
+ // ---------------------------------------------------------------------------
308
+
309
+ /**
310
+ * Run ONE council member with its own AbortController.
311
+ *
312
+ * The controller is linked to the parent ctx.signal, so a user-initiated abort
313
+ * (or session end) still propagates to every member. But a member-specific
314
+ * timeout/abort can abort() this controller alone without touching its
315
+ * siblings — that's the rpiv-btw "Decision 8" pattern. (No per-member
316
+ * circuit-breaker/backoff in v1 — allSettled isolation is the resilience
317
+ * mechanism. See SPEC §M for the v1.1 plan.)
318
+ *
319
+ * linkSignal itself now lives in timeout.ts (shared with debate's wall-clock
320
+ * budget) so the abort-linking pattern has one home.
321
+ */
322
+
323
+ async function runMember(
324
+ ctx: ExtensionContext,
325
+ persona: Persona,
326
+ advisor: ResolvedAdvisor,
327
+ messages: Message[],
328
+ contextBudget: ContextBudget,
329
+ parentSignal: AbortSignal | undefined,
330
+ sessionId: string | undefined,
331
+ memberTimeoutMs: number,
332
+ ): Promise<MemberResult> {
333
+ const thinkingLevel: ThinkingLevel | undefined = persona.thinkingLevel;
334
+ // Per-member wall-clock budget (council.timeoutMs). Insurance against a
335
+ // provider that accepts-then-hangs — without this, allSettled never resolves
336
+ // and the executor turn hangs. Consistent with debate's wall-clock fix.
337
+ const outcome = await withTimeout(memberTimeoutMs, parentSignal, async (signal) => {
338
+ return callAdvisor({
339
+ ctx,
340
+ advisor,
341
+ systemPrompt: personaSystemPrompt(persona),
342
+ messages,
343
+ thinkingLevel,
344
+ signal,
345
+ sessionId,
346
+ maxTokens: contextBudget.responseReserveTokens,
347
+ });
348
+ });
349
+
350
+ // Timeout or throw → failed member (isolation holds; siblings unaffected).
351
+ if (outcome.timedOut) {
352
+ return memberErr(persona, advisor, `timed out after ${memberTimeoutMs}ms`);
353
+ }
354
+ if (!outcome.ok) {
355
+ const message = outcome.error instanceof Error ? outcome.error.message : String(outcome.error);
356
+ return memberErr(persona, advisor, message);
357
+ }
358
+
359
+ const result = outcome.value;
360
+ const status: "ok" | "error" =
361
+ result.stopReason === "error" || result.stopReason === "aborted" || !result.text ? "error" : "ok";
362
+ return {
363
+ persona: persona.name,
364
+ stance: persona.stance,
365
+ model: advisor.label,
366
+ status,
367
+ text: result.text,
368
+ errorMessage: status === "error" ? result.errorMessage ?? result.stopReason : undefined,
369
+ alignment: status === "ok" ? validateStance(result.text, persona.stance) : 0,
370
+ usage: result.usage,
371
+ };
372
+ }
373
+
374
+ /** Build a failed-member result — shared by the timeout and throw paths. */
375
+ function memberErr(persona: Persona, advisor: ResolvedAdvisor, message: string): MemberResult {
376
+ return {
377
+ persona: persona.name,
378
+ stance: persona.stance,
379
+ model: advisor.label,
380
+ status: "error",
381
+ text: "",
382
+ errorMessage: message,
383
+ alignment: 0,
384
+ };
385
+ }
386
+
387
+ /** Run thunks one-at-a-time. Takes FACTORIES (not promises) so each member
388
+ * only starts after the previous one settles — that's what makes parallel:false
389
+ * a real rate-limit dodge rather than a no-op. */
390
+ async function runSequential<T>(thunks: Array<() => Promise<T>>): Promise<PromiseSettledResult<T>[]> {
391
+ const results: PromiseSettledResult<T>[] = [];
392
+ for (const thunk of thunks) {
393
+ try {
394
+ results.push({ status: "fulfilled", value: await thunk() });
395
+ } catch (reason) {
396
+ results.push({ status: "rejected", reason });
397
+ }
398
+ }
399
+ return results;
400
+ }
401
+
402
+ function ok(text: string, details: CouncilDetails): AgentToolResult<CouncilDetails> {
403
+ return { content: [{ type: "text", text }], details };
404
+ }
405
+
406
+ function err(text: string, details: CouncilDetails): AgentToolResult<CouncilDetails> {
407
+ return { content: [{ type: "text", text }], details };
408
+ }
409
+
410
+ /**
411
+ * Warn (non-blocking) when two or more council members share a provider.
412
+ * Parallel calls to the same provider can trip QPM rate limits and silently
413
+ * kill members — seen in live testing. The warning names the colliding
414
+ * provider and the members so the user can fix the roster.
415
+ */
416
+ function warnOnProviderCollision(ctx: ExtensionContext, members: Array<{ persona: Persona; advisor: ResolvedAdvisor }>): void {
417
+ const byProvider = new Map<string, string[]>();
418
+ for (const m of members) {
419
+ const p = m.advisor.model.provider;
420
+ byProvider.set(p, [...(byProvider.get(p) ?? []), m.persona.name]);
421
+ }
422
+ const collisions = [...byProvider.entries()].filter(([, names]) => names.length > 1);
423
+ if (collisions.length === 0) return;
424
+ const detail = collisions.map(([p, names]) => `${p} (${names.join(", ")})`).join("; ");
425
+ ctx.ui.notify(
426
+ `bpx-consult: council members share a provider [${detail}]. Parallel calls may trip rate limits — consider distinct providers or tiers. See SPEC §V.`,
427
+ "warning",
428
+ );
429
+ }