@bramburn/pi-model-council 1.6.2 → 1.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,499 @@
1
+ import type {
2
+ CouncilDecision,
3
+ CouncilInput,
4
+ CouncilModelResult,
5
+ ModelOpinion,
6
+ } from "./types.js";
7
+ import type { ModelRegistry } from "@earendil-works/pi-coding-agent";
8
+ import {
9
+ buildProposalPrompts,
10
+ buildSynthesisPrompts,
11
+ } from "./prompts.js";
12
+ import {
13
+ callOpenRouterChat,
14
+ extractJsonObject,
15
+ pingOpenRouter,
16
+ fetchOpenRouterModels,
17
+ } from "./openrouterClient.js";
18
+ import {
19
+ modelOpinionJsonSchema,
20
+ councilDecisionJsonSchema,
21
+ repairModelOpinion,
22
+ repairCouncilDecision,
23
+ validateCouncilDecision,
24
+ } from "./structuredOutput.js";
25
+ import { withTimeout, retry, isStructuredOutputError } from "./retry.js";
26
+ import { renderCouncilDecisionMarkdown } from "./markdown.js";
27
+ import { loadSettings } from "./settings.js";
28
+ import { CouncilSetupError } from "./types.js";
29
+ import { resolveOpenRouterApiKey } from "./runnerHelpers.js";
30
+
31
+ export function createFallbackDecision(input: CouncilInput, results: CouncilModelResult[]): CouncilDecision {
32
+ const successfulResults = results.filter(r => r.ok && r.parsed);
33
+
34
+ const decisionId = `council-${Date.now()}`;
35
+ const confidence: "low" | "medium" | "high" =
36
+ successfulResults.length >= 2 ? "medium" : "low";
37
+
38
+ const firstSuccessful = successfulResults[0]?.parsed as ModelOpinion | undefined;
39
+
40
+ const agreements: string[] = [];
41
+ if (successfulResults.length > 0) {
42
+ agreements.push("At least one model provided an implementation-oriented recommendation.");
43
+ }
44
+
45
+ const disagreements: string[] = [];
46
+ const errors = results.filter(r => !r.ok).map(r => r.error).filter(Boolean);
47
+ if (errors.length > 0) {
48
+ disagreements.push("Some models failed, so disagreements were not fully resolved.");
49
+ }
50
+
51
+ const unknowns: string[] = [];
52
+ for (const error of errors) {
53
+ unknowns.push(`API error: ${error}`);
54
+ }
55
+
56
+ const recommendedPlanSummary = firstSuccessful?.recommendedApproach
57
+ ?? "Review the raw model outputs before implementation.";
58
+
59
+ const recommendedPlanSteps = firstSuccessful?.steps?.length
60
+ ? [...firstSuccessful.steps]
61
+ : [
62
+ "Review the problem and relevant files.",
63
+ "Choose the smallest safe change.",
64
+ "Implement the change.",
65
+ "Run verification checks.",
66
+ ];
67
+
68
+ const filesToEditMap = new Map<string, { path: string; reason: string; action: string }>();
69
+ for (const result of successfulResults) {
70
+ const parsed = result.parsed as ModelOpinion;
71
+ for (const file of parsed.filesToConsider ?? []) {
72
+ if (!filesToEditMap.has(file.path)) {
73
+ filesToEditMap.set(file.path, {
74
+ path: file.path,
75
+ reason: file.reason,
76
+ action: file.suggestedAction,
77
+ });
78
+ }
79
+ }
80
+ }
81
+
82
+ const testsToRunSet = new Set<string>();
83
+ for (const result of successfulResults) {
84
+ const parsed = result.parsed as ModelOpinion;
85
+ for (const verification of parsed.verification ?? []) {
86
+ testsToRunSet.add(verification);
87
+ }
88
+ }
89
+
90
+ const modelNotes = results.map(result => ({
91
+ model: result.model,
92
+ stance: result.ok && result.parsed
93
+ ? (result.parsed as ModelOpinion).stance
94
+ : result.ok && result.rawText
95
+ ? "Unstructured response"
96
+ : "Failed to respond",
97
+ keyRisks: result.ok && result.parsed
98
+ ? (result.parsed as ModelOpinion).risks ?? []
99
+ : result.error ? [result.error] : [],
100
+ }));
101
+
102
+ return {
103
+ decisionId,
104
+ mode: input.mode,
105
+ confidence,
106
+ consensus: { agreements, disagreements, unknowns },
107
+ recommendedPlan: { summary: recommendedPlanSummary, steps: recommendedPlanSteps },
108
+ implementationGuidance: {
109
+ filesToEdit: Array.from(filesToEditMap.values()),
110
+ testsToRun: Array.from(testsToRunSet),
111
+ guardrails: [
112
+ "Do not broaden scope beyond the supplied problem.",
113
+ "Prefer minimal, reversible changes.",
114
+ "Run the listed verification commands before reporting success.",
115
+ ],
116
+ },
117
+ modelNotes,
118
+ handoffPrompt: `Review the council's model opinions. The recommended approach is: ${recommendedPlanSummary}. Choose the smallest safe change that addresses the problem. Verify the fix works before reporting success.`,
119
+ metadata: {
120
+ degraded: true,
121
+ fallbackUsed: true,
122
+ warnings: ["Used TypeScript fallback decision due to synthesis failure."],
123
+ },
124
+ };
125
+ }
126
+
127
+ export async function runCouncil(args: {
128
+ input: CouncilInput;
129
+ signal?: AbortSignal;
130
+ onStatus?: (message: string) => void;
131
+ cwd?: string;
132
+ isProjectTrusted?: boolean;
133
+ /** Optional pi extension context — when supplied we use it to (a) discover
134
+ * the OpenRouter API key from pi's auth storage if the settings file
135
+ * doesn't carry one and (b) validate the chosen model IDs against the
136
+ * live pi model registry. */
137
+ modelRegistry?: ModelRegistry;
138
+ }): Promise<{
139
+ decision: CouncilDecision;
140
+ rawModelResults: CouncilModelResult[];
141
+ markdown: string;
142
+ }> {
143
+ const cwd = args.cwd ?? process.cwd();
144
+ const isProjectTrusted = args.isProjectTrusted ?? false;
145
+
146
+ // ── Load settings ────────────────────────────────────────────────────────
147
+ const settings = await loadSettings(cwd, isProjectTrusted);
148
+
149
+ if (!settings) {
150
+ throw new CouncilSetupError(
151
+ "Model Council is not configured.\n\n" +
152
+ "Run /council-settings to:\n" +
153
+ " 1. Enter your OpenRouter API key (or set OPENROUTER_API_KEY so pi\n" +
154
+ " picks it up automatically)\n" +
155
+ " 2. Select your 3 council models\n" +
156
+ " 3. Optionally pick a 4th synthesis model\n" +
157
+ " 4. Save settings\n\n" +
158
+ "Get your API key at: https://openrouter.ai/keys",
159
+ );
160
+ }
161
+
162
+ const {
163
+ model1,
164
+ model2,
165
+ model3,
166
+ synthesisModelId,
167
+ } = {
168
+ model1: settings.openRouter.models.model1,
169
+ model2: settings.openRouter.models.model2,
170
+ model3: settings.openRouter.models.model3,
171
+ synthesisModelId: settings.synthesis?.modelId ?? settings.openRouter.models.model1,
172
+ };
173
+
174
+ // ── Pre-flight: resolve API key (settings → registry → env) ─────────────
175
+ args.onStatus?.("Council: resolving API key...");
176
+ const resolvedApiKey = await resolveOpenRouterApiKey(settings, args.modelRegistry);
177
+
178
+ if (!resolvedApiKey) {
179
+ throw new CouncilSetupError(
180
+ "Council cannot run: no OpenRouter API key found.\n\n" +
181
+ "Fix: set OPENROUTER_API_KEY, run `/login openrouter` in pi, or save a\n" +
182
+ "key via `/council-settings`.",
183
+ );
184
+ }
185
+
186
+ // ── Pre-flight: validate API key ─────────────────────────────────────────
187
+ args.onStatus?.("Council: validating API key...");
188
+ const ping = await pingOpenRouter(resolvedApiKey);
189
+ if (!ping.ok) {
190
+ throw new CouncilSetupError(
191
+ `Council cannot run: OpenRouter API key is invalid.\n` +
192
+ `${ping.error}\n\n` +
193
+ `Fix: run \`/council-settings\` to update your API key.`,
194
+ );
195
+ }
196
+
197
+ // ── Pre-flight: validate models (registry first, REST fallback) ─────────
198
+ args.onStatus?.("Council: verifying configured models are available...");
199
+ let availableModels: string[] = [];
200
+ if (args.modelRegistry) {
201
+ try {
202
+ const reg = await args.modelRegistry.getAvailable();
203
+ availableModels = reg
204
+ .filter((m) => m.provider === "openrouter")
205
+ .map((m) => m.id);
206
+ } catch {
207
+ // fall through to REST fetch
208
+ }
209
+ }
210
+
211
+ if (availableModels.length === 0) {
212
+ try {
213
+ const models = await fetchOpenRouterModels(resolvedApiKey);
214
+ availableModels = models.map(m => m.id);
215
+ } catch {
216
+ // If we can't fetch models, try to continue anyway
217
+ }
218
+ }
219
+
220
+ const configuredModels = [model1, model2, model3, synthesisModelId];
221
+ const missingModels = configuredModels.filter(m => availableModels.length > 0 && !availableModels.includes(m));
222
+
223
+ if (missingModels.length > 0) {
224
+ throw new CouncilSetupError(
225
+ `Some configured models are no longer available on OpenRouter:\n` +
226
+ `${missingModels.map(m => ` - ${m}`).join("\n")}\n\n` +
227
+ `Fix: run \`/council-settings\` to pick replacements.`,
228
+ );
229
+ }
230
+
231
+ // ── Normalize input ─────────────────────────────────────────────────────
232
+ const input: CouncilInput = {
233
+ mode: args.input.mode,
234
+ problem: args.input.problem.trim(),
235
+ currentUnderstanding: args.input.currentUnderstanding?.trim(),
236
+ relevantFiles: args.input.relevantFiles ?? [],
237
+ constraints: args.input.constraints ?? [],
238
+ questionsToCouncil: args.input.questionsToCouncil ?? [],
239
+ };
240
+
241
+ if (!input.problem || input.problem.length === 0) {
242
+ throw new Error("Problem is required and must be non-empty");
243
+ }
244
+
245
+ if (!["fix", "ask", "architecture"].includes(input.mode)) {
246
+ throw new Error(`Invalid mode: ${input.mode}. Must be one of: fix, ask, architecture`);
247
+ }
248
+
249
+ // ── Config from settings ─────────────────────────────────────────────────
250
+ const MODEL_TIMEOUT_MS = settings.options.modelTimeoutMs;
251
+ const SYNTHESIS_TIMEOUT_MS = settings.options.synthesisTimeoutMs;
252
+ const MODEL_RETRY_ATTEMPTS = settings.options.retryAttempts;
253
+ const MODEL_RETRY_DELAY_MS = settings.options.retryDelayMs;
254
+ const USE_STRUCTURED_OUTPUT = settings.options.useStructuredOutput;
255
+
256
+ const COUNCIL_MODELS = [model1, model2, model3];
257
+ // Use the dedicated synthesis model when set, otherwise fall back to model1
258
+ const SYNTHESIZER_MODEL = synthesisModelId;
259
+
260
+ // ── Call models ──────────────────────────────────────────────────────────
261
+ const { systemPrompt: proposalSystem, userPrompt: proposalUser } = buildProposalPrompts(input);
262
+
263
+ // Track per-model progress so the footer shows e.g.
264
+ // "Council: 2/3 models responded (waiting on anthropic/claude-3.5-sonnet)"
265
+ const responded = new Set<string>();
266
+ const announceProgress = (): void => {
267
+ const pending = COUNCIL_MODELS.filter((m) => !responded.has(m));
268
+ args.onStatus?.(
269
+ `Council: ${responded.size}/${COUNCIL_MODELS.length} models responded` +
270
+ (pending.length > 0 ? ` (waiting on ${pending.join(", ")})` : ""),
271
+ );
272
+ };
273
+ announceProgress();
274
+
275
+ const modelPromises = COUNCIL_MODELS.map(async (model): Promise<CouncilModelResult> => {
276
+ const started = Date.now();
277
+ let usedStructuredOutput = false;
278
+ let totalAttempts = 0;
279
+ const allWarnings: string[] = [];
280
+
281
+ let attemptWithStructuredOutput = USE_STRUCTURED_OUTPUT;
282
+
283
+ const callModel = async (attempt: number): Promise<string> => {
284
+ totalAttempts = attempt;
285
+
286
+ const options = {
287
+ apiKey: resolvedApiKey,
288
+ model,
289
+ systemPrompt: proposalSystem,
290
+ userPrompt: proposalUser,
291
+ signal: undefined as unknown as AbortSignal,
292
+ structuredOutputSchema: attemptWithStructuredOutput ? modelOpinionJsonSchema : undefined,
293
+ structuredOutputName: "model_opinion",
294
+ };
295
+
296
+ return withTimeout(
297
+ (childSignal) => callOpenRouterChat({ ...options, signal: childSignal }),
298
+ MODEL_TIMEOUT_MS,
299
+ args.signal,
300
+ );
301
+ };
302
+
303
+ try {
304
+ let rawText: string;
305
+
306
+ try {
307
+ rawText = await callModel(1);
308
+ usedStructuredOutput = attemptWithStructuredOutput;
309
+ } catch (firstError) {
310
+ if (attemptWithStructuredOutput && isStructuredOutputError(firstError)) {
311
+ allWarnings.push(`Model ${model} does not support structured output, using fallback mode.`);
312
+ attemptWithStructuredOutput = false;
313
+ const retryResult = await retry({
314
+ attempts: MODEL_RETRY_ATTEMPTS,
315
+ delayMs: MODEL_RETRY_DELAY_MS,
316
+ operation: callModel,
317
+ });
318
+ rawText = retryResult.value;
319
+ usedStructuredOutput = false;
320
+ } else {
321
+ throw firstError;
322
+ }
323
+ }
324
+
325
+ try {
326
+ const parsedRaw = extractJsonObject(rawText);
327
+ const repaired = repairModelOpinion(parsedRaw, rawText);
328
+ allWarnings.push(...repaired.warnings);
329
+
330
+ return {
331
+ model,
332
+ ok: true,
333
+ rawText,
334
+ parsed: repaired.value,
335
+ metadata: {
336
+ attemptCount: totalAttempts,
337
+ durationMs: Date.now() - started,
338
+ usedStructuredOutput,
339
+ parseStatus: (repaired.parseStatus === "valid" ? "ok" : repaired.parseStatus) as "ok" | "repaired" | "fallback" | "failed",
340
+ warnings: repaired.warnings,
341
+ },
342
+ };
343
+ } catch {
344
+ return {
345
+ model,
346
+ ok: true,
347
+ rawText,
348
+ parsed: {
349
+ stance: "Unstructured response",
350
+ recommendedApproach: rawText.substring(0, 200),
351
+ steps: [],
352
+ filesToConsider: [],
353
+ risks: [],
354
+ verification: [],
355
+ confidence: "medium" as const,
356
+ },
357
+ metadata: {
358
+ attemptCount: totalAttempts,
359
+ durationMs: Date.now() - started,
360
+ usedStructuredOutput,
361
+ parseStatus: "fallback" as const,
362
+ warnings: ["Failed to parse model opinion, using raw text fallback."],
363
+ },
364
+ };
365
+ }
366
+ } catch (apiError) {
367
+ return {
368
+ model,
369
+ ok: false,
370
+ error: apiError instanceof Error ? apiError.message : String(apiError),
371
+ metadata: {
372
+ attemptCount: totalAttempts,
373
+ durationMs: Date.now() - started,
374
+ usedStructuredOutput,
375
+ parseStatus: "failed" as const,
376
+ warnings: allWarnings,
377
+ },
378
+ };
379
+ } finally {
380
+ // Track that this model is done (success or failure) so the footer
381
+ // status reflects aggregate progress.
382
+ responded.add(model);
383
+ announceProgress();
384
+ }
385
+ });
386
+
387
+ const modelResults = await Promise.all(modelPromises);
388
+
389
+ const allFailed = modelResults.every(r => !r.ok);
390
+ if (allFailed) {
391
+ throw new Error(
392
+ "All three council models failed to respond. " +
393
+ "Check your API key and network connection.",
394
+ );
395
+ }
396
+
397
+ // ── Synthesize ──────────────────────────────────────────────────────────
398
+ args.onStatus?.("Council: synthesizing decision...");
399
+
400
+ const { systemPrompt: synthesisSystem, userPrompt: synthesisUser, labelMap } = buildSynthesisPrompts(input, modelResults);
401
+
402
+ let decision: CouncilDecision;
403
+ const synthesisWarnings: string[] = [];
404
+
405
+ try {
406
+ const attemptSynthesis = async (): Promise<string> => {
407
+ return withTimeout(
408
+ (childSignal) => callOpenRouterChat({
409
+ apiKey: resolvedApiKey,
410
+ model: SYNTHESIZER_MODEL,
411
+ systemPrompt: synthesisSystem,
412
+ userPrompt: synthesisUser,
413
+ signal: childSignal,
414
+ structuredOutputSchema: USE_STRUCTURED_OUTPUT ? councilDecisionJsonSchema : undefined,
415
+ structuredOutputName: "council_decision",
416
+ }),
417
+ SYNTHESIS_TIMEOUT_MS,
418
+ args.signal,
419
+ );
420
+ };
421
+
422
+ let synthesisRaw: string;
423
+
424
+ try {
425
+ synthesisRaw = await attemptSynthesis();
426
+ } catch (synthesisError) {
427
+ if (USE_STRUCTURED_OUTPUT && isStructuredOutputError(synthesisError)) {
428
+ synthesisWarnings.push("Synthesis does not support structured output, using fallback mode.");
429
+ const retryResult = await retry({
430
+ attempts: 2,
431
+ delayMs: MODEL_RETRY_DELAY_MS,
432
+ operation: attemptSynthesis,
433
+ });
434
+ synthesisRaw = retryResult.value;
435
+ } else {
436
+ throw synthesisError;
437
+ }
438
+ }
439
+
440
+ try {
441
+ const parsedRaw = extractJsonObject(synthesisRaw);
442
+ const validation = validateCouncilDecision(parsedRaw);
443
+
444
+ if (validation.ok) {
445
+ decision = validation.value!;
446
+ } else {
447
+ const repaired = repairCouncilDecision(parsedRaw, input);
448
+ decision = repaired.value;
449
+ synthesisWarnings.push(...repaired.warnings);
450
+ }
451
+ } catch {
452
+ const repaired = repairCouncilDecision(synthesisRaw, input);
453
+ decision = repaired.value;
454
+ synthesisWarnings.push(...repaired.warnings);
455
+ }
456
+ } catch {
457
+ decision = createFallbackDecision(input, modelResults);
458
+ }
459
+
460
+ // ── Finalize metadata ───────────────────────────────────────────────────
461
+ const anyModelFailed = modelResults.some(r => !r.ok);
462
+ const anyModelRepaired = modelResults.some(
463
+ r => r.metadata?.parseStatus === "repaired" || r.metadata?.parseStatus === "fallback",
464
+ );
465
+ const fallbackUsed = decision.metadata?.fallbackUsed ?? false;
466
+ const degraded = anyModelFailed || anyModelRepaired || fallbackUsed;
467
+
468
+ const allWarnings = [
469
+ ...synthesisWarnings,
470
+ ...(decision.metadata?.warnings ?? []),
471
+ ];
472
+
473
+ if (anyModelFailed) {
474
+ const failedModels = modelResults.filter(r => !r.ok).map(r => r.model);
475
+ allWarnings.push(`Models that failed: ${failedModels.join(", ")}`);
476
+ }
477
+
478
+ decision.metadata = { degraded, fallbackUsed, warnings: allWarnings };
479
+
480
+ // ── Persist ─────────────────────────────────────────────────────────────
481
+ // Translate blind labels back to model ids. The synthesis prompt uses
482
+ // blind labels (Opinion A/B/C) to avoid model prestige bias. The
483
+ // chairman's modelNotes and other fields may reference those labels;
484
+ // now that we know which label mapped to which model, resolve them so
485
+ // the downstream report shows real model names.
486
+ if (labelMap && labelMap.length > 0) {
487
+ const lookup = new Map(labelMap.map((entry) => [entry.label, entry.model]));
488
+ for (const note of decision.modelNotes) {
489
+ const resolved = lookup.get(note.model);
490
+ if (resolved) note.model = resolved;
491
+ }
492
+ }
493
+
494
+ args.onStatus?.("Council: complete");
495
+
496
+ const finalMarkdown = renderCouncilDecisionMarkdown(decision);
497
+
498
+ return { decision, rawModelResults: modelResults, markdown: finalMarkdown };
499
+ }