@roodriigoooo/pi-scrutiny 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,333 @@
1
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@earendil-works/pi-coding-agent";
4
+ import { StringEnum } from "@earendil-works/pi-ai";
5
+ import { Type } from "typebox";
6
+ import { councilToParams, exampleConfigJson, projectConfigPath, readScrutinyConfig, userConfigPath } from "./scrutiny/config.js";
7
+ import { SCRUTINY_PACKET_PREVIEW_CANCELLED, runScrutiny } from "./scrutiny/engine.js";
8
+ import { historyText, showHistoryPicker } from "./scrutiny/history.js";
9
+ import { confirmPacketPreview } from "./scrutiny/preview.js";
10
+ import { activeProgresses, recentRuns } from "./scrutiny/registry.js";
11
+ import { showScrutinyPalette } from "./scrutiny/palette.js";
12
+ import type { ScrutinyParams, ScrutinySurface } from "./scrutiny/types.js";
13
+ import { scrutinyStatusText, renderScrutinyCall, renderScrutinyDock, renderScrutinyMessage, renderScrutinyResult } from "./scrutiny/ui.js";
14
+
15
+ const SurfaceEnum = StringEnum(["consult", "hypotheses", "criteria", "repo-map", "risks", "verify"] as const);
16
+ const JudgeModeEnum = StringEnum(["auto", "off", "on"] as const);
17
+
18
+ function refreshScrutinyChrome(ctx: ExtensionContext, latest?: unknown): void {
19
+ if (!ctx.hasUI) return;
20
+ const active = activeProgresses();
21
+ if (active.length > 0) {
22
+ ctx.ui.setStatus("scrutiny", `scrutiny [${active.length} active]`);
23
+ ctx.ui.setWidget("scrutiny", renderScrutinyDock(active, ctx.ui.theme), { placement: "belowEditor" });
24
+ return;
25
+ }
26
+ ctx.ui.setStatus("scrutiny", latest ? scrutinyStatusText(latest) : undefined);
27
+ ctx.ui.setWidget("scrutiny", undefined);
28
+ }
29
+
30
+ function clearScrutinyChrome(ctx: ExtensionContext): void {
31
+ if (!ctx.hasUI) return;
32
+ if (activeProgresses().length > 0) return refreshScrutinyChrome(ctx);
33
+ ctx.ui.setStatus("scrutiny", undefined);
34
+ ctx.ui.setWidget("scrutiny", undefined);
35
+ }
36
+
37
+ export default function (pi: ExtensionAPI) {
38
+ pi.registerMessageRenderer("scrutiny-result", renderScrutinyMessage);
39
+
40
+ pi.registerTool({
41
+ name: "scrutiny_consult",
42
+ label: "Scrutiny Consult",
43
+ description: [
44
+ "Run a multi-model panel deliberation surface, OR run objective repo verification. This is NOT patch scrutiny.",
45
+ "Surfaces: consult/hypotheses/criteria use replicate mode (same prompt; agreement/disagreement signal); repo-map/risks use roles mode (separate lenses; coverage/gaps signal); verify runs tests/typecheck/lint as objective arbiter.",
46
+ "The main Pi agent synthesizes and acts. Arbiter is objective repo tools + human review, never an LLM judge. Do not fuse patches.",
47
+ ].join(" "),
48
+ promptSnippet: "Consult a multi-model panel for hypotheses, criteria, context, or risk review; or run objective verify. Do not use to fuse patches.",
49
+ promptGuidelines: [
50
+ "Use scrutiny_consult for deliberation that benefits from independent perspectives: hypotheses, criteria, repo-map, risks, or bounded research synthesis.",
51
+ "Use the verify surface to run objective repo checks (tests/typecheck/lint) as the real arbiter of a change.",
52
+ "Never use scrutiny to merge patches from multiple models into one diff. Fuse uncertainty, evidence, tests, plans, context, risks — not final code.",
53
+ "Treat sharp disagreement as a stop signal only on replicate surfaces (consult/hypotheses/criteria). On roles surfaces (repo-map/risks), treat non-overlap as coverage/gaps.",
54
+ "Panelists run sequentially. Only one scrutiny run can be active at a time. Do not call scrutiny_consult in parallel.",
55
+ "Panel deliberation can take time. Mention that input cost is replicated across panel models when proposing an expensive panel.",
56
+ ],
57
+ parameters: Type.Object({
58
+ prompt: Type.String({ description: "Focused task for the panel or the verify check description." }),
59
+ context: Type.Optional(Type.String({ description: "Extra compact context to include in the task packet." })),
60
+ surface: Type.Optional(SurfaceEnum),
61
+ panel: Type.Optional(Type.Array(Type.String({ description: "Model id, e.g. openai/gpt-5.5 or moonshotai/kimi-2.7-code." }), { description: "Panel models. Defaults to PI_SCRUTINY_PANEL." })),
62
+ judge: Type.Optional(Type.String({ description: "Trade-off explainer model. Defaults to PI_SCRUTINY_JUDGE or first panel model. Only runs for consult by default." })),
63
+ judgeMode: Type.Optional(JudgeModeEnum),
64
+ maxPanelModels: Type.Optional(Type.Number({ description: "Clamp panel size. Default from PI_SCRUTINY_MAX_PANEL_MODELS." })),
65
+ includeGitDiff: Type.Optional(Type.Boolean({ description: "Include current git diff in packet. Defaults per surface." })),
66
+ verify: Type.Optional(Type.Boolean({ description: "Run objective repo checks after the panel. Defaults per surface (on for risks/verify)." })),
67
+ tools: Type.Optional(Type.Array(Type.String(), { description: "Tools allowed to panel/judge. Default none. Prefer none." })),
68
+ }),
69
+ async execute(_toolCallId, params: ScrutinyParams, signal, onUpdate, ctx) {
70
+ const { result, brief } = await runScrutiny({
71
+ params,
72
+ cwd: ctx.cwd,
73
+ projectTrusted: ctx.isProjectTrusted(),
74
+ exec: (command, args, options) => pi.exec(command, args, { ...options, signal: options?.signal ?? signal }),
75
+ signal,
76
+ onProgress: (progress) => {
77
+ onUpdate?.({ content: [{ type: "text", text: scrutinyStatusText(progress) }], details: progress });
78
+ refreshScrutinyChrome(ctx, progress);
79
+ },
80
+ });
81
+ clearScrutinyChrome(ctx);
82
+ return {
83
+ content: [{ type: "text", text: brief }],
84
+ details: result,
85
+ };
86
+ },
87
+ renderCall: renderScrutinyCall,
88
+ renderResult: renderScrutinyResult,
89
+ });
90
+
91
+ pi.registerCommand("scrutiny", {
92
+ description: "Run or inspect Pi Scrutiny (usage: /scrutiny | help | models | runs | history | panels | config | <surface>: <prompt> | @<panel>: <prompt> | ask <prompt>)",
93
+ handler: async (args, ctx) => {
94
+ const runAndPublish = async (params: ScrutinyParams) => {
95
+ try {
96
+ if (ctx.hasUI) ctx.ui.setStatus("scrutiny", "scrutiny starting");
97
+ const { result, brief } = await runScrutiny({
98
+ params,
99
+ cwd: ctx.cwd,
100
+ projectTrusted: ctx.isProjectTrusted(),
101
+ exec: (command, execArgs, options) => pi.exec(command, execArgs, options),
102
+ signal: ctx.signal,
103
+ confirmPacket: ctx.hasUI ? (preview) => confirmPacketPreview(ctx, preview) : undefined,
104
+ onProgress: (progress) => {
105
+ refreshScrutinyChrome(ctx, progress);
106
+ },
107
+ });
108
+ clearScrutinyChrome(ctx);
109
+ pi.sendMessage({ customType: "scrutiny-result", content: brief, display: true, details: result });
110
+ } catch (error) {
111
+ clearScrutinyChrome(ctx);
112
+ if (error instanceof Error && error.message === SCRUTINY_PACKET_PREVIEW_CANCELLED) {
113
+ ctx.ui.notify("scrutiny cancelled before panel spend", "info");
114
+ return;
115
+ }
116
+ ctx.ui.notify(`scrutiny failed: ${error instanceof Error ? error.message : String(error)}`, "error");
117
+ }
118
+ };
119
+
120
+ const trimmed = args.trim();
121
+ if (!trimmed || trimmed === "ui" || trimmed === "palette") {
122
+ const params = await showScrutinyPalette(ctx);
123
+ if (params) await runAndPublish(params);
124
+ return;
125
+ }
126
+ if (trimmed === "help") {
127
+ pi.sendMessage({ customType: "scrutiny-result", content: helpText(), display: true, details: { kind: "help" } });
128
+ return;
129
+ }
130
+ if (trimmed === "models") {
131
+ const config = readScrutinyConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
132
+ pi.sendMessage({ customType: "scrutiny-result", content: modelsText(config), display: true, details: { kind: "models" } });
133
+ return;
134
+ }
135
+ if (trimmed === "runs") {
136
+ pi.sendMessage({ customType: "scrutiny-result", content: runsText(), display: true, details: { kind: "runs" } });
137
+ return;
138
+ }
139
+ if (trimmed === "history") {
140
+ const content = ctx.hasUI ? await showHistoryPicker(ctx) : await historyText(ctx.cwd, "");
141
+ if (content) pi.sendMessage({ customType: "scrutiny-result", content, display: true, details: { kind: "history" } });
142
+ return;
143
+ }
144
+ if (trimmed.startsWith("history ")) {
145
+ const content = await historyText(ctx.cwd, trimmed.slice("history".length).trim());
146
+ pi.sendMessage({ customType: "scrutiny-result", content, display: true, details: { kind: "history" } });
147
+ return;
148
+ }
149
+ if (trimmed === "panels" || trimmed === "councils") {
150
+ const config = readScrutinyConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
151
+ pi.sendMessage({ customType: "scrutiny-result", content: panelsText(config), display: true, details: { kind: "panels" } });
152
+ return;
153
+ }
154
+ if (trimmed === "config" || trimmed.startsWith("config ")) {
155
+ await handleConfigCommand(trimmed.slice("config".length).trim(), ctx, pi);
156
+ return;
157
+ }
158
+ const config = readScrutinyConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
159
+ const { params: parsed, prompt } = parseInline(trimmed, config);
160
+ if (!prompt && parsed.surface !== "verify") {
161
+ ctx.ui.notify("usage: /scrutiny ask <prompt> | /scrutiny <surface>: <prompt>", "warning");
162
+ return;
163
+ }
164
+ await runAndPublish({ prompt: prompt || "run objective repo checks", ...parsed });
165
+ },
166
+ });
167
+ }
168
+
169
+ function parseInline(trimmed: string, config: ReturnType<typeof readScrutinyConfig>): { params: Partial<ScrutinyParams>; prompt: string } {
170
+ const colonIdx = trimmed.indexOf(":");
171
+ if (colonIdx > 0) {
172
+ const head = trimmed.slice(0, colonIdx).trim();
173
+ const rest = trimmed.slice(colonIdx + 1).trim();
174
+ if (head.startsWith("@")) {
175
+ const council = config.councils.find((item) => item.name === head.slice(1));
176
+ if (council) return { params: councilToParams(council, rest), prompt: rest };
177
+ }
178
+ if (head && SCRUTINY_SURFACE_SET.has(head as ScrutinySurface)) {
179
+ return { params: { surface: head as ScrutinySurface }, prompt: rest };
180
+ }
181
+ }
182
+ const prompt = trimmed.startsWith("ask ") ? trimmed.slice(4).trim() : trimmed;
183
+ return { params: {}, prompt };
184
+ }
185
+
186
+ const SCRUTINY_SURFACE_SET = new Set<ScrutinySurface>(["consult", "hypotheses", "criteria", "repo-map", "risks", "verify"]);
187
+
188
+ async function handleConfigCommand(args: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise<void> {
189
+ const trimmed = args.trim();
190
+ if (!trimmed || trimmed === "show") {
191
+ const config = readScrutinyConfig({ cwd: ctx.cwd, projectTrusted: ctx.isProjectTrusted() });
192
+ pi.sendMessage({ customType: "scrutiny-result", content: configText(config), display: true, details: { kind: "config" } });
193
+ return;
194
+ }
195
+ if (trimmed === "edit" || trimmed === "edit global" || trimmed === "edit user") {
196
+ await editConfigFile("global", ctx);
197
+ return;
198
+ }
199
+ if (trimmed === "edit project") {
200
+ await editConfigFile("project", ctx);
201
+ return;
202
+ }
203
+ ctx.ui.notify("usage: /scrutiny config | /scrutiny config edit [project]", "warning");
204
+ }
205
+
206
+ async function editConfigFile(scope: "global" | "project", ctx: ExtensionCommandContext): Promise<void> {
207
+ if (scope === "project" && !ctx.isProjectTrusted()) {
208
+ ctx.ui.notify("project config skipped: project not trusted", "warning");
209
+ return;
210
+ }
211
+ const file = scope === "project" ? projectConfigPath(ctx.cwd) : userConfigPath();
212
+ const existing = await readFile(file, "utf8").catch(() => exampleConfigJson());
213
+ const edited = await ctx.ui.editor(`Edit ${scope} scrutiny config`, existing);
214
+ if (edited === undefined) return;
215
+ try {
216
+ JSON.parse(edited);
217
+ } catch (error) {
218
+ ctx.ui.notify(`scrutiny config not saved: invalid JSON (${error instanceof Error ? error.message : String(error)})`, "error");
219
+ return;
220
+ }
221
+ await mkdir(dirname(file), { recursive: true });
222
+ await writeFile(file, `${edited.trim()}\n`, { encoding: "utf8", mode: 0o600 });
223
+ ctx.ui.notify(`saved ${file}. next scrutiny run will use it.`, "info");
224
+ }
225
+
226
+ function configText(config: ReturnType<typeof readScrutinyConfig>): string {
227
+ const sourceRows = config.configSources.map((source) => {
228
+ const where = source.path ? ` — ${source.path}` : "";
229
+ const reason = source.reason ? ` (${source.reason})` : "";
230
+ return `- ${source.scope}: ${source.status}${reason}${where}`;
231
+ });
232
+ return [
233
+ "# scrutiny config",
234
+ "",
235
+ "config files load in this order: global → trusted project → env overrides.",
236
+ "",
237
+ "## sources",
238
+ ...(sourceRows.length ? sourceRows : ["- env: no PI_SCRUTINY_* overrides"]),
239
+ "",
240
+ "## active",
241
+ `- panel: ${config.panel.length ? config.panel.map(formatPanelMember).join(", ") : "not configured"}`,
242
+ `- saved panels: ${config.councils.length}`,
243
+ `- trade-off explainer: ${config.judge ?? "first panel model"}`,
244
+ `- max panel: ${config.maxPanelModels}`,
245
+ `- tools: ${config.tools.length ? config.tools.join(", ") : "none"}`,
246
+ `- verify checks: ${config.verifyChecks.map((c) => c.name).join(", ") || "none"}`,
247
+ "",
248
+ "## edit",
249
+ "- `/scrutiny config edit` edits global `~/.pi/agent/scrutiny.json`.",
250
+ "- `/scrutiny config edit project` edits project `.pi/scrutiny.json` (trusted projects only).",
251
+ "- env vars still override files for shell-specific experiments.",
252
+ ].join("\n");
253
+ }
254
+
255
+ function helpText(): string {
256
+ return [
257
+ "# pi-scrutiny",
258
+ "",
259
+ "multi-model panel for deliberation, plus objective repo verification. not patch scrutiny.",
260
+ "",
261
+ "surfaces:",
262
+ "- `consult` — replicate mode. bounded research/synthesis (validated use). trade-off explainer runs by default.",
263
+ "- `hypotheses` — replicate mode. ranked root causes + confirming evidence + minimal distinguishing tests. disagreement is signal.",
264
+ "- `criteria` — replicate mode. acceptance spec: edge cases, backward-compat, migration, test cases.",
265
+ "- `repo-map` — roles mode. compact context (symbols, call paths, tests, config, invariants) for an upcoming edit.",
266
+ "- `risks` — roles mode. per-class risk review of a patch (concurrency, reactive-chain, api-compat, security, perf, migration, null, flaky). runs verify.",
267
+ "- `verify` — runs tests/typecheck/lint as the objective arbiter. no panel, no judge.",
268
+ "",
269
+ "flow: surfaces run inline and stream a status footer while the panel works. press esc to cancel a run.",
270
+ "mode: replicate means same prompt and disagreement signal; roles means lenses and coverage/gaps signal.",
271
+ "panelists run sequentially. one scrutiny run at a time.",
272
+ "arbiter is objective repo tools + human review, never an LLM judge. do not fuse patches.",
273
+ "",
274
+ "```text",
275
+ "/scrutiny # open palette",
276
+ "/scrutiny models",
277
+ "/scrutiny runs # recent runs this session",
278
+ "/scrutiny history # interactive run history search",
279
+ "/scrutiny history list [query] # text history for scripts",
280
+ "/scrutiny history open <runId|latest> [result|summary|surface|packet|responses|verify]",
281
+ "/scrutiny panels # list saved panel presets",
282
+ "/scrutiny config # show config files + active settings",
283
+ "/scrutiny config edit [project] # edit ~/.pi/agent/scrutiny.json or .pi/scrutiny.json",
284
+ "/scrutiny verify: # run objective checks now",
285
+ "/scrutiny @code-duo: review this patch # run a saved panel",
286
+ "/scrutiny risks: review this webflux retry patch",
287
+ "/scrutiny hypotheses: intermittent offset commit on kafka consumer",
288
+ "/scrutiny ask compare these two implementation plans",
289
+ "```",
290
+ "",
291
+ "Tool: `scrutiny_consult`. Preferred setup: `/scrutiny config edit`. Env vars still work (`PI_SCRUTINY_PANEL=provider/model,provider/model`).",
292
+ ].join("\n");
293
+ }
294
+
295
+ function modelsText(config: ReturnType<typeof readScrutinyConfig>): string {
296
+ return [
297
+ "# scrutiny models",
298
+ "",
299
+ `panel: ${config.panel.length ? config.panel.map(formatPanelMember).join(", ") : "not configured"}`,
300
+ `trade-off explainer: ${config.judge ?? "first panel model"}`,
301
+ `max panel: ${config.maxPanelModels}`,
302
+ `tools: ${config.tools.length ? config.tools.join(", ") : "none"}`,
303
+ `verify checks: ${config.verifyChecks.map((c) => c.name).join(", ") || "none"}`,
304
+ "",
305
+ "Run `/scrutiny config edit` for persistent setup. `PI_SCRUTINY_*` env vars still override files.",
306
+ ].join("\n");
307
+ }
308
+
309
+ function runsText(): string {
310
+ const runs = recentRuns();
311
+ if (runs.length === 0) return "# scrutiny runs\n\nno runs yet in this session.";
312
+ const rows = runs.map((r) => {
313
+ const time = new Date(r.startedAt).toLocaleTimeString();
314
+ const ended = r.endedAt ? ` · ${new Date(r.endedAt).toLocaleTimeString()}` : "";
315
+ const err = r.error ? ` · ${r.error}` : "";
316
+ return `- ${r.runId} · ${r.surface} · ${r.status}${ended}${err} · ${time}\n ${r.runDir ?? "(no artifacts)"}`;
317
+ });
318
+ return ["# scrutiny runs", "", ...rows, "", "artifacts (packet/responses/verify/result.json) live under each run dir."].join("\n");
319
+ }
320
+
321
+ function panelsText(config: ReturnType<typeof readScrutinyConfig>): string {
322
+ const panels = config.councils;
323
+ if (panels.length === 0) return "# scrutiny saved panels\n\nno saved panels configured. run `/scrutiny config edit` and add a `panels` object.";
324
+ const rows = panels.map((c) => {
325
+ const members = c.panelists.map(formatPanelMember).join(", ");
326
+ return `- @${c.name} · ${c.surface} · ${members || "no members"}${c.judgeMode ? ` · map:${c.judgeMode}` : ""}${c.verify ? " · verify:on" : ""}`;
327
+ });
328
+ return ["# scrutiny saved panels", "", ...rows, "", "use: `/scrutiny @<name>: <prompt>`"].join("\n");
329
+ }
330
+
331
+ function formatPanelMember(member: { model: string; lens?: string; thinking?: string }): string {
332
+ return `${member.model}${member.lens ? ` (${member.lens})` : ""}${member.thinking ? ` think:${member.thinking}` : ""}`;
333
+ }
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@roodriigoooo/pi-scrutiny",
3
+ "version": "0.1.0",
4
+ "description": "Pi Scrutiny: multi-model panel consultation for Pi coding agent",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi-extension",
8
+ "scrutiny",
9
+ "multi-model",
10
+ "coding-agent"
11
+ ],
12
+ "license": "MIT",
13
+ "type": "module",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/roodriigoooo/pi-scrutiny.git"
17
+ },
18
+ "files": [
19
+ "extensions",
20
+ "README.md"
21
+ ],
22
+ "pi": {
23
+ "extensions": ["./extensions/scrutiny.ts"]
24
+ },
25
+ "scripts": {
26
+ "check": "tsc --noEmit",
27
+ "eval:smoke": "node --experimental-strip-types eval/run-eval.ts smoke",
28
+ "pack:dry": "npm pack --dry-run",
29
+ "smoke:help": "pi --no-extensions -e ./extensions/scrutiny.ts --mode json --no-session \"/scrutiny help\""
30
+ },
31
+ "peerDependencies": {
32
+ "@earendil-works/pi-ai": "*",
33
+ "@earendil-works/pi-coding-agent": "*",
34
+ "@earendil-works/pi-tui": "*",
35
+ "typebox": "*"
36
+ },
37
+ "devDependencies": {
38
+ "@earendil-works/pi-ai": "*",
39
+ "@earendil-works/pi-coding-agent": "*",
40
+ "@earendil-works/pi-tui": "*",
41
+ "@types/node": "^22.0.0",
42
+ "typebox": "*",
43
+ "typescript": "^5.9.0"
44
+ },
45
+ "publishConfig": {
46
+ "access": "public"
47
+ }
48
+ }