@inceptionstack/pi-hard-no 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +287 -0
- package/architect.ts +128 -0
- package/changes.ts +404 -0
- package/commands.ts +635 -0
- package/context.ts +658 -0
- package/default-review-rules.md +150 -0
- package/git-roots.ts +94 -0
- package/helpers.ts +72 -0
- package/ignore.ts +105 -0
- package/index.ts +892 -0
- package/judge-skip-chain.ts +113 -0
- package/judge.ts +213 -0
- package/logger.ts +175 -0
- package/message-sender.ts +83 -0
- package/orchestrator.ts +521 -0
- package/package.json +55 -0
- package/prompt.ts +126 -0
- package/review-display.ts +571 -0
- package/reviewer.ts +433 -0
- package/scaffold.ts +120 -0
- package/session-kind.ts +139 -0
- package/settings.ts +332 -0
package/reviewer.ts
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* reviewer.ts — Review session runner
|
|
3
|
+
*
|
|
4
|
+
* The reviewer gets:
|
|
5
|
+
* - Per-file git diffs and recent commit messages
|
|
6
|
+
* - Full paths of changed files
|
|
7
|
+
* - Read-only tools to read files and explore the codebase
|
|
8
|
+
* - Live status updates shown in the main pi status bar
|
|
9
|
+
*
|
|
10
|
+
* The reviewer reads each file itself via read(path) tool calls.
|
|
11
|
+
* Uses the standardized file logger for all diagnostic output.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
createAgentSession,
|
|
16
|
+
SessionManager,
|
|
17
|
+
AuthStorage,
|
|
18
|
+
ModelRegistry,
|
|
19
|
+
type AgentSessionEvent,
|
|
20
|
+
} from "@mariozechner/pi-coding-agent";
|
|
21
|
+
|
|
22
|
+
import { log, logReview, safeStringify, type ReviewToolCall } from "./logger";
|
|
23
|
+
|
|
24
|
+
export interface ReviewResult {
|
|
25
|
+
/** Cleaned review text shown to the user. */
|
|
26
|
+
text: string;
|
|
27
|
+
/** Raw LLM output before cleanup (for debugging / structured log). */
|
|
28
|
+
rawText: string;
|
|
29
|
+
isLgtm: boolean;
|
|
30
|
+
durationMs: number;
|
|
31
|
+
/** Every tool call the reviewer made during exploration. */
|
|
32
|
+
toolCalls: ReviewToolCall[];
|
|
33
|
+
/** Effective model used for the review. */
|
|
34
|
+
model: string;
|
|
35
|
+
/** Effective thinking level used. */
|
|
36
|
+
thinkingLevel: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface ReviewOptions {
|
|
40
|
+
signal: AbortSignal;
|
|
41
|
+
cwd: string;
|
|
42
|
+
/** "provider/model-id" to use for the reviewer */
|
|
43
|
+
model?: string;
|
|
44
|
+
/** Thinking level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" */
|
|
45
|
+
thinkingLevel?: string;
|
|
46
|
+
/** Max wall-clock for main prompt (ms). Default 120000. */
|
|
47
|
+
timeoutMs?: number;
|
|
48
|
+
/** Files being reviewed (used in the structured log record). */
|
|
49
|
+
filesReviewed?: string[];
|
|
50
|
+
/** Unique id for this review cycle — used as a log prefix and embedded in the structured record. */
|
|
51
|
+
reviewId?: string;
|
|
52
|
+
/** Called when the reviewer uses tools — for status bar updates */
|
|
53
|
+
onActivity?: (description: string) => void;
|
|
54
|
+
/** Called with structured tool call info — for display widget */
|
|
55
|
+
onToolCall?: (toolName: string, targetPath: string | null) => void;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export type ReviewRunner = (prompt: string, opts: ReviewOptions) => Promise<ReviewResult>;
|
|
59
|
+
|
|
60
|
+
/** Review text markers that indicate where the actual review findings start. */
|
|
61
|
+
const REVIEW_MARKERS = [
|
|
62
|
+
/\n##\s*Review/i,
|
|
63
|
+
/\n##\s*Issues/i,
|
|
64
|
+
/\n##\s*Findings/i,
|
|
65
|
+
/\nHere'?s my review/i,
|
|
66
|
+
/\nHere are the issues/i,
|
|
67
|
+
/\n-\s*\*\*(High|Medium|Low)/i,
|
|
68
|
+
/\n-\s*\[(High|Medium|Low)/i,
|
|
69
|
+
/\n\*\*Issues found/i,
|
|
70
|
+
/No issues found\./i,
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Strip tool-call noise from raw review text.
|
|
75
|
+
* Order: strip verdict tags → find review start marker → strip XML tags.
|
|
76
|
+
*/
|
|
77
|
+
export function cleanReviewText(raw: string): string {
|
|
78
|
+
// Strip verdict tags FIRST so they don't interfere with marker detection
|
|
79
|
+
let text = stripVerdict(raw);
|
|
80
|
+
|
|
81
|
+
// Find where the actual review findings start
|
|
82
|
+
for (const marker of REVIEW_MARKERS) {
|
|
83
|
+
const match = text.match(marker);
|
|
84
|
+
if (match?.index !== undefined && match.index > 0) {
|
|
85
|
+
text = text.slice(match.index).trim();
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Strip XML-style tool tags
|
|
91
|
+
text = text.replace(/<(bash|read_file|grep|find|ls)[^>]*>[\s\S]*?<\/\1>/g, "");
|
|
92
|
+
text = text.replace(/<(bash|read_file|grep|find|ls)[^>]*\/>/g, "");
|
|
93
|
+
return text.trim();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Severity markers that indicate the reviewer found issues.
|
|
98
|
+
* If any of these appear in the review text, it is NOT LGTM.
|
|
99
|
+
*/
|
|
100
|
+
const ISSUE_MARKERS = [
|
|
101
|
+
/\bHigh\s*(?:severity|—|-|:)/i,
|
|
102
|
+
/\bMedium\s*(?:severity|—|-|:)/i,
|
|
103
|
+
/\bLow\s*(?:severity|—|-|:)/i,
|
|
104
|
+
/-\s*\*\*(High|Medium|Low)/i,
|
|
105
|
+
/^###?\s*(High|Medium|Low)/im,
|
|
106
|
+
/\*\*Issues found/i,
|
|
107
|
+
];
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Parse the verdict tag from the reviewer's response.
|
|
111
|
+
* Returns "lgtm" if <verdict>LGTM</verdict>, "issues" if <verdict>ISSUES_FOUND</verdict>,
|
|
112
|
+
* or null if no verdict tag is present (requires retry).
|
|
113
|
+
*/
|
|
114
|
+
export function parseVerdict(text: string): "lgtm" | "issues" | null {
|
|
115
|
+
const match = text.match(/<verdict>\s*(LGTM|ISSUES_FOUND)\s*<\/verdict>/i);
|
|
116
|
+
if (!match) return null;
|
|
117
|
+
return match[1].toUpperCase() === "LGTM" ? "lgtm" : "issues";
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Strip the verdict tag from the cleaned review text.
|
|
122
|
+
* The verdict is metadata; the user shouldn't see it in the rendered message.
|
|
123
|
+
*/
|
|
124
|
+
export function stripVerdict(text: string): string {
|
|
125
|
+
return text.replace(/<verdict>\s*(LGTM|ISSUES_FOUND)\s*<\/verdict>/gi, "").trim();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Check if cleaned review text indicates LGTM (no issues).
|
|
130
|
+
* Prefer parseVerdict() for explicit verdict tags; this is a fallback heuristic.
|
|
131
|
+
*/
|
|
132
|
+
export function isLgtmResult(cleanedText: string): boolean {
|
|
133
|
+
const text = cleanedText.trim();
|
|
134
|
+
if (!text) return true;
|
|
135
|
+
|
|
136
|
+
// Any severity marker = issues were found, regardless of LGTM mention
|
|
137
|
+
for (const marker of ISSUE_MARKERS) {
|
|
138
|
+
if (marker.test(text)) return false;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Explicit LGTM at start of response (after optional "Review:" or "-" prefix)
|
|
142
|
+
if (/^[-\s]*(?:Review:\s*)?LGTM\b/i.test(text)) return true;
|
|
143
|
+
|
|
144
|
+
// No severity markers and no clear LGTM — default to NOT LGTM.
|
|
145
|
+
// Safer to show the text than silently swallow it.
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/** Format a tool call event as a short activity string for the status bar. */
|
|
150
|
+
function formatActivity(name: string, args: any): string {
|
|
151
|
+
if (name === "read") return `reading ${args?.path ?? "file"}`;
|
|
152
|
+
if (name === "bash") return `$ ${(args?.command ?? "").slice(0, 50)}`;
|
|
153
|
+
if (name === "find" || name === "grep" || name === "ls") {
|
|
154
|
+
return `${name} ${(args?.path ?? args?.pattern ?? "").slice(0, 40)}`;
|
|
155
|
+
}
|
|
156
|
+
return `${name}…`;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Spawn a fresh pi reviewer instance with tools, send a prompt,
|
|
161
|
+
* collect the response. The reviewer can read files and explore
|
|
162
|
+
* the codebase as needed.
|
|
163
|
+
*/
|
|
164
|
+
export async function runReviewSession(prompt: string, opts: ReviewOptions): Promise<ReviewResult> {
|
|
165
|
+
const startTime = Date.now();
|
|
166
|
+
const startedAt = new Date().toISOString();
|
|
167
|
+
const idPrefix = opts.reviewId ? `[${opts.reviewId}] ` : "";
|
|
168
|
+
// Use safeStringify (same circular-ref-safe serializer as log()) so rlog matches
|
|
169
|
+
// log()'s safety contract even for non-string arguments.
|
|
170
|
+
const rlog = (...args: any[]) => log(idPrefix + args.map(safeStringify).join(" "));
|
|
171
|
+
rlog(`reviewer: starting (prompt=${(prompt.length / 1000).toFixed(1)}k chars, cwd=${opts.cwd})`);
|
|
172
|
+
|
|
173
|
+
let authStorage: ReturnType<typeof AuthStorage.create>;
|
|
174
|
+
let modelRegistry: ReturnType<typeof ModelRegistry.create>;
|
|
175
|
+
try {
|
|
176
|
+
authStorage = AuthStorage.create();
|
|
177
|
+
modelRegistry = ModelRegistry.create(authStorage);
|
|
178
|
+
} catch (err: any) {
|
|
179
|
+
rlog(`reviewer: failed to create auth/model registry: ${err?.message ?? err}`);
|
|
180
|
+
rlog(`reviewer: stack: ${err?.stack ?? "(no stack)"}`);
|
|
181
|
+
throw err;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let session: Awaited<ReturnType<typeof createAgentSession>>["session"];
|
|
185
|
+
try {
|
|
186
|
+
const result = await createAgentSession({
|
|
187
|
+
cwd: opts.cwd,
|
|
188
|
+
sessionManager: SessionManager.inMemory(),
|
|
189
|
+
authStorage,
|
|
190
|
+
modelRegistry,
|
|
191
|
+
// Allowlist only read-only tools + bash; no write/edit for the reviewer
|
|
192
|
+
tools: ["read", "bash", "grep", "find", "ls"],
|
|
193
|
+
});
|
|
194
|
+
session = result.session;
|
|
195
|
+
} catch (err: any) {
|
|
196
|
+
rlog(`reviewer: createAgentSession failed: ${err?.message ?? err}`);
|
|
197
|
+
rlog(`reviewer: stack: ${err?.stack ?? "(no stack)"}`);
|
|
198
|
+
throw err;
|
|
199
|
+
}
|
|
200
|
+
rlog(`reviewer: session created, initial model=${session.model?.provider}/${session.model?.id}`);
|
|
201
|
+
|
|
202
|
+
// Set the reviewer model if specified
|
|
203
|
+
const sessionModelName = session.model
|
|
204
|
+
? `${session.model.provider}/${session.model.id}`
|
|
205
|
+
: "unknown";
|
|
206
|
+
let effectiveModel = opts.model ?? sessionModelName;
|
|
207
|
+
if (opts.model) {
|
|
208
|
+
const [provider, modelId] = opts.model.split("/", 2);
|
|
209
|
+
if (provider && modelId) {
|
|
210
|
+
const model = modelRegistry.find(provider, modelId);
|
|
211
|
+
if (model) {
|
|
212
|
+
try {
|
|
213
|
+
await session.setModel(model);
|
|
214
|
+
rlog(`reviewer: using model ${opts.model}`);
|
|
215
|
+
} catch {
|
|
216
|
+
const defaultName = session.model
|
|
217
|
+
? `${session.model.provider}/${session.model.id}`
|
|
218
|
+
: "unknown";
|
|
219
|
+
rlog(`reviewer: model ${opts.model} has no API key. Falling back to ${defaultName}`);
|
|
220
|
+
effectiveModel = defaultName;
|
|
221
|
+
opts.onActivity?.(`default model: ${defaultName}`);
|
|
222
|
+
}
|
|
223
|
+
} else {
|
|
224
|
+
const defaultName = session.model
|
|
225
|
+
? `${session.model.provider}/${session.model.id}`
|
|
226
|
+
: "unknown";
|
|
227
|
+
rlog(`reviewer: model ${opts.model} not found. Falling back to ${defaultName}`);
|
|
228
|
+
effectiveModel = defaultName;
|
|
229
|
+
opts.onActivity?.(`default model: ${defaultName}`);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Set thinking level (default: off for fast reviews)
|
|
235
|
+
type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
236
|
+
const thinkingLevel = (opts.thinkingLevel ?? "off") as ThinkingLevel;
|
|
237
|
+
session.setThinkingLevel(thinkingLevel);
|
|
238
|
+
rlog(`reviewer: thinking level = ${thinkingLevel}`);
|
|
239
|
+
|
|
240
|
+
let currentText = ""; // always holds the latest assistant message (reset on message_start)
|
|
241
|
+
let reviewText = ""; // set once after main sendPrompt completes; preserved through retries
|
|
242
|
+
const toolCalls: ReviewToolCall[] = [];
|
|
243
|
+
|
|
244
|
+
const unsub = session.subscribe((ev: AgentSessionEvent) => {
|
|
245
|
+
// Reset on each new assistant message so we only keep the latest response.
|
|
246
|
+
// (Agent loop may emit multiple messages within one prompt: reasoning, tool calls, final answer.)
|
|
247
|
+
if (ev.type === "message_start" && (ev.message as any)?.role === "assistant") {
|
|
248
|
+
currentText = "";
|
|
249
|
+
}
|
|
250
|
+
if (ev.type === "message_update" && ev.assistantMessageEvent.type === "text_delta") {
|
|
251
|
+
currentText += ev.assistantMessageEvent.delta;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Track + log every tool call the reviewer makes
|
|
255
|
+
if (ev.type === "tool_execution_start") {
|
|
256
|
+
const name = ev.toolName;
|
|
257
|
+
const args = ev.args as any;
|
|
258
|
+
const call: ReviewToolCall = {
|
|
259
|
+
name,
|
|
260
|
+
args,
|
|
261
|
+
timestamp: new Date().toISOString(),
|
|
262
|
+
};
|
|
263
|
+
toolCalls.push(call);
|
|
264
|
+
const activity = formatActivity(name, args);
|
|
265
|
+
rlog(`reviewer tool: ${activity}`);
|
|
266
|
+
opts.onActivity?.(activity);
|
|
267
|
+
// Emit structured tool call for display widget
|
|
268
|
+
const targetPath =
|
|
269
|
+
name === "read"
|
|
270
|
+
? (args?.path ?? null)
|
|
271
|
+
: name === "bash"
|
|
272
|
+
? (args?.command ?? null)
|
|
273
|
+
: (args?.path ?? args?.pattern ?? null);
|
|
274
|
+
opts.onToolCall?.(name, targetPath);
|
|
275
|
+
}
|
|
276
|
+
if (ev.type === "tool_execution_end") {
|
|
277
|
+
opts.onActivity?.("analyzing…");
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
// Helper: send a prompt to the existing session, wait for completion.
|
|
282
|
+
// Respects the outer abort signal and has its own timeout.
|
|
283
|
+
async function sendPrompt(text: string, timeoutMs: number): Promise<void> {
|
|
284
|
+
await new Promise<void>((resolve, reject) => {
|
|
285
|
+
let settled = false;
|
|
286
|
+
// eslint-disable-next-line prefer-const
|
|
287
|
+
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
|
288
|
+
|
|
289
|
+
const onAbort = () => {
|
|
290
|
+
if (settled) return;
|
|
291
|
+
settled = true;
|
|
292
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
293
|
+
// Await session.abort() so the reviewer agent actually stops
|
|
294
|
+
// before we reject. dispose() alone only disconnects listeners.
|
|
295
|
+
session.abort().then(
|
|
296
|
+
() => reject(new Error("Review cancelled")),
|
|
297
|
+
() => reject(new Error("Review cancelled")),
|
|
298
|
+
);
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
if (opts.signal.aborted) {
|
|
302
|
+
onAbort();
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
opts.signal.addEventListener("abort", onAbort, { once: true });
|
|
307
|
+
|
|
308
|
+
timeoutId = setTimeout(() => {
|
|
309
|
+
if (settled) return;
|
|
310
|
+
rlog(`reviewer: timed out after ${timeoutMs / 1000}s`);
|
|
311
|
+
settled = true;
|
|
312
|
+
session.abort().then(
|
|
313
|
+
() => reject(new Error("Review timed out")),
|
|
314
|
+
() => reject(new Error("Review timed out")),
|
|
315
|
+
);
|
|
316
|
+
}, timeoutMs);
|
|
317
|
+
|
|
318
|
+
session.prompt(text).then(
|
|
319
|
+
() => {
|
|
320
|
+
settled = true;
|
|
321
|
+
clearTimeout(timeoutId);
|
|
322
|
+
opts.signal.removeEventListener("abort", onAbort);
|
|
323
|
+
resolve();
|
|
324
|
+
},
|
|
325
|
+
(err) => {
|
|
326
|
+
settled = true;
|
|
327
|
+
clearTimeout(timeoutId);
|
|
328
|
+
opts.signal.removeEventListener("abort", onAbort);
|
|
329
|
+
reject(err);
|
|
330
|
+
},
|
|
331
|
+
);
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const MAIN_TIMEOUT_MS = opts.timeoutMs ?? 120 * 1000;
|
|
336
|
+
const RETRY_TIMEOUT_MS = 20 * 1000;
|
|
337
|
+
const MAX_VERDICT_RETRIES = 2;
|
|
338
|
+
|
|
339
|
+
let verdict: "lgtm" | "issues" | null = null;
|
|
340
|
+
try {
|
|
341
|
+
rlog(`reviewer: session.prompt() starting`);
|
|
342
|
+
try {
|
|
343
|
+
await sendPrompt(prompt, MAIN_TIMEOUT_MS);
|
|
344
|
+
rlog(`reviewer: session.prompt() resolved`);
|
|
345
|
+
} catch (err) {
|
|
346
|
+
// Preserve any partial text we streamed before the failure so the
|
|
347
|
+
// structured log still captures it. Re-throw so caller sees the error.
|
|
348
|
+
reviewText = currentText;
|
|
349
|
+
throw err;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Snapshot the main review (the final assistant message of the main prompt's agent loop).
|
|
353
|
+
// Retry prompts will overwrite currentText but reviewText stays fixed on the real findings.
|
|
354
|
+
reviewText = currentText;
|
|
355
|
+
|
|
356
|
+
// Verdict lives in either the main response or a retry response
|
|
357
|
+
verdict = parseVerdict(currentText);
|
|
358
|
+
let retries = 0;
|
|
359
|
+
while (!verdict && retries < MAX_VERDICT_RETRIES) {
|
|
360
|
+
retries++;
|
|
361
|
+
rlog(`reviewer: no verdict tag found, retry ${retries}/${MAX_VERDICT_RETRIES}`);
|
|
362
|
+
opts.onActivity?.(`retry ${retries}: asking for verdict`);
|
|
363
|
+
const followUp =
|
|
364
|
+
`Your previous response did not include a verdict tag. ` +
|
|
365
|
+
`Please respond with ONLY the final verdict on a single line:\n\n` +
|
|
366
|
+
`<verdict>LGTM</verdict>\n\n` +
|
|
367
|
+
`if no real bugs were found in your previous analysis, OR:\n\n` +
|
|
368
|
+
`<verdict>ISSUES_FOUND</verdict>\n\n` +
|
|
369
|
+
`if you found issues. Do not repeat the review, just output the verdict tag.`;
|
|
370
|
+
try {
|
|
371
|
+
await sendPrompt(followUp, RETRY_TIMEOUT_MS);
|
|
372
|
+
} catch (err: any) {
|
|
373
|
+
// Propagate cancellation — don't silently swallow user intent
|
|
374
|
+
if (err?.message === "Review cancelled") throw err;
|
|
375
|
+
// Other retry failures: keep reviewText (from main prompt) and fall back to default verdict
|
|
376
|
+
rlog(
|
|
377
|
+
`reviewer: retry ${retries} failed (${err?.message ?? err}), using current reviewText`,
|
|
378
|
+
);
|
|
379
|
+
break;
|
|
380
|
+
}
|
|
381
|
+
verdict = parseVerdict(currentText);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (!verdict) {
|
|
385
|
+
// After all retries, default to ISSUES_FOUND (safer to show findings than swallow them)
|
|
386
|
+
rlog(`reviewer: no verdict after ${MAX_VERDICT_RETRIES} retries, defaulting to ISSUES_FOUND`);
|
|
387
|
+
verdict = "issues";
|
|
388
|
+
}
|
|
389
|
+
} finally {
|
|
390
|
+
unsub();
|
|
391
|
+
session.dispose();
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const cleanedText = cleanReviewText(reviewText);
|
|
395
|
+
const isLgtm = verdict === "lgtm";
|
|
396
|
+
const durationMs = Date.now() - startTime;
|
|
397
|
+
|
|
398
|
+
rlog(
|
|
399
|
+
`reviewer: done in ${(durationMs / 1000).toFixed(1)}s | ` +
|
|
400
|
+
`prompt=${(prompt.length / 1000).toFixed(1)}k | ` +
|
|
401
|
+
`raw=${reviewText.length}c | ` +
|
|
402
|
+
`cleaned=${cleanedText.length}c | ` +
|
|
403
|
+
`tools=${toolCalls.length} | ` +
|
|
404
|
+
`lgtm=${isLgtm}`,
|
|
405
|
+
);
|
|
406
|
+
rlog(`reviewer raw response:\n${reviewText}`);
|
|
407
|
+
|
|
408
|
+
// Structured review record
|
|
409
|
+
const reviewPath = logReview({
|
|
410
|
+
timestamp: startedAt,
|
|
411
|
+
reviewId: opts.reviewId,
|
|
412
|
+
durationMs,
|
|
413
|
+
model: effectiveModel,
|
|
414
|
+
thinkingLevel,
|
|
415
|
+
isLgtm,
|
|
416
|
+
promptLength: prompt.length,
|
|
417
|
+
rawText: reviewText,
|
|
418
|
+
cleanedText,
|
|
419
|
+
filesReviewed: opts.filesReviewed ?? [],
|
|
420
|
+
toolCalls,
|
|
421
|
+
});
|
|
422
|
+
if (reviewPath) rlog(`reviewer: wrote structured record ${reviewPath}`);
|
|
423
|
+
|
|
424
|
+
return {
|
|
425
|
+
text: cleanedText,
|
|
426
|
+
rawText: reviewText,
|
|
427
|
+
isLgtm,
|
|
428
|
+
durationMs,
|
|
429
|
+
toolCalls,
|
|
430
|
+
model: effectiveModel,
|
|
431
|
+
thinkingLevel,
|
|
432
|
+
};
|
|
433
|
+
}
|
package/scaffold.ts
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scaffold.ts — Template content for /scaffold-review-files
|
|
3
|
+
*
|
|
4
|
+
* Contains the actual default prompts used by the extension so users
|
|
5
|
+
* can see and customise exactly what the reviewer sees.
|
|
6
|
+
*
|
|
7
|
+
* The default review rules live in default-review-rules.md (plain markdown,
|
|
8
|
+
* no code). scaffold.ts reads that file at import time so the content is
|
|
9
|
+
* available as SCAFFOLD_REVIEW_RULES for copying into the user's config dir.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { readFileSync } from "node:fs";
|
|
13
|
+
import { join, dirname } from "node:path";
|
|
14
|
+
import { fileURLToPath } from "node:url";
|
|
15
|
+
import { DEFAULT_AUTO_REVIEW_RULES } from "./prompt";
|
|
16
|
+
|
|
17
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
|
|
19
|
+
// ── auto-review.md ───────────────────────────────────
|
|
20
|
+
// The review criteria: what to look for and what to skip.
|
|
21
|
+
// This is the ONLY part of the review prompt that users override directly.
|
|
22
|
+
// The surrounding prompt (tools, budget, workflow, response format) is always
|
|
23
|
+
// included automatically and cannot be changed.
|
|
24
|
+
|
|
25
|
+
export const SCAFFOLD_AUTO_REVIEW = `${DEFAULT_AUTO_REVIEW_RULES}
|
|
26
|
+
`;
|
|
27
|
+
|
|
28
|
+
// ── review-rules.md ──────────────────────────────────
|
|
29
|
+
// Loaded from default-review-rules.md — pure review criteria, no operational instructions.
|
|
30
|
+
// The markdown file is the single source of truth; scaffold copies it to the user's config dir.
|
|
31
|
+
|
|
32
|
+
let _scaffoldReviewRules: string;
|
|
33
|
+
try {
|
|
34
|
+
_scaffoldReviewRules = readFileSync(join(__dirname, "default-review-rules.md"), "utf8");
|
|
35
|
+
} catch (err: any) {
|
|
36
|
+
console.error(
|
|
37
|
+
`[hard-no] Failed to read default-review-rules.md: ${err?.message ?? err}. ` +
|
|
38
|
+
`Scaffold will create an empty review-rules.md. ` +
|
|
39
|
+
`Expected at: ${join(__dirname, "default-review-rules.md")}`,
|
|
40
|
+
);
|
|
41
|
+
_scaffoldReviewRules = "";
|
|
42
|
+
}
|
|
43
|
+
export const SCAFFOLD_REVIEW_RULES: string = _scaffoldReviewRules;
|
|
44
|
+
|
|
45
|
+
// ── architect.md ─────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
export const SCAFFOLD_ARCHITECT_RULES = `## Architecture
|
|
48
|
+
|
|
49
|
+
- Verify the module dependency graph has no unexpected cycles
|
|
50
|
+
- Check that layering is respected (e.g. UI → Service → Repository → Database)
|
|
51
|
+
- Flag any god-objects or god-modules that accumulated too many responsibilities
|
|
52
|
+
|
|
53
|
+
## Cross-cutting concerns
|
|
54
|
+
|
|
55
|
+
- Error handling strategy consistent across all modules
|
|
56
|
+
- Logging follows the same patterns everywhere
|
|
57
|
+
- Configuration accessed the same way in all files
|
|
58
|
+
|
|
59
|
+
## Technical debt
|
|
60
|
+
|
|
61
|
+
- Flag any TODO/FIXME/HACK comments that were added
|
|
62
|
+
- Identify code that was clearly written in haste during fix loops
|
|
63
|
+
- Check for dead code or unused imports that accumulated
|
|
64
|
+
|
|
65
|
+
## Documentation
|
|
66
|
+
|
|
67
|
+
- README still accurate after all changes
|
|
68
|
+
- Architecture docs reflect current state
|
|
69
|
+
- Changed public APIs have updated JSDoc/comments
|
|
70
|
+
`;
|
|
71
|
+
|
|
72
|
+
// ── ignore ───────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
export const SCAFFOLD_IGNORE = `# Files to skip during review (gitignore syntax)
|
|
75
|
+
# Blank lines and lines starting with # are ignored.
|
|
76
|
+
# Patterns follow .gitignore rules: *, **, ?, !, trailing /
|
|
77
|
+
|
|
78
|
+
# Dependencies & lock files
|
|
79
|
+
package-lock.json
|
|
80
|
+
yarn.lock
|
|
81
|
+
pnpm-lock.yaml
|
|
82
|
+
bun.lockb
|
|
83
|
+
|
|
84
|
+
# Build output
|
|
85
|
+
dist/**
|
|
86
|
+
build/**
|
|
87
|
+
out/**
|
|
88
|
+
*.min.js
|
|
89
|
+
*.min.css
|
|
90
|
+
|
|
91
|
+
# Generated files
|
|
92
|
+
*.generated.ts
|
|
93
|
+
*.d.ts
|
|
94
|
+
|
|
95
|
+
# Snapshots
|
|
96
|
+
*.snap
|
|
97
|
+
|
|
98
|
+
# Large data / assets
|
|
99
|
+
*.csv
|
|
100
|
+
*.parquet
|
|
101
|
+
`;
|
|
102
|
+
|
|
103
|
+
// ── settings.json ────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
export const SCAFFOLD_SETTINGS = JSON.stringify(
|
|
106
|
+
{
|
|
107
|
+
maxReviewLoops: 100,
|
|
108
|
+
model: "amazon-bedrock/us.anthropic.claude-opus-4-6-v1",
|
|
109
|
+
thinkingLevel: "off",
|
|
110
|
+
architectEnabled: true,
|
|
111
|
+
reviewTimeoutMs: 120000,
|
|
112
|
+
toggleShortcut: "alt+r",
|
|
113
|
+
cancelShortcut: "",
|
|
114
|
+
judgeEnabled: false,
|
|
115
|
+
judgeModel: "amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
116
|
+
judgeTimeoutMs: 10000,
|
|
117
|
+
},
|
|
118
|
+
null,
|
|
119
|
+
2,
|
|
120
|
+
);
|
package/session-kind.ts
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* session-kind.ts — detect whether pi-hard-no is loaded into the *main* agent
|
|
3
|
+
* session or into a spawned *sub-session* (e.g. the reviewer session created
|
|
4
|
+
* by `runReviewSession` in reviewer.ts).
|
|
5
|
+
*
|
|
6
|
+
* WHY THIS EXISTS
|
|
7
|
+
* ───────────────
|
|
8
|
+
* pi's extension loader calls our factory fresh for every session it creates.
|
|
9
|
+
* `reviewer.ts` calls `createAgentSession({...})` to spawn a separate reviewer
|
|
10
|
+
* pi instance; that triggers `DefaultResourceLoader.reload()` which calls
|
|
11
|
+
* `loadExtensions()` which calls our factory again with a new `pi`. So
|
|
12
|
+
* pi-hard-no is loaded twice per review: once in the main session, once inside
|
|
13
|
+
* each reviewer session.
|
|
14
|
+
*
|
|
15
|
+
* Without a guard, the reviewer-instance's `agent_end` handler fires when
|
|
16
|
+
* the reviewer's one-shot prompt finishes, tries to recursively review that
|
|
17
|
+
* session, then crashes with "ctx is stale after session replacement or
|
|
18
|
+
* reload" when `reviewer.ts:391 finally { session.dispose() }` invalidates
|
|
19
|
+
* the reviewer's runtime. Beyond the error, that recursion would double-review
|
|
20
|
+
* every turn — a real functional bug, not just noise.
|
|
21
|
+
*
|
|
22
|
+
* DETECTION
|
|
23
|
+
* ─────────
|
|
24
|
+
* `reviewer.ts` creates the reviewer session with a restricted tool set
|
|
25
|
+
* (`["read", "bash", "grep", "find", "ls"]`, no `write` / `edit`). pi's SDK
|
|
26
|
+
* passes this through as `allowedToolNames` which filters
|
|
27
|
+
* `AgentSession._toolDefinitions`, so `pi.getAllTools()` on the reviewer
|
|
28
|
+
* session returns those 5 tools and nothing else. The main interactive
|
|
29
|
+
* session always has `write` and `edit` available.
|
|
30
|
+
*
|
|
31
|
+
* "No write AND no edit" → definitely not a session we want to auto-review
|
|
32
|
+
* for. This is a stable invariant: a session without write/edit cannot be
|
|
33
|
+
* producing file changes that warrant review. Safe to no-op there.
|
|
34
|
+
*
|
|
35
|
+
* TIMING
|
|
36
|
+
* ──────
|
|
37
|
+
* `runtime.getAllTools` is bound during the `AgentSession` constructor,
|
|
38
|
+
* which runs AFTER the extension factory. So we cannot detect at activation
|
|
39
|
+
* time — we detect lazily on the first call and cache the result per-`pi`.
|
|
40
|
+
*
|
|
41
|
+
* FAIL-SAFE
|
|
42
|
+
* ─────────
|
|
43
|
+
* If the probe itself throws (runtime not yet bound, or ctx already stale
|
|
44
|
+
* at the instant we check), we default to `false` (main session) so the
|
|
45
|
+
* normal path still runs. Worst case is one extra stale-ctx log line — no
|
|
46
|
+
* worse than pre-fix behavior, and much rarer.
|
|
47
|
+
*
|
|
48
|
+
* TESTING
|
|
49
|
+
* ───────
|
|
50
|
+
* Pure TS. `pi` is passed as a parameter so tests can inject mocks without
|
|
51
|
+
* spinning up a real session. Cache is per-`pi` via `WeakMap` so tests
|
|
52
|
+
* using distinct mock objects stay isolated without an explicit reset.
|
|
53
|
+
*/
|
|
54
|
+
|
|
55
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
56
|
+
|
|
57
|
+
import { log } from "./logger";
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Per-`pi` cache. The first successful probe is remembered for the life of
|
|
61
|
+
* that extension instance. WeakMap so GC'd pi instances (e.g. disposed
|
|
62
|
+
* reviewer sessions) don't leak.
|
|
63
|
+
*/
|
|
64
|
+
const cache = new WeakMap<object, boolean>();
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Tool names whose presence marks a session as "main" (capable of producing
|
|
68
|
+
* file changes we want to auto-review). If ALL of these are missing, the
|
|
69
|
+
* session is treated as a spawned sub-session and pi-hard-no no-ops.
|
|
70
|
+
*/
|
|
71
|
+
const MAIN_SESSION_WRITE_TOOLS = ["write", "edit"] as const;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Returns `true` if the current pi-hard-no instance is running inside a
|
|
75
|
+
* spawned sub-session (e.g. a reviewer session) rather than the main agent
|
|
76
|
+
* session.
|
|
77
|
+
*
|
|
78
|
+
* Callers should short-circuit work (e.g. skip triggering reviews, skip
|
|
79
|
+
* updating status bar) when this returns `true`.
|
|
80
|
+
*
|
|
81
|
+
* Idempotent and cheap after the first call.
|
|
82
|
+
*/
|
|
83
|
+
export function isSpawnedSubSession(pi: ExtensionAPI): boolean {
|
|
84
|
+
const cached = cache.get(pi);
|
|
85
|
+
if (cached !== undefined) return cached;
|
|
86
|
+
|
|
87
|
+
const result = probeIsSpawned(pi);
|
|
88
|
+
cache.set(pi, result);
|
|
89
|
+
return result;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* One-shot probe — separated so the cache-management wrapper above stays
|
|
94
|
+
* trivially readable. Never throws; failures collapse to `false`.
|
|
95
|
+
*/
|
|
96
|
+
function probeIsSpawned(pi: ExtensionAPI): boolean {
|
|
97
|
+
try {
|
|
98
|
+
// Explicit fail-safe: if `pi.getAllTools` isn't a function, the runtime
|
|
99
|
+
// isn't bound yet (shouldn't happen in practice once events fire) or the
|
|
100
|
+
// mock/environment is malformed. Defaulting to "main session" keeps the
|
|
101
|
+
// main path alive; treating this as "empty tool list = spawned" would
|
|
102
|
+
// wrongly no-op the real main session on an early call.
|
|
103
|
+
if (typeof pi.getAllTools !== "function") {
|
|
104
|
+
log(
|
|
105
|
+
`session-kind: pi.getAllTools unavailable — defaulting to main session (no-op guard disabled for this instance)`,
|
|
106
|
+
);
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
const raw = pi.getAllTools();
|
|
110
|
+
if (!Array.isArray(raw)) {
|
|
111
|
+
log(
|
|
112
|
+
`session-kind: pi.getAllTools() returned non-array (${typeof raw}) — defaulting to main session (no-op guard disabled for this instance)`,
|
|
113
|
+
);
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
const tools = raw;
|
|
117
|
+
const names = new Set(
|
|
118
|
+
tools
|
|
119
|
+
.map((t) => (t as { name?: unknown })?.name)
|
|
120
|
+
.filter((n): n is string => typeof n === "string"),
|
|
121
|
+
);
|
|
122
|
+
const hasAnyWriteTool = MAIN_SESSION_WRITE_TOOLS.some((t) => names.has(t));
|
|
123
|
+
const isSpawned = !hasAnyWriteTool;
|
|
124
|
+
if (isSpawned) {
|
|
125
|
+
log(
|
|
126
|
+
`session-kind: spawned sub-session detected (tools=[${[...names].join(",")}]) — pi-hard-no hooks will no-op for this instance`,
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
return isSpawned;
|
|
130
|
+
} catch (err: any) {
|
|
131
|
+
// Probe failing means runtime/ctx isn't healthy right now. Defaulting
|
|
132
|
+
// to "main session" keeps the normal path alive; the worst that can
|
|
133
|
+
// happen is one stale-ctx log line later, which is the pre-fix baseline.
|
|
134
|
+
log(
|
|
135
|
+
`session-kind: probe failed (${err?.message ?? err}) — defaulting to main session (no-op guard disabled for this instance)`,
|
|
136
|
+
);
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
}
|