@inceptionstack/pi-hard-no 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/reviewer.ts ADDED
@@ -0,0 +1,433 @@
1
+ /**
2
+ * reviewer.ts — Review session runner
3
+ *
4
+ * The reviewer gets:
5
+ * - Per-file git diffs and recent commit messages
6
+ * - Full paths of changed files
7
+ * - Read-only tools to read files and explore the codebase
8
+ * - Live status updates shown in the main pi status bar
9
+ *
10
+ * The reviewer reads each file itself via read(path) tool calls.
11
+ * Uses the standardized file logger for all diagnostic output.
12
+ */
13
+
14
+ import {
15
+ createAgentSession,
16
+ SessionManager,
17
+ AuthStorage,
18
+ ModelRegistry,
19
+ type AgentSessionEvent,
20
+ } from "@mariozechner/pi-coding-agent";
21
+
22
+ import { log, logReview, safeStringify, type ReviewToolCall } from "./logger";
23
+
24
+ export interface ReviewResult {
25
+ /** Cleaned review text shown to the user. */
26
+ text: string;
27
+ /** Raw LLM output before cleanup (for debugging / structured log). */
28
+ rawText: string;
29
+ isLgtm: boolean;
30
+ durationMs: number;
31
+ /** Every tool call the reviewer made during exploration. */
32
+ toolCalls: ReviewToolCall[];
33
+ /** Effective model used for the review. */
34
+ model: string;
35
+ /** Effective thinking level used. */
36
+ thinkingLevel: string;
37
+ }
38
+
39
+ export interface ReviewOptions {
40
+ signal: AbortSignal;
41
+ cwd: string;
42
+ /** "provider/model-id" to use for the reviewer */
43
+ model?: string;
44
+ /** Thinking level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh" */
45
+ thinkingLevel?: string;
46
+ /** Max wall-clock for main prompt (ms). Default 120000. */
47
+ timeoutMs?: number;
48
+ /** Files being reviewed (used in the structured log record). */
49
+ filesReviewed?: string[];
50
+ /** Unique id for this review cycle — used as a log prefix and embedded in the structured record. */
51
+ reviewId?: string;
52
+ /** Called when the reviewer uses tools — for status bar updates */
53
+ onActivity?: (description: string) => void;
54
+ /** Called with structured tool call info — for display widget */
55
+ onToolCall?: (toolName: string, targetPath: string | null) => void;
56
+ }
57
+
58
+ export type ReviewRunner = (prompt: string, opts: ReviewOptions) => Promise<ReviewResult>;
59
+
60
+ /** Review text markers that indicate where the actual review findings start. */
61
+ const REVIEW_MARKERS = [
62
+ /\n##\s*Review/i,
63
+ /\n##\s*Issues/i,
64
+ /\n##\s*Findings/i,
65
+ /\nHere'?s my review/i,
66
+ /\nHere are the issues/i,
67
+ /\n-\s*\*\*(High|Medium|Low)/i,
68
+ /\n-\s*\[(High|Medium|Low)/i,
69
+ /\n\*\*Issues found/i,
70
+ /No issues found\./i,
71
+ ];
72
+
73
+ /**
74
+ * Strip tool-call noise from raw review text.
75
+ * Order: strip verdict tags → find review start marker → strip XML tags.
76
+ */
77
+ export function cleanReviewText(raw: string): string {
78
+ // Strip verdict tags FIRST so they don't interfere with marker detection
79
+ let text = stripVerdict(raw);
80
+
81
+ // Find where the actual review findings start
82
+ for (const marker of REVIEW_MARKERS) {
83
+ const match = text.match(marker);
84
+ if (match?.index !== undefined && match.index > 0) {
85
+ text = text.slice(match.index).trim();
86
+ break;
87
+ }
88
+ }
89
+
90
+ // Strip XML-style tool tags
91
+ text = text.replace(/<(bash|read_file|grep|find|ls)[^>]*>[\s\S]*?<\/\1>/g, "");
92
+ text = text.replace(/<(bash|read_file|grep|find|ls)[^>]*\/>/g, "");
93
+ return text.trim();
94
+ }
95
+
96
+ /**
97
+ * Severity markers that indicate the reviewer found issues.
98
+ * If any of these appear in the review text, it is NOT LGTM.
99
+ */
100
+ const ISSUE_MARKERS = [
101
+ /\bHigh\s*(?:severity|—|-|:)/i,
102
+ /\bMedium\s*(?:severity|—|-|:)/i,
103
+ /\bLow\s*(?:severity|—|-|:)/i,
104
+ /-\s*\*\*(High|Medium|Low)/i,
105
+ /^###?\s*(High|Medium|Low)/im,
106
+ /\*\*Issues found/i,
107
+ ];
108
+
109
+ /**
110
+ * Parse the verdict tag from the reviewer's response.
111
+ * Returns "lgtm" if <verdict>LGTM</verdict>, "issues" if <verdict>ISSUES_FOUND</verdict>,
112
+ * or null if no verdict tag is present (requires retry).
113
+ */
114
+ export function parseVerdict(text: string): "lgtm" | "issues" | null {
115
+ const match = text.match(/<verdict>\s*(LGTM|ISSUES_FOUND)\s*<\/verdict>/i);
116
+ if (!match) return null;
117
+ return match[1].toUpperCase() === "LGTM" ? "lgtm" : "issues";
118
+ }
119
+
120
+ /**
121
+ * Strip the verdict tag from the cleaned review text.
122
+ * The verdict is metadata; the user shouldn't see it in the rendered message.
123
+ */
124
+ export function stripVerdict(text: string): string {
125
+ return text.replace(/<verdict>\s*(LGTM|ISSUES_FOUND)\s*<\/verdict>/gi, "").trim();
126
+ }
127
+
128
+ /**
129
+ * Check if cleaned review text indicates LGTM (no issues).
130
+ * Prefer parseVerdict() for explicit verdict tags; this is a fallback heuristic.
131
+ */
132
+ export function isLgtmResult(cleanedText: string): boolean {
133
+ const text = cleanedText.trim();
134
+ if (!text) return true;
135
+
136
+ // Any severity marker = issues were found, regardless of LGTM mention
137
+ for (const marker of ISSUE_MARKERS) {
138
+ if (marker.test(text)) return false;
139
+ }
140
+
141
+ // Explicit LGTM at start of response (after optional "Review:" or "-" prefix)
142
+ if (/^[-\s]*(?:Review:\s*)?LGTM\b/i.test(text)) return true;
143
+
144
+ // No severity markers and no clear LGTM — default to NOT LGTM.
145
+ // Safer to show the text than silently swallow it.
146
+ return false;
147
+ }
148
+
149
+ /** Format a tool call event as a short activity string for the status bar. */
150
+ function formatActivity(name: string, args: any): string {
151
+ if (name === "read") return `reading ${args?.path ?? "file"}`;
152
+ if (name === "bash") return `$ ${(args?.command ?? "").slice(0, 50)}`;
153
+ if (name === "find" || name === "grep" || name === "ls") {
154
+ return `${name} ${(args?.path ?? args?.pattern ?? "").slice(0, 40)}`;
155
+ }
156
+ return `${name}…`;
157
+ }
158
+
159
+ /**
160
+ * Spawn a fresh pi reviewer instance with tools, send a prompt,
161
+ * collect the response. The reviewer can read files and explore
162
+ * the codebase as needed.
163
+ */
164
+ export async function runReviewSession(prompt: string, opts: ReviewOptions): Promise<ReviewResult> {
165
+ const startTime = Date.now();
166
+ const startedAt = new Date().toISOString();
167
+ const idPrefix = opts.reviewId ? `[${opts.reviewId}] ` : "";
168
+ // Use safeStringify (same circular-ref-safe serializer as log()) so rlog matches
169
+ // log()'s safety contract even for non-string arguments.
170
+ const rlog = (...args: any[]) => log(idPrefix + args.map(safeStringify).join(" "));
171
+ rlog(`reviewer: starting (prompt=${(prompt.length / 1000).toFixed(1)}k chars, cwd=${opts.cwd})`);
172
+
173
+ let authStorage: ReturnType<typeof AuthStorage.create>;
174
+ let modelRegistry: ReturnType<typeof ModelRegistry.create>;
175
+ try {
176
+ authStorage = AuthStorage.create();
177
+ modelRegistry = ModelRegistry.create(authStorage);
178
+ } catch (err: any) {
179
+ rlog(`reviewer: failed to create auth/model registry: ${err?.message ?? err}`);
180
+ rlog(`reviewer: stack: ${err?.stack ?? "(no stack)"}`);
181
+ throw err;
182
+ }
183
+
184
+ let session: Awaited<ReturnType<typeof createAgentSession>>["session"];
185
+ try {
186
+ const result = await createAgentSession({
187
+ cwd: opts.cwd,
188
+ sessionManager: SessionManager.inMemory(),
189
+ authStorage,
190
+ modelRegistry,
191
+ // Allowlist only read-only tools + bash; no write/edit for the reviewer
192
+ tools: ["read", "bash", "grep", "find", "ls"],
193
+ });
194
+ session = result.session;
195
+ } catch (err: any) {
196
+ rlog(`reviewer: createAgentSession failed: ${err?.message ?? err}`);
197
+ rlog(`reviewer: stack: ${err?.stack ?? "(no stack)"}`);
198
+ throw err;
199
+ }
200
+ rlog(`reviewer: session created, initial model=${session.model?.provider}/${session.model?.id}`);
201
+
202
+ // Set the reviewer model if specified
203
+ const sessionModelName = session.model
204
+ ? `${session.model.provider}/${session.model.id}`
205
+ : "unknown";
206
+ let effectiveModel = opts.model ?? sessionModelName;
207
+ if (opts.model) {
208
+ const [provider, modelId] = opts.model.split("/", 2);
209
+ if (provider && modelId) {
210
+ const model = modelRegistry.find(provider, modelId);
211
+ if (model) {
212
+ try {
213
+ await session.setModel(model);
214
+ rlog(`reviewer: using model ${opts.model}`);
215
+ } catch {
216
+ const defaultName = session.model
217
+ ? `${session.model.provider}/${session.model.id}`
218
+ : "unknown";
219
+ rlog(`reviewer: model ${opts.model} has no API key. Falling back to ${defaultName}`);
220
+ effectiveModel = defaultName;
221
+ opts.onActivity?.(`default model: ${defaultName}`);
222
+ }
223
+ } else {
224
+ const defaultName = session.model
225
+ ? `${session.model.provider}/${session.model.id}`
226
+ : "unknown";
227
+ rlog(`reviewer: model ${opts.model} not found. Falling back to ${defaultName}`);
228
+ effectiveModel = defaultName;
229
+ opts.onActivity?.(`default model: ${defaultName}`);
230
+ }
231
+ }
232
+ }
233
+
234
+ // Set thinking level (default: off for fast reviews)
235
+ type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
236
+ const thinkingLevel = (opts.thinkingLevel ?? "off") as ThinkingLevel;
237
+ session.setThinkingLevel(thinkingLevel);
238
+ rlog(`reviewer: thinking level = ${thinkingLevel}`);
239
+
240
+ let currentText = ""; // always holds the latest assistant message (reset on message_start)
241
+ let reviewText = ""; // set once after main sendPrompt completes; preserved through retries
242
+ const toolCalls: ReviewToolCall[] = [];
243
+
244
+ const unsub = session.subscribe((ev: AgentSessionEvent) => {
245
+ // Reset on each new assistant message so we only keep the latest response.
246
+ // (Agent loop may emit multiple messages within one prompt: reasoning, tool calls, final answer.)
247
+ if (ev.type === "message_start" && (ev.message as any)?.role === "assistant") {
248
+ currentText = "";
249
+ }
250
+ if (ev.type === "message_update" && ev.assistantMessageEvent.type === "text_delta") {
251
+ currentText += ev.assistantMessageEvent.delta;
252
+ }
253
+
254
+ // Track + log every tool call the reviewer makes
255
+ if (ev.type === "tool_execution_start") {
256
+ const name = ev.toolName;
257
+ const args = ev.args as any;
258
+ const call: ReviewToolCall = {
259
+ name,
260
+ args,
261
+ timestamp: new Date().toISOString(),
262
+ };
263
+ toolCalls.push(call);
264
+ const activity = formatActivity(name, args);
265
+ rlog(`reviewer tool: ${activity}`);
266
+ opts.onActivity?.(activity);
267
+ // Emit structured tool call for display widget
268
+ const targetPath =
269
+ name === "read"
270
+ ? (args?.path ?? null)
271
+ : name === "bash"
272
+ ? (args?.command ?? null)
273
+ : (args?.path ?? args?.pattern ?? null);
274
+ opts.onToolCall?.(name, targetPath);
275
+ }
276
+ if (ev.type === "tool_execution_end") {
277
+ opts.onActivity?.("analyzing…");
278
+ }
279
+ });
280
+
281
+ // Helper: send a prompt to the existing session, wait for completion.
282
+ // Respects the outer abort signal and has its own timeout.
283
+ async function sendPrompt(text: string, timeoutMs: number): Promise<void> {
284
+ await new Promise<void>((resolve, reject) => {
285
+ let settled = false;
286
+ // eslint-disable-next-line prefer-const
287
+ let timeoutId: ReturnType<typeof setTimeout> | undefined;
288
+
289
+ const onAbort = () => {
290
+ if (settled) return;
291
+ settled = true;
292
+ if (timeoutId) clearTimeout(timeoutId);
293
+ // Await session.abort() so the reviewer agent actually stops
294
+ // before we reject. dispose() alone only disconnects listeners.
295
+ session.abort().then(
296
+ () => reject(new Error("Review cancelled")),
297
+ () => reject(new Error("Review cancelled")),
298
+ );
299
+ };
300
+
301
+ if (opts.signal.aborted) {
302
+ onAbort();
303
+ return;
304
+ }
305
+
306
+ opts.signal.addEventListener("abort", onAbort, { once: true });
307
+
308
+ timeoutId = setTimeout(() => {
309
+ if (settled) return;
310
+ rlog(`reviewer: timed out after ${timeoutMs / 1000}s`);
311
+ settled = true;
312
+ session.abort().then(
313
+ () => reject(new Error("Review timed out")),
314
+ () => reject(new Error("Review timed out")),
315
+ );
316
+ }, timeoutMs);
317
+
318
+ session.prompt(text).then(
319
+ () => {
320
+ settled = true;
321
+ clearTimeout(timeoutId);
322
+ opts.signal.removeEventListener("abort", onAbort);
323
+ resolve();
324
+ },
325
+ (err) => {
326
+ settled = true;
327
+ clearTimeout(timeoutId);
328
+ opts.signal.removeEventListener("abort", onAbort);
329
+ reject(err);
330
+ },
331
+ );
332
+ });
333
+ }
334
+
335
+ const MAIN_TIMEOUT_MS = opts.timeoutMs ?? 120 * 1000;
336
+ const RETRY_TIMEOUT_MS = 20 * 1000;
337
+ const MAX_VERDICT_RETRIES = 2;
338
+
339
+ let verdict: "lgtm" | "issues" | null = null;
340
+ try {
341
+ rlog(`reviewer: session.prompt() starting`);
342
+ try {
343
+ await sendPrompt(prompt, MAIN_TIMEOUT_MS);
344
+ rlog(`reviewer: session.prompt() resolved`);
345
+ } catch (err) {
346
+ // Preserve any partial text we streamed before the failure so the
347
+ // structured log still captures it. Re-throw so caller sees the error.
348
+ reviewText = currentText;
349
+ throw err;
350
+ }
351
+
352
+ // Snapshot the main review (the final assistant message of the main prompt's agent loop).
353
+ // Retry prompts will overwrite currentText but reviewText stays fixed on the real findings.
354
+ reviewText = currentText;
355
+
356
+ // Verdict lives in either the main response or a retry response
357
+ verdict = parseVerdict(currentText);
358
+ let retries = 0;
359
+ while (!verdict && retries < MAX_VERDICT_RETRIES) {
360
+ retries++;
361
+ rlog(`reviewer: no verdict tag found, retry ${retries}/${MAX_VERDICT_RETRIES}`);
362
+ opts.onActivity?.(`retry ${retries}: asking for verdict`);
363
+ const followUp =
364
+ `Your previous response did not include a verdict tag. ` +
365
+ `Please respond with ONLY the final verdict on a single line:\n\n` +
366
+ `<verdict>LGTM</verdict>\n\n` +
367
+ `if no real bugs were found in your previous analysis, OR:\n\n` +
368
+ `<verdict>ISSUES_FOUND</verdict>\n\n` +
369
+ `if you found issues. Do not repeat the review, just output the verdict tag.`;
370
+ try {
371
+ await sendPrompt(followUp, RETRY_TIMEOUT_MS);
372
+ } catch (err: any) {
373
+ // Propagate cancellation — don't silently swallow user intent
374
+ if (err?.message === "Review cancelled") throw err;
375
+ // Other retry failures: keep reviewText (from main prompt) and fall back to default verdict
376
+ rlog(
377
+ `reviewer: retry ${retries} failed (${err?.message ?? err}), using current reviewText`,
378
+ );
379
+ break;
380
+ }
381
+ verdict = parseVerdict(currentText);
382
+ }
383
+
384
+ if (!verdict) {
385
+ // After all retries, default to ISSUES_FOUND (safer to show findings than swallow them)
386
+ rlog(`reviewer: no verdict after ${MAX_VERDICT_RETRIES} retries, defaulting to ISSUES_FOUND`);
387
+ verdict = "issues";
388
+ }
389
+ } finally {
390
+ unsub();
391
+ session.dispose();
392
+ }
393
+
394
+ const cleanedText = cleanReviewText(reviewText);
395
+ const isLgtm = verdict === "lgtm";
396
+ const durationMs = Date.now() - startTime;
397
+
398
+ rlog(
399
+ `reviewer: done in ${(durationMs / 1000).toFixed(1)}s | ` +
400
+ `prompt=${(prompt.length / 1000).toFixed(1)}k | ` +
401
+ `raw=${reviewText.length}c | ` +
402
+ `cleaned=${cleanedText.length}c | ` +
403
+ `tools=${toolCalls.length} | ` +
404
+ `lgtm=${isLgtm}`,
405
+ );
406
+ rlog(`reviewer raw response:\n${reviewText}`);
407
+
408
+ // Structured review record
409
+ const reviewPath = logReview({
410
+ timestamp: startedAt,
411
+ reviewId: opts.reviewId,
412
+ durationMs,
413
+ model: effectiveModel,
414
+ thinkingLevel,
415
+ isLgtm,
416
+ promptLength: prompt.length,
417
+ rawText: reviewText,
418
+ cleanedText,
419
+ filesReviewed: opts.filesReviewed ?? [],
420
+ toolCalls,
421
+ });
422
+ if (reviewPath) rlog(`reviewer: wrote structured record ${reviewPath}`);
423
+
424
+ return {
425
+ text: cleanedText,
426
+ rawText: reviewText,
427
+ isLgtm,
428
+ durationMs,
429
+ toolCalls,
430
+ model: effectiveModel,
431
+ thinkingLevel,
432
+ };
433
+ }
package/scaffold.ts ADDED
@@ -0,0 +1,120 @@
1
+ /**
2
+ * scaffold.ts — Template content for /scaffold-review-files
3
+ *
4
+ * Contains the actual default prompts used by the extension so users
5
+ * can see and customise exactly what the reviewer sees.
6
+ *
7
+ * The default review rules live in default-review-rules.md (plain markdown,
8
+ * no code). scaffold.ts reads that file at import time so the content is
9
+ * available as SCAFFOLD_REVIEW_RULES for copying into the user's config dir.
10
+ */
11
+
12
+ import { readFileSync } from "node:fs";
13
+ import { join, dirname } from "node:path";
14
+ import { fileURLToPath } from "node:url";
15
+ import { DEFAULT_AUTO_REVIEW_RULES } from "./prompt";
16
+
17
+ const __dirname = dirname(fileURLToPath(import.meta.url));
18
+
19
+ // ── auto-review.md ───────────────────────────────────
20
+ // The review criteria: what to look for and what to skip.
21
+ // This is the ONLY part of the review prompt that users override directly.
22
+ // The surrounding prompt (tools, budget, workflow, response format) is always
23
+ // included automatically and cannot be changed.
24
+
25
+ export const SCAFFOLD_AUTO_REVIEW = `${DEFAULT_AUTO_REVIEW_RULES}
26
+ `;
27
+
28
+ // ── review-rules.md ──────────────────────────────────
29
+ // Loaded from default-review-rules.md — pure review criteria, no operational instructions.
30
+ // The markdown file is the single source of truth; scaffold copies it to the user's config dir.
31
+
32
+ let _scaffoldReviewRules: string;
33
+ try {
34
+ _scaffoldReviewRules = readFileSync(join(__dirname, "default-review-rules.md"), "utf8");
35
+ } catch (err: any) {
36
+ console.error(
37
+ `[hard-no] Failed to read default-review-rules.md: ${err?.message ?? err}. ` +
38
+ `Scaffold will create an empty review-rules.md. ` +
39
+ `Expected at: ${join(__dirname, "default-review-rules.md")}`,
40
+ );
41
+ _scaffoldReviewRules = "";
42
+ }
43
+ export const SCAFFOLD_REVIEW_RULES: string = _scaffoldReviewRules;
44
+
45
+ // ── architect.md ─────────────────────────────────────
46
+
47
+ export const SCAFFOLD_ARCHITECT_RULES = `## Architecture
48
+
49
+ - Verify the module dependency graph has no unexpected cycles
50
+ - Check that layering is respected (e.g. UI → Service → Repository → Database)
51
+ - Flag any god-objects or god-modules that accumulated too many responsibilities
52
+
53
+ ## Cross-cutting concerns
54
+
55
+ - Error handling strategy consistent across all modules
56
+ - Logging follows the same patterns everywhere
57
+ - Configuration accessed the same way in all files
58
+
59
+ ## Technical debt
60
+
61
+ - Flag any TODO/FIXME/HACK comments that were added
62
+ - Identify code that was clearly written in haste during fix loops
63
+ - Check for dead code or unused imports that accumulated
64
+
65
+ ## Documentation
66
+
67
+ - README still accurate after all changes
68
+ - Architecture docs reflect current state
69
+ - Changed public APIs have updated JSDoc/comments
70
+ `;
71
+
72
+ // ── ignore ───────────────────────────────────────────
73
+
74
+ export const SCAFFOLD_IGNORE = `# Files to skip during review (gitignore syntax)
75
+ # Blank lines and lines starting with # are ignored.
76
+ # Patterns follow .gitignore rules: *, **, ?, !, trailing /
77
+
78
+ # Dependencies & lock files
79
+ package-lock.json
80
+ yarn.lock
81
+ pnpm-lock.yaml
82
+ bun.lockb
83
+
84
+ # Build output
85
+ dist/**
86
+ build/**
87
+ out/**
88
+ *.min.js
89
+ *.min.css
90
+
91
+ # Generated files
92
+ *.generated.ts
93
+ *.d.ts
94
+
95
+ # Snapshots
96
+ *.snap
97
+
98
+ # Large data / assets
99
+ *.csv
100
+ *.parquet
101
+ `;
102
+
103
+ // ── settings.json ────────────────────────────────────
104
+
105
+ export const SCAFFOLD_SETTINGS = JSON.stringify(
106
+ {
107
+ maxReviewLoops: 100,
108
+ model: "amazon-bedrock/us.anthropic.claude-opus-4-6-v1",
109
+ thinkingLevel: "off",
110
+ architectEnabled: true,
111
+ reviewTimeoutMs: 120000,
112
+ toggleShortcut: "alt+r",
113
+ cancelShortcut: "",
114
+ judgeEnabled: false,
115
+ judgeModel: "amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0",
116
+ judgeTimeoutMs: 10000,
117
+ },
118
+ null,
119
+ 2,
120
+ );
@@ -0,0 +1,139 @@
1
+ /**
2
+ * session-kind.ts — detect whether pi-hard-no is loaded into the *main* agent
3
+ * session or into a spawned *sub-session* (e.g. the reviewer session created
4
+ * by `runReviewSession` in reviewer.ts).
5
+ *
6
+ * WHY THIS EXISTS
7
+ * ───────────────
8
+ * pi's extension loader calls our factory fresh for every session it creates.
9
+ * `reviewer.ts` calls `createAgentSession({...})` to spawn a separate reviewer
10
+ * pi instance; that triggers `DefaultResourceLoader.reload()` which calls
11
+ * `loadExtensions()` which calls our factory again with a new `pi`. So
12
+ * pi-hard-no is loaded twice per review: once in the main session, once inside
13
+ * each reviewer session.
14
+ *
15
+ * Without a guard, the reviewer-instance's `agent_end` handler fires when
16
+ * the reviewer's one-shot prompt finishes, tries to recursively review that
17
+ * session, then crashes with "ctx is stale after session replacement or
18
+ * reload" when `reviewer.ts:391 finally { session.dispose() }` invalidates
19
+ * the reviewer's runtime. Beyond the error, that recursion would double-review
20
+ * every turn — a real functional bug, not just noise.
21
+ *
22
+ * DETECTION
23
+ * ─────────
24
+ * `reviewer.ts` creates the reviewer session with a restricted tool set
25
+ * (`["read", "bash", "grep", "find", "ls"]`, no `write` / `edit`). pi's SDK
26
+ * passes this through as `allowedToolNames` which filters
27
+ * `AgentSession._toolDefinitions`, so `pi.getAllTools()` on the reviewer
28
+ * session returns those 5 tools and nothing else. The main interactive
29
+ * session always has `write` and `edit` available.
30
+ *
31
+ * "No write AND no edit" → definitely not a session we want to auto-review
32
+ * for. This is a stable invariant: a session without write/edit cannot be
33
+ * producing file changes that warrant review. Safe to no-op there.
34
+ *
35
+ * TIMING
36
+ * ──────
37
+ * `runtime.getAllTools` is bound during the `AgentSession` constructor,
38
+ * which runs AFTER the extension factory. So we cannot detect at activation
39
+ * time — we detect lazily on the first call and cache the result per-`pi`.
40
+ *
41
+ * FAIL-SAFE
42
+ * ─────────
43
+ * If the probe itself throws (runtime not yet bound, or ctx already stale
44
+ * at the instant we check), we default to `false` (main session) so the
45
+ * normal path still runs. Worst case is one extra stale-ctx log line — no
46
+ * worse than pre-fix behavior, and much rarer.
47
+ *
48
+ * TESTING
49
+ * ───────
50
+ * Pure TS. `pi` is passed as a parameter so tests can inject mocks without
51
+ * spinning up a real session. Cache is per-`pi` via `WeakMap` so tests
52
+ * using distinct mock objects stay isolated without an explicit reset.
53
+ */
54
+
55
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
56
+
57
+ import { log } from "./logger";
58
+
59
+ /**
60
+ * Per-`pi` cache. The first successful probe is remembered for the life of
61
+ * that extension instance. WeakMap so GC'd pi instances (e.g. disposed
62
+ * reviewer sessions) don't leak.
63
+ */
64
+ const cache = new WeakMap<object, boolean>();
65
+
66
+ /**
67
+ * Tool names whose presence marks a session as "main" (capable of producing
68
+ * file changes we want to auto-review). If ALL of these are missing, the
69
+ * session is treated as a spawned sub-session and pi-hard-no no-ops.
70
+ */
71
+ const MAIN_SESSION_WRITE_TOOLS = ["write", "edit"] as const;
72
+
73
+ /**
74
+ * Returns `true` if the current pi-hard-no instance is running inside a
75
+ * spawned sub-session (e.g. a reviewer session) rather than the main agent
76
+ * session.
77
+ *
78
+ * Callers should short-circuit work (e.g. skip triggering reviews, skip
79
+ * updating status bar) when this returns `true`.
80
+ *
81
+ * Idempotent and cheap after the first call.
82
+ */
83
+ export function isSpawnedSubSession(pi: ExtensionAPI): boolean {
84
+ const cached = cache.get(pi);
85
+ if (cached !== undefined) return cached;
86
+
87
+ const result = probeIsSpawned(pi);
88
+ cache.set(pi, result);
89
+ return result;
90
+ }
91
+
92
+ /**
93
+ * One-shot probe — separated so the cache-management wrapper above stays
94
+ * trivially readable. Never throws; failures collapse to `false`.
95
+ */
96
+ function probeIsSpawned(pi: ExtensionAPI): boolean {
97
+ try {
98
+ // Explicit fail-safe: if `pi.getAllTools` isn't a function, the runtime
99
+ // isn't bound yet (shouldn't happen in practice once events fire) or the
100
+ // mock/environment is malformed. Defaulting to "main session" keeps the
101
+ // main path alive; treating this as "empty tool list = spawned" would
102
+ // wrongly no-op the real main session on an early call.
103
+ if (typeof pi.getAllTools !== "function") {
104
+ log(
105
+ `session-kind: pi.getAllTools unavailable — defaulting to main session (no-op guard disabled for this instance)`,
106
+ );
107
+ return false;
108
+ }
109
+ const raw = pi.getAllTools();
110
+ if (!Array.isArray(raw)) {
111
+ log(
112
+ `session-kind: pi.getAllTools() returned non-array (${typeof raw}) — defaulting to main session (no-op guard disabled for this instance)`,
113
+ );
114
+ return false;
115
+ }
116
+ const tools = raw;
117
+ const names = new Set(
118
+ tools
119
+ .map((t) => (t as { name?: unknown })?.name)
120
+ .filter((n): n is string => typeof n === "string"),
121
+ );
122
+ const hasAnyWriteTool = MAIN_SESSION_WRITE_TOOLS.some((t) => names.has(t));
123
+ const isSpawned = !hasAnyWriteTool;
124
+ if (isSpawned) {
125
+ log(
126
+ `session-kind: spawned sub-session detected (tools=[${[...names].join(",")}]) — pi-hard-no hooks will no-op for this instance`,
127
+ );
128
+ }
129
+ return isSpawned;
130
+ } catch (err: any) {
131
+ // Probe failing means runtime/ctx isn't healthy right now. Defaulting
132
+ // to "main session" keeps the normal path alive; the worst that can
133
+ // happen is one stale-ctx log line later, which is the pre-fix baseline.
134
+ log(
135
+ `session-kind: probe failed (${err?.message ?? err}) — defaulting to main session (no-op guard disabled for this instance)`,
136
+ );
137
+ return false;
138
+ }
139
+ }