@inceptionstack/pi-hard-no 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,521 @@
1
+ import { createHash } from "node:crypto";
2
+
3
+ import { type ContentSizeLimits, FALLBACK_LIMITS, type ReviewContent } from "./context";
4
+ import { hasFileChanges, isFormattingOnlyTurn, collectModifiedPaths } from "./changes";
5
+ import type { TrackedToolCall } from "./changes";
6
+ import { createReviewId, computeReviewTimeoutMs } from "./helpers";
7
+ import type { BashClassification } from "./judge";
8
+ import { buildReviewPrompt } from "./prompt";
9
+ import type { AutoReviewSettings } from "./settings";
10
+ import { runArchitectReview, shouldRunArchitectReview } from "./architect";
11
+ import type { ReviewResult, ReviewRunner } from "./reviewer";
12
+ import { log } from "./logger";
13
+
14
+ const MIN_REVIEW_CONTENT_LENGTH = 50;
15
+
16
+ export type ReviewStepResult = {
17
+ result: ReviewResult;
18
+ label?: string;
19
+ loopInfo?: string;
20
+ /** Unique id for this review step (senior review cycle or architect review). */
21
+ reviewId: string;
22
+ };
23
+
24
+ export type ReviewOutcome =
25
+ | { type: "skipped"; reason: string }
26
+ | { type: "cancelled" }
27
+ | { type: "error"; error: Error }
28
+ | { type: "max_loops" }
29
+ | {
30
+ type: "completed";
31
+ senior: ReviewStepResult;
32
+ architect?: ReviewStepResult;
33
+ /** Populated when architect was supposed to run but failed (timeout, error).
34
+ * Distinct from `architect` being undefined because it was skipped by the trigger logic. */
35
+ architectFailure?: { reviewId: string; error: Error };
36
+ files: string[];
37
+ };
38
+
39
+ export interface ContentBuilderInput {
40
+ agentToolCalls: TrackedToolCall[];
41
+ onStatus?: (msg: string) => void;
42
+ ignorePatterns?: string[];
43
+ gitRoots?: Set<string>;
44
+ limits?: ContentSizeLimits;
45
+ }
46
+
47
+ export type ContentBuilder = (input: ContentBuilderInput) => Promise<ReviewContent | null>;
48
+
49
+ export interface ReviewOrchestratorInput {
50
+ agentToolCalls: TrackedToolCall[];
51
+ modifiedFiles: Set<string>;
52
+ gitRoots: Set<string>;
53
+ cwd: string;
54
+ settings: AutoReviewSettings;
55
+ customRules: string | null;
56
+ autoReviewRules: string | null;
57
+ ignorePatterns: string[] | null;
58
+ architectRules: string | null;
59
+ lastUserMessage: string | null;
60
+ onActivity?: (description: string) => void;
61
+ onToolCall?: (toolName: string, targetPath: string | null) => void;
62
+ onArchitectActivity?: (description: string) => void;
63
+ onArchitectToolCall?: (toolName: string, targetPath: string | null) => void;
64
+ onContentReady?: (files: string[], loopCount: number, timeoutMs: number) => void;
65
+ onArchitectStart?: (files: string[], timeoutMs: number) => void;
66
+ /** Check if a file still exists on disk. Used to prune deleted files from architect review. */
67
+ fileExists?: (path: string) => Promise<boolean>;
68
+ }
69
+
70
+ export interface ReviewOrchestratorOptions {
71
+ runner: ReviewRunner;
72
+ contentBuilder: ContentBuilder;
73
+ /**
74
+ * Optional duplicate-review suppressor ("judge"). When provided AND
75
+ * `settings.judgeEnabled` is true, the orchestrator asks the judge to
76
+ * classify each bash tool call before building content. If ALL bash calls
77
+ * classify as `inspection_vcs_noop` and no write/edit tool calls happened,
78
+ * the review is skipped with reason="judge_read_only".
79
+ *
80
+ * Injected (not hard-imported) so tests can mock without touching the SDK.
81
+ * Fail-open: missing judge, judge throws, or judge returns `modifying`/`unsure`
82
+ * for any command → the review runs as normal.
83
+ */
84
+ judge?: JudgeClassifier;
85
+ }
86
+
87
+ /**
88
+ * Classifier contract the orchestrator expects. Implementations must always
89
+ * resolve (never reject); failures map to `"unsure"` which is treated as
90
+ * "run the review". See `judge.ts` for the production implementation.
91
+ */
92
+ export type JudgeClassifier = (
93
+ command: string,
94
+ opts: { signal: AbortSignal; cwd: string; model: string; timeoutMs: number },
95
+ ) => Promise<BashClassification>;
96
+
97
+ export class ReviewOrchestrator {
98
+ private readonly runner: ReviewRunner;
99
+ private readonly contentBuilder: ContentBuilder;
100
+ private readonly judge?: JudgeClassifier;
101
+
102
+ private reviewAbort: AbortController | null = null;
103
+ private isReviewingValue = false;
104
+ private reviewEnabled = true;
105
+ private loopCount = 0;
106
+ private peakReviewLoopCount = 0;
107
+ private lastReviewedContentHash = "";
108
+ private architectDone = false;
109
+ private sessionChangeSummaries: string[] = [];
110
+ private sessionChangedFiles = new Set<string>();
111
+ private sessionHasGitContent = false;
112
+ private lastReviewHadIssues = false;
113
+
114
+ constructor(opts: ReviewOrchestratorOptions) {
115
+ this.runner = opts.runner;
116
+ this.contentBuilder = opts.contentBuilder;
117
+ this.judge = opts.judge;
118
+ }
119
+
120
+ get isReviewing(): boolean {
121
+ return this.isReviewingValue;
122
+ }
123
+
124
+ get isEnabled(): boolean {
125
+ return this.reviewEnabled;
126
+ }
127
+
128
+ get lastHadIssues(): boolean {
129
+ return this.lastReviewHadIssues;
130
+ }
131
+
132
+ get currentLoopCount(): number {
133
+ return this.loopCount;
134
+ }
135
+
136
+ get abortSignal(): AbortSignal | null {
137
+ return this.reviewAbort?.signal ?? null;
138
+ }
139
+
140
+ setEnabled(enabled: boolean): void {
141
+ this.reviewEnabled = enabled;
142
+ if (enabled) this.resetCycleState();
143
+ }
144
+
145
+ reset(): void {
146
+ this.reviewAbort?.abort();
147
+ this.reviewAbort = null;
148
+ this.isReviewingValue = false;
149
+ this.resetCycleState();
150
+ this.lastReviewHadIssues = false;
151
+ }
152
+
153
+ cancel(): void {
154
+ this.reviewAbort?.abort();
155
+ }
156
+
157
+ async handleAgentEnd(input: ReviewOrchestratorInput): Promise<ReviewOutcome> {
158
+ if (!this.reviewEnabled) return { type: "skipped", reason: "disabled" };
159
+
160
+ if (this.loopCount >= input.settings.maxReviewLoops) {
161
+ return { type: "max_loops" };
162
+ }
163
+
164
+ if (!hasFileChanges(input.agentToolCalls)) {
165
+ return { type: "skipped", reason: "no_file_changes" };
166
+ }
167
+
168
+ if (isFormattingOnlyTurn(input.agentToolCalls)) {
169
+ log("skipping review: formatting/linting only");
170
+ return { type: "skipped", reason: "formatting_only" };
171
+ }
172
+
173
+ const realFiles = new Set([
174
+ ...[...input.modifiedFiles].filter((f) => f !== "(bash file op)"),
175
+ ...collectModifiedPaths(input.agentToolCalls),
176
+ ]);
177
+ if (realFiles.size === 0) {
178
+ log("skipping review: no real file paths found");
179
+ return { type: "skipped", reason: "no_real_files" };
180
+ }
181
+
182
+ // Judge gate: if enabled, ask a cheap LLM to classify each bash command.
183
+ // If they're all read-only AND no write/edit tool call ran, skip the
184
+ // full review entirely. See judge.ts + eval/RESULTS.md for the pick.
185
+ if (input.settings.judgeEnabled && this.judge) {
186
+ const abort = (this.reviewAbort = new AbortController());
187
+ try {
188
+ const skip = await this.isTurnReadOnlyViaJudge(input, abort.signal);
189
+ if (skip) {
190
+ log("skipping review: judge classified turn as read-only");
191
+ return { type: "skipped", reason: "judge_read_only" };
192
+ }
193
+ } catch (err: any) {
194
+ // Fail-open: any judge-gate error → proceed with the normal review.
195
+ log(`judge gate failed (${err?.message ?? err}) — proceeding with review`);
196
+ } finally {
197
+ this.reviewAbort = null;
198
+ }
199
+ }
200
+
201
+ this.loopCount++;
202
+ this.isReviewingValue = true;
203
+ this.reviewAbort = new AbortController();
204
+
205
+ const seniorReviewId = createReviewId();
206
+ log(
207
+ `[${seniorReviewId}] review cycle started (loop ${this.loopCount}/${input.settings.maxReviewLoops})`,
208
+ );
209
+
210
+ try {
211
+ let best = await this.buildContent(input);
212
+
213
+ if (
214
+ !best ||
215
+ best.files.length === 0 ||
216
+ best.content.trim().length < MIN_REVIEW_CONTENT_LENGTH
217
+ ) {
218
+ log(`[${seniorReviewId}] no meaningful changes, skipping`);
219
+ // Previous issues are resolved (files deleted/changes gone) — clear indicators
220
+ this.lastReviewHadIssues = false;
221
+ this.loopCount = 0;
222
+ return { type: "skipped", reason: "no_meaningful_changes" };
223
+ }
224
+
225
+ log(`[${seniorReviewId}] best:`, {
226
+ label: best.label,
227
+ files: best.files,
228
+ contentLen: best.content.length,
229
+ });
230
+
231
+ const contentHash = hashContent(best.content);
232
+ if (contentHash === this.lastReviewedContentHash) {
233
+ log(`[${seniorReviewId}] Skipping — same content as last review`);
234
+ return { type: "skipped", reason: "duplicate_content" };
235
+ }
236
+
237
+ const seniorTimeoutMs = computeReviewTimeoutMs(
238
+ input.settings.reviewTimeoutMs,
239
+ best.files.length,
240
+ );
241
+ input.onContentReady?.(best.files, this.loopCount, seniorTimeoutMs);
242
+ log(
243
+ `[${seniorReviewId}] Reviewing ${best.files.length} files via ${best.label || "git diff"}: ${best.files.join(", ")}`,
244
+ );
245
+
246
+ let result: ReviewResult;
247
+ try {
248
+ result = await this.runSeniorReview(input, best, seniorReviewId);
249
+ } catch (retryErr: any) {
250
+ if (!isContextOverflowError(retryErr)) throw retryErr;
251
+ log(`[${seniorReviewId}] Context overflow, retrying with fallback limits`);
252
+ input.onActivity?.("retrying with smaller context…");
253
+ const smallBest = await this.buildContent(input, FALLBACK_LIMITS);
254
+ if (
255
+ !smallBest ||
256
+ smallBest.files.length === 0 ||
257
+ smallBest.content.trim().length < MIN_REVIEW_CONTENT_LENGTH
258
+ ) {
259
+ log(`[${seniorReviewId}] Fallback content too small, skipping review`);
260
+ this.lastReviewHadIssues = false;
261
+ this.loopCount = 0;
262
+ return { type: "skipped", reason: "fallback_too_small" };
263
+ }
264
+ best = smallBest;
265
+ result = await this.runSeniorReview(input, best, seniorReviewId);
266
+ }
267
+
268
+ // Check for late cancellation: if abort fired while runSeniorReview was
269
+ // settling, discard the result instead of feeding it back to the agent.
270
+ if (this.reviewAbort?.signal.aborted) {
271
+ log(`[${seniorReviewId}] Review cancelled after review completed (race window)`);
272
+ return { type: "cancelled" };
273
+ }
274
+
275
+ this.sessionChangeSummaries.push(best.content.slice(0, 5000));
276
+ for (const f of best.files) this.sessionChangedFiles.add(f);
277
+ if (best.isGitBased) this.sessionHasGitContent = true;
278
+ this.lastReviewedContentHash = hashContent(best.content);
279
+
280
+ const senior: ReviewStepResult = {
281
+ result,
282
+ label: "",
283
+ loopInfo: undefined,
284
+ reviewId: seniorReviewId,
285
+ };
286
+
287
+ if (result.isLgtm) {
288
+ this.lastReviewHadIssues = false;
289
+ this.loopCount = 0;
290
+
291
+ const architectOutcome = await this.runArchitectIfNeeded(input);
292
+ if (architectOutcome && "step" in architectOutcome) {
293
+ return {
294
+ type: "completed",
295
+ senior,
296
+ architect: architectOutcome.step,
297
+ files: best.files,
298
+ };
299
+ }
300
+ if (architectOutcome && "failure" in architectOutcome) {
301
+ // Architect attempted but failed (timeout, error). Surface to the caller
302
+ // so the user sees a message instead of a silent swallow.
303
+ return {
304
+ type: "completed",
305
+ senior,
306
+ architectFailure: architectOutcome.failure,
307
+ files: best.files,
308
+ };
309
+ }
310
+ // No architect ran — clear session accumulators so stale files
311
+ // from this cycle don't leak into a future unrelated cycle.
312
+ this.resetCycleState();
313
+ return { type: "completed", senior, files: best.files };
314
+ }
315
+
316
+ this.peakReviewLoopCount = Math.max(this.peakReviewLoopCount, this.loopCount);
317
+ this.lastReviewHadIssues = true;
318
+ senior.loopInfo = `loop ${this.loopCount}/${input.settings.maxReviewLoops}`;
319
+ return { type: "completed", senior, files: best.files };
320
+ } catch (err: any) {
321
+ if (err?.message === "Review cancelled") return { type: "cancelled" };
322
+ return { type: "error", error: toError(err) };
323
+ } finally {
324
+ this.isReviewingValue = false;
325
+ this.reviewAbort = null;
326
+ }
327
+ }
328
+
329
+ private async buildContent(
330
+ input: ReviewOrchestratorInput,
331
+ limits?: ContentSizeLimits,
332
+ ): Promise<ReviewContent | null> {
333
+ return await this.contentBuilder({
334
+ agentToolCalls: input.agentToolCalls,
335
+ ignorePatterns: input.ignorePatterns ?? undefined,
336
+ gitRoots: input.gitRoots,
337
+ limits,
338
+ });
339
+ }
340
+
341
+ private async runSeniorReview(
342
+ input: ReviewOrchestratorInput,
343
+ content: ReviewContent,
344
+ reviewId: string,
345
+ ): Promise<ReviewResult> {
346
+ const prompt = `${buildReviewPrompt(input.autoReviewRules, input.customRules, input.lastUserMessage)}\n\n---\n\n${content.content}`;
347
+ log(`[${reviewId}] prompt length:`, prompt.length);
348
+ const result = await this.runner(prompt, {
349
+ signal: this.requiredSignal(),
350
+ cwd: input.cwd,
351
+ model: input.settings.model,
352
+ thinkingLevel: input.settings.thinkingLevel,
353
+ timeoutMs: computeReviewTimeoutMs(input.settings.reviewTimeoutMs, content.files.length),
354
+ filesReviewed: content.files,
355
+ reviewId,
356
+ onActivity: input.onActivity,
357
+ onToolCall: input.onToolCall,
358
+ });
359
+ log(`[${reviewId}] result:`, {
360
+ isLgtm: result.isLgtm,
361
+ durationMs: result.durationMs,
362
+ textLen: result.text.length,
363
+ });
364
+ return result;
365
+ }
366
+
367
+ private async runArchitectIfNeeded(
368
+ input: ReviewOrchestratorInput,
369
+ ): Promise<
370
+ { step: ReviewStepResult } | { failure: { reviewId: string; error: Error } } | undefined
371
+ > {
372
+ // Prune deleted files from session accumulator before checking architect trigger
373
+ if (input.fileExists) {
374
+ const existing = new Set<string>();
375
+ for (const f of this.sessionChangedFiles) {
376
+ if (await input.fileExists(f)) existing.add(f);
377
+ }
378
+ this.sessionChangedFiles = existing;
379
+ }
380
+
381
+ const willRunArchitect =
382
+ input.settings.architectEnabled &&
383
+ !this.architectDone &&
384
+ shouldRunArchitectReview([...this.sessionChangedFiles], this.sessionHasGitContent);
385
+
386
+ if (!willRunArchitect) return undefined;
387
+
388
+ this.architectDone = true;
389
+ const architectReviewId = createReviewId();
390
+ const fileCount = this.sessionChangedFiles.size;
391
+ // Architect explores the codebase with grep/read across many files; scale the timeout
392
+ // with session file count like the senior review does.
393
+ const architectTimeoutMs = computeReviewTimeoutMs(input.settings.reviewTimeoutMs, fileCount);
394
+ log(
395
+ `[${architectReviewId}] architect: running — ${fileCount} files reviewed across session (timeoutMs=${architectTimeoutMs})`,
396
+ );
397
+ input.onArchitectStart?.([...this.sessionChangedFiles], architectTimeoutMs);
398
+
399
+ try {
400
+ const summaryText = this.sessionChangeSummaries.join("\n\n---\n\n");
401
+ const result = await runArchitectReview(this.runner, {
402
+ signal: this.requiredSignal(),
403
+ cwd: input.cwd,
404
+ model: input.settings.model,
405
+ customRules: input.architectRules,
406
+ sessionChangeSummary: summaryText,
407
+ reviewId: architectReviewId,
408
+ timeoutMs: architectTimeoutMs,
409
+ onActivity: input.onArchitectActivity,
410
+ onToolCall: input.onArchitectToolCall,
411
+ });
412
+ return { step: { result, label: "Architect Review", reviewId: architectReviewId } };
413
+ } catch (err: any) {
414
+ if (err?.message === "Review cancelled") throw err;
415
+ log(`[${architectReviewId}] ERROR: Architect review failed: ${err?.message ?? err}`);
416
+ return { failure: { reviewId: architectReviewId, error: toError(err) } };
417
+ } finally {
418
+ this.sessionChangeSummaries = [];
419
+ this.sessionChangedFiles = new Set();
420
+ this.peakReviewLoopCount = 0;
421
+ this.architectDone = false;
422
+ this.sessionHasGitContent = false;
423
+ }
424
+ }
425
+
426
+ /**
427
+ * Ask the judge to classify each bash tool call in this turn. Returns true
428
+ * only if the turn is confidently read-only:
429
+ * - No write/edit tool calls happened.
430
+ * - Every bash command classified as `inspection_vcs_noop`.
431
+ *
432
+ * Fail-open: any individual classification that returns `unsure` or
433
+ * `modifying` (or throws, which is mapped to `unsure` inside
434
+ * `classifyBashCommand`) flips the answer back to "run the review".
435
+ *
436
+ * Serial invocation keeps rate-limit risk low. Most real turns have <5
437
+ * bash calls, so the latency cost is <~5s for the skip case — negligible
438
+ * compared to the 30-90s main review we're avoiding.
439
+ */
440
+ private async isTurnReadOnlyViaJudge(
441
+ input: ReviewOrchestratorInput,
442
+ signal: AbortSignal,
443
+ ): Promise<boolean> {
444
+ if (!this.judge) return false;
445
+
446
+ // Any explicit write/edit tool call is an unambiguous modification.
447
+ // Don't waste a judge call on those — go straight to review.
448
+ for (const tc of input.agentToolCalls) {
449
+ if (tc.name === "write" || tc.name === "edit") return false;
450
+ }
451
+
452
+ const bashCalls = input.agentToolCalls.filter((tc) => tc.name === "bash");
453
+ if (bashCalls.length === 0) {
454
+ // No bash and no write/edit, but we got past `realFiles.size === 0`,
455
+ // so something else pushed files into the set. Safer to review.
456
+ return false;
457
+ }
458
+
459
+ let classifiedAny = false;
460
+
461
+ for (const tc of bashCalls) {
462
+ const cmd = String(tc.input?.command ?? "").trim();
463
+ if (!cmd) continue;
464
+ const classification = await this.judge(cmd, {
465
+ signal,
466
+ cwd: input.cwd,
467
+ model: input.settings.judgeModel,
468
+ timeoutMs: input.settings.judgeTimeoutMs,
469
+ });
470
+ log(`judge: ${classification} ← ${cmd.slice(0, 80).replace(/\n/g, " ")}`);
471
+ if (classification !== "inspection_vcs_noop") return false;
472
+ if (signal.aborted) return false;
473
+ classifiedAny = true;
474
+ }
475
+ // Safety: only return true if we actually classified at least one command.
476
+ // A turn with bash calls that all have empty command strings shouldn't be
477
+ // treated as "confidently read-only" — bail to review instead.
478
+ return classifiedAny;
479
+ }
480
+
481
+ private resetCycleState(): void {
482
+ this.loopCount = 0;
483
+ this.peakReviewLoopCount = 0;
484
+ this.lastReviewedContentHash = "";
485
+ this.architectDone = false;
486
+ this.sessionChangeSummaries = [];
487
+ this.sessionChangedFiles = new Set();
488
+ this.sessionHasGitContent = false;
489
+ }
490
+
491
+ private requiredSignal(): AbortSignal {
492
+ if (!this.reviewAbort) throw new Error("Review cancelled");
493
+ return this.reviewAbort.signal;
494
+ }
495
+ }
496
+
497
+ function hashContent(content: string): string {
498
+ return createHash("sha256").update(content).digest("hex");
499
+ }
500
+
501
+ function isContextOverflowError(err: any): boolean {
502
+ const msg = (err?.message ?? String(err)).toLowerCase();
503
+ return (
504
+ msg.includes("too many tokens") ||
505
+ (msg.includes("context") && msg.includes("length")) ||
506
+ (msg.includes("context") && msg.includes("window")) ||
507
+ (msg.includes("context") && msg.includes("too long")) ||
508
+ (msg.includes("maximum") && msg.includes("token")) ||
509
+ (msg.includes("input") && msg.includes("too large")) ||
510
+ (msg.includes("prompt") && msg.includes("too long")) ||
511
+ (msg.includes("exceeds") && msg.includes("context")) ||
512
+ (msg.includes("exceeds") && msg.includes("token")) ||
513
+ msg.includes("payload too large") ||
514
+ msg.includes("request too large")
515
+ );
516
+ }
517
+
518
+ function toError(err: unknown): Error {
519
+ if (err instanceof Error) return err;
520
+ return new Error(String(err));
521
+ }
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@inceptionstack/pi-hard-no",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "description": "Pi extension — automatic code review after every agent turn",
6
+ "license": "MIT",
7
+ "publishConfig": {
8
+ "access": "public"
9
+ },
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/inceptionstack/pi-hard-no.git"
13
+ },
14
+ "keywords": [
15
+ "pi",
16
+ "extension",
17
+ "code-review",
18
+ "ai",
19
+ "agent"
20
+ ],
21
+ "files": [
22
+ "*.ts",
23
+ "default-review-rules.md",
24
+ "LICENSE",
25
+ "README.md"
26
+ ],
27
+ "pi": {
28
+ "extensions": [
29
+ "./index.ts"
30
+ ]
31
+ },
32
+ "scripts": {
33
+ "lint": "eslint .",
34
+ "lint:fix": "eslint . --fix",
35
+ "format": "prettier --write .",
36
+ "format:check": "prettier --check .",
37
+ "typecheck": "tsc --noEmit",
38
+ "test": "vitest run",
39
+ "test:watch": "vitest",
40
+ "check": "npm run typecheck && npm run lint && npm run format:check && npm run test"
41
+ },
42
+ "peerDependencies": {
43
+ "@mariozechner/pi-coding-agent": "*"
44
+ },
45
+ "devDependencies": {
46
+ "@eslint/js": "^9.27.0",
47
+ "@mariozechner/pi-coding-agent": "^0.69.0",
48
+ "@types/node": "^22.15.17",
49
+ "eslint": "^9.27.0",
50
+ "prettier": "^3.5.3",
51
+ "typescript": "^5.8.3",
52
+ "typescript-eslint": "^8.32.1",
53
+ "vitest": "^4.1.5"
54
+ }
55
+ }
package/prompt.ts ADDED
@@ -0,0 +1,126 @@
1
+ /**
2
+ * prompt.ts — Review prompt construction
3
+ *
4
+ * The review prompt has three parts:
5
+ * 1. PROMPT_PREFIX — system preamble (tools, budget, workflow)
6
+ * 2. Auto-review rules — what to review / what not to report
7
+ * (default: DEFAULT_AUTO_REVIEW_RULES, overridable via .hardno/auto-review.md)
8
+ * 3. PROMPT_SUFFIX — response format, examples, verdict instructions
9
+ *
10
+ * The user can override ONLY part 2 via auto-review.md.
11
+ * review-rules.md still appends additional project-specific rules at the end.
12
+ */
13
+
14
+ // ── Part 1: Prefix (always included, not user-editable) ──
15
+
16
+ export const PROMPT_PREFIX = `You are a senior code reviewer. You will review files that were recently changed. For each file, you are given its full path, the git diff for that file, and related commit messages.
17
+
18
+ **You MUST read each file yourself** using the read(path) tool to see the full current contents. The diffs below show what changed, but you need the full file to understand context.
19
+
20
+ ## Tools
21
+
22
+ - read(path) — read a file (USE THIS to read each reviewed file and any related files)
23
+ - bash(command) — run commands like grep/find/test
24
+ - grep, find, ls — for exploration
25
+
26
+ You do NOT have write or edit tools.
27
+ Do NOT output XML tags like <bash> or <read_file>. Use real function calls.
28
+
29
+ ## Budget: 30 tool calls per reviewed file
30
+
31
+ You have a budget of **30 tool calls per file** being reviewed. For example, if 5 files are under review you may use up to 150 tool calls total.
32
+
33
+ ## Workflow
34
+
35
+ 1. Read each changed file with read(path) to see its full current contents.
36
+ 2. Cross-reference with the per-file diffs and commit messages provided below.
37
+ 3. Use additional tool calls for targeted verification (related files, tests, etc.).
38
+ 4. Write your review. No more tool calls after that.`;
39
+
40
+ // ── Part 2: Default auto-review rules (user can override via auto-review.md) ──
41
+
42
+ export const DEFAULT_AUTO_REVIEW_RULES = `## What to review (in priority order)
43
+
44
+ ### Correctness bugs
45
+ - Off-by-one errors, boundary conditions (< vs <=, i=0 to length)
46
+ - Missing null/undefined checks, possible TypeError
47
+ - Missing error handling where a crash would propagate
48
+ - Logic bugs: inverted conditions, wrong operator, wrong variable
49
+ - Unhandled promise rejections, race conditions
50
+
51
+ ### Security
52
+ - Hardcoded secrets, API keys, passwords
53
+ - SQL / shell / command injection (string interpolation into queries/commands)
54
+ - Path traversal, unsafe user input
55
+ - Auth bypasses
56
+
57
+ ### Data loss or corruption
58
+ - Writes that could lose data
59
+ - Missing transactions where atomicity matters
60
+
61
+ ### Architecture / Single Responsibility
62
+ - Functions or event handlers doing multiple unrelated things — recommend extraction
63
+ - Inline logic that should be a separate module/class for testability
64
+ - God functions (>50 lines mixing concerns) — suggest splitting
65
+
66
+ ## What NOT to report
67
+ - Style / naming preferences
68
+ - Missing tests (unless the change is complex algorithmic logic)
69
+ - "Could be cleaner" opinions without a concrete SRP or DRY violation`;
70
+
71
+ // ── Part 3: Suffix (always included, not user-editable) ──
72
+
73
+ export const PROMPT_SUFFIX = `## Response format
74
+
75
+ Your response MUST follow this exact structure:
76
+
77
+ 1. (If issues found) List of bullet points, each: - **<Severity>:** <file/location> — <one-line explanation>
78
+ Severity is one of: High, Medium, Low.
79
+ 2. (If no issues) Write a single line: No issues found.
80
+ 3. On the final line of your response, output exactly ONE of these verdict tags:
81
+ - <verdict>LGTM</verdict> — if no real bugs were found
82
+ - <verdict>ISSUES_FOUND</verdict> — if you flagged any issue above
83
+
84
+ ## Example — issues found
85
+
86
+ - **High:** test-bugs.ts:12 — Off-by-one error: i <= items.length should be i < items.length.
87
+ - **High:** test-bugs.ts:6 — Hardcoded API key sk-prod-... leaks a secret.
88
+
89
+ <verdict>ISSUES_FOUND</verdict>
90
+
91
+ ## Example — no issues
92
+
93
+ No issues found.
94
+
95
+ <verdict>LGTM</verdict>
96
+
97
+ The verdict tag is MANDATORY. Without it, your review is invalid and will be re-requested.
98
+
99
+ Caught bugs > silence. If something looks wrong and you're 70%+ confident, FLAG IT. The user can push back on false positives.`;
100
+
101
+ // ── Composite (for backwards compat / scaffold display) ──
102
+
103
+ export const DEFAULT_REVIEW_PROMPT = `${PROMPT_PREFIX}\n\n${DEFAULT_AUTO_REVIEW_RULES}\n\n${PROMPT_SUFFIX}`;
104
+
105
+ /**
106
+ * Build the full review prompt.
107
+ *
108
+ * @param autoReviewRules — contents of .hardno/auto-review.md, or null to use defaults
109
+ * @param customRules — contents of .hardno/review-rules.md (appended at the end)
110
+ * @param userRequest — the last user message that triggered the agent (what the user asked)
111
+ */
112
+ export function buildReviewPrompt(
113
+ autoReviewRules?: string | null,
114
+ customRules?: string | null,
115
+ userRequest?: string | null,
116
+ ): string {
117
+ const reviewSection = autoReviewRules?.trim() || DEFAULT_AUTO_REVIEW_RULES;
118
+ let prompt = `${PROMPT_PREFIX}\n\n${reviewSection}\n\n${PROMPT_SUFFIX}`;
119
+ if (customRules) {
120
+ prompt += `\n\n## Additional project-specific rules\n\n${customRules}`;
121
+ }
122
+ if (userRequest) {
123
+ prompt += `\n\n## User request (what the agent was asked to do)\n\n> ${userRequest.split("\n").join("\n> ")}`;
124
+ }
125
+ return prompt;
126
+ }