@inceptionstack/pi-hard-no 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +287 -0
- package/architect.ts +128 -0
- package/changes.ts +404 -0
- package/commands.ts +635 -0
- package/context.ts +658 -0
- package/default-review-rules.md +150 -0
- package/git-roots.ts +94 -0
- package/helpers.ts +72 -0
- package/ignore.ts +105 -0
- package/index.ts +892 -0
- package/judge-skip-chain.ts +113 -0
- package/judge.ts +213 -0
- package/logger.ts +175 -0
- package/message-sender.ts +83 -0
- package/orchestrator.ts +521 -0
- package/package.json +55 -0
- package/prompt.ts +126 -0
- package/review-display.ts +571 -0
- package/reviewer.ts +433 -0
- package/scaffold.ts +120 -0
- package/session-kind.ts +139 -0
- package/settings.ts +332 -0
package/orchestrator.ts
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
import { type ContentSizeLimits, FALLBACK_LIMITS, type ReviewContent } from "./context";
|
|
4
|
+
import { hasFileChanges, isFormattingOnlyTurn, collectModifiedPaths } from "./changes";
|
|
5
|
+
import type { TrackedToolCall } from "./changes";
|
|
6
|
+
import { createReviewId, computeReviewTimeoutMs } from "./helpers";
|
|
7
|
+
import type { BashClassification } from "./judge";
|
|
8
|
+
import { buildReviewPrompt } from "./prompt";
|
|
9
|
+
import type { AutoReviewSettings } from "./settings";
|
|
10
|
+
import { runArchitectReview, shouldRunArchitectReview } from "./architect";
|
|
11
|
+
import type { ReviewResult, ReviewRunner } from "./reviewer";
|
|
12
|
+
import { log } from "./logger";
|
|
13
|
+
|
|
14
|
+
const MIN_REVIEW_CONTENT_LENGTH = 50;
|
|
15
|
+
|
|
16
|
+
export type ReviewStepResult = {
|
|
17
|
+
result: ReviewResult;
|
|
18
|
+
label?: string;
|
|
19
|
+
loopInfo?: string;
|
|
20
|
+
/** Unique id for this review step (senior review cycle or architect review). */
|
|
21
|
+
reviewId: string;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export type ReviewOutcome =
|
|
25
|
+
| { type: "skipped"; reason: string }
|
|
26
|
+
| { type: "cancelled" }
|
|
27
|
+
| { type: "error"; error: Error }
|
|
28
|
+
| { type: "max_loops" }
|
|
29
|
+
| {
|
|
30
|
+
type: "completed";
|
|
31
|
+
senior: ReviewStepResult;
|
|
32
|
+
architect?: ReviewStepResult;
|
|
33
|
+
/** Populated when architect was supposed to run but failed (timeout, error).
|
|
34
|
+
* Distinct from `architect` being undefined because it was skipped by the trigger logic. */
|
|
35
|
+
architectFailure?: { reviewId: string; error: Error };
|
|
36
|
+
files: string[];
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export interface ContentBuilderInput {
|
|
40
|
+
agentToolCalls: TrackedToolCall[];
|
|
41
|
+
onStatus?: (msg: string) => void;
|
|
42
|
+
ignorePatterns?: string[];
|
|
43
|
+
gitRoots?: Set<string>;
|
|
44
|
+
limits?: ContentSizeLimits;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type ContentBuilder = (input: ContentBuilderInput) => Promise<ReviewContent | null>;
|
|
48
|
+
|
|
49
|
+
export interface ReviewOrchestratorInput {
|
|
50
|
+
agentToolCalls: TrackedToolCall[];
|
|
51
|
+
modifiedFiles: Set<string>;
|
|
52
|
+
gitRoots: Set<string>;
|
|
53
|
+
cwd: string;
|
|
54
|
+
settings: AutoReviewSettings;
|
|
55
|
+
customRules: string | null;
|
|
56
|
+
autoReviewRules: string | null;
|
|
57
|
+
ignorePatterns: string[] | null;
|
|
58
|
+
architectRules: string | null;
|
|
59
|
+
lastUserMessage: string | null;
|
|
60
|
+
onActivity?: (description: string) => void;
|
|
61
|
+
onToolCall?: (toolName: string, targetPath: string | null) => void;
|
|
62
|
+
onArchitectActivity?: (description: string) => void;
|
|
63
|
+
onArchitectToolCall?: (toolName: string, targetPath: string | null) => void;
|
|
64
|
+
onContentReady?: (files: string[], loopCount: number, timeoutMs: number) => void;
|
|
65
|
+
onArchitectStart?: (files: string[], timeoutMs: number) => void;
|
|
66
|
+
/** Check if a file still exists on disk. Used to prune deleted files from architect review. */
|
|
67
|
+
fileExists?: (path: string) => Promise<boolean>;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface ReviewOrchestratorOptions {
|
|
71
|
+
runner: ReviewRunner;
|
|
72
|
+
contentBuilder: ContentBuilder;
|
|
73
|
+
/**
|
|
74
|
+
* Optional duplicate-review suppressor ("judge"). When provided AND
|
|
75
|
+
* `settings.judgeEnabled` is true, the orchestrator asks the judge to
|
|
76
|
+
* classify each bash tool call before building content. If ALL bash calls
|
|
77
|
+
* classify as `inspection_vcs_noop` and no write/edit tool calls happened,
|
|
78
|
+
* the review is skipped with reason="judge_read_only".
|
|
79
|
+
*
|
|
80
|
+
* Injected (not hard-imported) so tests can mock without touching the SDK.
|
|
81
|
+
* Fail-open: missing judge, judge throws, or judge returns `modifying`/`unsure`
|
|
82
|
+
* for any command → the review runs as normal.
|
|
83
|
+
*/
|
|
84
|
+
judge?: JudgeClassifier;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Classifier contract the orchestrator expects. Implementations must always
|
|
89
|
+
* resolve (never reject); failures map to `"unsure"` which is treated as
|
|
90
|
+
* "run the review". See `judge.ts` for the production implementation.
|
|
91
|
+
*/
|
|
92
|
+
export type JudgeClassifier = (
|
|
93
|
+
command: string,
|
|
94
|
+
opts: { signal: AbortSignal; cwd: string; model: string; timeoutMs: number },
|
|
95
|
+
) => Promise<BashClassification>;
|
|
96
|
+
|
|
97
|
+
export class ReviewOrchestrator {
|
|
98
|
+
private readonly runner: ReviewRunner;
|
|
99
|
+
private readonly contentBuilder: ContentBuilder;
|
|
100
|
+
private readonly judge?: JudgeClassifier;
|
|
101
|
+
|
|
102
|
+
private reviewAbort: AbortController | null = null;
|
|
103
|
+
private isReviewingValue = false;
|
|
104
|
+
private reviewEnabled = true;
|
|
105
|
+
private loopCount = 0;
|
|
106
|
+
private peakReviewLoopCount = 0;
|
|
107
|
+
private lastReviewedContentHash = "";
|
|
108
|
+
private architectDone = false;
|
|
109
|
+
private sessionChangeSummaries: string[] = [];
|
|
110
|
+
private sessionChangedFiles = new Set<string>();
|
|
111
|
+
private sessionHasGitContent = false;
|
|
112
|
+
private lastReviewHadIssues = false;
|
|
113
|
+
|
|
114
|
+
constructor(opts: ReviewOrchestratorOptions) {
|
|
115
|
+
this.runner = opts.runner;
|
|
116
|
+
this.contentBuilder = opts.contentBuilder;
|
|
117
|
+
this.judge = opts.judge;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
get isReviewing(): boolean {
|
|
121
|
+
return this.isReviewingValue;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
get isEnabled(): boolean {
|
|
125
|
+
return this.reviewEnabled;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
get lastHadIssues(): boolean {
|
|
129
|
+
return this.lastReviewHadIssues;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
get currentLoopCount(): number {
|
|
133
|
+
return this.loopCount;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
get abortSignal(): AbortSignal | null {
|
|
137
|
+
return this.reviewAbort?.signal ?? null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
setEnabled(enabled: boolean): void {
|
|
141
|
+
this.reviewEnabled = enabled;
|
|
142
|
+
if (enabled) this.resetCycleState();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
reset(): void {
|
|
146
|
+
this.reviewAbort?.abort();
|
|
147
|
+
this.reviewAbort = null;
|
|
148
|
+
this.isReviewingValue = false;
|
|
149
|
+
this.resetCycleState();
|
|
150
|
+
this.lastReviewHadIssues = false;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
cancel(): void {
|
|
154
|
+
this.reviewAbort?.abort();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
async handleAgentEnd(input: ReviewOrchestratorInput): Promise<ReviewOutcome> {
|
|
158
|
+
if (!this.reviewEnabled) return { type: "skipped", reason: "disabled" };
|
|
159
|
+
|
|
160
|
+
if (this.loopCount >= input.settings.maxReviewLoops) {
|
|
161
|
+
return { type: "max_loops" };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (!hasFileChanges(input.agentToolCalls)) {
|
|
165
|
+
return { type: "skipped", reason: "no_file_changes" };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (isFormattingOnlyTurn(input.agentToolCalls)) {
|
|
169
|
+
log("skipping review: formatting/linting only");
|
|
170
|
+
return { type: "skipped", reason: "formatting_only" };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const realFiles = new Set([
|
|
174
|
+
...[...input.modifiedFiles].filter((f) => f !== "(bash file op)"),
|
|
175
|
+
...collectModifiedPaths(input.agentToolCalls),
|
|
176
|
+
]);
|
|
177
|
+
if (realFiles.size === 0) {
|
|
178
|
+
log("skipping review: no real file paths found");
|
|
179
|
+
return { type: "skipped", reason: "no_real_files" };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Judge gate: if enabled, ask a cheap LLM to classify each bash command.
|
|
183
|
+
// If they're all read-only AND no write/edit tool call ran, skip the
|
|
184
|
+
// full review entirely. See judge.ts + eval/RESULTS.md for the pick.
|
|
185
|
+
if (input.settings.judgeEnabled && this.judge) {
|
|
186
|
+
const abort = (this.reviewAbort = new AbortController());
|
|
187
|
+
try {
|
|
188
|
+
const skip = await this.isTurnReadOnlyViaJudge(input, abort.signal);
|
|
189
|
+
if (skip) {
|
|
190
|
+
log("skipping review: judge classified turn as read-only");
|
|
191
|
+
return { type: "skipped", reason: "judge_read_only" };
|
|
192
|
+
}
|
|
193
|
+
} catch (err: any) {
|
|
194
|
+
// Fail-open: any judge-gate error → proceed with the normal review.
|
|
195
|
+
log(`judge gate failed (${err?.message ?? err}) — proceeding with review`);
|
|
196
|
+
} finally {
|
|
197
|
+
this.reviewAbort = null;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
this.loopCount++;
|
|
202
|
+
this.isReviewingValue = true;
|
|
203
|
+
this.reviewAbort = new AbortController();
|
|
204
|
+
|
|
205
|
+
const seniorReviewId = createReviewId();
|
|
206
|
+
log(
|
|
207
|
+
`[${seniorReviewId}] review cycle started (loop ${this.loopCount}/${input.settings.maxReviewLoops})`,
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
try {
|
|
211
|
+
let best = await this.buildContent(input);
|
|
212
|
+
|
|
213
|
+
if (
|
|
214
|
+
!best ||
|
|
215
|
+
best.files.length === 0 ||
|
|
216
|
+
best.content.trim().length < MIN_REVIEW_CONTENT_LENGTH
|
|
217
|
+
) {
|
|
218
|
+
log(`[${seniorReviewId}] no meaningful changes, skipping`);
|
|
219
|
+
// Previous issues are resolved (files deleted/changes gone) — clear indicators
|
|
220
|
+
this.lastReviewHadIssues = false;
|
|
221
|
+
this.loopCount = 0;
|
|
222
|
+
return { type: "skipped", reason: "no_meaningful_changes" };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
log(`[${seniorReviewId}] best:`, {
|
|
226
|
+
label: best.label,
|
|
227
|
+
files: best.files,
|
|
228
|
+
contentLen: best.content.length,
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
const contentHash = hashContent(best.content);
|
|
232
|
+
if (contentHash === this.lastReviewedContentHash) {
|
|
233
|
+
log(`[${seniorReviewId}] Skipping — same content as last review`);
|
|
234
|
+
return { type: "skipped", reason: "duplicate_content" };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const seniorTimeoutMs = computeReviewTimeoutMs(
|
|
238
|
+
input.settings.reviewTimeoutMs,
|
|
239
|
+
best.files.length,
|
|
240
|
+
);
|
|
241
|
+
input.onContentReady?.(best.files, this.loopCount, seniorTimeoutMs);
|
|
242
|
+
log(
|
|
243
|
+
`[${seniorReviewId}] Reviewing ${best.files.length} files via ${best.label || "git diff"}: ${best.files.join(", ")}`,
|
|
244
|
+
);
|
|
245
|
+
|
|
246
|
+
let result: ReviewResult;
|
|
247
|
+
try {
|
|
248
|
+
result = await this.runSeniorReview(input, best, seniorReviewId);
|
|
249
|
+
} catch (retryErr: any) {
|
|
250
|
+
if (!isContextOverflowError(retryErr)) throw retryErr;
|
|
251
|
+
log(`[${seniorReviewId}] Context overflow, retrying with fallback limits`);
|
|
252
|
+
input.onActivity?.("retrying with smaller context…");
|
|
253
|
+
const smallBest = await this.buildContent(input, FALLBACK_LIMITS);
|
|
254
|
+
if (
|
|
255
|
+
!smallBest ||
|
|
256
|
+
smallBest.files.length === 0 ||
|
|
257
|
+
smallBest.content.trim().length < MIN_REVIEW_CONTENT_LENGTH
|
|
258
|
+
) {
|
|
259
|
+
log(`[${seniorReviewId}] Fallback content too small, skipping review`);
|
|
260
|
+
this.lastReviewHadIssues = false;
|
|
261
|
+
this.loopCount = 0;
|
|
262
|
+
return { type: "skipped", reason: "fallback_too_small" };
|
|
263
|
+
}
|
|
264
|
+
best = smallBest;
|
|
265
|
+
result = await this.runSeniorReview(input, best, seniorReviewId);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Check for late cancellation: if abort fired while runSeniorReview was
|
|
269
|
+
// settling, discard the result instead of feeding it back to the agent.
|
|
270
|
+
if (this.reviewAbort?.signal.aborted) {
|
|
271
|
+
log(`[${seniorReviewId}] Review cancelled after review completed (race window)`);
|
|
272
|
+
return { type: "cancelled" };
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
this.sessionChangeSummaries.push(best.content.slice(0, 5000));
|
|
276
|
+
for (const f of best.files) this.sessionChangedFiles.add(f);
|
|
277
|
+
if (best.isGitBased) this.sessionHasGitContent = true;
|
|
278
|
+
this.lastReviewedContentHash = hashContent(best.content);
|
|
279
|
+
|
|
280
|
+
const senior: ReviewStepResult = {
|
|
281
|
+
result,
|
|
282
|
+
label: "",
|
|
283
|
+
loopInfo: undefined,
|
|
284
|
+
reviewId: seniorReviewId,
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
if (result.isLgtm) {
|
|
288
|
+
this.lastReviewHadIssues = false;
|
|
289
|
+
this.loopCount = 0;
|
|
290
|
+
|
|
291
|
+
const architectOutcome = await this.runArchitectIfNeeded(input);
|
|
292
|
+
if (architectOutcome && "step" in architectOutcome) {
|
|
293
|
+
return {
|
|
294
|
+
type: "completed",
|
|
295
|
+
senior,
|
|
296
|
+
architect: architectOutcome.step,
|
|
297
|
+
files: best.files,
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
if (architectOutcome && "failure" in architectOutcome) {
|
|
301
|
+
// Architect attempted but failed (timeout, error). Surface to the caller
|
|
302
|
+
// so the user sees a message instead of a silent swallow.
|
|
303
|
+
return {
|
|
304
|
+
type: "completed",
|
|
305
|
+
senior,
|
|
306
|
+
architectFailure: architectOutcome.failure,
|
|
307
|
+
files: best.files,
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
// No architect ran — clear session accumulators so stale files
|
|
311
|
+
// from this cycle don't leak into a future unrelated cycle.
|
|
312
|
+
this.resetCycleState();
|
|
313
|
+
return { type: "completed", senior, files: best.files };
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
this.peakReviewLoopCount = Math.max(this.peakReviewLoopCount, this.loopCount);
|
|
317
|
+
this.lastReviewHadIssues = true;
|
|
318
|
+
senior.loopInfo = `loop ${this.loopCount}/${input.settings.maxReviewLoops}`;
|
|
319
|
+
return { type: "completed", senior, files: best.files };
|
|
320
|
+
} catch (err: any) {
|
|
321
|
+
if (err?.message === "Review cancelled") return { type: "cancelled" };
|
|
322
|
+
return { type: "error", error: toError(err) };
|
|
323
|
+
} finally {
|
|
324
|
+
this.isReviewingValue = false;
|
|
325
|
+
this.reviewAbort = null;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
private async buildContent(
|
|
330
|
+
input: ReviewOrchestratorInput,
|
|
331
|
+
limits?: ContentSizeLimits,
|
|
332
|
+
): Promise<ReviewContent | null> {
|
|
333
|
+
return await this.contentBuilder({
|
|
334
|
+
agentToolCalls: input.agentToolCalls,
|
|
335
|
+
ignorePatterns: input.ignorePatterns ?? undefined,
|
|
336
|
+
gitRoots: input.gitRoots,
|
|
337
|
+
limits,
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
private async runSeniorReview(
|
|
342
|
+
input: ReviewOrchestratorInput,
|
|
343
|
+
content: ReviewContent,
|
|
344
|
+
reviewId: string,
|
|
345
|
+
): Promise<ReviewResult> {
|
|
346
|
+
const prompt = `${buildReviewPrompt(input.autoReviewRules, input.customRules, input.lastUserMessage)}\n\n---\n\n${content.content}`;
|
|
347
|
+
log(`[${reviewId}] prompt length:`, prompt.length);
|
|
348
|
+
const result = await this.runner(prompt, {
|
|
349
|
+
signal: this.requiredSignal(),
|
|
350
|
+
cwd: input.cwd,
|
|
351
|
+
model: input.settings.model,
|
|
352
|
+
thinkingLevel: input.settings.thinkingLevel,
|
|
353
|
+
timeoutMs: computeReviewTimeoutMs(input.settings.reviewTimeoutMs, content.files.length),
|
|
354
|
+
filesReviewed: content.files,
|
|
355
|
+
reviewId,
|
|
356
|
+
onActivity: input.onActivity,
|
|
357
|
+
onToolCall: input.onToolCall,
|
|
358
|
+
});
|
|
359
|
+
log(`[${reviewId}] result:`, {
|
|
360
|
+
isLgtm: result.isLgtm,
|
|
361
|
+
durationMs: result.durationMs,
|
|
362
|
+
textLen: result.text.length,
|
|
363
|
+
});
|
|
364
|
+
return result;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
private async runArchitectIfNeeded(
|
|
368
|
+
input: ReviewOrchestratorInput,
|
|
369
|
+
): Promise<
|
|
370
|
+
{ step: ReviewStepResult } | { failure: { reviewId: string; error: Error } } | undefined
|
|
371
|
+
> {
|
|
372
|
+
// Prune deleted files from session accumulator before checking architect trigger
|
|
373
|
+
if (input.fileExists) {
|
|
374
|
+
const existing = new Set<string>();
|
|
375
|
+
for (const f of this.sessionChangedFiles) {
|
|
376
|
+
if (await input.fileExists(f)) existing.add(f);
|
|
377
|
+
}
|
|
378
|
+
this.sessionChangedFiles = existing;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const willRunArchitect =
|
|
382
|
+
input.settings.architectEnabled &&
|
|
383
|
+
!this.architectDone &&
|
|
384
|
+
shouldRunArchitectReview([...this.sessionChangedFiles], this.sessionHasGitContent);
|
|
385
|
+
|
|
386
|
+
if (!willRunArchitect) return undefined;
|
|
387
|
+
|
|
388
|
+
this.architectDone = true;
|
|
389
|
+
const architectReviewId = createReviewId();
|
|
390
|
+
const fileCount = this.sessionChangedFiles.size;
|
|
391
|
+
// Architect explores the codebase with grep/read across many files; scale the timeout
|
|
392
|
+
// with session file count like the senior review does.
|
|
393
|
+
const architectTimeoutMs = computeReviewTimeoutMs(input.settings.reviewTimeoutMs, fileCount);
|
|
394
|
+
log(
|
|
395
|
+
`[${architectReviewId}] architect: running — ${fileCount} files reviewed across session (timeoutMs=${architectTimeoutMs})`,
|
|
396
|
+
);
|
|
397
|
+
input.onArchitectStart?.([...this.sessionChangedFiles], architectTimeoutMs);
|
|
398
|
+
|
|
399
|
+
try {
|
|
400
|
+
const summaryText = this.sessionChangeSummaries.join("\n\n---\n\n");
|
|
401
|
+
const result = await runArchitectReview(this.runner, {
|
|
402
|
+
signal: this.requiredSignal(),
|
|
403
|
+
cwd: input.cwd,
|
|
404
|
+
model: input.settings.model,
|
|
405
|
+
customRules: input.architectRules,
|
|
406
|
+
sessionChangeSummary: summaryText,
|
|
407
|
+
reviewId: architectReviewId,
|
|
408
|
+
timeoutMs: architectTimeoutMs,
|
|
409
|
+
onActivity: input.onArchitectActivity,
|
|
410
|
+
onToolCall: input.onArchitectToolCall,
|
|
411
|
+
});
|
|
412
|
+
return { step: { result, label: "Architect Review", reviewId: architectReviewId } };
|
|
413
|
+
} catch (err: any) {
|
|
414
|
+
if (err?.message === "Review cancelled") throw err;
|
|
415
|
+
log(`[${architectReviewId}] ERROR: Architect review failed: ${err?.message ?? err}`);
|
|
416
|
+
return { failure: { reviewId: architectReviewId, error: toError(err) } };
|
|
417
|
+
} finally {
|
|
418
|
+
this.sessionChangeSummaries = [];
|
|
419
|
+
this.sessionChangedFiles = new Set();
|
|
420
|
+
this.peakReviewLoopCount = 0;
|
|
421
|
+
this.architectDone = false;
|
|
422
|
+
this.sessionHasGitContent = false;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Ask the judge to classify each bash tool call in this turn. Returns true
|
|
428
|
+
* only if the turn is confidently read-only:
|
|
429
|
+
* - No write/edit tool calls happened.
|
|
430
|
+
* - Every bash command classified as `inspection_vcs_noop`.
|
|
431
|
+
*
|
|
432
|
+
* Fail-open: any individual classification that returns `unsure` or
|
|
433
|
+
* `modifying` (or throws, which is mapped to `unsure` inside
|
|
434
|
+
* `classifyBashCommand`) flips the answer back to "run the review".
|
|
435
|
+
*
|
|
436
|
+
* Serial invocation keeps rate-limit risk low. Most real turns have <5
|
|
437
|
+
* bash calls, so the latency cost is <~5s for the skip case — negligible
|
|
438
|
+
* compared to the 30-90s main review we're avoiding.
|
|
439
|
+
*/
|
|
440
|
+
private async isTurnReadOnlyViaJudge(
|
|
441
|
+
input: ReviewOrchestratorInput,
|
|
442
|
+
signal: AbortSignal,
|
|
443
|
+
): Promise<boolean> {
|
|
444
|
+
if (!this.judge) return false;
|
|
445
|
+
|
|
446
|
+
// Any explicit write/edit tool call is an unambiguous modification.
|
|
447
|
+
// Don't waste a judge call on those — go straight to review.
|
|
448
|
+
for (const tc of input.agentToolCalls) {
|
|
449
|
+
if (tc.name === "write" || tc.name === "edit") return false;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const bashCalls = input.agentToolCalls.filter((tc) => tc.name === "bash");
|
|
453
|
+
if (bashCalls.length === 0) {
|
|
454
|
+
// No bash and no write/edit, but we got past `realFiles.size === 0`,
|
|
455
|
+
// so something else pushed files into the set. Safer to review.
|
|
456
|
+
return false;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
let classifiedAny = false;
|
|
460
|
+
|
|
461
|
+
for (const tc of bashCalls) {
|
|
462
|
+
const cmd = String(tc.input?.command ?? "").trim();
|
|
463
|
+
if (!cmd) continue;
|
|
464
|
+
const classification = await this.judge(cmd, {
|
|
465
|
+
signal,
|
|
466
|
+
cwd: input.cwd,
|
|
467
|
+
model: input.settings.judgeModel,
|
|
468
|
+
timeoutMs: input.settings.judgeTimeoutMs,
|
|
469
|
+
});
|
|
470
|
+
log(`judge: ${classification} ← ${cmd.slice(0, 80).replace(/\n/g, " ")}`);
|
|
471
|
+
if (classification !== "inspection_vcs_noop") return false;
|
|
472
|
+
if (signal.aborted) return false;
|
|
473
|
+
classifiedAny = true;
|
|
474
|
+
}
|
|
475
|
+
// Safety: only return true if we actually classified at least one command.
|
|
476
|
+
// A turn with bash calls that all have empty command strings shouldn't be
|
|
477
|
+
// treated as "confidently read-only" — bail to review instead.
|
|
478
|
+
return classifiedAny;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
private resetCycleState(): void {
|
|
482
|
+
this.loopCount = 0;
|
|
483
|
+
this.peakReviewLoopCount = 0;
|
|
484
|
+
this.lastReviewedContentHash = "";
|
|
485
|
+
this.architectDone = false;
|
|
486
|
+
this.sessionChangeSummaries = [];
|
|
487
|
+
this.sessionChangedFiles = new Set();
|
|
488
|
+
this.sessionHasGitContent = false;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
private requiredSignal(): AbortSignal {
|
|
492
|
+
if (!this.reviewAbort) throw new Error("Review cancelled");
|
|
493
|
+
return this.reviewAbort.signal;
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
function hashContent(content: string): string {
|
|
498
|
+
return createHash("sha256").update(content).digest("hex");
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
function isContextOverflowError(err: any): boolean {
|
|
502
|
+
const msg = (err?.message ?? String(err)).toLowerCase();
|
|
503
|
+
return (
|
|
504
|
+
msg.includes("too many tokens") ||
|
|
505
|
+
(msg.includes("context") && msg.includes("length")) ||
|
|
506
|
+
(msg.includes("context") && msg.includes("window")) ||
|
|
507
|
+
(msg.includes("context") && msg.includes("too long")) ||
|
|
508
|
+
(msg.includes("maximum") && msg.includes("token")) ||
|
|
509
|
+
(msg.includes("input") && msg.includes("too large")) ||
|
|
510
|
+
(msg.includes("prompt") && msg.includes("too long")) ||
|
|
511
|
+
(msg.includes("exceeds") && msg.includes("context")) ||
|
|
512
|
+
(msg.includes("exceeds") && msg.includes("token")) ||
|
|
513
|
+
msg.includes("payload too large") ||
|
|
514
|
+
msg.includes("request too large")
|
|
515
|
+
);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function toError(err: unknown): Error {
|
|
519
|
+
if (err instanceof Error) return err;
|
|
520
|
+
return new Error(String(err));
|
|
521
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@inceptionstack/pi-hard-no",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Pi extension — automatic code review after every agent turn",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"publishConfig": {
|
|
8
|
+
"access": "public"
|
|
9
|
+
},
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "https://github.com/inceptionstack/pi-hard-no.git"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"pi",
|
|
16
|
+
"extension",
|
|
17
|
+
"code-review",
|
|
18
|
+
"ai",
|
|
19
|
+
"agent"
|
|
20
|
+
],
|
|
21
|
+
"files": [
|
|
22
|
+
"*.ts",
|
|
23
|
+
"default-review-rules.md",
|
|
24
|
+
"LICENSE",
|
|
25
|
+
"README.md"
|
|
26
|
+
],
|
|
27
|
+
"pi": {
|
|
28
|
+
"extensions": [
|
|
29
|
+
"./index.ts"
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
"scripts": {
|
|
33
|
+
"lint": "eslint .",
|
|
34
|
+
"lint:fix": "eslint . --fix",
|
|
35
|
+
"format": "prettier --write .",
|
|
36
|
+
"format:check": "prettier --check .",
|
|
37
|
+
"typecheck": "tsc --noEmit",
|
|
38
|
+
"test": "vitest run",
|
|
39
|
+
"test:watch": "vitest",
|
|
40
|
+
"check": "npm run typecheck && npm run lint && npm run format:check && npm run test"
|
|
41
|
+
},
|
|
42
|
+
"peerDependencies": {
|
|
43
|
+
"@mariozechner/pi-coding-agent": "*"
|
|
44
|
+
},
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"@eslint/js": "^9.27.0",
|
|
47
|
+
"@mariozechner/pi-coding-agent": "^0.69.0",
|
|
48
|
+
"@types/node": "^22.15.17",
|
|
49
|
+
"eslint": "^9.27.0",
|
|
50
|
+
"prettier": "^3.5.3",
|
|
51
|
+
"typescript": "^5.8.3",
|
|
52
|
+
"typescript-eslint": "^8.32.1",
|
|
53
|
+
"vitest": "^4.1.5"
|
|
54
|
+
}
|
|
55
|
+
}
|
package/prompt.ts
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* prompt.ts — Review prompt construction
|
|
3
|
+
*
|
|
4
|
+
* The review prompt has three parts:
|
|
5
|
+
* 1. PROMPT_PREFIX — system preamble (tools, budget, workflow)
|
|
6
|
+
* 2. Auto-review rules — what to review / what not to report
|
|
7
|
+
* (default: DEFAULT_AUTO_REVIEW_RULES, overridable via .hardno/auto-review.md)
|
|
8
|
+
* 3. PROMPT_SUFFIX — response format, examples, verdict instructions
|
|
9
|
+
*
|
|
10
|
+
* The user can override ONLY part 2 via auto-review.md.
|
|
11
|
+
* review-rules.md still appends additional project-specific rules at the end.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ── Part 1: Prefix (always included, not user-editable) ──
|
|
15
|
+
|
|
16
|
+
export const PROMPT_PREFIX = `You are a senior code reviewer. You will review files that were recently changed. For each file, you are given its full path, the git diff for that file, and related commit messages.
|
|
17
|
+
|
|
18
|
+
**You MUST read each file yourself** using the read(path) tool to see the full current contents. The diffs below show what changed, but you need the full file to understand context.
|
|
19
|
+
|
|
20
|
+
## Tools
|
|
21
|
+
|
|
22
|
+
- read(path) — read a file (USE THIS to read each reviewed file and any related files)
|
|
23
|
+
- bash(command) — run commands like grep/find/test
|
|
24
|
+
- grep, find, ls — for exploration
|
|
25
|
+
|
|
26
|
+
You do NOT have write or edit tools.
|
|
27
|
+
Do NOT output XML tags like <bash> or <read_file>. Use real function calls.
|
|
28
|
+
|
|
29
|
+
## Budget: 30 tool calls per reviewed file
|
|
30
|
+
|
|
31
|
+
You have a budget of **30 tool calls per file** being reviewed. For example, if 5 files are under review you may use up to 150 tool calls total.
|
|
32
|
+
|
|
33
|
+
## Workflow
|
|
34
|
+
|
|
35
|
+
1. Read each changed file with read(path) to see its full current contents.
|
|
36
|
+
2. Cross-reference with the per-file diffs and commit messages provided below.
|
|
37
|
+
3. Use additional tool calls for targeted verification (related files, tests, etc.).
|
|
38
|
+
4. Write your review. No more tool calls after that.`;
|
|
39
|
+
|
|
40
|
+
// ── Part 2: Default auto-review rules (user can override via auto-review.md) ──
|
|
41
|
+
|
|
42
|
+
export const DEFAULT_AUTO_REVIEW_RULES = `## What to review (in priority order)
|
|
43
|
+
|
|
44
|
+
### Correctness bugs
|
|
45
|
+
- Off-by-one errors, boundary conditions (< vs <=, i=0 to length)
|
|
46
|
+
- Missing null/undefined checks, possible TypeError
|
|
47
|
+
- Missing error handling where a crash would propagate
|
|
48
|
+
- Logic bugs: inverted conditions, wrong operator, wrong variable
|
|
49
|
+
- Unhandled promise rejections, race conditions
|
|
50
|
+
|
|
51
|
+
### Security
|
|
52
|
+
- Hardcoded secrets, API keys, passwords
|
|
53
|
+
- SQL / shell / command injection (string interpolation into queries/commands)
|
|
54
|
+
- Path traversal, unsafe user input
|
|
55
|
+
- Auth bypasses
|
|
56
|
+
|
|
57
|
+
### Data loss or corruption
|
|
58
|
+
- Writes that could lose data
|
|
59
|
+
- Missing transactions where atomicity matters
|
|
60
|
+
|
|
61
|
+
### Architecture / Single Responsibility
|
|
62
|
+
- Functions or event handlers doing multiple unrelated things — recommend extraction
|
|
63
|
+
- Inline logic that should be a separate module/class for testability
|
|
64
|
+
- God functions (>50 lines mixing concerns) — suggest splitting
|
|
65
|
+
|
|
66
|
+
## What NOT to report
|
|
67
|
+
- Style / naming preferences
|
|
68
|
+
- Missing tests (unless the change is complex algorithmic logic)
|
|
69
|
+
- "Could be cleaner" opinions without a concrete SRP or DRY violation`;
|
|
70
|
+
|
|
71
|
+
// ── Part 3: Suffix (always included, not user-editable) ──
|
|
72
|
+
|
|
73
|
+
export const PROMPT_SUFFIX = `## Response format
|
|
74
|
+
|
|
75
|
+
Your response MUST follow this exact structure:
|
|
76
|
+
|
|
77
|
+
1. (If issues found) List of bullet points, each: - **<Severity>:** <file/location> — <one-line explanation>
|
|
78
|
+
Severity is one of: High, Medium, Low.
|
|
79
|
+
2. (If no issues) Write a single line: No issues found.
|
|
80
|
+
3. On the final line of your response, output exactly ONE of these verdict tags:
|
|
81
|
+
- <verdict>LGTM</verdict> — if no real bugs were found
|
|
82
|
+
- <verdict>ISSUES_FOUND</verdict> — if you flagged any issue above
|
|
83
|
+
|
|
84
|
+
## Example — issues found
|
|
85
|
+
|
|
86
|
+
- **High:** test-bugs.ts:12 — Off-by-one error: i <= items.length should be i < items.length.
|
|
87
|
+
- **High:** test-bugs.ts:6 — Hardcoded API key sk-prod-... leaks a secret.
|
|
88
|
+
|
|
89
|
+
<verdict>ISSUES_FOUND</verdict>
|
|
90
|
+
|
|
91
|
+
## Example — no issues
|
|
92
|
+
|
|
93
|
+
No issues found.
|
|
94
|
+
|
|
95
|
+
<verdict>LGTM</verdict>
|
|
96
|
+
|
|
97
|
+
The verdict tag is MANDATORY. Without it, your review is invalid and will be re-requested.
|
|
98
|
+
|
|
99
|
+
Caught bugs > silence. If something looks wrong and you're 70%+ confident, FLAG IT. The user can push back on false positives.`;
|
|
100
|
+
|
|
101
|
+
// ── Composite (for backwards compat / scaffold display) ──
|
|
102
|
+
|
|
103
|
+
export const DEFAULT_REVIEW_PROMPT = `${PROMPT_PREFIX}\n\n${DEFAULT_AUTO_REVIEW_RULES}\n\n${PROMPT_SUFFIX}`;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Build the full review prompt.
|
|
107
|
+
*
|
|
108
|
+
* @param autoReviewRules — contents of .hardno/auto-review.md, or null to use defaults
|
|
109
|
+
* @param customRules — contents of .hardno/review-rules.md (appended at the end)
|
|
110
|
+
* @param userRequest — the last user message that triggered the agent (what the user asked)
|
|
111
|
+
*/
|
|
112
|
+
export function buildReviewPrompt(
|
|
113
|
+
autoReviewRules?: string | null,
|
|
114
|
+
customRules?: string | null,
|
|
115
|
+
userRequest?: string | null,
|
|
116
|
+
): string {
|
|
117
|
+
const reviewSection = autoReviewRules?.trim() || DEFAULT_AUTO_REVIEW_RULES;
|
|
118
|
+
let prompt = `${PROMPT_PREFIX}\n\n${reviewSection}\n\n${PROMPT_SUFFIX}`;
|
|
119
|
+
if (customRules) {
|
|
120
|
+
prompt += `\n\n## Additional project-specific rules\n\n${customRules}`;
|
|
121
|
+
}
|
|
122
|
+
if (userRequest) {
|
|
123
|
+
prompt += `\n\n## User request (what the agent was asked to do)\n\n> ${userRequest.split("\n").join("\n> ")}`;
|
|
124
|
+
}
|
|
125
|
+
return prompt;
|
|
126
|
+
}
|