@dreki-gg/pi-code-reviewer 0.5.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import { writeFile } from 'node:fs/promises';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
1
4
|
import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
|
|
2
5
|
import { Type } from 'typebox';
|
|
3
6
|
|
|
@@ -7,16 +10,34 @@ import { discoverLenses, getLensContent } from '../lenses';
|
|
|
7
10
|
import { resolveModelPlan } from '../model-plan';
|
|
8
11
|
import { runPipeline } from '../passes';
|
|
9
12
|
import {
|
|
10
|
-
buildDiffSection,
|
|
11
13
|
buildLensResult,
|
|
14
|
+
buildPipelineResult,
|
|
12
15
|
buildReviewBasePrompt,
|
|
16
|
+
buildSinglePassResult,
|
|
13
17
|
pickLensToolOutputs,
|
|
14
|
-
renderPipelineReport,
|
|
15
18
|
runTools,
|
|
16
19
|
} from '../reviewer';
|
|
17
|
-
import type {
|
|
20
|
+
import type { ReviewPointer } from '../reviewer';
|
|
18
21
|
import type { LensResult, ReviewConfig } from '../types';
|
|
19
22
|
|
|
23
|
+
/**
|
|
24
|
+
* Spill the full review context to a temp Markdown file and return a pointer
|
|
25
|
+
* (path + byte size + line count). Both pi's tool-output and `read` caps are
|
|
26
|
+
* ~50KB / 2000 lines, so large reviews would otherwise be truncated and lost
|
|
27
|
+
* on compaction. The on-disk file survives compaction and can be paged.
|
|
28
|
+
*
|
|
29
|
+
* Node-only IO (no Bun) per the extension runtime constraint.
|
|
30
|
+
*/
|
|
31
|
+
async function writeReviewTempFile(content: string): Promise<ReviewPointer> {
|
|
32
|
+
const path = join(tmpdir(), `pi-code-review-${Date.now()}.md`);
|
|
33
|
+
await writeFile(path, content, 'utf8');
|
|
34
|
+
return {
|
|
35
|
+
path,
|
|
36
|
+
bytes: Buffer.byteLength(content, 'utf8'),
|
|
37
|
+
lines: content.split('\n').length,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
20
41
|
export function registerReviewTool(pi: ExtensionAPI) {
|
|
21
42
|
pi.registerTool({
|
|
22
43
|
name: 'code_review',
|
|
@@ -148,17 +169,33 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
148
169
|
signal,
|
|
149
170
|
);
|
|
150
171
|
ctx.ui.setStatus('code-review', undefined);
|
|
151
|
-
|
|
152
|
-
|
|
172
|
+
// Every pass failed (e.g. the review model/pi-ai was unavailable for
|
|
173
|
+
// each call). The swallowed failures would render as a misleading
|
|
174
|
+
// "0 findings" report — instead, degrade to the single-pass prompt so
|
|
175
|
+
// the reviewing agent still produces a real review.
|
|
176
|
+
const allPassesFailed =
|
|
177
|
+
config.review.passes > 0 && pipeline.telemetry.failedPasses >= config.review.passes;
|
|
178
|
+
if (!allPassesFailed) {
|
|
179
|
+
return buildPipelineResult(
|
|
180
|
+
{
|
|
181
|
+
pipeline,
|
|
182
|
+
diff,
|
|
183
|
+
basePrompt,
|
|
184
|
+
lensNames,
|
|
185
|
+
availableLenses: [...available.keys()],
|
|
186
|
+
changedFiles,
|
|
187
|
+
},
|
|
188
|
+
writeReviewTempFile,
|
|
189
|
+
onUpdate,
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
onUpdate?.({
|
|
193
|
+
content: [{ type: 'text', text: 'all review passes failed — single-pass fallback' }],
|
|
153
194
|
details: {
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
availableLenses: [...available.keys()],
|
|
157
|
-
changedFiles,
|
|
158
|
-
findings: pipeline.findings,
|
|
159
|
-
telemetry: pipeline.telemetry,
|
|
195
|
+
failedPasses: pipeline.telemetry.failedPasses,
|
|
196
|
+
passError: pipeline.telemetry.passErrorSample,
|
|
160
197
|
},
|
|
161
|
-
};
|
|
198
|
+
});
|
|
162
199
|
} catch (cause) {
|
|
163
200
|
// Pipeline failed hard (e.g. model/pi-ai unavailable at runtime) —
|
|
164
201
|
// degrade to the single-pass prompt instead of failing the review.
|
|
@@ -172,20 +209,25 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
172
209
|
|
|
173
210
|
ctx.ui.setStatus('code-review', undefined);
|
|
174
211
|
|
|
175
|
-
// Fallback:
|
|
176
|
-
//
|
|
177
|
-
//
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
212
|
+
// Fallback: spill the full single-pass review context to a temp file and
|
|
213
|
+
// return a compact summary + pointer (degrades gracefully on empty
|
|
214
|
+
// context or a write failure). Used when no model is available (e.g.
|
|
215
|
+
// print mode) or passes are disabled in config.
|
|
216
|
+
//
|
|
217
|
+
// This is the PRIMARY truncation culprit: the full context embeds the
|
|
218
|
+
// diff (up to 50KB) plus every lens's tool outputs (20KB each), which
|
|
219
|
+
// easily blows past pi's 50KB tool-output cap.
|
|
220
|
+
return buildSinglePassResult(
|
|
221
|
+
{
|
|
222
|
+
results,
|
|
223
|
+
diff,
|
|
224
|
+
lensNames,
|
|
185
225
|
availableLenses: [...available.keys()],
|
|
186
226
|
changedFiles,
|
|
187
227
|
},
|
|
188
|
-
|
|
228
|
+
writeReviewTempFile,
|
|
229
|
+
onUpdate,
|
|
230
|
+
);
|
|
189
231
|
},
|
|
190
232
|
});
|
|
191
233
|
}
|
|
@@ -204,42 +246,3 @@ function resolveLensNames(
|
|
|
204
246
|
return [...available.keys()];
|
|
205
247
|
}
|
|
206
248
|
|
|
207
|
-
/**
|
|
208
|
-
* Build the agent-facing review instructions appended to the report. The diff
|
|
209
|
-
* is embedded ONCE (not per lens) followed by each lens's section — large
|
|
210
|
-
* diffs would otherwise be repeated for every lens, bloating the tool output.
|
|
211
|
-
*/
|
|
212
|
-
function buildToolContext(results: LensResult[], diff: DiffSource): string {
|
|
213
|
-
const sections = results.map((r) => r._lensSection).filter(Boolean) as string[];
|
|
214
|
-
if (sections.length === 0) return '';
|
|
215
|
-
|
|
216
|
-
return [
|
|
217
|
-
`# Code Review — ${new Date().toISOString().slice(0, 10)}`,
|
|
218
|
-
'',
|
|
219
|
-
'## Changes',
|
|
220
|
-
'```',
|
|
221
|
-
diff.stat.trim() || '(no diffstat)',
|
|
222
|
-
'```',
|
|
223
|
-
'',
|
|
224
|
-
'Evaluate the diff through each lens below; the tool outputs are automated analysis.',
|
|
225
|
-
'',
|
|
226
|
-
buildDiffSection(diff),
|
|
227
|
-
'',
|
|
228
|
-
'## Lenses',
|
|
229
|
-
'',
|
|
230
|
-
...sections,
|
|
231
|
-
'',
|
|
232
|
-
'## Instructions',
|
|
233
|
-
'',
|
|
234
|
-
'For each lens above, review the diff against its criteria and output a JSON array of findings:',
|
|
235
|
-
'',
|
|
236
|
-
'```json',
|
|
237
|
-
'[',
|
|
238
|
-
' { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
|
|
239
|
-
']',
|
|
240
|
-
'```',
|
|
241
|
-
'',
|
|
242
|
-
'After each lens JSON array, write a 2-3 sentence summary.',
|
|
243
|
-
'If a lens has no findings, return an empty array `[]` and note the code looks good.',
|
|
244
|
-
].join('\n');
|
|
245
|
-
}
|
|
@@ -23,6 +23,14 @@ export type DiffOptions = { base?: string; staged?: boolean };
|
|
|
23
23
|
* the whole review. */
|
|
24
24
|
const GIT_TIMEOUT_MS = 30_000;
|
|
25
25
|
|
|
26
|
+
/** Cap on untracked files diffed against /dev/null so a repo full of generated
|
|
27
|
+
* junk can't blow up the prompt. The whole diff is truncated downstream too. */
|
|
28
|
+
const MAX_UNTRACKED_FILES = 200;
|
|
29
|
+
|
|
30
|
+
/** The empty tree object — diffing a path against it yields a full new-file
|
|
31
|
+
* diff portably (no reliance on /dev/null path handling across platforms). */
|
|
32
|
+
const NULL_DEVICE = '/dev/null';
|
|
33
|
+
|
|
26
34
|
function git(args: string[], cwd: string): Effect.Effect<string, ExecError, Executor> {
|
|
27
35
|
return Effect.gen(function* () {
|
|
28
36
|
const executor = yield* Executor;
|
|
@@ -31,6 +39,51 @@ function git(args: string[], cwd: string): Effect.Effect<string, ExecError, Exec
|
|
|
31
39
|
});
|
|
32
40
|
}
|
|
33
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Diff every untracked (new, not-yet-`git add`ed) file against /dev/null so
|
|
44
|
+
* brand-new files show up in a working-directory review — `git diff HEAD`
|
|
45
|
+
* omits them entirely, which is exactly the class of change agents introduce.
|
|
46
|
+
*
|
|
47
|
+
* Read-only: it NEVER touches the index (no `git add -N`). `git diff --no-index`
|
|
48
|
+
* exits non-zero when files differ, but pi.exec resolves with the diff on stdout
|
|
49
|
+
* regardless; any per-file failure degrades to an empty string rather than
|
|
50
|
+
* sinking the whole review.
|
|
51
|
+
*/
|
|
52
|
+
function collectUntrackedEffect(
|
|
53
|
+
cwd: string,
|
|
54
|
+
): Effect.Effect<{ diff: string; files: string[] }, never, Executor> {
|
|
55
|
+
return Effect.gen(function* () {
|
|
56
|
+
const listed = yield* git(['ls-files', '--others', '--exclude-standard'], cwd).pipe(
|
|
57
|
+
Effect.orElseSucceed(() => ''),
|
|
58
|
+
);
|
|
59
|
+
const files = listed
|
|
60
|
+
.split('\n')
|
|
61
|
+
.map((f) => f.trim())
|
|
62
|
+
.filter(Boolean);
|
|
63
|
+
if (files.length === 0) return { diff: '', files: [] };
|
|
64
|
+
|
|
65
|
+
const parts = yield* Effect.forEach(
|
|
66
|
+
files.slice(0, MAX_UNTRACKED_FILES),
|
|
67
|
+
(file) =>
|
|
68
|
+
git(['diff', '--no-index', '--', NULL_DEVICE, file], cwd).pipe(
|
|
69
|
+
Effect.orElseSucceed(() => ''),
|
|
70
|
+
),
|
|
71
|
+
{ concurrency: 4 },
|
|
72
|
+
);
|
|
73
|
+
return { diff: parts.filter((part) => part.trim()).join('\n'), files };
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Append a one-line-per-file summary of untracked files to a `--stat` block so
|
|
78
|
+
* the change overview reflects new files that git's own stat never lists. */
|
|
79
|
+
function appendUntrackedStat(stat: string, files: string[]): string {
|
|
80
|
+
if (files.length === 0) return stat;
|
|
81
|
+
const shown = files.slice(0, MAX_UNTRACKED_FILES);
|
|
82
|
+
const lines = shown.map((file) => ` ${file} | (new, untracked)`);
|
|
83
|
+
const note = `${files.length} untracked file(s) included`;
|
|
84
|
+
return [stat.trimEnd(), ...lines, note].filter(Boolean).join('\n');
|
|
85
|
+
}
|
|
86
|
+
|
|
34
87
|
/** Collect the diff from the working directory or a specific base ref. */
|
|
35
88
|
export function collectDiffEffect(
|
|
36
89
|
cwd: string,
|
|
@@ -49,20 +102,31 @@ export function collectDiffEffect(
|
|
|
49
102
|
return { diff, stat, label: `changes since ${options.base}` };
|
|
50
103
|
}
|
|
51
104
|
|
|
52
|
-
// Default:
|
|
53
|
-
//
|
|
105
|
+
// Default: EVERYTHING the agent is working on but hasn't committed —
|
|
106
|
+
// tracked changes (unstaged + staged) relative to HEAD, PLUS untracked
|
|
107
|
+
// (brand-new) files. `git diff HEAD` covers only the former; untracked
|
|
108
|
+
// files are collected separately and merged so new files are reviewed too.
|
|
109
|
+
// `git diff HEAD` also fails on a repo with no commits (HEAD is unborn), so
|
|
54
110
|
// tolerate that and fall back to the bare working-directory diff.
|
|
55
111
|
const headDiff = yield* git(['diff', 'HEAD'], cwd).pipe(Effect.either);
|
|
112
|
+
const untracked = yield* collectUntrackedEffect(cwd);
|
|
56
113
|
|
|
57
|
-
|
|
114
|
+
let tracked: string;
|
|
115
|
+
let stat: string;
|
|
116
|
+
let label: string;
|
|
58
117
|
if (headDiff._tag === 'Left' || !headDiff.right.trim()) {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
118
|
+
// No HEAD (fresh repo) or no tracked changes → use the bare working dir.
|
|
119
|
+
tracked = yield* git(['diff'], cwd);
|
|
120
|
+
stat = yield* git(['diff', '--stat'], cwd);
|
|
121
|
+
label = 'working directory changes';
|
|
122
|
+
} else {
|
|
123
|
+
tracked = headDiff.right;
|
|
124
|
+
stat = yield* git(['diff', 'HEAD', '--stat'], cwd);
|
|
125
|
+
label = 'all uncommitted changes';
|
|
62
126
|
}
|
|
63
127
|
|
|
64
|
-
const
|
|
65
|
-
return { diff:
|
|
128
|
+
const diff = [tracked, untracked.diff].filter((part) => part.trim()).join('\n');
|
|
129
|
+
return { diff, stat: appendUntrackedStat(stat, untracked.files), label };
|
|
66
130
|
});
|
|
67
131
|
}
|
|
68
132
|
|
|
@@ -72,19 +136,31 @@ export function getChangedFilesEffect(
|
|
|
72
136
|
options: DiffOptions,
|
|
73
137
|
): Effect.Effect<string[], ExecError, Executor> {
|
|
74
138
|
return Effect.gen(function* () {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
139
|
+
if (options.staged || options.base) {
|
|
140
|
+
const args = ['diff', '--name-only', options.staged ? '--staged' : options.base!];
|
|
141
|
+
const stdout = yield* git(args, cwd);
|
|
142
|
+
return splitPaths(stdout);
|
|
143
|
+
}
|
|
79
144
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
.
|
|
84
|
-
|
|
145
|
+
// Default: tracked changes vs HEAD (tolerate an unborn HEAD) plus untracked
|
|
146
|
+
// files, deduped, so the changed-file list mirrors the merged default diff.
|
|
147
|
+
const tracked = yield* git(['diff', '--name-only', 'HEAD'], cwd).pipe(
|
|
148
|
+
Effect.orElseSucceed(() => ''),
|
|
149
|
+
);
|
|
150
|
+
const untracked = yield* git(['ls-files', '--others', '--exclude-standard'], cwd).pipe(
|
|
151
|
+
Effect.orElseSucceed(() => ''),
|
|
152
|
+
);
|
|
153
|
+
return [...new Set([...splitPaths(tracked), ...splitPaths(untracked)])];
|
|
85
154
|
});
|
|
86
155
|
}
|
|
87
156
|
|
|
157
|
+
function splitPaths(stdout: string): string[] {
|
|
158
|
+
return stdout
|
|
159
|
+
.split('\n')
|
|
160
|
+
.map((f) => f.trim())
|
|
161
|
+
.filter(Boolean);
|
|
162
|
+
}
|
|
163
|
+
|
|
88
164
|
// ── Promise wrappers (live Executor from pi) ──────────────────────────────────
|
|
89
165
|
|
|
90
166
|
export function collectDiff(
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
import { Effect } from 'effect';
|
|
18
18
|
|
|
19
|
+
import { causeMessage } from './errors';
|
|
19
20
|
import { type ModelResolution, Reviewer, makeReviewerService } from './effects/model';
|
|
20
21
|
import type {
|
|
21
22
|
CandidateFinding,
|
|
@@ -355,7 +356,11 @@ export function runPassesEffect(
|
|
|
355
356
|
config: ReviewPipelineConfig,
|
|
356
357
|
plan: ModelPlan,
|
|
357
358
|
signal?: AbortSignal,
|
|
358
|
-
): Effect.Effect<
|
|
359
|
+
): Effect.Effect<
|
|
360
|
+
{ perPass: RawFinding[][]; failedPasses: number; passErrorSample?: string },
|
|
361
|
+
never,
|
|
362
|
+
Reviewer
|
|
363
|
+
> {
|
|
359
364
|
return Effect.gen(function* () {
|
|
360
365
|
const reviewer = yield* Reviewer;
|
|
361
366
|
const indices = Array.from({ length: config.passes }, (_unused, index) => index);
|
|
@@ -380,19 +385,29 @@ export function runPassesEffect(
|
|
|
380
385
|
})
|
|
381
386
|
.pipe(Effect.either);
|
|
382
387
|
return result._tag === 'Right'
|
|
383
|
-
? { findings: parseFindings(result.right), failed: false }
|
|
384
|
-
: { findings: [] as RawFinding[], failed: true };
|
|
388
|
+
? { findings: parseFindings(result.right), failed: false, error: undefined }
|
|
389
|
+
: { findings: [] as RawFinding[], failed: true, error: describePassError(result.left) };
|
|
385
390
|
}),
|
|
386
391
|
{ concurrency: Math.max(1, config.concurrency) },
|
|
387
392
|
);
|
|
388
393
|
|
|
394
|
+
const failures = outcomes.filter((outcome) => outcome.failed);
|
|
389
395
|
return {
|
|
390
396
|
perPass: outcomes.map((outcome) => outcome.findings),
|
|
391
|
-
failedPasses:
|
|
397
|
+
failedPasses: failures.length,
|
|
398
|
+
passErrorSample: failures[0]?.error,
|
|
392
399
|
};
|
|
393
400
|
});
|
|
394
401
|
}
|
|
395
402
|
|
|
403
|
+
/** Best-effort human message for a failed pass: the ModelError's own message
|
|
404
|
+
* when present, else its underlying cause. */
|
|
405
|
+
function describePassError(error: unknown): string {
|
|
406
|
+
const message = (error as { message?: unknown }).message;
|
|
407
|
+
if (typeof message === 'string' && message.trim()) return message;
|
|
408
|
+
return causeMessage((error as { cause?: unknown }).cause);
|
|
409
|
+
}
|
|
410
|
+
|
|
396
411
|
function buildValidatorUser(basePrompt: string, candidates: CandidateFinding[]): string {
|
|
397
412
|
const list = candidates
|
|
398
413
|
.map((candidate, index) => {
|
|
@@ -514,7 +529,12 @@ export function runPipelineEffect(
|
|
|
514
529
|
): Effect.Effect<PipelineResult, never, Reviewer> {
|
|
515
530
|
return Effect.gen(function* () {
|
|
516
531
|
hooks.onStage?.(`running ${config.passes} passes`);
|
|
517
|
-
const { perPass, failedPasses } = yield* runPassesEffect(
|
|
532
|
+
const { perPass, failedPasses, passErrorSample } = yield* runPassesEffect(
|
|
533
|
+
basePrompt,
|
|
534
|
+
config,
|
|
535
|
+
plan,
|
|
536
|
+
signal,
|
|
537
|
+
);
|
|
518
538
|
|
|
519
539
|
const buckets = bucketFindings(perPass);
|
|
520
540
|
const { kept, droppedLowSignal } = selectCandidates(buckets, config);
|
|
@@ -547,6 +567,7 @@ export function runPipelineEffect(
|
|
|
547
567
|
droppedFalsePositives,
|
|
548
568
|
droppedLowSignal,
|
|
549
569
|
failedPasses,
|
|
570
|
+
passErrorSample,
|
|
550
571
|
passModels: plan.passes.map((assignment) => assignment.label),
|
|
551
572
|
validatorModel: plan.validator.label,
|
|
552
573
|
};
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import { platform } from 'node:os';
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
AgentToolResult,
|
|
4
|
+
AgentToolUpdateCallback,
|
|
5
|
+
ExtensionAPI,
|
|
6
|
+
} from '@earendil-works/pi-coding-agent';
|
|
3
7
|
import { Effect } from 'effect';
|
|
4
8
|
|
|
5
9
|
import type { DiffSource } from './diff';
|
|
@@ -118,6 +122,71 @@ export function buildReviewBasePrompt(lensSections: string[], diff: DiffSource):
|
|
|
118
122
|
].join('\n');
|
|
119
123
|
}
|
|
120
124
|
|
|
125
|
+
/** Pointer to the temp file holding the full review context. */
|
|
126
|
+
export type ReviewPointer = { path: string; bytes: number; lines: number };
|
|
127
|
+
|
|
128
|
+
/** Round bytes to whole KB for a human-readable size (min 1KB). */
|
|
129
|
+
function toKb(bytes: number): number {
|
|
130
|
+
return Math.max(1, Math.round(bytes / 1024));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Condense a git `--stat` block into a one-line "N files, +ins -del" summary.
|
|
135
|
+
* Returns '' when the diffstat has no recognizable summary line.
|
|
136
|
+
*/
|
|
137
|
+
function summarizeDiffStat(stat: string): string {
|
|
138
|
+
const lastLine = stat.trim().split('\n').pop()?.trim() ?? '';
|
|
139
|
+
const files = lastLine.match(/(\d+) files? changed/)?.[1];
|
|
140
|
+
if (!files) return '';
|
|
141
|
+
const insertions = lastLine.match(/(\d+) insertions?\(\+\)/)?.[1];
|
|
142
|
+
const deletions = lastLine.match(/(\d+) deletions?\(-\)/)?.[1];
|
|
143
|
+
const parts = [`${files} file${files === '1' ? '' : 's'}`];
|
|
144
|
+
if (insertions) parts.push(`+${insertions}`);
|
|
145
|
+
if (deletions) parts.push(`-${deletions}`);
|
|
146
|
+
return parts.join(', ');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Compact inline header for the single-pass fallback. The full review context
|
|
151
|
+
* (diff, lenses, instructions) lives in a temp file — see {@link buildPointer} —
|
|
152
|
+
* so this only names the lenses and the diff scope. Pure (no IO).
|
|
153
|
+
*/
|
|
154
|
+
export function buildInlineSummary(lensNames: string[], diff: DiffSource): string {
|
|
155
|
+
const stat = summarizeDiffStat(diff.stat);
|
|
156
|
+
const diffLine = stat ? `${diff.label} (${stat})` : diff.label;
|
|
157
|
+
return [
|
|
158
|
+
'# Code Review Summary',
|
|
159
|
+
`- **Lenses**: ${lensNames.join(', ') || '(none)'}`,
|
|
160
|
+
`- **Diff**: ${diffLine}`,
|
|
161
|
+
].join('\n');
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Inline pointer to the temp file holding the full review context. pi's tool
|
|
166
|
+
* output / `read` caps are both ~50KB / 2000 lines, so the directive tells the
|
|
167
|
+
* agent to page large content with `read` offset/limit. Pure (no IO).
|
|
168
|
+
*
|
|
169
|
+
* `mode` switches the action sentence: single-pass needs the agent to perform
|
|
170
|
+
* the whole review from the file; pipeline only needs it to drill into the diff
|
|
171
|
+
* behind an already-rendered finding.
|
|
172
|
+
*/
|
|
173
|
+
export function buildPointer(pointer: ReviewPointer, mode: 'single-pass' | 'pipeline'): string {
|
|
174
|
+
const size = `(${pointer.lines} lines, ${toKb(pointer.bytes)}KB)`;
|
|
175
|
+
if (mode === 'single-pass') {
|
|
176
|
+
return [
|
|
177
|
+
'📄 Full review context (diff, lens definitions, tool outputs, instructions)',
|
|
178
|
+
`saved to: \`${pointer.path}\``,
|
|
179
|
+
`${size}. **Read that file** to perform the review — page large content with`,
|
|
180
|
+
'`read` offset/limit.',
|
|
181
|
+
].join('\n');
|
|
182
|
+
}
|
|
183
|
+
return [
|
|
184
|
+
'---',
|
|
185
|
+
`📄 Full diff + lens context saved to: \`${pointer.path}\``,
|
|
186
|
+
`${size}. Use \`read\` (offset/limit) to inspect the diff behind a finding.`,
|
|
187
|
+
].join('\n');
|
|
188
|
+
}
|
|
189
|
+
|
|
121
190
|
const SEVERITY_EMOJI: Record<ValidatedFinding['severity'], string> = {
|
|
122
191
|
blocker: '🔴',
|
|
123
192
|
warning: '🟡',
|
|
@@ -159,10 +228,38 @@ export function renderPipelineReport(result: PipelineResult, diff: DiffSource):
|
|
|
159
228
|
'',
|
|
160
229
|
];
|
|
161
230
|
|
|
231
|
+
// A pass fails when its model call errors; failures are swallowed into 0
|
|
232
|
+
// findings, so an all-failed run must NOT masquerade as a clean review.
|
|
233
|
+
const someFailed = telemetry.failedPasses > 0;
|
|
234
|
+
const allFailed = telemetry.passes > 0 && telemetry.failedPasses >= telemetry.passes;
|
|
235
|
+
const errSuffix = telemetry.passErrorSample ? ` — e.g. ${telemetry.passErrorSample}` : '';
|
|
236
|
+
|
|
162
237
|
if (findings.length === 0) {
|
|
238
|
+
if (allFailed) {
|
|
239
|
+
return [
|
|
240
|
+
...header,
|
|
241
|
+
`> ⚠️ **Inconclusive — all ${telemetry.passes} review pass(es) failed${errSuffix}.**`,
|
|
242
|
+
'> No analysis actually ran; this is NOT a clean result. Re-run the review',
|
|
243
|
+
'> (check that the review model / pi-ai is available) before trusting it.',
|
|
244
|
+
].join('\n');
|
|
245
|
+
}
|
|
246
|
+
if (someFailed) {
|
|
247
|
+
return [
|
|
248
|
+
...header,
|
|
249
|
+
`> ⚠️ **Partial review — ${telemetry.failedPasses}/${telemetry.passes} pass(es) failed${errSuffix}.**`,
|
|
250
|
+
`> The ${telemetry.passes - telemetry.failedPasses} surviving pass(es) found nothing, but coverage was reduced.`,
|
|
251
|
+
].join('\n');
|
|
252
|
+
}
|
|
163
253
|
return [...header, 'No bugs found that survived validation. ✅'].join('\n');
|
|
164
254
|
}
|
|
165
255
|
|
|
256
|
+
const partialWarning = someFailed
|
|
257
|
+
? [
|
|
258
|
+
`> ⚠️ **Partial review — ${telemetry.failedPasses}/${telemetry.passes} pass(es) failed${errSuffix}; findings below may be incomplete.**`,
|
|
259
|
+
'',
|
|
260
|
+
]
|
|
261
|
+
: [];
|
|
262
|
+
|
|
166
263
|
// Only attribute models per finding when more than one distinct model ran
|
|
167
264
|
// (a bake-off); with a single model it's noise.
|
|
168
265
|
const multiModel = new Set(telemetry.passModels).size > 1;
|
|
@@ -180,7 +277,7 @@ export function renderPipelineReport(result: PipelineResult, diff: DiffSource):
|
|
|
180
277
|
return `- ${SEVERITY_EMOJI[finding.severity]} **${finding.severity}** ${where} — ${finding.message} _(${meta})_${justification}`;
|
|
181
278
|
});
|
|
182
279
|
|
|
183
|
-
return [...header, '## Findings', '', ...lines].join('\n');
|
|
280
|
+
return [...header, ...partialWarning, '## Findings', '', ...lines].join('\n');
|
|
184
281
|
}
|
|
185
282
|
|
|
186
283
|
/** Build the lens-specific section of the review prompt (no diff duplication). */
|
|
@@ -235,6 +332,158 @@ export function buildLensResult(
|
|
|
235
332
|
};
|
|
236
333
|
}
|
|
237
334
|
|
|
335
|
+
/**
|
|
336
|
+
* Build the agent-facing review instructions for the single-pass fallback. The
|
|
337
|
+
* diff is embedded ONCE (not per lens) followed by each lens's section — large
|
|
338
|
+
* diffs would otherwise be repeated for every lens, bloating the tool output.
|
|
339
|
+
* Returns '' when no lens produced a section (nothing to review).
|
|
340
|
+
*/
|
|
341
|
+
export function buildToolContext(results: LensResult[], diff: DiffSource): string {
|
|
342
|
+
const sections = results.map((r) => r._lensSection).filter(Boolean) as string[];
|
|
343
|
+
if (sections.length === 0) return '';
|
|
344
|
+
|
|
345
|
+
return [
|
|
346
|
+
`# Code Review — ${new Date().toISOString().slice(0, 10)}`,
|
|
347
|
+
'',
|
|
348
|
+
'## Changes',
|
|
349
|
+
'```',
|
|
350
|
+
diff.stat.trim() || '(no diffstat)',
|
|
351
|
+
'```',
|
|
352
|
+
'',
|
|
353
|
+
'Evaluate the diff through each lens below; the tool outputs are automated analysis.',
|
|
354
|
+
'',
|
|
355
|
+
buildDiffSection(diff),
|
|
356
|
+
'',
|
|
357
|
+
'## Lenses',
|
|
358
|
+
'',
|
|
359
|
+
...sections,
|
|
360
|
+
'',
|
|
361
|
+
'## Instructions',
|
|
362
|
+
'',
|
|
363
|
+
'For each lens above, review the diff against its criteria and output a JSON array of findings:',
|
|
364
|
+
'',
|
|
365
|
+
'```json',
|
|
366
|
+
'[',
|
|
367
|
+
' { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
|
|
368
|
+
']',
|
|
369
|
+
'```',
|
|
370
|
+
'',
|
|
371
|
+
'After each lens JSON array, write a 2-3 sentence summary.',
|
|
372
|
+
'If a lens has no findings, return an empty array `[]` and note the code looks good.',
|
|
373
|
+
].join('\n');
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/** Persist the full review context somewhere durable, returning a pointer. */
|
|
377
|
+
export type ReviewTempWriter = (content: string) => Promise<ReviewPointer>;
|
|
378
|
+
|
|
379
|
+
type ReviewToolResult = AgentToolResult<Record<string, unknown>>;
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Assemble the single-pass fallback result. The full review context is spilled
|
|
383
|
+
* to a temp file (via the injected {@link ReviewTempWriter}) so it survives
|
|
384
|
+
* pi's tool-output cap; the inline payload is just a summary + pointer.
|
|
385
|
+
* Degrades gracefully: an empty context yields a "no applicable lenses" notice,
|
|
386
|
+
* and a temp-write failure falls back to the (truncation-prone) inline context
|
|
387
|
+
* rather than throwing out of the tool.
|
|
388
|
+
*/
|
|
389
|
+
export async function buildSinglePassResult(
|
|
390
|
+
args: {
|
|
391
|
+
results: LensResult[];
|
|
392
|
+
diff: DiffSource;
|
|
393
|
+
lensNames: string[];
|
|
394
|
+
availableLenses: string[];
|
|
395
|
+
changedFiles: string[];
|
|
396
|
+
},
|
|
397
|
+
writeTemp: ReviewTempWriter,
|
|
398
|
+
onUpdate?: AgentToolUpdateCallback,
|
|
399
|
+
): Promise<ReviewToolResult> {
|
|
400
|
+
const fullContext = buildToolContext(args.results, args.diff);
|
|
401
|
+
const baseDetails: Record<string, unknown> = {
|
|
402
|
+
mode: 'single-pass',
|
|
403
|
+
lensCount: args.lensNames.length,
|
|
404
|
+
availableLenses: args.availableLenses,
|
|
405
|
+
changedFiles: args.changedFiles,
|
|
406
|
+
};
|
|
407
|
+
|
|
408
|
+
// No lens produced any context (e.g. the requested lenses matched none of the
|
|
409
|
+
// available ones) — there is nothing to review, so don't point the agent at
|
|
410
|
+
// an empty temp file.
|
|
411
|
+
if (!fullContext.trim()) {
|
|
412
|
+
return {
|
|
413
|
+
content: [
|
|
414
|
+
{
|
|
415
|
+
type: 'text',
|
|
416
|
+
text: `No applicable lenses for this review. Available: ${args.availableLenses.join(', ') || '(none)'}.`,
|
|
417
|
+
},
|
|
418
|
+
],
|
|
419
|
+
details: baseDetails,
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
try {
|
|
424
|
+
const pointer = await writeTemp(fullContext);
|
|
425
|
+
const summary = `${buildInlineSummary(args.lensNames, args.diff)}\n\n${buildPointer(pointer, 'single-pass')}`;
|
|
426
|
+
return {
|
|
427
|
+
content: [{ type: 'text', text: summary }],
|
|
428
|
+
details: { ...baseDetails, contextFile: pointer.path },
|
|
429
|
+
};
|
|
430
|
+
} catch (cause) {
|
|
431
|
+
onUpdate?.({
|
|
432
|
+
content: [{ type: 'text', text: 'temp-file write failed — returning inline context' }],
|
|
433
|
+
details: { writeError: cause instanceof Error ? cause.message : String(cause) },
|
|
434
|
+
});
|
|
435
|
+
return { content: [{ type: 'text', text: fullContext }], details: baseDetails };
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Assemble the pipeline result. The validated findings are the valuable output
|
|
441
|
+
* and stay inline; the diff + lens context is spilled to a temp file (via the
|
|
442
|
+
* injected {@link ReviewTempWriter}) purely so the agent can drill into the
|
|
443
|
+
* diff behind a finding. A write failure must NOT discard a completed pipeline,
|
|
444
|
+
* so on failure the findings are returned WITHOUT a pointer.
|
|
445
|
+
*/
|
|
446
|
+
export async function buildPipelineResult(
|
|
447
|
+
args: {
|
|
448
|
+
pipeline: PipelineResult;
|
|
449
|
+
diff: DiffSource;
|
|
450
|
+
basePrompt: string;
|
|
451
|
+
lensNames: string[];
|
|
452
|
+
availableLenses: string[];
|
|
453
|
+
changedFiles: string[];
|
|
454
|
+
},
|
|
455
|
+
writeTemp: ReviewTempWriter,
|
|
456
|
+
onUpdate?: AgentToolUpdateCallback,
|
|
457
|
+
): Promise<ReviewToolResult> {
|
|
458
|
+
const report = renderPipelineReport(args.pipeline, args.diff);
|
|
459
|
+
let text = report;
|
|
460
|
+
let contextFile: string | undefined;
|
|
461
|
+
try {
|
|
462
|
+
const pointer = await writeTemp(args.basePrompt);
|
|
463
|
+
text = `${report}\n\n${buildPointer(pointer, 'pipeline')}`;
|
|
464
|
+
contextFile = pointer.path;
|
|
465
|
+
} catch (cause) {
|
|
466
|
+
onUpdate?.({
|
|
467
|
+
content: [
|
|
468
|
+
{ type: 'text', text: 'temp-file write failed — findings returned without diff pointer' },
|
|
469
|
+
],
|
|
470
|
+
details: { writeError: cause instanceof Error ? cause.message : String(cause) },
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
return {
|
|
474
|
+
content: [{ type: 'text', text }],
|
|
475
|
+
details: {
|
|
476
|
+
mode: 'pipeline',
|
|
477
|
+
lensCount: args.lensNames.length,
|
|
478
|
+
availableLenses: args.availableLenses,
|
|
479
|
+
changedFiles: args.changedFiles,
|
|
480
|
+
findings: args.pipeline.findings,
|
|
481
|
+
telemetry: args.pipeline.telemetry,
|
|
482
|
+
...(contextFile ? { contextFile } : {}),
|
|
483
|
+
},
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
|
|
238
487
|
/** Promise wrapper: run a deduped tool set once, building a live Executor from `pi`. */
|
|
239
488
|
export function runTools(
|
|
240
489
|
pi: Pick<ExtensionAPI, 'exec'>,
|
|
@@ -95,6 +95,9 @@ export type PipelineTelemetry = {
|
|
|
95
95
|
droppedFalsePositives: number;
|
|
96
96
|
droppedLowSignal: number;
|
|
97
97
|
failedPasses: number;
|
|
98
|
+
/** A representative error message from the first failed pass, surfaced so a
|
|
99
|
+
* fully-failed run reports WHY instead of a misleading "0 findings". */
|
|
100
|
+
passErrorSample?: string;
|
|
98
101
|
/** Model key used for each pass (parallel to pass index). */
|
|
99
102
|
passModels: string[];
|
|
100
103
|
/** Model key used for the validator stage. */
|