@dreki-gg/pi-code-reviewer 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/extensions/code-reviewer/commands/review-tool.ts +64 -61
- package/extensions/code-reviewer/config.ts +11 -0
- package/extensions/code-reviewer/passes.ts +14 -96
- package/extensions/code-reviewer/rejections.ts +115 -0
- package/extensions/code-reviewer/reviewer.ts +223 -1
- package/extensions/code-reviewer/similarity.ts +81 -0
- package/extensions/code-reviewer/types.ts +22 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -54,6 +54,14 @@ multi-stage pipeline modeled on Cursor's Bugbot:
|
|
|
54
54
|
candidate and drops false positives. It **fails open** — if the validator
|
|
55
55
|
errors, candidates are surfaced unvalidated rather than silently lost.
|
|
56
56
|
|
|
57
|
+
4. **Recorded rejections.** The candidates the validator refutes are appended to
|
|
58
|
+
`.code-review/rejections.jsonl`. On later runs, any finding that matches a
|
|
59
|
+
past rejection is **downranked and tagged** `⟲ previously rejected` — never
|
|
60
|
+
hidden, so nothing is silently suppressed, but resurfaced false-positives
|
|
61
|
+
sink below fresh findings. The store dedupes and is capped. Persisting is
|
|
62
|
+
best-effort: an FS error never breaks a review. Disable with
|
|
63
|
+
`review.recordRejections: false`.
|
|
64
|
+
|
|
57
65
|
The tool returns finished, validated findings as a Markdown report (vote count,
|
|
58
66
|
confidence, validator justification) plus structured `details`.
|
|
59
67
|
|
|
@@ -140,6 +148,8 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
|
|
|
140
148
|
| `review.concurrency` | `= passes` | Max passes run concurrently. |
|
|
141
149
|
| `review.temperature` | `0.4` | Base sampling temperature; each pass adds a small jitter so passes diverge. |
|
|
142
150
|
| `review.maxFindings` | `50` | Hard cap on findings returned. |
|
|
151
|
+
| `review.recordRejections` | `true` | Persist validator false-positives and downrank+tag matches on later runs. |
|
|
152
|
+
| `rejectionsFile` | `.code-review/rejections.jsonl` | Path (relative to cwd) of the recorded-rejections store. |
|
|
143
153
|
| `review.passModel` | session model | Model for ALL passes: a spec string (`"provider/id"`, bare id, or name) or `{ "model", "reasoning" }`. |
|
|
144
154
|
| `review.passModels` | — | List of models **rotated round-robin across passes** — a bake-off in one run. Overrides `passModel`. |
|
|
145
155
|
| `review.validateModel` | session model | Model for the validator stage (string or `{ "model", "reasoning" }`). |
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import { writeFile } from 'node:fs/promises';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
1
4
|
import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
|
|
2
5
|
import { Type } from 'typebox';
|
|
3
6
|
|
|
@@ -7,16 +10,40 @@ import { discoverLenses, getLensContent } from '../lenses';
|
|
|
7
10
|
import { resolveModelPlan } from '../model-plan';
|
|
8
11
|
import { runPipeline } from '../passes';
|
|
9
12
|
import {
|
|
10
|
-
|
|
13
|
+
appendRejections,
|
|
14
|
+
applyRejections,
|
|
15
|
+
loadRejections,
|
|
16
|
+
toRejectionRecords,
|
|
17
|
+
} from '../rejections';
|
|
18
|
+
import {
|
|
11
19
|
buildLensResult,
|
|
20
|
+
buildPipelineResult,
|
|
12
21
|
buildReviewBasePrompt,
|
|
22
|
+
buildSinglePassResult,
|
|
13
23
|
pickLensToolOutputs,
|
|
14
|
-
renderPipelineReport,
|
|
15
24
|
runTools,
|
|
16
25
|
} from '../reviewer';
|
|
17
|
-
import type {
|
|
26
|
+
import type { ReviewPointer } from '../reviewer';
|
|
18
27
|
import type { LensResult, ReviewConfig } from '../types';
|
|
19
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Spill the full review context to a temp Markdown file and return a pointer
|
|
31
|
+
* (path + byte size + line count). Both pi's tool-output and `read` caps are
|
|
32
|
+
* ~50KB / 2000 lines, so large reviews would otherwise be truncated and lost
|
|
33
|
+
* on compaction. The on-disk file survives compaction and can be paged.
|
|
34
|
+
*
|
|
35
|
+
* Node-only IO (no Bun) per the extension runtime constraint.
|
|
36
|
+
*/
|
|
37
|
+
async function writeReviewTempFile(content: string): Promise<ReviewPointer> {
|
|
38
|
+
const path = join(tmpdir(), `pi-code-review-${Date.now()}.md`);
|
|
39
|
+
await writeFile(path, content, 'utf8');
|
|
40
|
+
return {
|
|
41
|
+
path,
|
|
42
|
+
bytes: Buffer.byteLength(content, 'utf8'),
|
|
43
|
+
lines: content.split('\n').length,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
20
47
|
export function registerReviewTool(pi: ExtensionAPI) {
|
|
21
48
|
pi.registerTool({
|
|
22
49
|
name: 'code_review',
|
|
@@ -155,17 +182,27 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
155
182
|
const allPassesFailed =
|
|
156
183
|
config.review.passes > 0 && pipeline.telemetry.failedPasses >= config.review.passes;
|
|
157
184
|
if (!allPassesFailed) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
185
|
+
// Recorded rejections: downrank+tag findings the validator refuted on
|
|
186
|
+
// a previous run, then persist this run's false-positives. All FS is
|
|
187
|
+
// best-effort — it must never break a completed review.
|
|
188
|
+
if (config.review.recordRejections) {
|
|
189
|
+
const rejectionsPath = join(cwd, config.rejectionsFile);
|
|
190
|
+
const past = await loadRejections(rejectionsPath);
|
|
191
|
+
pipeline.findings = applyRejections(pipeline.findings, past);
|
|
192
|
+
await appendRejections(rejectionsPath, toRejectionRecords(pipeline.rejected));
|
|
193
|
+
}
|
|
194
|
+
return buildPipelineResult(
|
|
195
|
+
{
|
|
196
|
+
pipeline,
|
|
197
|
+
diff,
|
|
198
|
+
basePrompt,
|
|
199
|
+
lensNames,
|
|
163
200
|
availableLenses: [...available.keys()],
|
|
164
201
|
changedFiles,
|
|
165
|
-
findings: pipeline.findings,
|
|
166
|
-
telemetry: pipeline.telemetry,
|
|
167
202
|
},
|
|
168
|
-
|
|
203
|
+
writeReviewTempFile,
|
|
204
|
+
onUpdate,
|
|
205
|
+
);
|
|
169
206
|
}
|
|
170
207
|
onUpdate?.({
|
|
171
208
|
content: [{ type: 'text', text: 'all review passes failed — single-pass fallback' }],
|
|
@@ -187,20 +224,25 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
187
224
|
|
|
188
225
|
ctx.ui.setStatus('code-review', undefined);
|
|
189
226
|
|
|
190
|
-
// Fallback:
|
|
191
|
-
//
|
|
192
|
-
//
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
227
|
+
// Fallback: spill the full single-pass review context to a temp file and
|
|
228
|
+
// return a compact summary + pointer (degrades gracefully on empty
|
|
229
|
+
// context or a write failure). Used when no model is available (e.g.
|
|
230
|
+
// print mode) or passes are disabled in config.
|
|
231
|
+
//
|
|
232
|
+
// This is the PRIMARY truncation culprit: the full context embeds the
|
|
233
|
+
// diff (up to 50KB) plus every lens's tool outputs (20KB each), which
|
|
234
|
+
// easily blows past pi's 50KB tool-output cap.
|
|
235
|
+
return buildSinglePassResult(
|
|
236
|
+
{
|
|
237
|
+
results,
|
|
238
|
+
diff,
|
|
239
|
+
lensNames,
|
|
200
240
|
availableLenses: [...available.keys()],
|
|
201
241
|
changedFiles,
|
|
202
242
|
},
|
|
203
|
-
|
|
243
|
+
writeReviewTempFile,
|
|
244
|
+
onUpdate,
|
|
245
|
+
);
|
|
204
246
|
},
|
|
205
247
|
});
|
|
206
248
|
}
|
|
@@ -219,42 +261,3 @@ function resolveLensNames(
|
|
|
219
261
|
return [...available.keys()];
|
|
220
262
|
}
|
|
221
263
|
|
|
222
|
-
/**
|
|
223
|
-
* Build the agent-facing review instructions appended to the report. The diff
|
|
224
|
-
* is embedded ONCE (not per lens) followed by each lens's section — large
|
|
225
|
-
* diffs would otherwise be repeated for every lens, bloating the tool output.
|
|
226
|
-
*/
|
|
227
|
-
function buildToolContext(results: LensResult[], diff: DiffSource): string {
|
|
228
|
-
const sections = results.map((r) => r._lensSection).filter(Boolean) as string[];
|
|
229
|
-
if (sections.length === 0) return '';
|
|
230
|
-
|
|
231
|
-
return [
|
|
232
|
-
`# Code Review — ${new Date().toISOString().slice(0, 10)}`,
|
|
233
|
-
'',
|
|
234
|
-
'## Changes',
|
|
235
|
-
'```',
|
|
236
|
-
diff.stat.trim() || '(no diffstat)',
|
|
237
|
-
'```',
|
|
238
|
-
'',
|
|
239
|
-
'Evaluate the diff through each lens below; the tool outputs are automated analysis.',
|
|
240
|
-
'',
|
|
241
|
-
buildDiffSection(diff),
|
|
242
|
-
'',
|
|
243
|
-
'## Lenses',
|
|
244
|
-
'',
|
|
245
|
-
...sections,
|
|
246
|
-
'',
|
|
247
|
-
'## Instructions',
|
|
248
|
-
'',
|
|
249
|
-
'For each lens above, review the diff against its criteria and output a JSON array of findings:',
|
|
250
|
-
'',
|
|
251
|
-
'```json',
|
|
252
|
-
'[',
|
|
253
|
-
' { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
|
|
254
|
-
']',
|
|
255
|
-
'```',
|
|
256
|
-
'',
|
|
257
|
-
'After each lens JSON array, write a 2-3 sentence summary.',
|
|
258
|
-
'If a lens has no findings, return an empty array `[]` and note the code looks good.',
|
|
259
|
-
].join('\n');
|
|
260
|
-
}
|
|
@@ -18,6 +18,7 @@ const CONFIG_FILE = '.code-review.json';
|
|
|
18
18
|
const DEFAULT_LENS_DIR = '.code-review/lenses';
|
|
19
19
|
const DEFAULT_TOOL_TIMEOUT_MS = 60_000;
|
|
20
20
|
const DEFAULT_TOOL_CONCURRENCY = 4;
|
|
21
|
+
const DEFAULT_REJECTIONS_FILE = '.code-review/rejections.jsonl';
|
|
21
22
|
|
|
22
23
|
const DEFAULT_PIPELINE: ReviewPipelineConfig = {
|
|
23
24
|
passes: 5,
|
|
@@ -26,6 +27,7 @@ const DEFAULT_PIPELINE: ReviewPipelineConfig = {
|
|
|
26
27
|
concurrency: 5,
|
|
27
28
|
temperature: 0.4,
|
|
28
29
|
maxFindings: 50,
|
|
30
|
+
recordRejections: true,
|
|
29
31
|
};
|
|
30
32
|
|
|
31
33
|
function defaultConfig(): ReviewConfig {
|
|
@@ -35,6 +37,7 @@ function defaultConfig(): ReviewConfig {
|
|
|
35
37
|
toolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
|
|
36
38
|
toolConcurrency: DEFAULT_TOOL_CONCURRENCY,
|
|
37
39
|
review: { ...DEFAULT_PIPELINE },
|
|
40
|
+
rejectionsFile: DEFAULT_REJECTIONS_FILE,
|
|
38
41
|
};
|
|
39
42
|
}
|
|
40
43
|
|
|
@@ -91,6 +94,10 @@ function parsePipeline(raw: unknown): ReviewPipelineConfig {
|
|
|
91
94
|
concurrency: positiveIntOr(review.concurrency, Math.max(1, passes)),
|
|
92
95
|
temperature: clampNumberOr(review.temperature, DEFAULT_PIPELINE.temperature, 0, 2),
|
|
93
96
|
maxFindings: positiveIntOr(review.maxFindings, DEFAULT_PIPELINE.maxFindings),
|
|
97
|
+
recordRejections:
|
|
98
|
+
typeof review.recordRejections === 'boolean'
|
|
99
|
+
? review.recordRejections
|
|
100
|
+
: DEFAULT_PIPELINE.recordRejections,
|
|
94
101
|
passModel: parseModelStep(review.passModel),
|
|
95
102
|
passModels: parseModelStepArray(review.passModels),
|
|
96
103
|
validateModel: parseModelStep(review.validateModel),
|
|
@@ -118,6 +125,10 @@ export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never
|
|
|
118
125
|
toolTimeoutMs: positiveIntOr(parsed.toolTimeoutMs, DEFAULT_TOOL_TIMEOUT_MS),
|
|
119
126
|
toolConcurrency: positiveIntOr(parsed.toolConcurrency, DEFAULT_TOOL_CONCURRENCY),
|
|
120
127
|
review: parsePipeline((parsed as { review?: unknown }).review),
|
|
128
|
+
rejectionsFile:
|
|
129
|
+
typeof parsed.rejectionsFile === 'string' && parsed.rejectionsFile.trim()
|
|
130
|
+
? parsed.rejectionsFile.trim()
|
|
131
|
+
: DEFAULT_REJECTIONS_FILE,
|
|
121
132
|
};
|
|
122
133
|
} catch {
|
|
123
134
|
// Malformed config — fall back to defaults.
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
import { Effect } from 'effect';
|
|
18
18
|
|
|
19
19
|
import { causeMessage } from './errors';
|
|
20
|
+
import { sameBug, tokenize } from './similarity';
|
|
20
21
|
import { type ModelResolution, Reviewer, makeReviewerService } from './effects/model';
|
|
21
22
|
import type {
|
|
22
23
|
CandidateFinding,
|
|
@@ -85,95 +86,6 @@ const VALIDATOR_SYSTEM_PROMPT = [
|
|
|
85
86
|
'[{ "id": 0, "verdict": "real|false-positive", "confidence": 0.0, "justification": "..." }]',
|
|
86
87
|
].join('\n');
|
|
87
88
|
|
|
88
|
-
const STOPWORDS = new Set([
|
|
89
|
-
'the',
|
|
90
|
-
'and',
|
|
91
|
-
'for',
|
|
92
|
-
'with',
|
|
93
|
-
'that',
|
|
94
|
-
'this',
|
|
95
|
-
'when',
|
|
96
|
-
'from',
|
|
97
|
-
'into',
|
|
98
|
-
'will',
|
|
99
|
-
'would',
|
|
100
|
-
'could',
|
|
101
|
-
'should',
|
|
102
|
-
'have',
|
|
103
|
-
'has',
|
|
104
|
-
'not',
|
|
105
|
-
'but',
|
|
106
|
-
'are',
|
|
107
|
-
'was',
|
|
108
|
-
'were',
|
|
109
|
-
'its',
|
|
110
|
-
'his',
|
|
111
|
-
'her',
|
|
112
|
-
'than',
|
|
113
|
-
'then',
|
|
114
|
-
'which',
|
|
115
|
-
'what',
|
|
116
|
-
'where',
|
|
117
|
-
'while',
|
|
118
|
-
'use',
|
|
119
|
-
'used',
|
|
120
|
-
'using',
|
|
121
|
-
'can',
|
|
122
|
-
'may',
|
|
123
|
-
'might',
|
|
124
|
-
'a',
|
|
125
|
-
'an',
|
|
126
|
-
'is',
|
|
127
|
-
'of',
|
|
128
|
-
'to',
|
|
129
|
-
'in',
|
|
130
|
-
'on',
|
|
131
|
-
'it',
|
|
132
|
-
'be',
|
|
133
|
-
'as',
|
|
134
|
-
'at',
|
|
135
|
-
'or',
|
|
136
|
-
'if',
|
|
137
|
-
'so',
|
|
138
|
-
]);
|
|
139
|
-
|
|
140
|
-
/** Tokenize a finding message for similarity comparison. */
|
|
141
|
-
function tokenize(message: string): Set<string> {
|
|
142
|
-
const tokens = message
|
|
143
|
-
.toLowerCase()
|
|
144
|
-
.replace(/[^a-z0-9]+/g, ' ')
|
|
145
|
-
.split(' ')
|
|
146
|
-
.filter((token) => token.length > 2 && !STOPWORDS.has(token));
|
|
147
|
-
return new Set(tokens);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function jaccard(left: Set<string>, right: Set<string>): number {
|
|
151
|
-
if (left.size === 0 && right.size === 0) return 1;
|
|
152
|
-
let intersection = 0;
|
|
153
|
-
for (const token of left) if (right.has(token)) intersection += 1;
|
|
154
|
-
const union = left.size + right.size - intersection;
|
|
155
|
-
return union === 0 ? 0 : intersection / union;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/** Two findings are "the same bug" when they touch the same file and either sit
|
|
159
|
-
* within a few lines (a strong co-location signal, so only a MODEST text
|
|
160
|
-
* overlap is needed to fuse paraphrases) or — when a line is missing — read
|
|
161
|
-
* clearly similar. The lower co-located bar matters: independent passes word
|
|
162
|
-
* the same defect very differently, and Bugbot leans on an LLM to merge them;
|
|
163
|
-
* co-location is our deterministic stand-in for that judgment. */
|
|
164
|
-
function sameBug(
|
|
165
|
-
candidate: { file: string; line?: number; tokens: Set<string> },
|
|
166
|
-
bucket: { file: string; line?: number; tokens: Set<string> },
|
|
167
|
-
): boolean {
|
|
168
|
-
if (candidate.file !== bucket.file) return false;
|
|
169
|
-
const similarity = jaccard(candidate.tokens, bucket.tokens);
|
|
170
|
-
if (candidate.line !== undefined && bucket.line !== undefined) {
|
|
171
|
-
if (Math.abs(candidate.line - bucket.line) > 3) return false;
|
|
172
|
-
return similarity >= 0.25;
|
|
173
|
-
}
|
|
174
|
-
// One side has no line to anchor on — demand a clearer textual match.
|
|
175
|
-
return similarity >= 0.5;
|
|
176
|
-
}
|
|
177
89
|
|
|
178
90
|
type WorkingBucket = {
|
|
179
91
|
file: string;
|
|
@@ -468,9 +380,13 @@ export function validateCandidatesEffect(
|
|
|
468
380
|
candidates: CandidateFinding[],
|
|
469
381
|
plan: ModelPlan,
|
|
470
382
|
signal?: AbortSignal,
|
|
471
|
-
): Effect.Effect<
|
|
383
|
+
): Effect.Effect<
|
|
384
|
+
{ findings: ValidatedFinding[]; droppedFalsePositives: number; rejected: CandidateFinding[] },
|
|
385
|
+
never,
|
|
386
|
+
Reviewer
|
|
387
|
+
> {
|
|
472
388
|
return Effect.gen(function* () {
|
|
473
|
-
if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0 };
|
|
389
|
+
if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0, rejected: [] };
|
|
474
390
|
const reviewer = yield* Reviewer;
|
|
475
391
|
|
|
476
392
|
const result = yield* reviewer
|
|
@@ -494,17 +410,17 @@ export function validateCandidatesEffect(
|
|
|
494
410
|
justification: '(validator unavailable — surfaced unvalidated)',
|
|
495
411
|
models: contributingModels(candidate.passIndices, plan),
|
|
496
412
|
}));
|
|
497
|
-
return { findings, droppedFalsePositives: 0 };
|
|
413
|
+
return { findings, droppedFalsePositives: 0, rejected: [] };
|
|
498
414
|
}
|
|
499
415
|
|
|
500
416
|
const verdicts = parseVerdicts(result.right);
|
|
501
417
|
const findings: ValidatedFinding[] = [];
|
|
502
|
-
|
|
418
|
+
const rejected: CandidateFinding[] = [];
|
|
503
419
|
candidates.forEach((candidate, index) => {
|
|
504
420
|
const verdict = verdicts.get(index);
|
|
505
421
|
// A candidate with no verdict returned is kept (fail open), not dropped.
|
|
506
422
|
if (verdict && verdict.verdict === 'false-positive') {
|
|
507
|
-
|
|
423
|
+
rejected.push(candidate);
|
|
508
424
|
return;
|
|
509
425
|
}
|
|
510
426
|
findings.push({
|
|
@@ -515,7 +431,7 @@ export function validateCandidatesEffect(
|
|
|
515
431
|
models: contributingModels(candidate.passIndices, plan),
|
|
516
432
|
});
|
|
517
433
|
});
|
|
518
|
-
return { findings, droppedFalsePositives };
|
|
434
|
+
return { findings, droppedFalsePositives: rejected.length, rejected };
|
|
519
435
|
});
|
|
520
436
|
}
|
|
521
437
|
|
|
@@ -541,11 +457,13 @@ export function runPipelineEffect(
|
|
|
541
457
|
|
|
542
458
|
let validated: ValidatedFinding[];
|
|
543
459
|
let droppedFalsePositives = 0;
|
|
460
|
+
let rejected: CandidateFinding[] = [];
|
|
544
461
|
if (config.validate) {
|
|
545
462
|
hooks.onStage?.(`validating ${kept.length} candidates`);
|
|
546
463
|
const outcome = yield* validateCandidatesEffect(basePrompt, kept, plan, signal);
|
|
547
464
|
validated = outcome.findings;
|
|
548
465
|
droppedFalsePositives = outcome.droppedFalsePositives;
|
|
466
|
+
rejected = outcome.rejected;
|
|
549
467
|
} else {
|
|
550
468
|
validated = kept.map((candidate) => ({
|
|
551
469
|
...candidate,
|
|
@@ -571,7 +489,7 @@ export function runPipelineEffect(
|
|
|
571
489
|
passModels: plan.passes.map((assignment) => assignment.label),
|
|
572
490
|
validatorModel: plan.validator.label,
|
|
573
491
|
};
|
|
574
|
-
return { findings: capped, telemetry };
|
|
492
|
+
return { findings: capped, rejected, telemetry };
|
|
575
493
|
});
|
|
576
494
|
}
|
|
577
495
|
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recorded-rejection store: persist validator false-positives and, on later
|
|
3
|
+
* runs, downrank+tag findings that match a past rejection (never hide them).
|
|
4
|
+
*
|
|
5
|
+
* Failure-tolerant by design — any FS or parse error degrades to "no
|
|
6
|
+
* rejections" so a review is never broken by a missing/garbled store. Node-only
|
|
7
|
+
* (node:fs/promises), never Bun, since extension source runs on Node via jiti.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { appendFile, mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
11
|
+
import { dirname } from 'node:path';
|
|
12
|
+
|
|
13
|
+
import { sameBug, tokenize } from './similarity';
|
|
14
|
+
import type { CandidateFinding, RejectionRecord, ValidatedFinding } from './types';
|
|
15
|
+
|
|
16
|
+
/** Keep the store bounded; oldest records are dropped past this many. */
|
|
17
|
+
export const DEFAULT_REJECTION_CAP = 200;
|
|
18
|
+
|
|
19
|
+
/** Read the JSONL store, tolerating a missing file or garbled lines. */
|
|
20
|
+
export async function loadRejections(path: string): Promise<RejectionRecord[]> {
|
|
21
|
+
let text: string;
|
|
22
|
+
try {
|
|
23
|
+
text = await readFile(path, 'utf8');
|
|
24
|
+
} catch {
|
|
25
|
+
return [];
|
|
26
|
+
}
|
|
27
|
+
const records: RejectionRecord[] = [];
|
|
28
|
+
for (const raw of text.split(/\r?\n/)) {
|
|
29
|
+
if (!raw.trim()) continue;
|
|
30
|
+
try {
|
|
31
|
+
const parsed = JSON.parse(raw) as Record<string, unknown>;
|
|
32
|
+
if (
|
|
33
|
+
typeof parsed.file === 'string' &&
|
|
34
|
+
typeof parsed.message === 'string' &&
|
|
35
|
+
typeof parsed.severity === 'string'
|
|
36
|
+
) {
|
|
37
|
+
records.push(parsed as unknown as RejectionRecord);
|
|
38
|
+
}
|
|
39
|
+
} catch {
|
|
40
|
+
// Skip an unparseable line rather than discard the whole store.
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return records;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Does a finding match any recorded rejection (same file + co-located/similar)? */
|
|
47
|
+
export function matchesRejection(
|
|
48
|
+
finding: { file: string; line?: number; message: string },
|
|
49
|
+
rejections: RejectionRecord[],
|
|
50
|
+
): boolean {
|
|
51
|
+
const tokens = tokenize(finding.message);
|
|
52
|
+
return rejections.some((record) =>
|
|
53
|
+
sameBug(
|
|
54
|
+
{ file: finding.file, line: finding.line, tokens },
|
|
55
|
+
{ file: record.file, line: record.line, tokens: tokenize(record.message) },
|
|
56
|
+
),
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Tag findings matching a past rejection and downrank them to the bottom,
|
|
61
|
+
* preserving the existing leverage order within each group. Pure. */
|
|
62
|
+
export function applyRejections(
|
|
63
|
+
findings: ValidatedFinding[],
|
|
64
|
+
rejections: RejectionRecord[],
|
|
65
|
+
): ValidatedFinding[] {
|
|
66
|
+
if (rejections.length === 0) return findings;
|
|
67
|
+
const tagged = findings.map((finding) =>
|
|
68
|
+
matchesRejection(finding, rejections) ? { ...finding, previouslyRejected: true } : finding,
|
|
69
|
+
);
|
|
70
|
+
const kept = tagged.filter((finding) => !finding.previouslyRejected);
|
|
71
|
+
const downranked = tagged.filter((finding) => finding.previouslyRejected);
|
|
72
|
+
return [...kept, ...downranked];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Convert this run's validator-refuted candidates into rejection records. */
|
|
76
|
+
export function toRejectionRecords(
|
|
77
|
+
rejected: CandidateFinding[],
|
|
78
|
+
now: string = new Date().toISOString(),
|
|
79
|
+
): RejectionRecord[] {
|
|
80
|
+
return rejected.map((candidate) => ({
|
|
81
|
+
file: candidate.file,
|
|
82
|
+
line: candidate.line,
|
|
83
|
+
severity: candidate.severity,
|
|
84
|
+
message: candidate.message,
|
|
85
|
+
recorded_at: now,
|
|
86
|
+
}));
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Append new rejections, deduping against existing ones and capping the total.
|
|
90
|
+
* Never throws — a write failure silently no-ops. */
|
|
91
|
+
export async function appendRejections(
|
|
92
|
+
path: string,
|
|
93
|
+
entries: RejectionRecord[],
|
|
94
|
+
cap: number = DEFAULT_REJECTION_CAP,
|
|
95
|
+
): Promise<void> {
|
|
96
|
+
if (entries.length === 0) return;
|
|
97
|
+
try {
|
|
98
|
+
const existing = await loadRejections(path);
|
|
99
|
+
const fresh = entries.filter(
|
|
100
|
+
(entry) => !matchesRejection({ file: entry.file, line: entry.line, message: entry.message }, existing),
|
|
101
|
+
);
|
|
102
|
+
if (fresh.length === 0) return;
|
|
103
|
+
const merged = [...existing, ...fresh].slice(-cap);
|
|
104
|
+
await mkdir(dirname(path), { recursive: true });
|
|
105
|
+
if (merged.length === existing.length + fresh.length) {
|
|
106
|
+
// Nothing was capped out — a plain append keeps the file append-only.
|
|
107
|
+
await appendFile(path, fresh.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
|
|
108
|
+
} else {
|
|
109
|
+
// Cap trimmed older records — rewrite the whole bounded store.
|
|
110
|
+
await writeFile(path, merged.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
|
|
111
|
+
}
|
|
112
|
+
} catch {
|
|
113
|
+
// Persisting rejections must never break a review.
|
|
114
|
+
}
|
|
115
|
+
}
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import { platform } from 'node:os';
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
AgentToolResult,
|
|
4
|
+
AgentToolUpdateCallback,
|
|
5
|
+
ExtensionAPI,
|
|
6
|
+
} from '@earendil-works/pi-coding-agent';
|
|
3
7
|
import { Effect } from 'effect';
|
|
4
8
|
|
|
5
9
|
import type { DiffSource } from './diff';
|
|
@@ -118,6 +122,71 @@ export function buildReviewBasePrompt(lensSections: string[], diff: DiffSource):
|
|
|
118
122
|
].join('\n');
|
|
119
123
|
}
|
|
120
124
|
|
|
125
|
+
/** Pointer to the temp file holding the full review context. */
|
|
126
|
+
export type ReviewPointer = { path: string; bytes: number; lines: number };
|
|
127
|
+
|
|
128
|
+
/** Round bytes to whole KB for a human-readable size (min 1KB). */
|
|
129
|
+
function toKb(bytes: number): number {
|
|
130
|
+
return Math.max(1, Math.round(bytes / 1024));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Condense a git `--stat` block into a one-line "N files, +ins -del" summary.
|
|
135
|
+
* Returns '' when the diffstat has no recognizable summary line.
|
|
136
|
+
*/
|
|
137
|
+
function summarizeDiffStat(stat: string): string {
|
|
138
|
+
const lastLine = stat.trim().split('\n').pop()?.trim() ?? '';
|
|
139
|
+
const files = lastLine.match(/(\d+) files? changed/)?.[1];
|
|
140
|
+
if (!files) return '';
|
|
141
|
+
const insertions = lastLine.match(/(\d+) insertions?\(\+\)/)?.[1];
|
|
142
|
+
const deletions = lastLine.match(/(\d+) deletions?\(-\)/)?.[1];
|
|
143
|
+
const parts = [`${files} file${files === '1' ? '' : 's'}`];
|
|
144
|
+
if (insertions) parts.push(`+${insertions}`);
|
|
145
|
+
if (deletions) parts.push(`-${deletions}`);
|
|
146
|
+
return parts.join(', ');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Compact inline header for the single-pass fallback. The full review context
|
|
151
|
+
* (diff, lenses, instructions) lives in a temp file — see {@link buildPointer} —
|
|
152
|
+
* so this only names the lenses and the diff scope. Pure (no IO).
|
|
153
|
+
*/
|
|
154
|
+
export function buildInlineSummary(lensNames: string[], diff: DiffSource): string {
|
|
155
|
+
const stat = summarizeDiffStat(diff.stat);
|
|
156
|
+
const diffLine = stat ? `${diff.label} (${stat})` : diff.label;
|
|
157
|
+
return [
|
|
158
|
+
'# Code Review Summary',
|
|
159
|
+
`- **Lenses**: ${lensNames.join(', ') || '(none)'}`,
|
|
160
|
+
`- **Diff**: ${diffLine}`,
|
|
161
|
+
].join('\n');
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Inline pointer to the temp file holding the full review context. pi's tool
|
|
166
|
+
* output / `read` caps are both ~50KB / 2000 lines, so the directive tells the
|
|
167
|
+
* agent to page large content with `read` offset/limit. Pure (no IO).
|
|
168
|
+
*
|
|
169
|
+
* `mode` switches the action sentence: single-pass needs the agent to perform
|
|
170
|
+
* the whole review from the file; pipeline only needs it to drill into the diff
|
|
171
|
+
* behind an already-rendered finding.
|
|
172
|
+
*/
|
|
173
|
+
export function buildPointer(pointer: ReviewPointer, mode: 'single-pass' | 'pipeline'): string {
|
|
174
|
+
const size = `(${pointer.lines} lines, ${toKb(pointer.bytes)}KB)`;
|
|
175
|
+
if (mode === 'single-pass') {
|
|
176
|
+
return [
|
|
177
|
+
'📄 Full review context (diff, lens definitions, tool outputs, instructions)',
|
|
178
|
+
`saved to: \`${pointer.path}\``,
|
|
179
|
+
`${size}. **Read that file** to perform the review — page large content with`,
|
|
180
|
+
'`read` offset/limit.',
|
|
181
|
+
].join('\n');
|
|
182
|
+
}
|
|
183
|
+
return [
|
|
184
|
+
'---',
|
|
185
|
+
`📄 Full diff + lens context saved to: \`${pointer.path}\``,
|
|
186
|
+
`${size}. Use \`read\` (offset/limit) to inspect the diff behind a finding.`,
|
|
187
|
+
].join('\n');
|
|
188
|
+
}
|
|
189
|
+
|
|
121
190
|
const SEVERITY_EMOJI: Record<ValidatedFinding['severity'], string> = {
|
|
122
191
|
blocker: '🔴',
|
|
123
192
|
warning: '🟡',
|
|
@@ -201,6 +270,7 @@ export function renderPipelineReport(result: PipelineResult, diff: DiffSource):
|
|
|
201
270
|
`${Math.round(finding.confidence * 100)}% conf`,
|
|
202
271
|
finding.category,
|
|
203
272
|
multiModel && finding.models.length > 0 ? `models: ${finding.models.join(', ')}` : undefined,
|
|
273
|
+
finding.previouslyRejected ? '⟲ previously rejected' : undefined,
|
|
204
274
|
]
|
|
205
275
|
.filter(Boolean)
|
|
206
276
|
.join(', ');
|
|
@@ -263,6 +333,158 @@ export function buildLensResult(
|
|
|
263
333
|
};
|
|
264
334
|
}
|
|
265
335
|
|
|
336
|
+
/**
|
|
337
|
+
* Build the agent-facing review instructions for the single-pass fallback. The
|
|
338
|
+
* diff is embedded ONCE (not per lens) followed by each lens's section — large
|
|
339
|
+
* diffs would otherwise be repeated for every lens, bloating the tool output.
|
|
340
|
+
* Returns '' when no lens produced a section (nothing to review).
|
|
341
|
+
*/
|
|
342
|
+
export function buildToolContext(results: LensResult[], diff: DiffSource): string {
|
|
343
|
+
const sections = results.map((r) => r._lensSection).filter(Boolean) as string[];
|
|
344
|
+
if (sections.length === 0) return '';
|
|
345
|
+
|
|
346
|
+
return [
|
|
347
|
+
`# Code Review — ${new Date().toISOString().slice(0, 10)}`,
|
|
348
|
+
'',
|
|
349
|
+
'## Changes',
|
|
350
|
+
'```',
|
|
351
|
+
diff.stat.trim() || '(no diffstat)',
|
|
352
|
+
'```',
|
|
353
|
+
'',
|
|
354
|
+
'Evaluate the diff through each lens below; the tool outputs are automated analysis.',
|
|
355
|
+
'',
|
|
356
|
+
buildDiffSection(diff),
|
|
357
|
+
'',
|
|
358
|
+
'## Lenses',
|
|
359
|
+
'',
|
|
360
|
+
...sections,
|
|
361
|
+
'',
|
|
362
|
+
'## Instructions',
|
|
363
|
+
'',
|
|
364
|
+
'For each lens above, review the diff against its criteria and output a JSON array of findings:',
|
|
365
|
+
'',
|
|
366
|
+
'```json',
|
|
367
|
+
'[',
|
|
368
|
+
' { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
|
|
369
|
+
']',
|
|
370
|
+
'```',
|
|
371
|
+
'',
|
|
372
|
+
'After each lens JSON array, write a 2-3 sentence summary.',
|
|
373
|
+
'If a lens has no findings, return an empty array `[]` and note the code looks good.',
|
|
374
|
+
].join('\n');
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** Persist the full review context somewhere durable, returning a pointer. */
|
|
378
|
+
export type ReviewTempWriter = (content: string) => Promise<ReviewPointer>;
|
|
379
|
+
|
|
380
|
+
type ReviewToolResult = AgentToolResult<Record<string, unknown>>;
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Assemble the single-pass fallback result. The full review context is spilled
|
|
384
|
+
* to a temp file (via the injected {@link ReviewTempWriter}) so it survives
|
|
385
|
+
* pi's tool-output cap; the inline payload is just a summary + pointer.
|
|
386
|
+
* Degrades gracefully: an empty context yields a "no applicable lenses" notice,
|
|
387
|
+
* and a temp-write failure falls back to the (truncation-prone) inline context
|
|
388
|
+
* rather than throwing out of the tool.
|
|
389
|
+
*/
|
|
390
|
+
export async function buildSinglePassResult(
|
|
391
|
+
args: {
|
|
392
|
+
results: LensResult[];
|
|
393
|
+
diff: DiffSource;
|
|
394
|
+
lensNames: string[];
|
|
395
|
+
availableLenses: string[];
|
|
396
|
+
changedFiles: string[];
|
|
397
|
+
},
|
|
398
|
+
writeTemp: ReviewTempWriter,
|
|
399
|
+
onUpdate?: AgentToolUpdateCallback,
|
|
400
|
+
): Promise<ReviewToolResult> {
|
|
401
|
+
const fullContext = buildToolContext(args.results, args.diff);
|
|
402
|
+
const baseDetails: Record<string, unknown> = {
|
|
403
|
+
mode: 'single-pass',
|
|
404
|
+
lensCount: args.lensNames.length,
|
|
405
|
+
availableLenses: args.availableLenses,
|
|
406
|
+
changedFiles: args.changedFiles,
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
// No lens produced any context (e.g. the requested lenses matched none of the
|
|
410
|
+
// available ones) — there is nothing to review, so don't point the agent at
|
|
411
|
+
// an empty temp file.
|
|
412
|
+
if (!fullContext.trim()) {
|
|
413
|
+
return {
|
|
414
|
+
content: [
|
|
415
|
+
{
|
|
416
|
+
type: 'text',
|
|
417
|
+
text: `No applicable lenses for this review. Available: ${args.availableLenses.join(', ') || '(none)'}.`,
|
|
418
|
+
},
|
|
419
|
+
],
|
|
420
|
+
details: baseDetails,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
try {
|
|
425
|
+
const pointer = await writeTemp(fullContext);
|
|
426
|
+
const summary = `${buildInlineSummary(args.lensNames, args.diff)}\n\n${buildPointer(pointer, 'single-pass')}`;
|
|
427
|
+
return {
|
|
428
|
+
content: [{ type: 'text', text: summary }],
|
|
429
|
+
details: { ...baseDetails, contextFile: pointer.path },
|
|
430
|
+
};
|
|
431
|
+
} catch (cause) {
|
|
432
|
+
onUpdate?.({
|
|
433
|
+
content: [{ type: 'text', text: 'temp-file write failed — returning inline context' }],
|
|
434
|
+
details: { writeError: cause instanceof Error ? cause.message : String(cause) },
|
|
435
|
+
});
|
|
436
|
+
return { content: [{ type: 'text', text: fullContext }], details: baseDetails };
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Assemble the pipeline result. The validated findings are the valuable output
|
|
442
|
+
* and stay inline; the diff + lens context is spilled to a temp file (via the
|
|
443
|
+
* injected {@link ReviewTempWriter}) purely so the agent can drill into the
|
|
444
|
+
* diff behind a finding. A write failure must NOT discard a completed pipeline,
|
|
445
|
+
* so on failure the findings are returned WITHOUT a pointer.
|
|
446
|
+
*/
|
|
447
|
+
export async function buildPipelineResult(
|
|
448
|
+
args: {
|
|
449
|
+
pipeline: PipelineResult;
|
|
450
|
+
diff: DiffSource;
|
|
451
|
+
basePrompt: string;
|
|
452
|
+
lensNames: string[];
|
|
453
|
+
availableLenses: string[];
|
|
454
|
+
changedFiles: string[];
|
|
455
|
+
},
|
|
456
|
+
writeTemp: ReviewTempWriter,
|
|
457
|
+
onUpdate?: AgentToolUpdateCallback,
|
|
458
|
+
): Promise<ReviewToolResult> {
|
|
459
|
+
const report = renderPipelineReport(args.pipeline, args.diff);
|
|
460
|
+
let text = report;
|
|
461
|
+
let contextFile: string | undefined;
|
|
462
|
+
try {
|
|
463
|
+
const pointer = await writeTemp(args.basePrompt);
|
|
464
|
+
text = `${report}\n\n${buildPointer(pointer, 'pipeline')}`;
|
|
465
|
+
contextFile = pointer.path;
|
|
466
|
+
} catch (cause) {
|
|
467
|
+
onUpdate?.({
|
|
468
|
+
content: [
|
|
469
|
+
{ type: 'text', text: 'temp-file write failed — findings returned without diff pointer' },
|
|
470
|
+
],
|
|
471
|
+
details: { writeError: cause instanceof Error ? cause.message : String(cause) },
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
return {
|
|
475
|
+
content: [{ type: 'text', text }],
|
|
476
|
+
details: {
|
|
477
|
+
mode: 'pipeline',
|
|
478
|
+
lensCount: args.lensNames.length,
|
|
479
|
+
availableLenses: args.availableLenses,
|
|
480
|
+
changedFiles: args.changedFiles,
|
|
481
|
+
findings: args.pipeline.findings,
|
|
482
|
+
telemetry: args.pipeline.telemetry,
|
|
483
|
+
...(contextFile ? { contextFile } : {}),
|
|
484
|
+
},
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
266
488
|
/** Promise wrapper: run a deduped tool set once, building a live Executor from `pi`. */
|
|
267
489
|
export function runTools(
|
|
268
490
|
pi: Pick<ExtensionAPI, 'exec'>,
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic finding-similarity helpers, shared by the pass bucketer and the
|
|
3
|
+
* recorded-rejection matcher. Kept dependency-free and pure so both the
|
|
4
|
+
* Bugbot-style vote pipeline and the rejection store reason about "is this the
|
|
5
|
+
* same bug?" identically.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const STOPWORDS = new Set([
|
|
9
|
+
'the',
|
|
10
|
+
'and',
|
|
11
|
+
'for',
|
|
12
|
+
'with',
|
|
13
|
+
'that',
|
|
14
|
+
'this',
|
|
15
|
+
'from',
|
|
16
|
+
'into',
|
|
17
|
+
'when',
|
|
18
|
+
'where',
|
|
19
|
+
'which',
|
|
20
|
+
'while',
|
|
21
|
+
'will',
|
|
22
|
+
'would',
|
|
23
|
+
'could',
|
|
24
|
+
'should',
|
|
25
|
+
'using',
|
|
26
|
+
'can',
|
|
27
|
+
'may',
|
|
28
|
+
'might',
|
|
29
|
+
'a',
|
|
30
|
+
'an',
|
|
31
|
+
'is',
|
|
32
|
+
'of',
|
|
33
|
+
'to',
|
|
34
|
+
'in',
|
|
35
|
+
'on',
|
|
36
|
+
'it',
|
|
37
|
+
'be',
|
|
38
|
+
'as',
|
|
39
|
+
'at',
|
|
40
|
+
'or',
|
|
41
|
+
'if',
|
|
42
|
+
'so',
|
|
43
|
+
]);
|
|
44
|
+
|
|
45
|
+
/** Tokenize a finding message for similarity comparison. */
|
|
46
|
+
export function tokenize(message: string): Set<string> {
|
|
47
|
+
const tokens = message
|
|
48
|
+
.toLowerCase()
|
|
49
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
50
|
+
.split(' ')
|
|
51
|
+
.filter((token) => token.length > 2 && !STOPWORDS.has(token));
|
|
52
|
+
return new Set(tokens);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function jaccard(left: Set<string>, right: Set<string>): number {
|
|
56
|
+
if (left.size === 0 && right.size === 0) return 1;
|
|
57
|
+
let intersection = 0;
|
|
58
|
+
for (const token of left) if (right.has(token)) intersection += 1;
|
|
59
|
+
const union = left.size + right.size - intersection;
|
|
60
|
+
return union === 0 ? 0 : intersection / union;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Two findings are "the same bug" when they touch the same file and either sit
|
|
64
|
+
* within a few lines (a strong co-location signal, so only a MODEST text
|
|
65
|
+
* overlap is needed to fuse paraphrases) or — when a line is missing — read
|
|
66
|
+
* clearly similar. The lower co-located bar matters: independent passes word
|
|
67
|
+
* the same defect very differently, and Bugbot leans on an LLM to merge them;
|
|
68
|
+
* co-location is our deterministic stand-in for that judgment. */
|
|
69
|
+
export function sameBug(
|
|
70
|
+
candidate: { file: string; line?: number; tokens: Set<string> },
|
|
71
|
+
bucket: { file: string; line?: number; tokens: Set<string> },
|
|
72
|
+
): boolean {
|
|
73
|
+
if (candidate.file !== bucket.file) return false;
|
|
74
|
+
const similarity = jaccard(candidate.tokens, bucket.tokens);
|
|
75
|
+
if (candidate.line !== undefined && bucket.line !== undefined) {
|
|
76
|
+
if (Math.abs(candidate.line - bucket.line) > 3) return false;
|
|
77
|
+
return similarity >= 0.25;
|
|
78
|
+
}
|
|
79
|
+
// One side has no line to anchor on — demand a clearer textual match.
|
|
80
|
+
return similarity >= 0.5;
|
|
81
|
+
}
|
|
@@ -56,6 +56,9 @@ export type ValidatedFinding = CandidateFinding & {
|
|
|
56
56
|
/** Validator confidence in `verdict`, 0..1. */
|
|
57
57
|
confidence: number;
|
|
58
58
|
justification?: string;
|
|
59
|
+
/** True when this finding matches a previously-recorded rejection. Downranked
|
|
60
|
+
* and tagged in the report; never hidden. */
|
|
61
|
+
previouslyRejected?: boolean;
|
|
59
62
|
/** Distinct model keys whose passes contributed to this finding (for the
|
|
60
63
|
* model bake-off: "which model caught this"). */
|
|
61
64
|
models: string[];
|
|
@@ -106,9 +109,24 @@ export type PipelineTelemetry = {
|
|
|
106
109
|
|
|
107
110
|
export type PipelineResult = {
|
|
108
111
|
findings: ValidatedFinding[];
|
|
112
|
+
/** Candidates the validator refuted this run. Surfaced (not just counted) so
|
|
113
|
+
* the command layer can persist them as recorded rejections. */
|
|
114
|
+
rejected: CandidateFinding[];
|
|
109
115
|
telemetry: PipelineTelemetry;
|
|
110
116
|
};
|
|
111
117
|
|
|
118
|
+
/** A persisted record of a validator-refuted finding, matched against future
|
|
119
|
+
* runs so a refuted finding that resurfaces is downranked and tagged. */
|
|
120
|
+
export type RejectionRecord = {
|
|
121
|
+
file: string;
|
|
122
|
+
line?: number;
|
|
123
|
+
severity: LensSeverity;
|
|
124
|
+
message: string;
|
|
125
|
+
justification?: string;
|
|
126
|
+
/** ISO timestamp the rejection was recorded. */
|
|
127
|
+
recorded_at: string;
|
|
128
|
+
};
|
|
129
|
+
|
|
112
130
|
/** Tunables for the self-driving pipeline (all overridable in config). */
|
|
113
131
|
export type ReviewPipelineConfig = {
|
|
114
132
|
/** Parallel adversarial bug-finding passes. 0 disables the pipeline
|
|
@@ -126,6 +144,8 @@ export type ReviewPipelineConfig = {
|
|
|
126
144
|
temperature: number;
|
|
127
145
|
/** Hard cap on findings returned (safety valve against runaway output). */
|
|
128
146
|
maxFindings: number;
|
|
147
|
+
/** Persist validator false-positives and downrank+tag matches on later runs. */
|
|
148
|
+
recordRejections: boolean;
|
|
129
149
|
/** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
|
|
130
150
|
* session model. Overridden per-pass by {@link passModels}. */
|
|
131
151
|
passModel?: ModelStepConfig;
|
|
@@ -153,4 +173,6 @@ export type ReviewConfig = {
|
|
|
153
173
|
toolConcurrency: number;
|
|
154
174
|
/** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
|
|
155
175
|
review: ReviewPipelineConfig;
|
|
176
|
+
/** Path (relative to cwd) of the recorded-rejections JSONL store. */
|
|
177
|
+
rejectionsFile: string;
|
|
156
178
|
};
|